[contrasting project] modify dictionary related to wp to make them depend

c8734e35 · Le Roux Erwan · e9e777bc · c8734e35 · c8734e35 · c8734e35
Commit c8734e35 authored 5 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 41 additions and 20 deletions
+41 -20
--- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
+++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
@@ -105,24 +105,39 @@ class AbstractStudy(object):
            year_to_wps[year] = self.df_weather_types.loc[days].iloc[:, 0].values
        return year_to_wps
-    def wps_for_top_annual_maxima(self, nb_top, massif_ids):
+    def df_for_top_annual_maxima(self, nb_top=None, massif_names=None):
-        d = self.massif_id_to_df_ordered_by_maxima
+        # Replace default arguments
-        wps = pd.Series(np.concatenate([d[massif_id]['WP'].values[:nb_top]
+        if nb_top is None:
-                                        for massif_id in massif_ids]))
+            nb_top = self.nb_years
+        if massif_names is None:
+            massif_names = self.study_massif_names
+        # Load percentages of massifs
+        wps = pd.Series(np.concatenate([self.massif_name_to_df_ordered_by_maxima[massif_name]['WP'].values[:nb_top]
+                                        for massif_name in massif_names]))
        s_normalized = wps.value_counts(normalize=True) * 100
        s_normalized = s_normalized.round()
        s_not_normalized = wps.value_counts()
+        # todo: do that, complete the last columns with the mean maxima
+        # Add a column that indicate the mean maxima associated to each weather pattern
+        # f = {}
+        # for wp in s_normalized.index:
+        #     print(wp)
+        #     for massif_id
+        # Concatenate all the results in one dataframe
        df = pd.concat([s_normalized, s_not_normalized], axis=1)
        df.columns = ['Percentage', 'Nb massifs concerned']
        df.index.name = 'Number Top={}'.format(nb_top)
        return df
    @cached_property
-    def massif_id_to_df_ordered_by_maxima(self):
+    def massif_name_to_df_ordered_by_maxima(self):
        df_annual_maxima = pd.DataFrame(self.year_to_annual_maxima)
        df_wps = pd.DataFrame(self.year_to_wp_for_annual_maxima)
-        massif_id_to_df_ordered_by_maxima = {}
+        massif_name_to_df_ordered_by_maxima = {}
        for massif_id, s_annual_maxima in df_annual_maxima.iterrows():
+            massif_name = self.study_massif_names[massif_id]
            s_annual_maxima.sort_values(inplace=True, ascending=False)
            d = {
                'Year': s_annual_maxima.index,
@@ -131,8 +146,9 @@ class AbstractStudy(object):
            }
            df = pd.DataFrame(d)
            df.set_index('Year', inplace=True)
-            massif_id_to_df_ordered_by_maxima[massif_id] = df
+            massif_name_to_df_ordered_by_maxima[massif_name] = df
-        return massif_id_to_df_ordered_by_maxima
+        assert set(self.study_massif_names) == set(massif_name_to_df_ordered_by_maxima.keys())
+        return massif_name_to_df_ordered_by_maxima
    @cached_property
    def year_to_wp_for_annual_maxima(self):

--- a/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py
+++ b/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py
@@ -9,10 +9,10 @@ from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranP
 def main_spatial_distribution_wps(study_class, year_min=1954, year_max=2008):
    study = study_class(altitude=1800, year_min=year_min, year_max=year_max)
    for region_name in AbstractExtendedStudy.region_names:
-        massifs_ids = AbstractExtendedStudy.region_name_to_massif_ids[region_name]
+        massif_names = AbstractExtendedStudy.region_name_to_massif_names[region_name]
        print('\n \n', region_name, '\n')
        for nb_top in [study.nb_years, 5, 1][1:2]:
-            print(study.wps_for_top_annual_maxima(nb_top=nb_top, massif_ids=massifs_ids), '\n')
+            print(study.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')
 """
@@ -53,11 +53,11 @@ def main_temporal_distribution_wps(study_class, year_min=1954, year_max=2008):
    study_before = study_class(altitude=altitude, year_min=year_min, year_max=1981)
    study_after = study_class(altitude=altitude, year_min=1981, year_max=2008)
    for region_name in AbstractExtendedStudy.region_names:
-        massifs_ids = AbstractExtendedStudy.region_name_to_massif_ids[region_name]
+        massif_names = AbstractExtendedStudy.region_name_to_massif_names[region_name]
        print('\n \n', region_name, '\n')
-        for nb_top in [study_before.nb_years, 10, 5, 1][1:2]:
+        for nb_top in [study_before.nb_years, 10, 5, 1][-1:]:
-            print(study_before.wps_for_top_annual_maxima(nb_top=nb_top, massif_ids=massifs_ids), '\n')
+            print(study_before.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')
-            print(study_after.wps_for_top_annual_maxima(nb_top=nb_top, massif_ids=massifs_ids), '\n')
+            print(study_after.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')
 """
 There is no real stationarity in the percentage of the kind of storms that are causing extreme.
@@ -71,5 +71,5 @@ even for the local region it is the same.
 if __name__ == '__main__':
    study_class = [CrocusSnowLoad1Day, SafranPrecipitation1Day][-1]
-    # main_spatial_distribution_wps(study_class)
+    main_spatial_distribution_wps(study_class)
-    main_temporal_distribution_wps(study_class)
+    # main_temporal_distribution_wps(study_class)
--- a/test/test_extreme_data/test_edf_data/test_weather_types.py
+++ b/test/test_extreme_data/test_edf_data/test_weather_types.py
@@ -63,12 +63,17 @@ class TestWeatherTypes(unittest.TestCase):
    def test_weather_patterns_maxima(self):
        study = SafranPrecipitation1Day(altitude=900, year_min=1954, year_max=2008)
-        s = pd.Series(np.concatenate([v for v in study.year_to_wp_for_annual_maxima.values()]))
+        storms_ranking = study.df_for_top_annual_maxima()
-        storms_ranking = s.value_counts()
        self.assertEqual(storms_ranking.index[0], STEADY_OCEANIC)
        self.assertEqual(storms_ranking.index[-1], ANTICYCLONIC)
-        self.assertEqual(storms_ranking.values[0], 376)
+        self.assertEqual(storms_ranking.values[0, 1], 376)
-        self.assertEqual(storms_ranking.values[-1], 9)
+        self.assertEqual(storms_ranking.values[-1, 1], 9)
+    def test_massif_id_to_weather_type_df(self):
+        study = SafranPrecipitation1Day(altitude=900, year_min=2004, year_max=2008)
+        df = study.massif_name_to_df_ordered_by_maxima['Chablais']
+        self.assertAlmostEqual(df.loc[2007].values[0], 57.384655)
+        self.assertEqual(df.loc[2007].values[1], 'Steady Oceanic')
 if __name__ == '__main__':