From c8734e359aea9d40e13b68494500a28f73bcc96f Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Wed, 25 Mar 2020 19:52:42 +0100
Subject: [PATCH] [contrasting project] modify dictionary related to wp to make
 them depend

---
 .../scm_models_data/abstract_study.py         | 32 ++++++++++++++-----
 .../main_distribution_wps.py                  | 16 +++++-----
 .../test_edf_data/test_weather_types.py       | 13 +++++---
 3 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
index 68873376..17bbf6f7 100644
--- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
+++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
@@ -105,24 +105,39 @@ class AbstractStudy(object):
             year_to_wps[year] = self.df_weather_types.loc[days].iloc[:, 0].values
         return year_to_wps
 
-    def wps_for_top_annual_maxima(self, nb_top, massif_ids):
-        d = self.massif_id_to_df_ordered_by_maxima
-        wps = pd.Series(np.concatenate([d[massif_id]['WP'].values[:nb_top]
-                                        for massif_id in massif_ids]))
+    def df_for_top_annual_maxima(self, nb_top=None, massif_names=None):
+        # Replace default arguments
+        if nb_top is None:
+            nb_top = self.nb_years
+        if massif_names is None:
+            massif_names = self.study_massif_names
+        # Load percentages of massifs
+        wps = pd.Series(np.concatenate([self.massif_name_to_df_ordered_by_maxima[massif_name]['WP'].values[:nb_top]
+                                        for massif_name in massif_names]))
         s_normalized = wps.value_counts(normalize=True) * 100
         s_normalized = s_normalized.round()
         s_not_normalized = wps.value_counts()
+        # todo: do that, complete the last columns with the mean maxima
+        # Add a column that indicate the mean maxima associated to each weather pattern
+        # f = {}
+        # for wp in s_normalized.index:
+        #     print(wp)
+        #     for massif_id
+
+
+        # Concatenate all the results in one dataframe
         df = pd.concat([s_normalized, s_not_normalized], axis=1)
         df.columns = ['Percentage', 'Nb massifs concerned']
         df.index.name = 'Number Top={}'.format(nb_top)
         return df
 
     @cached_property
-    def massif_id_to_df_ordered_by_maxima(self):
+    def massif_name_to_df_ordered_by_maxima(self):
         df_annual_maxima = pd.DataFrame(self.year_to_annual_maxima)
         df_wps = pd.DataFrame(self.year_to_wp_for_annual_maxima)
-        massif_id_to_df_ordered_by_maxima = {}
+        massif_name_to_df_ordered_by_maxima = {}
         for massif_id, s_annual_maxima in df_annual_maxima.iterrows():
+            massif_name = self.study_massif_names[massif_id]
             s_annual_maxima.sort_values(inplace=True, ascending=False)
             d = {
                 'Year': s_annual_maxima.index,
@@ -131,8 +146,9 @@ class AbstractStudy(object):
             }
             df = pd.DataFrame(d)
             df.set_index('Year', inplace=True)
-            massif_id_to_df_ordered_by_maxima[massif_id] = df
-        return massif_id_to_df_ordered_by_maxima
+            massif_name_to_df_ordered_by_maxima[massif_name] = df
+        assert set(self.study_massif_names) == set(massif_name_to_df_ordered_by_maxima.keys())
+        return massif_name_to_df_ordered_by_maxima
 
     @cached_property
     def year_to_wp_for_annual_maxima(self):
diff --git a/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py b/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py
index 8191e44a..5018c1b4 100644
--- a/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py
+++ b/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py
@@ -9,10 +9,10 @@ from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranP
 def main_spatial_distribution_wps(study_class, year_min=1954, year_max=2008):
     study = study_class(altitude=1800, year_min=year_min, year_max=year_max)
     for region_name in AbstractExtendedStudy.region_names:
-        massifs_ids = AbstractExtendedStudy.region_name_to_massif_ids[region_name]
+        massif_names = AbstractExtendedStudy.region_name_to_massif_names[region_name]
         print('\n \n', region_name, '\n')
         for nb_top in [study.nb_years, 5, 1][1:2]:
-            print(study.wps_for_top_annual_maxima(nb_top=nb_top, massif_ids=massifs_ids), '\n')
+            print(study.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')
 
 
 """
@@ -53,11 +53,11 @@ def main_temporal_distribution_wps(study_class, year_min=1954, year_max=2008):
     study_before = study_class(altitude=altitude, year_min=year_min, year_max=1981)
     study_after = study_class(altitude=altitude, year_min=1981, year_max=2008)
     for region_name in AbstractExtendedStudy.region_names:
-        massifs_ids = AbstractExtendedStudy.region_name_to_massif_ids[region_name]
+        massif_names = AbstractExtendedStudy.region_name_to_massif_names[region_name]
         print('\n \n', region_name, '\n')
-        for nb_top in [study_before.nb_years, 10, 5, 1][1:2]:
-            print(study_before.wps_for_top_annual_maxima(nb_top=nb_top, massif_ids=massifs_ids), '\n')
-            print(study_after.wps_for_top_annual_maxima(nb_top=nb_top, massif_ids=massifs_ids), '\n')
+        for nb_top in [study_before.nb_years, 10, 5, 1][-1:]:
+            print(study_before.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')
+            print(study_after.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')
 
 """
 There is no real stationarity in the percentage of the kind of storms that are causing extreme.
@@ -71,5 +71,5 @@ even for the local region it is the same.
 
 if __name__ == '__main__':
     study_class = [CrocusSnowLoad1Day, SafranPrecipitation1Day][-1]
-    # main_spatial_distribution_wps(study_class)
-    main_temporal_distribution_wps(study_class)
+    main_spatial_distribution_wps(study_class)
+    # main_temporal_distribution_wps(study_class)
diff --git a/test/test_extreme_data/test_edf_data/test_weather_types.py b/test/test_extreme_data/test_edf_data/test_weather_types.py
index 78af1c08..740e0d33 100644
--- a/test/test_extreme_data/test_edf_data/test_weather_types.py
+++ b/test/test_extreme_data/test_edf_data/test_weather_types.py
@@ -63,12 +63,17 @@ class TestWeatherTypes(unittest.TestCase):
 
     def test_weather_patterns_maxima(self):
         study = SafranPrecipitation1Day(altitude=900, year_min=1954, year_max=2008)
-        s = pd.Series(np.concatenate([v for v in study.year_to_wp_for_annual_maxima.values()]))
-        storms_ranking = s.value_counts()
+        storms_ranking = study.df_for_top_annual_maxima()
         self.assertEqual(storms_ranking.index[0], STEADY_OCEANIC)
         self.assertEqual(storms_ranking.index[-1], ANTICYCLONIC)
-        self.assertEqual(storms_ranking.values[0], 376)
-        self.assertEqual(storms_ranking.values[-1], 9)
+        self.assertEqual(storms_ranking.values[0, 1], 376)
+        self.assertEqual(storms_ranking.values[-1, 1], 9)
+
+    def test_massif_id_to_weather_type_df(self):
+        study = SafranPrecipitation1Day(altitude=900, year_min=2004, year_max=2008)
+        df = study.massif_name_to_df_ordered_by_maxima['Chablais']
+        self.assertAlmostEqual(df.loc[2007].values[0], 57.384655)
+        self.assertEqual(df.loc[2007].values[1], 'Steady Oceanic')
 
 
 if __name__ == '__main__':
-- 
GitLab