From 89e9e74eb1e190d2087605207e75201b51de3280 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Wed, 25 Mar 2020 17:11:21 +0100
Subject: [PATCH] [contrasting project] replace the index of df_weather_types.
 add test_weather_patterns_maxima

---
 extreme_data/edf_data/weather_types.py        | 39 ++++++++++++++++---
 .../scm_models_data/abstract_study.py         | 20 +++++-----
 .../test_edf_data/test_weather_types.py       | 25 ++++++++----
 3 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/extreme_data/edf_data/weather_types.py b/extreme_data/edf_data/weather_types.py
index 519b85a8..7dcbb6e0 100644
--- a/extreme_data/edf_data/weather_types.py
+++ b/extreme_data/edf_data/weather_types.py
@@ -5,23 +5,50 @@ import pandas as pd
 from extreme_data.meteo_france_data.scm_models_data.utils import date_to_str
 from root_utils import get_full_path
 
-relative_path = r'local/EDF_data/Weather_types/CatalogueTT_EDF_France0_5308.txt'
-edf_filepath = get_full_path(relative_path=relative_path)
+# Type of Weather
+
+ANTICYCLONIC = 'Anticyclonic'
+
+CENTRAL_DEPRESSION = 'Central Depression'
+
+EAST_RETURN = 'East Return'
+
+NORTHEAST_CIRCULATION = 'Northeast Circulation'
+
+SOUTH_CIRCULATION = 'South Circulation'
+
+SOUTHWEST_CIRCULATION = 'Southwest Circulation'
+
+STEADY_OCEANIC = 'Steady Oceanic'
+
+ATLANTIC_WAVE = 'Atlantic Wave'
+
+wp_int_to_wp_str = {
+    1: ATLANTIC_WAVE,
+    2: STEADY_OCEANIC,
+    3: SOUTHWEST_CIRCULATION,
+    4: SOUTH_CIRCULATION,
+    5: NORTHEAST_CIRCULATION,
+    6: EAST_RETURN,
+    7: CENTRAL_DEPRESSION,
+    8: ANTICYCLONIC,
+}
 
 
 def load_df_weather_types() -> pd.DataFrame:
+    relative_path = r'local/EDF_data/Weather_types/CatalogueTT_EDF_France0_5308.txt'
+    edf_filepath = get_full_path(relative_path=relative_path)
     global df
     weather_types = []
     with open(edf_filepath, 'rb') as f:
         for i, l in enumerate(f):
             if i >= 7:
                 l = str(l).split('"')[1:]
-                wp = int(l[1][2])
+                wp_int = int(l[1][2])
+                wp_str = wp_int_to_wp_str[wp_int]
                 day, month, year = [int(e) for e in l[0].split('/')]
                 date_str = date_to_str(datetime(year=year, month=month, day=day))
-                weather_types.append((date_str, wp))
+                weather_types.append((date_str, wp_str))
     df = pd.DataFrame(weather_types, columns=['Date', 'WP'])
     df.set_index('Date', inplace=True)
     return df
-
-
diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
index 64fd8cc2..ddafc5d3 100644
--- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
+++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
@@ -82,7 +82,7 @@ class AbstractStudy(object):
         # Map each year to the 'days since year-08-01 06:00:00'
         year_to_days = OrderedDict()
         for year in self.ordered_years:
-            date = datetime.datetime(year=year-1, month=8, day=1, hour=6, minute=0, second=0)
+            date = datetime.datetime(year=year - 1, month=8, day=1, hour=6, minute=0, second=0)
             days = []
             for i in range(366):
                 days.append(date_to_str(date))
@@ -96,11 +96,19 @@ class AbstractStudy(object):
     def year_to_wps(self):
         assert 1954 <= self.year_min and self.year_max <= WP_PATTERN_MAX_YEAR, \
             'Weather patterns are not available between {} and {}'.format(self.year_min, self.year_max)
-        year_to_wps = {}
+        year_to_wps = OrderedDict()
         for year, days in self.year_to_days.items():
             year_to_wps[year] = self.df_weather_types.loc[days].iloc[:, 0].values
         return year_to_wps
 
+    @cached_property
+    def year_to_wp_for_annual_maxima(self):
+        year_to_wp_for_annual_maxima = OrderedDict()
+        for year, idx in self.year_to_annual_maxima_index.items():
+            wps_for_annual_maxima = self.year_to_wps[year][idx]
+            year_to_wp_for_annual_maxima[year] = wps_for_annual_maxima
+        return year_to_wp_for_annual_maxima
+
     @cached_property
     def df_weather_types(self):
         return load_df_weather_types()
@@ -190,8 +198,6 @@ class AbstractStudy(object):
 
     """ Load daily observations """
 
-
-
     @cached_property
     def year_to_daily_time_serie_array(self) -> OrderedDict:
         return self._year_to_daily_time_serie_array
@@ -252,7 +258,7 @@ class AbstractStudy(object):
 
     @cached_property
     def ordered_years_and_path_files(self):
-        nc_files = [(int(f.split('_')[-2][:4])+1, f) for f in os.listdir(self.study_full_path) if f.endswith('.nc')]
+        nc_files = [(int(f.split('_')[-2][:4]) + 1, f) for f in os.listdir(self.study_full_path) if f.endswith('.nc')]
         ordered_years, path_files = zip(*[(year, op.join(self.study_full_path, nc_file))
                                           for year, nc_file in sorted(nc_files, key=lambda t: t[0])
                                           if self.year_min <= year <= self.year_max])
@@ -609,7 +615,3 @@ class AbstractStudy(object):
             mask_massif = np.array(img)
             mask_french_alps += mask_massif
         return ~np.array(mask_french_alps, dtype=bool)
-
-
-
-
diff --git a/test/test_extreme_data/test_edf_data/test_weather_types.py b/test/test_extreme_data/test_edf_data/test_weather_types.py
index aa596645..78af1c08 100644
--- a/test/test_extreme_data/test_edf_data/test_weather_types.py
+++ b/test/test_extreme_data/test_edf_data/test_weather_types.py
@@ -4,7 +4,7 @@ from datetime import datetime
 
 import pandas as pd
 
-from extreme_data.edf_data.weather_types import load_df_weather_types
+from extreme_data.edf_data.weather_types import load_df_weather_types, wp_int_to_wp_str, ANTICYCLONIC, STEADY_OCEANIC
 from extreme_data.meteo_france_data.scm_models_data.crocus.crocus import CrocusSwe3Days
 from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranTemperature, SafranPrecipitation1Day
 from extreme_data.meteo_france_data.scm_models_data.utils import date_to_str
@@ -16,8 +16,8 @@ class TestWeatherTypes(unittest.TestCase):
         df = load_df_weather_types()
         self.assertEqual(len(df), 20354)
         # Assert values
-        self.assertEqual(df.iloc[0, :].values[0], 5)
-        self.assertEqual(df.iloc[-1, :].values[0], 8)
+        self.assertEqual(df.iloc[0, :].values[0], wp_int_to_wp_str[5])
+        self.assertEqual(df.iloc[-1, :].values[0], wp_int_to_wp_str[8])
         # Assert keys
         self.assertEqual(date_to_str(datetime(year=1953, month=1, day=1)), df.index[0])
         self.assertEqual(date_to_str(datetime(year=2008, month=9, day=22)), df.index[-1])
@@ -34,14 +34,14 @@ class TestWeatherTypes(unittest.TestCase):
     def test_yearly_percentages(self):
         # Expected Percentages come from the original paper
         expected_percentages = [7, 23, 8, 18, 7, 6, 3, 28]
-        wp_ids = list(range(1, 9))
-        wp_to_expected_percentages = dict(zip(wp_ids, expected_percentages))
+        wp_str = [wp_int_to_wp_str[wp_int] for wp_int in range(1, 9)]
+        wp_to_expected_percentages = dict(zip(wp_str, expected_percentages))
         # Compute percentages
         df = load_df_weather_types()
         wp_to_found_percentages = 100 * df['WP'].value_counts() / len(df)
-        wp_to_found_percentages = {int(k): round(v) for k, v in wp_to_found_percentages.to_dict().items()}
+        wp_to_found_percentages = {k: round(v) for k, v in wp_to_found_percentages.to_dict().items()}
         # They remove one the wp1 so that the sum of the percentages sum to 100
-        wp_to_found_percentages[1] -= 1
+        wp_to_found_percentages[wp_int_to_wp_str[1]] -= 1
         self.assertEqual(sum(wp_to_found_percentages.values()), 100)
         # wp_to_found_percentages = wp_to_found_percentages.astype(int)
         self.assertEqual(wp_to_expected_percentages, wp_to_found_percentages)
@@ -53,7 +53,7 @@ class TestWeatherTypes(unittest.TestCase):
         for year, wps in study.year_to_wps.items():
             daily_time_serie_array = study.year_to_daily_time_serie_array[year]
             self.assertEqual(len(daily_time_serie_array), len(wps))
-            mask = np.array(wps) == 8
+            mask = np.array(wps) == ANTICYCLONIC
             no_rain.extend(np.max(daily_time_serie_array[mask, :], axis=1))
             rain.extend(np.max(daily_time_serie_array[~mask, :], axis=1))
         # For 90% of the anticyclonic days, the daily max precipitation (snowfall + rainfall) for a massifs is < 0.2mm
@@ -61,6 +61,15 @@ class TestWeatherTypes(unittest.TestCase):
         self.assertLess(np.quantile(no_rain, 0.5), 0.2)
         self.assertLess(5, np.quantile(rain, 0.5))
 
+    def test_weather_patterns_maxima(self):
+        study = SafranPrecipitation1Day(altitude=900, year_min=1954, year_max=2008)
+        s = pd.Series(np.concatenate([v for v in study.year_to_wp_for_annual_maxima.values()]))
+        storms_ranking = s.value_counts()
+        self.assertEqual(storms_ranking.index[0], STEADY_OCEANIC)
+        self.assertEqual(storms_ranking.index[-1], ANTICYCLONIC)
+        self.assertEqual(storms_ranking.values[0], 376)
+        self.assertEqual(storms_ranking.values[-1], 9)
+
 
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab