From 4664516989fcebb1acf6b11a96ee6376f5218bd8 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Mon, 6 Apr 2020 10:59:05 +0200
Subject: [PATCH] [contrasting project] add pyrenees data. add
 refuge_sarradet.py analysis

---
 .../scm_models_data/abstract_study.py         | 75 ++++++++++++++-----
 .../case_studies/refuge_sarradet.py           | 44 +++++++++++
 .../crocus/crocus_variables.py                |  2 +-
 .../scm_models_data/safran/cumulated_study.py |  2 +-
 .../scm_models_data/utils.py                  | 30 ++++++++
 ...dy_visualizer_for_non_stationary_trends.py |  2 +-
 6 files changed, 135 insertions(+), 20 deletions(-)
 create mode 100644 extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py

diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
index 5f6e0ba8..53840c30 100644
--- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
+++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
@@ -24,7 +24,7 @@ from extreme_data.meteo_france_data.scm_models_data.abstract_variable import Abs
 from extreme_data.meteo_france_data.scm_models_data.utils import ALTITUDES, ZS_INT_23, ZS_INT_MASK, LONGITUDES, \
     LATITUDES, ORIENTATIONS, SLOPES, ORDERED_ALLSLOPES_ALTITUDES, ORDERED_ALLSLOPES_ORIENTATIONS, \
     ORDERED_ALLSLOPES_SLOPES, ORDERED_ALLSLOPES_MASSIFNUM, date_to_str, WP_PATTERN_MAX_YEAR, SeasonForTheMaxima, \
-    first_day_and_last_day
+    first_day_and_last_day, FrenchRegion, ZS_INT_MASK_PYRENNES, alps_massif_order, ZS_INT_MASK_PYRENNES_LIST
 from extreme_data.meteo_france_data.scm_models_data.visualization.utils import get_km_formatter
 from extreme_fit.function.margin_function.abstract_margin_function import \
     AbstractMarginFunction
@@ -56,16 +56,19 @@ class AbstractStudy(object):
     The year 2017 represents the nc file that correspond to the winter between the year 2017 and 2018.
     """
     # REANALYSIS_FLAT_FOLDER = 'SAFRAN_montagne-CROCUS_2019/alp_flat/reanalysis'
-    REANALYSIS_FLAT_FOLDER = 'S2M_AERIS_MARS_2020/'
-    REANALYSIS_ALLSLOPES_FOLDER = 'SAFRAN_montagne-CROCUS_2019/alp_allslopes/reanalysis'
+    REANALYSIS_ALPS_FLAT_FOLDER = 'S2M_AERIS_MARS_2020/'
+    REANALYSIS_PYRENEES_FLAT_FOLDER = 'S2M_AERIS_AVRIL_2020/'
+    REANALYSIS_ALPS_ALLSLOPES_FOLDER = 'SAFRAN_montagne-CROCUS_2019/alp_allslopes/reanalysis'
 
     # REANALYSIS_FOLDER = 'SAFRAN_montagne-CROCUS_2019/postes/reanalysis'
 
     def __init__(self, variable_class: type, altitude: int = 1800, year_min=1959, year_max=2019,
                  multiprocessing=True, orientation=None, slope=20.0,
-                 season=SeasonForTheMaxima.annual):
+                 season=SeasonForTheMaxima.annual,
+                 french_region=FrenchRegion.alps):
         assert isinstance(altitude, int), type(altitude)
         assert altitude in ALTITUDES, altitude
+        self.french_region = french_region
         self.altitude = altitude
         self.model_name = None
         self.variable_class = variable_class
@@ -377,7 +380,7 @@ class AbstractStudy(object):
 
     @property
     def missing_massif_name(self):
-        return set(self.all_massif_names) - set(self.altitude_to_massif_names[self.altitude])
+        return set(self.all_massif_names(self.reanalysis_path, self.dbf_filename)) - set(self.altitude_to_massif_names[self.altitude])
 
     @property
     def column_mask(self):
@@ -395,7 +398,12 @@ class AbstractStudy(object):
 
     @cached_property
     def flat_mask(self):
-        altitude_mask = ZS_INT_MASK == self.altitude
+        if self.french_region is FrenchRegion.alps:
+            altitude_mask = ZS_INT_MASK == self.altitude
+        elif self.french_region is FrenchRegion.pyrenees:
+            altitude_mask = ZS_INT_MASK_PYRENNES == self.altitude
+        else:
+            raise ValueError('{}'.format(self.french_region))
         assert np.sum(altitude_mask) == len(self.altitude_to_massif_names[self.altitude])
         return altitude_mask
 
@@ -580,9 +588,28 @@ class AbstractStudy(object):
 
     @property
     def reanalysis_path(self):
-        reanalysis_folder = self.REANALYSIS_ALLSLOPES_FOLDER if self.has_orientation else self.REANALYSIS_FLAT_FOLDER
+        if self.french_region is FrenchRegion.alps:
+            if self.has_orientation:
+                reanalysis_folder = self.REANALYSIS_ALPS_ALLSLOPES_FOLDER
+            else:
+                reanalysis_folder = self.REANALYSIS_ALPS_FLAT_FOLDER
+        elif self.french_region is FrenchRegion.pyrenees and not self.has_orientation:
+            reanalysis_folder = self.REANALYSIS_PYRENEES_FLAT_FOLDER
+        else:
+            raise ValueError(
+                'french_region = {}, has_orientation = {}'.format(self.french_region, self.has_orientation))
+
         return op.join(self.full_path, reanalysis_folder)
 
+    @property
+    def dbf_filename(self) -> str:
+        if self.french_region is FrenchRegion.alps:
+            return 'massifs_alpes'
+        elif self.french_region is FrenchRegion.pyrenees:
+            return 'massifs_pyrenees'
+        else:
+            raise ValueError('{}'.format(self.french_region))
+
     @property
     def has_orientation(self):
         return self.orientation is not None
@@ -593,19 +620,31 @@ class AbstractStudy(object):
     def original_safran_massif_id_to_massif_name(cls) -> Dict[int, str]:
         return {massif_id: massif_name for massif_id, massif_name in enumerate(cls.all_massif_names)}
 
-    @classproperty
-    def all_massif_names(cls) -> List[str]:
+    @classmethod
+    def all_massif_names(cls, reanalysis_path=None, dbf_filename='massifs_alpes') -> List[str]:
         """
         Pour l'identification des massifs, le numéro de la variable massif_num correspond à celui de l'attribut num_opp
         """
-        metadata_path = op.join(cls.full_path, cls.REANALYSIS_FLAT_FOLDER, 'metadata')
-        dbf = Dbf5(op.join(metadata_path, 'massifs_alpes.dbf'))
+        if reanalysis_path is None:
+            # Default case if the french alps
+            reanalysis_path = op.join(cls.full_path, cls.REANALYSIS_ALPS_FLAT_FOLDER)
+        if cls.REANALYSIS_ALPS_FLAT_FOLDER in reanalysis_path or cls.REANALYSIS_ALPS_ALLSLOPES_FOLDER in reanalysis_path:
+            french_region = FrenchRegion.alps
+            key = 'num_opp'
+        else:
+            french_region = FrenchRegion.pyrenees
+            key = 'massif_num'
+
+        metadata_path = op.join(reanalysis_path, 'metadata')
+        dbf = Dbf5(op.join(metadata_path, '{}.dbf'.format(dbf_filename)))
         df = dbf.to_dataframe().copy()  # type: pd.DataFrame
         dbf.f.close()
-        df.sort_values(by='num_opp', inplace=True)
+        # Important part (for the alps & pyrenees all data is order from the smaller massif number to the bigger)
+        df.sort_values(by=key, inplace=True)
         all_massif_names = list(df['nom'])
         # Correct a massif name
-        all_massif_names[all_massif_names.index('Beaufortin')] = 'Beaufortain'
+        if french_region is FrenchRegion.alps:
+            all_massif_names[all_massif_names.index('Beaufortin')] = 'Beaufortain'
         return all_massif_names
 
     @classmethod
@@ -614,17 +653,18 @@ class AbstractStudy(object):
         df_centroid = pd.read_csv(op.join(cls.map_full_path, 'coordonnees_massifs_alpes.csv'))
         df_centroid.set_index('NOM', inplace=True)
         # Check that the names of massifs are the same
-        symmetric_difference = set(df_centroid.index).symmetric_difference(cls.all_massif_names)
+        symmetric_difference = set(df_centroid.index).symmetric_difference(cls.all_massif_names())
         assert len(symmetric_difference) == 0, symmetric_difference
         # Sort the column in the order of the SAFRAN dataset
-        df_centroid = df_centroid.reindex(cls.all_massif_names, axis=0)
+        df_centroid = df_centroid.reindex(cls.all_massif_names(), axis=0)
         for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
             df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
         return df_centroid
 
     @cached_property
     def massif_name_to_altitudes(self) -> Dict[str, List[int]]:
-        s = ZS_INT_23 + [0]
+        zs = ZS_INT_23 if self.french_region is FrenchRegion.alps else ZS_INT_MASK_PYRENNES_LIST
+        s = zs + [0]
         zs_list = []
         zs_all_list = []
         for a, b in zip(s[:-1], s[1:]):
@@ -632,7 +672,8 @@ class AbstractStudy(object):
             if a > b:
                 zs_all_list.append(zs_list)
                 zs_list = []
-        return OrderedDict(zip(self.all_massif_names, zs_all_list))
+        all_massif_names = self.all_massif_names(self.reanalysis_path, self.dbf_filename)
+        return OrderedDict(zip(all_massif_names, zs_all_list))
 
     @cached_property
     def altitude_to_massif_names(self) -> Dict[int, List[str]]:
diff --git a/extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py b/extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py
new file mode 100644
index 00000000..5834fb88
--- /dev/null
+++ b/extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py
@@ -0,0 +1,44 @@
+import pandas as pd
+import numpy as np
+import xlsxwriter
+
+
+from extreme_data.meteo_france_data.scm_models_data.abstract_study import AbstractStudy
+from extreme_data.meteo_france_data.scm_models_data.crocus.crocus import CrocusDepth, CrocusSwe3Days
+from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day, SafranSnowfall3Days
+from extreme_data.meteo_france_data.scm_models_data.utils import FrenchRegion
+from extreme_fit.estimator.margin_estimator.utils import fitted_stationary_gev
+
+
+def generate_excel_with_annual_maxima(fit=True):
+    massif_name, french_region, altitudes = 'Beaufortain', FrenchRegion.alps, [900, 1200]
+    # massif_name, french_region, altitudes = 'haute-bigorre', FrenchRegion.pyrenees, [300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300]
+    study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, CrocusSwe3Days, CrocusDepth]
+    writer = pd.ExcelWriter('{}.xlsx'.format(massif_name), engine='xlsxwriter')
+    # writer = pd.ExcelWriter('pandas_multiple.xlsx')
+    for j, study_class in enumerate(study_classes, 1):
+        write_df_with_annual_maxima(massif_name, french_region, writer, study_class, altitudes, fit=fit)
+    writer.save()
+
+
+def write_df_with_annual_maxima(massif_name, french_region, writer, study_class, altitudes, fit=True) -> pd.DataFrame:
+    columns = []
+    for altitude in altitudes:
+        study = study_class(altitude=altitude, french_region=french_region) # type: AbstractStudy
+        df_maxima = study.observations_annual_maxima.df_maxima_gev
+        s = df_maxima.loc[massif_name]
+        # Fit the data and add the parameters as the first columns
+        gev_param = fitted_stationary_gev(s.values)
+        s = pd.concat([gev_param.to_serie(), s])
+        columns.append(s)
+    df = pd.concat(columns, axis=1)
+    altitude_str = [str(a) + ' m' for a in altitudes]
+    df.columns = altitude_str
+    name = study.variable_name
+    short_name = name[:31]
+    df.to_excel(writer, sheet_name=short_name)
+    return df
+
+
+if __name__ == '__main__':
+    generate_excel_with_annual_maxima(fit=True)
diff --git a/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py b/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py
index ad044cad..8b4c7c9d 100644
--- a/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py
+++ b/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py
@@ -29,7 +29,7 @@ class CrocusRecentSweVariableOneDay(CrocusTotalSweVariable):
 
 
 class CrocusRecentSweVariableThreeDays(CrocusTotalSweVariable):
-    NAME = 'Snow Water Equivalent last 3 days'
+    NAME = 'SWE in 3 days'
 
     @classmethod
     def keyword(cls):
diff --git a/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py b/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py
index 7f08607b..2e55205a 100644
--- a/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py
+++ b/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py
@@ -15,4 +15,4 @@ class CumulatedStudy(AbstractStudy):
 
     @property
     def variable_name(self):
-        return super().variable_name + ' cumulated over {} day(s)'.format(self.nb_consecutive_days)
+        return super().variable_name + ' {} day(s)'.format(self.nb_consecutive_days)
diff --git a/extreme_data/meteo_france_data/scm_models_data/utils.py b/extreme_data/meteo_france_data/scm_models_data/utils.py
index 1dbdfe4b..3f1c57e9 100644
--- a/extreme_data/meteo_france_data/scm_models_data/utils.py
+++ b/extreme_data/meteo_france_data/scm_models_data/utils.py
@@ -8,6 +8,8 @@ import numpy as np
 
 WP_PATTERN_MAX_YEAR = 2008
 
+alps_massif_order = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 30]
+pyrenees_massif_order = [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91]
 
 def date_to_str(date: datetime) -> str:
     return str(date).split()[0]
@@ -19,6 +21,11 @@ class SeasonForTheMaxima(Enum):
     # i could add the classical seasons if needed
 
 
+class FrenchRegion(Enum):
+    alps = 1
+    pyrenees = 2
+
+
 def first_day_and_last_day(season):
     season_to_start_day_and_last_day = {
         SeasonForTheMaxima.annual: ('08-01', '07-31'),
@@ -60,6 +67,29 @@ ZS_INT_23 = ZS_INT[:-10].copy()
 ZS_INT_MASK = np.array(ZS_INT)
 ZS_INT_MASK[-10:] = np.nan
 
+ZS_PYRENNES = """[   0.  300.  600.  900. 1200. 1500. 1800. 2100.    0.  300.  600.  900.
+ 1200. 1500. 1800. 2100. 2400. 2700. 3000.  300.  600.  900. 1200. 1500.
+ 1800. 2100. 2400. 2700. 3000. 3300.  300.  600.  900. 1200. 1500. 1800.
+ 2100. 2400. 2700. 3000. 3300.  300.  600.  900. 1200. 1500. 1800. 2100.
+ 2400. 2700. 3000. 3300.  300.  600.  900. 1200. 1500. 1800. 2100. 2400.
+ 2700. 3000.  300.  600.  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000.
+ 3300.  600.  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000.  300.  600.
+  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000.  300.  600.  900. 1200.
+ 1500. 1800. 2100. 2400. 2700. 3000.    0.  300.  600.  900. 1200. 1500.
+ 1800. 2100. 2400. 2700. 3000.    0.  300.  600.  900. 1200. 1500. 1800.
+ 2100. 2400. 2700.  600.  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000.
+  600.  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300.  300.  600.
+  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300. 3600.  300.  600.
+  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300. 3600.  300.  600.
+  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300.  600.  900. 1200.
+ 1500. 1800. 2100. 2400. 2700. 3000.  300.  600.  900. 1200. 1500. 1800.
+ 2100. 2400. 2700. 3000.  600.  900. 1200. 1500. 1800. 2100. 2400. 2700.
+ 3000.  600.  900. 1200. 1500. 1800. 2100. 2400. 2700. 3000.  300.  600.
+  900. 1200. 1500. 1800. 2100. 2400. 2700.  300.  600.  900. 1200. 1500.
+ 1800. 2100. 2400. 2700.]"""
+ZS_INT_MASK_PYRENNES_LIST = [int(float(e)) for e in ZS_PYRENNES[1:-1].split()]
+ZS_INT_MASK_PYRENNES = np.array(ZS_INT_MASK_PYRENNES_LIST)
+
 # Longitudes and Latitudes in degrees
 LONGITUDES = [6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493,
               6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.82392,
diff --git a/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py b/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py
index 05bcf688..f9a2c83d 100644
--- a/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py
+++ b/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py
@@ -356,7 +356,7 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer):
             res = [compute_eurocode_confidence_interval(*argument) for argument in arguments]
         massif_name_to_eurocode_return_level_uncertainty = dict(zip(massifs_names, res))
         # For the rest of the massif names. Create a Eurocode Return Level Uncertainty as nan
-        for massif_name in set(self.study.all_massif_names) - set(massifs_names):
+        for massif_name in set(self.study.all_massif_names()) - set(massifs_names):
             massif_name_to_eurocode_return_level_uncertainty[massif_name] = self.default_eurocode_uncertainty
         return massif_name_to_eurocode_return_level_uncertainty
 
-- 
GitLab