From e78eb79fc93faecc3ad70ebc9db1961768483ada Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Thu, 24 Jan 2019 17:26:25 +0100
Subject: [PATCH] [DATA] add safran extended (which include as massifs: the
 original massif + the 4 alps regions + the whole alps)

---
 safran_study/abstract_safran.py | 115 ---------------------------
 safran_study/fit_safran.py      |  25 ++++++
 safran_study/safran.py          | 133 ++++++++++++++++++++++++++------
 safran_study/safran_extended.py |  36 +++++++++
 test/test_utils.py              |   6 +-
 utils.py                        |   2 +
 6 files changed, 174 insertions(+), 143 deletions(-)
 delete mode 100644 safran_study/abstract_safran.py
 create mode 100644 safran_study/fit_safran.py
 create mode 100644 safran_study/safran_extended.py

diff --git a/safran_study/abstract_safran.py b/safran_study/abstract_safran.py
deleted file mode 100644
index 5dce958e..00000000
--- a/safran_study/abstract_safran.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import os
-import os.path as op
-from collections import OrderedDict
-
-import matplotlib.pyplot as plt
-import pandas as pd
-from netCDF4 import Dataset
-
-from extreme_estimator.gev.gevmle_fit import GevMleFit
-from safran_study.massif import safran_massif_names_from_datasets
-from safran_study.snowfall_annual_maxima import SafranSnowfall
-from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
-from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
-    AbstractSpatialCoordinates
-from utils import get_full_path, cached_property
-
-
-class Safran(object):
-
-    def __init__(self, nb_days_of_snowfall=1):
-        self.safran_altitude = None
-        self.nb_days_of_snowfall = nb_days_of_snowfall
-
-    def write_to_file(self, df):
-        if not op.exists(self.result_full_path):
-            os.makedirs(self.result_full_path, exist_ok=True)
-        df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude)))
-
-    """ Visualization methods """
-
-    def visualize(self):
-        df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
-        coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X],
-                         row_massif[AbstractCoordinates.COORDINATE_Y])
-                        for _, row_massif in df_massif.iterrows()]
-        for massif_idx in set([tuple[0] for tuple in coord_tuples]):
-            l = [coords for idx, *coords in coord_tuples if idx == massif_idx]
-            l = list(zip(*l))
-            plt.plot(*l, color='black')
-            plt.fill(*l)
-        self.massifs_coordinates.visualization_2D()
-
-    """ Statistics methods """
-
-    @property
-    def df_gev_mle_each_massif(self):
-        # Fit a gev n each massif
-        massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.to_serie()
-                             for massif_name in self.safran_massif_names}
-        return pd.DataFrame(massif_to_gev_mle)
-
-    """ Annual maxima of snowfall """
-
-    @property
-    def df_annual_maxima(self):
-        return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T
-
-    """ Load some attributes only once """
-
-    @cached_property
-    def year_to_annual_maxima(self):
-        year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in
-                                   self.year_to_dataset_ordered_dict.items()}
-        year_to_annual_maxima = OrderedDict()
-        for year in self.year_to_dataset_ordered_dict.keys():
-            year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall(
-                self.nb_days_of_snowfall)
-        return year_to_annual_maxima
-
-    @cached_property
-    def safran_massif_names(self):
-        # Load the names of the massif as defined by SAFRAN
-        return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values())
-
-    @cached_property
-    def year_to_dataset_ordered_dict(self) -> OrderedDict:
-        # Map each year to the correspond netCDF4 Dataset
-        year_to_dataset = OrderedDict()
-        nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')]
-        for year, nc_file in sorted(nc_files, key=lambda t: t[0]):
-            year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file))
-        return year_to_dataset
-
-    @cached_property
-    def massifs_coordinates(self) -> AbstractSpatialCoordinates:
-        # Coordinate object that represents the massif coordinates in Lambert extended
-        df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
-        for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
-            df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
-        # Assert that the massif names are the same between SAFRAN and the coordinate file
-        assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
-        # Build coordinate object from df_centroid
-        return AbstractSpatialCoordinates.from_df(df_centroid)
-
-    """ Some properties """
-
-    @property
-    def relative_path(self) -> str:
-        return r'local/spatio_temporal_datasets'
-
-    @property
-    def full_path(self) -> str:
-        return get_full_path(relative_path=self.relative_path)
-
-    @property
-    def safran_full_path(self) -> str:
-        return op.join(self.full_path, 'safran-crocus_{}'.format(self.safran_altitude), 'Safran')
-
-    @property
-    def map_full_path(self) -> str:
-        return op.join(self.full_path, 'map')
-
-    @property
-    def result_full_path(self) -> str:
-        return op.join(self.full_path, 'results')
\ No newline at end of file
diff --git a/safran_study/fit_safran.py b/safran_study/fit_safran.py
new file mode 100644
index 00000000..29c6f9ae
--- /dev/null
+++ b/safran_study/fit_safran.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+from safran_study.safran import Safran
+from safran_study.safran_extended import ExtendedSafran
+from utils import VERSION
+
+
+def fit_mle_gev_for_all_safran_and_different_days():
+    # Dump the result in a csv
+    dfs = []
+    for safran_alti in [1800, 2400][:1]:
+        for nb_day in [1, 3, 7][:]:
+            print('alti: {}, nb_day: {}'.format(safran_alti, nb_day))
+            # safran = Safran(safran_alti, nb_day)
+            safran = ExtendedSafran(safran_alti, nb_day)
+            df = safran.df_gev_mle_each_massif
+            df.index += ' Safran{} with {} days'.format(safran.safran_altitude, safran.nb_days_of_snowfall)
+            dfs.append(df)
+    df = pd.concat(dfs)
+    path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
+    df.to_csv(path.format(VERSION))
+
+
+if __name__ == '__main__':
+    fit_mle_gev_for_all_safran_and_different_days()
diff --git a/safran_study/safran.py b/safran_study/safran.py
index cc32c2bf..7b735f0c 100644
--- a/safran_study/safran.py
+++ b/safran_study/safran.py
@@ -1,36 +1,119 @@
+import os
+import os.path as op
+from collections import OrderedDict
+
+import matplotlib.pyplot as plt
 import pandas as pd
+from netCDF4 import Dataset
+
+from extreme_estimator.gev.gevmle_fit import GevMleFit
+from safran_study.massif import safran_massif_names_from_datasets
+from safran_study.snowfall_annual_maxima import SafranSnowfall
+from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
+    AbstractSpatialCoordinates
+from utils import get_full_path, cached_property
+
+
+class Safran(object):
+
+    def __init__(self, safran_altitude=1800, nb_days_of_snowfall=1):
+        assert safran_altitude in [1800, 2400]
+        self.safran_altitude = safran_altitude
+        self.nb_days_of_snowfall = nb_days_of_snowfall
+
+    def write_to_file(self, df):
+        if not op.exists(self.result_full_path):
+            os.makedirs(self.result_full_path, exist_ok=True)
+        df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude)))
+
+    """ Visualization methods """
+
+    def visualize(self):
+        df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
+        coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X],
+                         row_massif[AbstractCoordinates.COORDINATE_Y])
+                        for _, row_massif in df_massif.iterrows()]
+        for massif_idx in set([tuple[0] for tuple in coord_tuples]):
+            l = [coords for idx, *coords in coord_tuples if idx == massif_idx]
+            l = list(zip(*l))
+            plt.plot(*l, color='black')
+            plt.fill(*l)
+        self.massifs_coordinates.visualization_2D()
+
+    """ Statistics methods """
+
+    @property
+    def df_gev_mle_each_massif(self):
+        # Fit a gev n each massif
+        massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.to_serie()
+                             for massif_name in self.safran_massif_names}
+        return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names)
+
+    """ Annual maxima of snowfall """
+
+    @property
+    def df_annual_maxima(self):
+        return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T
+
+    """ Load some attributes only once """
+
+    @cached_property
+    def year_to_annual_maxima(self):
+        year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in
+                                   self.year_to_dataset_ordered_dict.items()}
+        year_to_annual_maxima = OrderedDict()
+        for year in self.year_to_dataset_ordered_dict.keys():
+            year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall(
+                self.nb_days_of_snowfall)
+        return year_to_annual_maxima
 
-from safran_study.abstract_safran import Safran
-from utils import VERSION
+    @property
+    def safran_massif_names(self):
+        # Load the names of the massif as defined by SAFRAN
+        return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values())
 
+    @cached_property
+    def year_to_dataset_ordered_dict(self) -> OrderedDict:
+        # Map each year to the correspond netCDF4 Dataset
+        year_to_dataset = OrderedDict()
+        nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')]
+        for year, nc_file in sorted(nc_files, key=lambda t: t[0]):
+            year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file))
+        return year_to_dataset
 
-class Safran1800(Safran):
+    @cached_property
+    def massifs_coordinates(self) -> AbstractSpatialCoordinates:
+        # Coordinate object that represents the massif coordinates in Lambert extended
+        df_centroid = self.load_df_centroid()
+        for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
+            df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
+        # Assert that the massif names are the same between SAFRAN and the coordinate file
+        assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
+        # Build coordinate object from df_centroid
+        return AbstractSpatialCoordinates.from_df(df_centroid)
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.safran_altitude = 1800
+    def load_df_centroid(self):
+        return pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
 
+    """ Some properties """
 
-class Safran2400(Safran):
+    @property
+    def relative_path(self) -> str:
+        return r'local/spatio_temporal_datasets'
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.safran_altitude = 2400
+    @property
+    def full_path(self) -> str:
+        return get_full_path(relative_path=self.relative_path)
 
-def fit_mle_gev_for_all_safran_and_different_days():
-    # Dump the result in a csv
-    dfs = []
-    for safran_class in [Safran1800, Safran2400]:
-        for nb_day in [1, 3, 7]:
-            print('here')
-            safran = safran_class(nb_day)
-            df = safran.df_gev_mle_each_massif
-            df.index += ' Safran{} with {} days'.format(safran.safran_altitude, safran.nb_days_of_snowfall)
-            dfs.append(df)
-    df = pd.concat(dfs)
-    path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
-    df.to_csv(path.format(VERSION))
+    @property
+    def safran_full_path(self) -> str:
+        return op.join(self.full_path, 'safran-crocus_{}'.format(self.safran_altitude), 'Safran')
 
+    @property
+    def map_full_path(self) -> str:
+        return op.join(self.full_path, 'map')
 
-if __name__ == '__main__':
-    fit_mle_gev_for_all_safran_and_different_days()
\ No newline at end of file
+    @property
+    def result_full_path(self) -> str:
+        return op.join(self.full_path, 'results')
\ No newline at end of file
diff --git a/safran_study/safran_extended.py b/safran_study/safran_extended.py
new file mode 100644
index 00000000..e3e76208
--- /dev/null
+++ b/safran_study/safran_extended.py
@@ -0,0 +1,36 @@
+from collections import OrderedDict
+
+import pandas as pd
+
+from safran_study.safran import Safran
+from utils import cached_property
+
+
+class ExtendedSafran(Safran):
+
+    @property
+    def safran_massif_names(self):
+        return self.region_names + super().safran_massif_names
+
+    """ Properties """
+
+    @cached_property
+    def df_annual_maxima(self):
+        df_annual_maxima = pd.DataFrame(self.year_to_annual_maxima, index=super().safran_massif_names).T
+        # Add the column corresponding to the aggregated massif
+        for region_name_loop in self.region_names:
+            # We use "is in" so that the "Alps" will automatically regroup all the massif data
+            massif_belong_to_the_group = [massif_name
+                                          for massif_name, region_name in self.massif_name_to_region_name.items()
+                                          if region_name_loop in region_name]
+            df_annual_maxima[region_name_loop] = df_annual_maxima.loc[:, massif_belong_to_the_group].max(axis=1)
+        return df_annual_maxima
+
+    @property
+    def massif_name_to_region_name(self):
+        df_centroid = self.load_df_centroid()
+        return OrderedDict(zip(df_centroid['NOM'], df_centroid['REGION']))
+
+    @property
+    def region_names(self):
+        return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps']
diff --git a/test/test_utils.py b/test/test_utils.py
index 1c4fa313..146b7713 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -7,7 +7,7 @@ from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model
     AbstractMaxStableModelWithCovarianceFunction, CovarianceFunction
 from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith, BrownResnick, Schlather, \
     Geometric, ExtremalT, ISchlather
-from safran_study.safran import Safran2400, Safran1800
+from safran_study.safran import Safran
 from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \
     AlpsStation3DCoordinatesWithAnisotropy
 from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \
@@ -85,5 +85,5 @@ def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=
 
 def load_safran_objects():
     nb_days_list = [1, 3, 5][:1]
-    safran_classes = [Safran1800, Safran2400][:1]
-    return [safran_class(nb_days) for safran_class in safran_classes for nb_days in nb_days_list]
\ No newline at end of file
+    safran_altitude_list = [1800, 2400][:1]
+    return [Safran(safran_altitude, nb_days) for safran_altitude in safran_altitude_list for nb_days in nb_days_list]
\ No newline at end of file
diff --git a/utils.py b/utils.py
index c682da11..44b396f6 100644
--- a/utils.py
+++ b/utils.py
@@ -24,6 +24,8 @@ def first(s):
     return next(iter(s))
 
 
+# todo: these cached property have a weird behavior with inheritence,
+#  when we call the super cached_property in the child method
 class cached_property(object):
     """
     Descriptor (non-data) for building an attribute on-demand on first use.
-- 
GitLab