[SCM] add visualization implementation for safran_extended

5ed09100 · Le Roux Erwan · 76936786 · 5ed09100 · 5ed09100 · 5ed09100
Commit 5ed09100 authored 6 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 147 additions and 49 deletions
+147 -49
--- a/experiment/meteo_france_SCM_study/abstract_study.py
+++ b/experiment/meteo_france_SCM_study/abstract_study.py
@@ -45,14 +45,6 @@ class AbstractStudy(object):
    """ Load some attributes only once """
-    @cached_property
-    def year_to_annual_maxima(self) -> OrderedDict:
-        # Map each year to an array of size nb_massif
-        year_to_annual_maxima = OrderedDict()
-        for year, time_serie in self.year_to_daily_time_serie.items():
-            year_to_annual_maxima[year] = time_serie.max(axis=0)
-        return year_to_annual_maxima
    @cached_property
    def year_to_dataset_ordered_dict(self) -> OrderedDict:
        # Map each year to the correspond netCDF4 Dataset
@@ -64,6 +56,23 @@ class AbstractStudy(object):
    @cached_property
    def year_to_daily_time_serie(self) -> OrderedDict:
+        return self._year_to_daily_time_serie
+    @cached_property
+    def year_to_annual_maxima(self) -> OrderedDict:
+        # Map each year to an array of size nb_massif
+        year_to_annual_maxima = OrderedDict()
+        for year, time_serie in self._year_to_max_daily_time_serie.items():
+            year_to_annual_maxima[year] = time_serie.max(axis=0)
+        return year_to_annual_maxima
+    def instantiate_variable_object(self, dataset) -> AbstractVariable:
+        return self.variable_class(dataset)
+    """ Private methods to be overwritten """
+    @property
+    def _year_to_daily_time_serie(self) -> OrderedDict:
        # Map each year to a matrix of size 365-nb_days_consecutive+1 x nb_massifs
        year_to_variable = {year: self.instantiate_variable_object(dataset) for year, dataset in
                            self.year_to_dataset_ordered_dict.items()}
@@ -72,19 +81,24 @@ class AbstractStudy(object):
            year_to_daily_time_serie[year] = year_to_variable[year].daily_time_serie
        return year_to_daily_time_serie
-    def instantiate_variable_object(self, dataset) -> AbstractVariable:
+    @property
-        return self.variable_class(dataset)
+    def _year_to_max_daily_time_serie(self):
+        return self._year_to_daily_time_serie
    ##########
    @property
    def safran_massif_names(self) -> List[str]:
+        return self.original_safran_massif_names
+    @property
+    def original_safran_massif_names(self):
        # Load the names of the massif as defined by SAFRAN
        return safran_massif_names_from_datasets(list(self.year_to_dataset_ordered_dict.values()))
    @property
-    def safran_massif_id_to_massif_name(self) -> Dict[int, str]:
+    def original_safran_massif_id_to_massif_name(self) -> Dict[int, str]:
-        return {massif_id: massif_name for massif_id, massif_name in enumerate(self.safran_massif_names)}
+        return {massif_id: massif_name for massif_id, massif_name in enumerate(self.original_safran_massif_names)}
    @cached_property
    def massifs_coordinates(self) -> AbstractSpatialCoordinates:
@@ -97,10 +111,8 @@ class AbstractStudy(object):
    def load_df_centroid(self) -> pd.DataFrame:
        df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
-        # Assert that the massif names are the same between SAFRAN and the coordinate file
-        assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
        df_centroid.set_index('NOM', inplace=True)
-        df_centroid = df_centroid.loc[self.safran_massif_names]
+        df_centroid = df_centroid.loc[self.original_safran_massif_names]
        return df_centroid
    @property

--- a/experiment/meteo_france_SCM_study/main_visualize.py
+++ b/experiment/meteo_france_SCM_study/main_visualize.py
@@ -3,6 +3,7 @@ from experiment.meteo_france_SCM_study.crocus.crocus import CrocusDepth, CrocusS
 from experiment.meteo_france_SCM_study.safran.safran import Safran
 from itertools import product
+from experiment.meteo_france_SCM_study.safran.safran_extended import ExtendedSafran
 from experiment.meteo_france_SCM_study.safran.safran_visualizer import StudyVisualizer
@@ -20,7 +21,7 @@ def load_all_studies(study_class, only_first_one=False):
 if __name__ == '__main__':
-    for study_class in [Safran, CrocusSwe, CrocusDepth][1:2]:
+    for study_class in [ExtendedSafran, Safran, CrocusSwe, CrocusDepth][:1]:
        for study in load_all_studies(study_class, only_first_one=True):
            study_visualizer = StudyVisualizer(study)
            # study_visualizer.visualize_independent_margin_fits(threshold=[None, 20, 40, 60][0])

--- a/experiment/meteo_france_SCM_study/safran/safran.py
+++ b/experiment/meteo_france_SCM_study/safran/safran.py
@@ -15,5 +15,5 @@ class Safran(AbstractStudy):
    @property
    def variable_name(self):
-        return super().variable_name() + 'cumulated over {} days'.format(self.nb_days_of_snowfall)
+        return super().variable_name + ' cumulated over {} days'.format(self.nb_days_of_snowfall)
--- a/experiment/meteo_france_SCM_study/safran/safran_extended.py
+++ b/experiment/meteo_france_SCM_study/safran/safran_extended.py
+import numpy as np
 from collections import OrderedDict
-import pandas as pd
 from experiment.meteo_france_SCM_study.safran.safran import Safran
 from utils import cached_property
 class ExtendedSafran(Safran):
+    @property
+    def region_names(self):
+        return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps']
+    @property
+    def nb_region_names(self):
+        return len(self.region_names)
    @property
    def safran_massif_names(self):
        return self.region_names + super().safran_massif_names
-    """ Properties """
+    @property
+    def massif_name_to_region_name(self):
+        df_centroid = self.load_df_centroid()
+        return OrderedDict(zip(df_centroid.index, df_centroid['REGION']))
-    @cached_property
+    @property
-    def df_annual_maxima(self):
+    def region_name_to_massif_ids(self):
-        df_annual_maxima = pd.DataFrame(self.year_to_annual_maxima, index=super().safran_massif_names).T
+        region_name_to_massifs_ids = {}
-        # Add the column corresponding to the aggregated massif
        for region_name_loop in self.region_names:
            # We use "is in" so that the "Alps" will automatically regroup all the massif data
-            massif_belong_to_the_group = [massif_name
+            massif_names_belong_to_the_group = [massif_name
-                                          for massif_name, region_name in self.massif_name_to_region_name.items()
+                                                for massif_name, region_name in self.massif_name_to_region_name.items()
-                                          if region_name_loop in region_name]
+                                                if region_name_loop in region_name]
-            df_annual_maxima[region_name_loop] = df_annual_maxima.loc[:, massif_belong_to_the_group].max(axis=1)
+            massif_ids_belong_to_the_group = [massif_id
-        return df_annual_maxima
+                                              for massif_id, massif_name in self.original_safran_massif_id_to_massif_name.items()
+                                              if massif_name in massif_names_belong_to_the_group]
+            region_name_to_massifs_ids[region_name_loop] = massif_ids_belong_to_the_group
+        return region_name_to_massifs_ids
+    """ Properties """
    @property
-    def massif_name_to_region_name(self):
+    def _year_to_daily_time_serie(self) -> OrderedDict:
-        df_centroid = self.load_df_centroid()
+        return self._year_to_extended_time_serie(aggregation_function=np.mean)
-        return OrderedDict(zip(df_centroid['NOM'], df_centroid['REGION']))
    @property
-    def region_names(self):
+    def _year_to_max_daily_time_serie(self):
-        return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps']
+        return self._year_to_extended_time_serie(aggregation_function=np.max)
+    def _year_to_extended_time_serie(self, aggregation_function) -> OrderedDict:
+        year_to_extended_time_serie = OrderedDict()
+        for year, old_time_serie in super()._year_to_daily_time_serie.items():
+            new_time_serie = np.zeros([len(old_time_serie), len(self.safran_massif_names)])
+            new_time_serie[:, self.nb_region_names:] = old_time_serie
+            for i, region_name in enumerate(self.region_names):
+                massifs_ids_belong_to_region = self.region_name_to_massif_ids[region_name]
+                aggregated_time_serie = aggregation_function(old_time_serie[:, massifs_ids_belong_to_region], axis=1)
+                new_time_serie[:, i] = aggregated_time_serie
+            year_to_extended_time_serie[year] = new_time_serie
+        return year_to_extended_time_serie
--- a/test/test_meteo_france_SCM_study/test_SCM_study.py
+++ b/test/test_meteo_france_SCM_study/test_SCM_study.py
+import os
+import os.path as op
+from collections import OrderedDict
+from typing import List, Dict
+import matplotlib.pyplot as plt
+import pandas as pd
+from netCDF4 import Dataset
+from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
+from experiment.meteo_france_SCM_study.massif import safran_massif_names_from_datasets
+from experiment.meteo_france_SCM_study.safran.safran import Safran
+from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
+    AbstractSpatialCoordinates
+from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima
+from utils import get_full_path, cached_property
+import unittest
+from test.test_utils import load_safran_objects
+class TestSCMStudy(unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.study = Safran()
+    def test_massif_safran(self):
+        df_centroid = pd.read_csv(op.join(self.study.map_full_path, 'coordonnees_massifs_alpes.csv'))
+        # Assert that the massif names are the same between SAFRAN and the coordinate file
+        assert not set(self.study.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
+if __name__ == '__main__':
+    unittest.main()
--- a/test/test_meteo_france_SCM_study/test_safran_gev.py
+++ b/test/test_meteo_france_SCM_study/test_safran_gev.py
+import unittest
+import os
+import os.path as op
+from collections import OrderedDict
+from typing import List, Dict
+import matplotlib.pyplot as plt
+import pandas as pd
+from netCDF4 import Dataset
+from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
+from experiment.meteo_france_SCM_study.massif import safran_massif_names_from_datasets
+from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
+    AbstractSpatialCoordinates
+from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima
+from utils import get_full_path, cached_property
+#
+# from test.test_utils import load_safran_objects
+#
+#
+# class TestFullEstimators(unittest.TestCase):
+#
+#     def test_gev_mle_per_massif(self):
+#         safran_1800_one_day = load_safran_objects()[0]
+#         df = safran_1800_one_day.df_gev_mle_each_massif
+#         self.assertAlmostEqual(df.values.sum(), 1131.4551665871832)
+#
+#
+# if __name__ == '__main__':
+#     unittest.main()
--- a/test/test_safran_study/test_safran_gev.py
+++ b/test/test_safran_study/test_safran_gev.py
-import unittest
-from test.test_utils import load_safran_objects
-class TestFullEstimators(unittest.TestCase):
-    def test_gev_mle_per_massif(self):
-        safran_1800_one_day = load_safran_objects()[0]
-        df = safran_1800_one_day.df_gev_mle_each_massif
-        self.assertAlmostEqual(df.values.sum(), 1131.4551665871832)
-if __name__ == '__main__':
-    unittest.main()