From bc920bf60f4e702628755c5ea18906f203fb0774 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Fri, 26 Feb 2021 15:14:11 +0100
Subject: [PATCH] [refactor] add dates for adamont v2. test them.

---
 .../adamont_data/abstract_adamont_study.py    | 73 ++++++++++++-------
 .../adamont_data/adamont/adamont_crocus.py    |  3 +-
 .../adamont_data/adamont/adamont_variables.py | 47 ++++++++----
 .../test_adamont_study.py                     | 10 ++-
 4 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/extreme_data/meteo_france_data/adamont_data/abstract_adamont_study.py b/extreme_data/meteo_france_data/adamont_data/abstract_adamont_study.py
index 00ccb21b..85aa30be 100644
--- a/extreme_data/meteo_france_data/adamont_data/abstract_adamont_study.py
+++ b/extreme_data/meteo_france_data/adamont_data/abstract_adamont_study.py
@@ -60,7 +60,8 @@ class AbstractAdamontStudy(AbstractStudy):
                          multiprocessing=multiprocessing, season=season, french_region=french_region)
         self.adamont_version = adamont_version
         self.gcm_rcm_couple = gcm_rcm_couple
-        self.gcm_rcm_full_name = get_gcm_rcm_couple_adamont_to_full_name(adamont_version=self.adamont_version)[gcm_rcm_couple]
+        self.gcm_rcm_full_name = get_gcm_rcm_couple_adamont_to_full_name(adamont_version=self.adamont_version)[
+            gcm_rcm_couple]
         self.scenario = scenario
         assert issubclass(self.variable_class, AbstractAdamontVariable)
         # Assert the massif_name are in the same order
@@ -77,17 +78,31 @@ class AbstractAdamontStudy(AbstractStudy):
         if self.adamont_version == 1:
             return super().year_to_annual_maxima
         else:
-            return self.load_year_to_annual_maxima_version_2()
+            return self.load_year_to_annual_maxima_data_version_2(maxima_date=False)
+
+    @cached_property
+    def year_to_annual_maxima_index(self) -> OrderedDict:
+        if self.adamont_version == 1:
+            return super().year_to_annual_maxima_index
+        else:
+            return self.load_year_to_annual_maxima_data_version_2(maxima_date=True)
 
     # Loading part for adamont v2
 
-    def load_year_to_annual_maxima_version_2(self):
-        year_to_annual_maxima = OrderedDict()
-        for dataset, real_scenario in zip(self.datasets, self.adamont_real_scenarios):
-            annual_maxima = np.array(dataset.variables[self.variable_class.indicator_name_for_maxima])
-            annual_maxima = self.variable_class.transform_annual_maxima(annual_maxima)
-            assert annual_maxima.shape[1] == len(self.column_mask)
-            annual_maxima = annual_maxima[:, self.column_mask]
+    @cached_property
+    def datasets_for_dates(self):
+        return [self._load_dataset(scenario, maxima_date=True) for scenario in self.adamont_real_scenarios]
+
+    def load_year_to_annual_maxima_data_version_2(self, maxima_date):
+        year_to_annual_maxima_data = OrderedDict()
+        datasets = self.datasets_for_dates if maxima_date else self.datasets
+        for dataset, real_scenario in zip(datasets, self.adamont_real_scenarios):
+            annual_maxima_data = np.array(dataset.variables[self.indicator_name(maxima_date)])
+            # Add potential transformation for the maxima
+            if not maxima_date:
+                annual_maxima_data = self.variable_class.transform_annual_maxima(annual_maxima_data)
+            assert annual_maxima_data.shape[1] == len(self.column_mask)
+            annual_maxima_data = annual_maxima_data[:, self.column_mask]
             year_min, year_max = get_year_min_and_year_max_from_scenario(real_scenario, self.gcm_rcm_couple)
             years = list(range(year_min, year_max + 1))
             time = np.array(dataset.variables['time'])
@@ -99,36 +114,43 @@ class AbstractAdamontStudy(AbstractStudy):
             # dates = [start + timedelta(hours=int(h)) for h in time]
             # print(["{}-{}".format(date.year-1, date.year) for date in dates])
             assert len(years) == len(time), msg
-            for year, maxima in zip(years, annual_maxima):
+            for year, maxima in zip(years, annual_maxima_data):
                 if self.year_min <= year <= self.year_max:
-                    year_to_annual_maxima[year] = maxima
-        return year_to_annual_maxima
+                    year_to_annual_maxima_data[year] = maxima
+        return year_to_annual_maxima_data
 
-    def _load_dataset(self, scenario):
+    def _load_dataset(self, scenario, maxima_date):
         scenario_name = scenario_to_str(scenario)
-        nc_filename = self.nc_filename_adamont_v2(scenario)
-        nc_folder = op.join(ADAMONT_v2_PATH, self.variable_folder_name, scenario_name)
+        nc_filename = self.nc_filename_adamont_v2(scenario, maxima_date)
+        nc_folder = op.join(ADAMONT_v2_PATH, self.variable_folder_name(maxima_date), scenario_name)
         nc_filepath = op.join(nc_folder, nc_filename)
         # Assert that the file is present, otherwise download it
         if not op.exists(nc_filepath):
-            self._download_year_to_annual_maxima_version_2(scenario, nc_folder)
+            self._download_year_to_annual_maxima_version_2(scenario, nc_folder, maxima_date)
         # Load the file
         dataset = Dataset(filename=nc_filepath)
         return dataset
 
-    def _download_year_to_annual_maxima_version_2(self, scenario, path_folder):
+    def _download_year_to_annual_maxima_version_2(self, scenario, path_folder, maxima_date):
         scenario_name = self._scenario_to_str_adamont_v2(scenario)
         directory = self.gcm_rcm_full_name + '_' + scenario_name
-        filename = self.nc_filename_adamont_v2(scenario)
+        filename = self.nc_filename_adamont_v2(scenario, maxima_date)
         full_path = op.join(ADAMONT_v2_WEBPATH, directory, filename)
         # Download file
         request = 'wget {} -P {}'.format(full_path, path_folder)
         print(request)
         subprocess.run(request, shell=True)
 
-    def nc_filename_adamont_v2(self, scenario):
+    def nc_filename_adamont_v2(self, scenario, maxima_date):
         scenario_name = self._scenario_to_str_adamont_v2(scenario)
-        return '_'.join([self.variable_class.indicator_name_for_maxima, self.gcm_rcm_full_name, scenario_name]) + '.nc'
+        indicator_name = self.indicator_name(maxima_date)
+        return '_'.join([indicator_name, self.gcm_rcm_full_name, scenario_name]) + '.nc'
+
+    def indicator_name(self, maxima_date) -> str:
+        if maxima_date:
+            return self.variable_class.indicator_name_for_maxima_date
+        else:
+            return self.variable_class.indicator_name_for_maxima
 
     def _scenario_to_str_adamont_v2(self, scenario):
         scenario_name = scenario_to_str(scenario)
@@ -210,13 +232,12 @@ class AbstractAdamontStudy(AbstractStudy):
         if self.adamont_version == 1:
             return [Dataset(file_path) for file_path in self.nc_file_paths]
         else:
-            return [self._load_dataset(scenario) for scenario in self.adamont_real_scenarios]
+            return [self._load_dataset(scenario, maxima_date=False) for scenario in self.adamont_real_scenarios]
 
     # PATHS
 
-    @property
-    def variable_folder_name(self):
-        return self.variable_class.variable_name_for_folder_and_nc_file()
+    def variable_folder_name(self, annual_maxima_date=False):
+        return self.variable_class.variable_folder_name(annual_maxima_date)
 
     @property
     def region_name(self):
@@ -232,7 +253,7 @@ class AbstractAdamontStudy(AbstractStudy):
 
     @property
     def nc_files_paths(self):
-        return [op.join(ADAMONT_PATH, self.variable_folder_name, name) for name in self.scenario_names]
+        return [op.join(ADAMONT_PATH, self.variable_folder_name(), name) for name in self.scenario_names]
 
     @property
     def nc_file_paths(self):
@@ -240,7 +261,7 @@ class AbstractAdamontStudy(AbstractStudy):
         for scenario, scenario_name, files_path in zip(self.adamont_real_scenarios, self.scenario_names,
                                                        self.nc_files_paths):
             suffix_nc_file = get_suffix_for_the_nc_file(scenario, self.gcm_rcm_couple)
-            nc_file = '{}_FORCING_{}_{}_{}_{}.nc'.format(self.variable_folder_name, self.gcm_rcm_full_name,
+            nc_file = '{}_FORCING_{}_{}_{}_{}.nc'.format(self.variable_folder_name(), self.gcm_rcm_full_name,
                                                          scenario_name,
                                                          self.region_name, suffix_nc_file)
             file_paths.append(op.join(files_path, nc_file))
diff --git a/extreme_data/meteo_france_data/adamont_data/adamont/adamont_crocus.py b/extreme_data/meteo_france_data/adamont_data/adamont/adamont_crocus.py
index 32da20bd..579c2b86 100644
--- a/extreme_data/meteo_france_data/adamont_data/adamont/adamont_crocus.py
+++ b/extreme_data/meteo_france_data/adamont_data/adamont/adamont_crocus.py
@@ -25,4 +25,5 @@ if __name__ == '__main__':
     for study_class in [AdamontSwe, AdamontSnowLoad]:
         study = study_class(altitude=1800, adamont_version=2, gcm_rcm_couple=('HadGEM2-ES', 'RACMO22E'),
                                 scenario=AdamontScenario.rcp85_extended)
-        print(study.year_to_annual_maxima[2000])
\ No newline at end of file
+        print(study.year_to_annual_maxima[2000])
+        print(study.year_to_annual_maxima_index[2000])
\ No newline at end of file
diff --git a/extreme_data/meteo_france_data/adamont_data/adamont/adamont_variables.py b/extreme_data/meteo_france_data/adamont_data/adamont/adamont_variables.py
index ab6a061c..c9273e31 100644
--- a/extreme_data/meteo_france_data/adamont_data/adamont/adamont_variables.py
+++ b/extreme_data/meteo_france_data/adamont_data/adamont/adamont_variables.py
@@ -9,23 +9,43 @@ from root_utils import classproperty
 
 class AbstractAdamontVariable(AbstractVariable):
 
+    # Adamont v1
+
+    @classmethod
+    def variable_folder_name(cls, annual_maxima_date):
+        raise NotImplementedError
+
     @classmethod
-    def variable_name_for_folder_and_nc_file(cls):
-        return cls.keyword()
+    def keyword(cls):
+        raise NotImplementedError
 
     @classmethod
     def indicator_name_for_maxima(cls):
         raise NotImplementedError
 
+    @classmethod
+    def indicator_name_for_maxima_date(cls):
+        raise NotImplementedError
+
+    @classmethod
+    def get_folder_name_from_indicator_name(cls, indicator_name):
+        return indicator_name.replace('-', '_').capitalize()
+
     @classmethod
     def transform_annual_maxima(cls, annual_maxima):
         return annual_maxima
 
+
 class SafranSnowfallSimulationVariable(AbstractAdamontVariable):
     UNIT = SafranSnowfallVariable.UNIT
     NAME = SafranSnowfallVariable.NAME
 
-    # For adamont v1
+    @classmethod
+    def variable_folder_name(cls, annual_maxima_date):
+        if annual_maxima_date:
+            return cls.get_folder_name_from_indicator_name(cls.indicator_name_for_maxima_date)
+        else:
+            return 'Snow'
 
     @property
     def daily_time_serie_array(self) -> np.ndarray:
@@ -35,12 +55,6 @@ class SafranSnowfallSimulationVariable(AbstractAdamontVariable):
     def keyword(cls):
         return 'SNOW'
 
-    # For adamont v2
-
-    @classmethod
-    def variable_name_for_folder_and_nc_file(cls):
-        return 'Snow'
-
     @classproperty
     def indicator_name_for_maxima(cls):
         return 'max-1day-snowf'
@@ -50,16 +64,21 @@ class CrocusSweSimulationVariable(AbstractAdamontVariable):
     UNIT = CrocusTotalSweVariable.UNIT
     NAME = CrocusTotalSweVariable.NAME
 
-    # For adamont v2
-
     @classmethod
-    def variable_name_for_folder_and_nc_file(cls):
-        return cls.indicator_name_for_maxima.replace('-', '_').capitalize()
+    def variable_folder_name(cls, annual_maxima_date):
+        if annual_maxima_date:
+            return cls.get_folder_name_from_indicator_name(cls.indicator_name_for_maxima_date)
+        else:
+            return cls.get_folder_name_from_indicator_name(cls.indicator_name_for_maxima)
 
     @classproperty
     def indicator_name_for_maxima(cls):
         return 'swe-max-winter-11-04-NN'
 
+    @classproperty
+    def indicator_name_for_maxima_date(cls):
+        return 'date-swe-max-winter-11-04-NN'
+
 
 class CrocusTotalSnowLoadVariable(CrocusSweSimulationVariable):
     NAME = TotalSnowLoadVariable.NAME
@@ -68,5 +87,3 @@ class CrocusTotalSnowLoadVariable(CrocusSweSimulationVariable):
     @classmethod
     def transform_annual_maxima(cls, annual_maxima):
         return AbstractSnowLoadVariable.transform_swe_into_snow_load(annual_maxima)
-
-
diff --git a/test/test_extreme_data/test_meteo_france_data/test_adamont_study.py b/test/test_extreme_data/test_meteo_france_data/test_adamont_study.py
index e7305c84..6c7a94d9 100644
--- a/test/test_extreme_data/test_meteo_france_data/test_adamont_study.py
+++ b/test/test_extreme_data/test_meteo_france_data/test_adamont_study.py
@@ -17,10 +17,10 @@ class TestAdamontStudy(unittest.TestCase):
         self.assertTrue(True)
 
     def test_load_adamont_swe(self):
-        self.load_many_study(AdamontSwe, version=2)
+        self.load_many_study(AdamontSwe, version=2, load_index=True)
         self.assertTrue(True)
 
-    def test_load_adamont_swe(self):
+    def test_load_adamont_snow_load(self):
         maxima = [study_class(altitude=1800, adamont_version=2, gcm_rcm_couple=('HadGEM2-ES', 'RACMO22E'),
                               scenario=AdamontScenario.rcp85_extended).year_to_annual_maxima[2000][0]
                   for study_class in [AdamontSwe, AdamontSnowLoad]]
@@ -28,7 +28,7 @@ class TestAdamontStudy(unittest.TestCase):
         snow_load_from_swe = swe * CrocusVariable.snow_load_multiplication_factor
         self.assertEqual(snow_load_from_swe, snow_load)
 
-    def load_many_study(self, adamont_study_class, version):
+    def load_many_study(self, adamont_study_class, version, load_index=False):
         study_list = [
             adamont_study_class(altitude=900, adamont_version=version),
             adamont_study_class(altitude=1800, adamont_version=version)
@@ -42,7 +42,9 @@ class TestAdamontStudy(unittest.TestCase):
         study_list.extend([adamont_study_class(altitude=900, gcm_rcm_couple=gcm_rcm_couple, adamont_version=version)
                            for gcm_rcm_couple in get_gcm_rcm_couples(adamont_version=version)])
         for study in study_list:
-            annual_maxima_for_year_min = study.year_to_annual_maxima[study.year_min]
+            _ = study.year_to_annual_maxima[study.year_min]
+            if load_index:
+                _ = study.year_to_annual_maxima_index[study.year_min]
 
     def test_massifs_names_adamont_v2(self):
         year_min = 2004
-- 
GitLab