From a8b41514e0e8b3de9aed7e724b13200f2ca67af0 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Thu, 18 Feb 2021 10:53:52 +0100
Subject: [PATCH] [projections] add AnomalyTemperatureTemporalCovariate and
 test. and anomaly computation for cmip5 global temperatures

---
 .../adamont_data/adamont_scenario.py          | 18 ++++++++++++-
 .../cmip5/climate_explorer_cimp5.py           | 25 ++++++++++++++-----
 .../coordinates/abstract_coordinates.py       |  4 ++-
 .../abstract_temporal_covariate_for_fit.py    | 13 +++++-----
 .../temperature_covariate.py                  |  4 ++-
 .../test_one_fold_fit.py                      | 23 +++++++++--------
 .../test_altitudes_studies.py                 |  4 ++-
 7 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/extreme_data/meteo_france_data/adamont_data/adamont_scenario.py b/extreme_data/meteo_france_data/adamont_data/adamont_scenario.py
index 26e87aee..604a3a12 100644
--- a/extreme_data/meteo_france_data/adamont_data/adamont_scenario.py
+++ b/extreme_data/meteo_france_data/adamont_data/adamont_scenario.py
@@ -15,7 +15,8 @@ class AdamontScenario(Enum):
 
 adamont_scenarios_real = [AdamontScenario.histo, AdamontScenario.rcp26, AdamontScenario.rcp45, AdamontScenario.rcp85]
 rcp_scenarios = [AdamontScenario.rcp26, AdamontScenario.rcp45, AdamontScenario.rcp85]
-rcm_scenarios_extended = [AdamontScenario.rcp26_extended, AdamontScenario.rcp45_extended, AdamontScenario.rcp85_extended]
+rcm_scenarios_extended = [AdamontScenario.rcp26_extended, AdamontScenario.rcp45_extended,
+                          AdamontScenario.rcp85_extended]
 
 
 def get_linestyle_from_scenario(adamont_scenario):
@@ -90,10 +91,12 @@ def get_gcm_rcm_couples(adamont_scenario=AdamontScenario.histo, adamont_version=
             gcm_rcm_couples.remove(couple_to_remove)
     return list(gcm_rcm_couples)
 
+
 def get_gcm_list(adamont_version):
     s = set([gcm for gcm, _ in get_gcm_rcm_couples(adamont_version=adamont_version)])
     return list(s)
 
+
 def get_suffix_for_the_nc_file(adamont_scenario, gcm_rcm_couple):
     assert isinstance(adamont_scenario, AdamontScenario)
     year_min, year_max = get_year_min_and_year_max_from_scenario(adamont_scenario, gcm_rcm_couple)
@@ -105,6 +108,19 @@ def scenario_to_str(adamont_scenario):
                      for real_adamont_scenario in scenario_to_real_scenarios(adamont_scenario)])
 
 
+def str_to_scenario(str_scenario):
+    if '26' in str_scenario:
+        return AdamontScenario.rcp26
+    elif '45' in str_scenario:
+        return AdamontScenario.rcp45
+    elif '85' in str_scenario:
+        return AdamontScenario.rcp85
+    elif 'HISTO' in str_scenario:
+        raise NotImplementedError('No global temperatures defined for histo')
+    else:
+        raise NotImplementedError(str_scenario)
+
+
 def scenario_to_real_scenarios(adamont_scenario):
     if adamont_scenario in adamont_scenarios_real:
         return [adamont_scenario]
diff --git a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
index d029c7c7..96779b8b 100644
--- a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
+++ b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
@@ -24,22 +24,24 @@ def get_scenario_name(scenario):
         return str(scenario).split('.')[-1]
 
 
-def year_to_global_mean_temp(gcm, scenario, year_min=None, year_max=None, rolling=30):
+def year_to_global_mean_temp(gcm, scenario, year_min=None, year_max=None, rolling=30, anomaly=False):
     d = OrderedDict()
-    years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min, year_max, rolling=rolling)
+    years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min, year_max, rolling=rolling, anomaly=anomaly)
     for year, global_mean_temp in zip(years, global_mean_temps):
         d[year] = global_mean_temp
     return d
 
 
-def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rolling=30):
+def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rolling=30, anomaly=False):
     # Compute everything
     ensemble_member = 'r{}i1p1'.format(gcm_to_rnumber[gcm])
     scenario_name = get_scenario_name(scenario)
 
     # Standards
     mean_annual_column_name = 'Annual mean'
+    anomaly_annual_column_name = 'Annual anomaly'
     rolling_mean_annual_column_name = 'Rolling annual mean for window={}'.format(rolling)
+    rolling_anomaly_annual_column_name = 'Rolling annual anomaly for window={}'.format(rolling)
     filename = 'global_tas_Amon_{}_{}_{}'.format(gcm, scenario_name, ensemble_member)
     dat_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.dat')
     txt_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.txt')
@@ -50,6 +52,7 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
     # Transform nc file into csv file
     if not op.exists(csv_filepath):
         dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name,
+                   anomaly_annual_column_name,  rolling_anomaly_annual_column_name,
                    rolling=rolling)
 
     # Load csv file
@@ -63,13 +66,20 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
     assert years[0] >= year_min
     assert years[-1] <= year_max
     if rolling:
-        global_mean_temp = list(df[rolling_mean_annual_column_name])
+        if anomaly:
+            global_mean_temp = list(df[rolling_anomaly_annual_column_name])
+        else:
+            global_mean_temp = list(df[rolling_mean_annual_column_name])
     else:
-        global_mean_temp = list(df[mean_annual_column_name])
+        if anomaly:
+            global_mean_temp = list(df[anomaly_annual_column_name])
+        else:
+            global_mean_temp = list(df[mean_annual_column_name])
     return years, global_mean_temp
 
 
-def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name, rolling=30):
+def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name,
+               anomaly_annual_column_name,  rolling_anomaly_annual_column_name, rolling=30):
     d = OrderedDict()
     with open(txt_filepath, 'r') as f:
         for i, l in enumerate(f):
@@ -89,9 +99,12 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean
     l = [np.nan] + list(l)
     assert len(l) == len(df.index)
     df[mean_annual_column_name] = l
+    mean_for_reference_period_1850_to_1900 = df.loc[1850:1900, mean_annual_column_name].mean()
+    df[anomaly_annual_column_name] = df[mean_annual_column_name] - mean_for_reference_period_1850_to_1900
     # Computing the rolling
     if rolling is not None:
         df[rolling_mean_annual_column_name] = df[mean_annual_column_name].rolling(window=rolling).mean()
+        df[rolling_anomaly_annual_column_name] = df[anomaly_annual_column_name].rolling(window=rolling).mean()
     df.to_csv(csv_filepath)
 
 
diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
index 29315b02..d78bb441 100644
--- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
@@ -284,7 +284,9 @@ class AbstractCoordinates(object):
             df = temporal_transformation.transform_df(df_temporal_coordinates)
         # Potentially transform the time covariate into another covariate
         if temporal_covariate_for_fit is not None:
-            df.loc[:, self.COORDINATE_T] = df.apply(temporal_covariate_for_fit.get_temporal_covariate, axis=1)
+            df_climate_model = df_sliced(df=self.df_coordinate_climate_model, split=split, slicer=self.slicer)
+            df_input = pd.concat([df, df_climate_model], axis=1)
+            df.loc[:, self.COORDINATE_T] = df_input.apply(temporal_covariate_for_fit.get_temporal_covariate, axis=1)
         return df
 
     @property
diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py
index acd16f0e..b06f94c1 100644
--- a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py
+++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py
@@ -1,5 +1,6 @@
 import pandas as pd
 
+from extreme_data.meteo_france_data.adamont_data.adamont_scenario import str_to_scenario
 from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 import year_to_global_mean_temp
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
 
@@ -18,18 +19,18 @@ class TimeTemporalCovariate(AbstractTemporalCovariateForFit):
         return row[AbstractCoordinates.COORDINATE_T]
 
 
-class TemperatureTemporalCovariate(AbstractTemporalCovariateForFit):
+class AnomalyTemperatureTemporalCovariate(AbstractTemporalCovariateForFit):
     gcm_and_scenario_to_d = {}
 
     @classmethod
     def get_temporal_covariate(cls, row: pd.Series):
         year = row[AbstractCoordinates.COORDINATE_T]
-        gcm = None
-        scenario = None
+        gcm = row[AbstractCoordinates.COORDINATE_GCM]
+        scenario_str = row[AbstractCoordinates.COORDINATE_RCP]
+        scenario = str_to_scenario(scenario_str)
         if (gcm, scenario) not in cls.gcm_and_scenario_to_d:
-            d = year_to_global_mean_temp(gcm, scenario)
+            d = year_to_global_mean_temp(gcm, scenario, anomaly=True)
             cls.gcm_and_scenario_to_d[(gcm, scenario)] = d
         d = cls.gcm_and_scenario_to_d[(gcm, scenario)]
-        global_mean_temp = d[year]
-        print(type(global_mean_temp))
+        global_mean_temp = d[year] * 1000
         return global_mean_temp
diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py
index ca39e6f5..5b5cd7b5 100644
--- a/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py
+++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py
@@ -1,6 +1,7 @@
 from extreme_data.meteo_france_data.mean_alps_temperature import load_year_to_mean_alps_temperatures
 from extreme_data.nasa_data.global_mean_temperature import load_year_to_mean_global_temperature
 from root_utils import classproperty
+from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
 from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
     AbstractTemporalCovariateForFit
 import pandas as pd
@@ -20,7 +21,8 @@ class AbstractTemperatureCovariate(AbstractTemporalCovariateForFit):
         raise NotImplemented
 
     @classmethod
-    def get_temporal_covariate(cls, t):
+    def get_temporal_covariate(cls, row):
+        t = row[AbstractCoordinates.COORDINATE_T]
         try:
             return pd.Series(cls.year_to_global_mean[t])
         except KeyError:
diff --git a/test/test_projects/test_altitude_spatial/test_one_fold_fit.py b/test/test_projects/test_altitude_spatial/test_one_fold_fit.py
index bb90656b..32ebd32b 100644
--- a/test/test_projects/test_altitude_spatial/test_one_fold_fit.py
+++ b/test/test_projects/test_altitude_spatial/test_one_fold_fit.py
@@ -1,6 +1,7 @@
 import unittest
 
 from extreme_data.meteo_france_data.adamont_data.adamont.adamont_snowfall import AdamontSnowfall
+from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario
 from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day
 from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal
 from extreme_fit.model.margin_model.polynomial_margin_model.models_based_on_pariwise_analysis.gev_with_constant_shape_wrt_altitude import \
@@ -9,7 +10,7 @@ from extreme_fit.model.margin_model.polynomial_margin_model.models_based_on_pari
 from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies
 from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit
 from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
-    TimeTemporalCovariate, TemperatureTemporalCovariate
+    TimeTemporalCovariate, AnomalyTemperatureTemporalCovariate
 
 
 class TestOneFoldFit(unittest.TestCase):
@@ -25,8 +26,8 @@ class TestOneFoldFit(unittest.TestCase):
                               AltitudinalShapeConstantTimeLocScaleLinear
                               ][:]
 
-    def load_dataset(self, study_class):
-        self.studies = AltitudesStudies(study_class, self.altitudes)
+    def load_dataset(self, study_class, **kwargs_study):
+        self.studies = AltitudesStudies(study_class, self.altitudes, **kwargs_study)
         dataset = self.studies.spatio_temporal_dataset(massif_name=self.massif_name)
         return dataset
 
@@ -47,14 +48,14 @@ class TestOneFoldFit(unittest.TestCase):
             _ = one_fold_fit.best_estimator.margin_model
         self.assertTrue(True)
 
-    # def test_with_temporal_covariate_for_temperature(self):
-    #     for study_class in [AdamontSnowfall][:]:
-    #         dataset = self.load_dataset(study_class)
-    #         one_fold_fit = OneFoldFit(self.massif_name, dataset,
-    #                                   models_classes=self.model_classes,
-    #                                   temporal_covariate_for_fit=TemperatureTemporalCovariate)
-    #         print(type(one_fold_fit.best_estimator.margin_model))
-    #     self.assertTrue(True)
+    def test_with_temporal_covariate_for_temperature_anomaly(self):
+        for study_class in [AdamontSnowfall][:]:
+            dataset = self.load_dataset(study_class, scenario=AdamontScenario.rcp85)
+            one_fold_fit = OneFoldFit(self.massif_name, dataset,
+                                      models_classes=self.model_classes,
+                                      temporal_covariate_for_fit=AnomalyTemperatureTemporalCovariate)
+            _ = one_fold_fit.best_estimator.margin_model
+        self.assertTrue(True)
 
 
 if __name__ == '__main__':
diff --git a/test/test_projects/test_contrasting/test_altitudes_studies.py b/test/test_projects/test_contrasting/test_altitudes_studies.py
index a8a0fc0a..6ef79a4f 100644
--- a/test/test_projects/test_contrasting/test_altitudes_studies.py
+++ b/test/test_projects/test_contrasting/test_altitudes_studies.py
@@ -85,13 +85,15 @@ class TestSpatioTemporalDatasetForClimateModels(unittest.TestCase):
         super().setUp()
         altitudes = [900, 1200]
         study_class = AdamontSnowfall
+        self.scenario = AdamontScenario.rcp85
         self.studies = AltitudesStudies(study_class, altitudes,
                                         year_min=2009, year_max=2012,
-                                        scenario=AdamontScenario.rcp85)
+                                        scenario=self.scenario)
         self.massif_name = "Vercors"
 
     def test_dataset(self):
         dataset = self.studies.spatio_temporal_dataset(self.massif_name)
+        self.assertEqual(self.studies.study.scenario, AdamontScenario.rcp85)
         self.assertEqual(len(dataset.coordinates.df_coordinate_climate_model.columns), 3)
 
 if __name__ == '__main__':
-- 
GitLab