From 3388213bf97a40dbe2fb5046646ce1f851945dd3 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Thu, 18 Feb 2021 10:19:35 +0100
Subject: [PATCH] [projections] add TestSpatioTemporalDatasetForClimateModels.
 add TemperatureTemporalCovariate. integrate DatasetForClimateModels into
 altitude_studies.py

---
 .../cmip5/climate_explorer_cimp5.py           |  5 ++-
 .../adamont_data/utils/download_cmip5.py      | 24 -------------
 .../altitudes_fit/altitudes_studies.py        | 34 ++++++++++++++-----
 .../coordinates/abstract_coordinates.py       | 21 +++++++-----
 .../abstract_spatio_temporal_coordinates.py   | 10 ++++--
 ...temporal_coordinates_for_climate_models.py | 27 +++++++++++++++
 .../abstract_temporal_covariate_for_fit.py    | 27 +++++++++++++--
 .../test_one_fold_fit.py                      | 15 ++++++--
 .../test_altitudes_studies.py                 | 17 ++++++++++
 9 files changed, 129 insertions(+), 51 deletions(-)
 delete mode 100644 extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py
 create mode 100644 spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py

diff --git a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
index 01d1a7c9..d029c7c7 100644
--- a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
+++ b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
@@ -49,7 +49,8 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
         download_dat(dat_filepath, txt_filepath)
     # Transform nc file into csv file
     if not op.exists(csv_filepath):
-        dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name, rolling=rolling)
+        dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name,
+                   rolling=rolling)
 
     # Load csv file
     df = pd.read_csv(csv_filepath, index_col=0)
@@ -119,12 +120,14 @@ def main_test_cmip5_loader():
             print(years)
             print(temps)
 
+
 def test_rolling():
     df = pd.DataFrame([1, 2, 3, 4, 5])
     print(df)
     df2 = df.rolling(window=3).mean()
     print(df2)
 
+
 if __name__ == '__main__':
     # main_example()
     # test_rolling()
diff --git a/extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py b/extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py
deleted file mode 100644
index 71b5c599..00000000
--- a/extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import cdsapi
-
-c = cdsapi.Client()
-
-models = ['mpi_esm_lr', 'cnrm_cm5']
-
-
-def get_year_min_and_max(model, experiment):
-    pass
-
-
-def retrieve(model, experiment):
-    year_min, year_max = get_year_min_and_max(model, experiment)
-    c.retrieve(
-        'projections-cmip5-monthly-single-levels',
-        {
-            'experiment': 'historical',
-            'variable': '2m_temperature',
-            'model': 'cnrm_cm5',
-            'ensemble_member': 'r1i1p1',
-            'period': '195001-200512',
-            'format': 'zip',
-        },
-        'download.zip')
diff --git a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py
index efe1e5cf..b0566196 100644
--- a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py
+++ b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py
@@ -4,6 +4,8 @@ from collections import OrderedDict
 
 from cached_property import cached_property
 
+from extreme_data.meteo_france_data.adamont_data.abstract_adamont_study import AbstractAdamontStudy
+from extreme_data.meteo_france_data.adamont_data.adamont_scenario import scenario_to_str
 from extreme_data.meteo_france_data.scm_models_data.abstract_study import AbstractStudy
 from extreme_data.meteo_france_data.scm_models_data.visualization.main_study_visualizer import \
     SCM_STUDY_CLASS_TO_ABBREVIATION, STUDY_CLASS_TO_ABBREVIATION
@@ -14,6 +16,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_co
     AbstractSpatialCoordinates
 from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \
     AbstractSpatioTemporalCoordinates
+from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.spatio_temporal_coordinates_for_climate_models import \
+    SpatioTemporalCoordinatesForClimateModels
 from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \
     ConsecutiveTemporalCoordinates
 from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
@@ -78,12 +82,23 @@ class AltitudesStudies(object):
             assert len(massif_altitudes) > 0
             spatial_coordinates = self.spatial_coordinates_for_altitudes(massif_altitudes)
         slicer_class = get_slicer_class_from_s_splits(s_split_spatial, s_split_temporal)
-        return AbstractSpatioTemporalCoordinates(slicer_class=slicer_class,
-                                                 s_split_spatial=s_split_spatial,
-                                                 s_split_temporal=s_split_temporal,
-                                                 transformation_class=self.spatial_transformation_class,
-                                                 spatial_coordinates=spatial_coordinates,
-                                                 temporal_coordinates=self.temporal_coordinates)
+        if isinstance(self.study, AbstractAdamontStudy):
+            return SpatioTemporalCoordinatesForClimateModels(slicer_class=slicer_class,
+                                                             s_split_spatial=s_split_spatial,
+                                                             s_split_temporal=s_split_temporal,
+                                                             transformation_class=self.spatial_transformation_class,
+                                                             spatial_coordinates=spatial_coordinates,
+                                                             temporal_coordinates=self.temporal_coordinates,
+                                                             gcm_rcm_couple=self.study.gcm_rcm_couple,
+                                                             scenario_str=scenario_to_str(self.study.scenario),
+                                                             )
+        else:
+            return AbstractSpatioTemporalCoordinates(slicer_class=slicer_class,
+                                                     s_split_spatial=s_split_spatial,
+                                                     s_split_temporal=s_split_temporal,
+                                                     transformation_class=self.spatial_transformation_class,
+                                                     spatial_coordinates=spatial_coordinates,
+                                                     temporal_coordinates=self.temporal_coordinates)
 
     @cached_property
     def temporal_coordinates(self):
@@ -115,7 +130,8 @@ class AltitudesStudies(object):
     def show_or_save_to_file(self, plot_name, show=False, no_title=False, tight_layout=None):
         study_visualizer = StudyVisualizer(study=self.study, show=show, save_to_file=not show)
         study_visualizer.plot_name = plot_name
-        study_visualizer.show_or_save_to_file(add_classic_title=False, dpi=500, no_title=no_title, tight_layout=tight_layout)
+        study_visualizer.show_or_save_to_file(add_classic_title=False, dpi=500, no_title=no_title,
+                                              tight_layout=tight_layout)
 
     def run_for_each_massif(self, function, massif_names, **kwargs):
         massif_names = massif_names if massif_names is not None else self.study.all_massif_names()
@@ -165,7 +181,7 @@ class AltitudesStudies(object):
             moment += ' change (between two block of 30 years) for'
         moment += 'mean' if not std else 'std'
         # if change is False:
-            # moment += ' (for the 60 years of data)'
+        # moment += ' (for the 60 years of data)'
         plot_name = '{} of {} maxima of {}'.format(moment, self.study.season_name,
                                                    SCM_STUDY_CLASS_TO_ABBREVIATION[self.study_class])
         ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15)
@@ -196,4 +212,4 @@ class AltitudesStudies(object):
                     moment = function(annual_maxima)
                 mean_moment.append(moment)
                 altitudes.append(altitude)
-        plot_against_altitude(altitudes, ax, massif_id, massif_name, mean_moment)
\ No newline at end of file
+        plot_against_altitude(altitudes, ax, massif_id, massif_name, mean_moment)
diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
index 363063c3..29315b02 100644
--- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
@@ -6,8 +6,6 @@ import numpy as np
 import pandas as pd
 from mpl_toolkits.mplot3d import Axes3D
 
-from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
-    AbstractTemporalCovariateForFit
 from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \
     AbstractTransformation, IdentityTransformation
 from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix
@@ -35,8 +33,13 @@ class AbstractCoordinates(object):
     # Temporal columns
     COORDINATE_T = 'coord_t'
     TEMPORAL_SPLIT = 'temporal_split'
+    # Climate model columns
+    COORDINATE_RCP = 'coord_rcp'
+    COORDINATE_GCM = 'coord_gcm'
+    COORDINATE_RCM = 'coord_rcm'
+    COORDINATE_CLIMATE_MODEL_NAMES = [COORDINATE_RCP, COORDINATE_GCM, COORDINATE_RCM]
     # Coordinates columns
-    COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T]
+    COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] + COORDINATE_CLIMATE_MODEL_NAMES
     # Coordinate type
     ALL_COORDINATES_ACCEPTED_TYPES = ['int64', 'float64']
     COORDINATE_TYPE = 'float64'
@@ -50,7 +53,10 @@ class AbstractCoordinates(object):
         sorted_coordinates_columns = [c for c in self.COORDINATES_NAMES if c in coordinate_columns]
         self.df_all_coordinates = df.loc[:, sorted_coordinates_columns].copy()  # type: pd.DataFrame
         # Cast coordinates
-        self.df_all_coordinates = self.df_all_coordinates.astype(self.COORDINATE_TYPE)  # type: pd.DataFrame
+        ind = self.df_all_coordinates.columns.isin(self.COORDINATE_CLIMATE_MODEL_NAMES)
+        self.df_coordinate_climate_model = self.df_all_coordinates.loc[:, ind].copy()
+        self.df_all_coordinates = self.df_all_coordinates.loc[:, ~ind] # type: pd.DataFrame
+        self.df_all_coordinates = self.df_all_coordinates.astype(self.COORDINATE_TYPE)
 
         # Slicing attributes
         self.s_split_spatial = s_split_spatial  # type: pd.Series
@@ -64,7 +70,7 @@ class AbstractCoordinates(object):
         # Transformation only works for float coordinates
         accepted_dtypes = [self.COORDINATE_TYPE]
         assert len(self.df_all_coordinates.select_dtypes(include=accepted_dtypes).columns) \
-               == len(coordinate_columns), 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes)
+               == len(coordinate_columns) - sum(ind), 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes)
         # Transformation class is instantiated with all coordinates
         self.transformation = transformation_class(self.df_all_coordinates)  # type: AbstractTransformation
         assert isinstance(self.transformation, AbstractTransformation)
@@ -278,8 +284,7 @@ class AbstractCoordinates(object):
             df = temporal_transformation.transform_df(df_temporal_coordinates)
         # Potentially transform the time covariate into another covariate
         if temporal_covariate_for_fit is not None:
-            assert issubclass(temporal_covariate_for_fit, AbstractTemporalCovariateForFit)
-            df.iloc[:, 0] = df.iloc[:, 0].apply(temporal_covariate_for_fit.get_temporal_covariate)
+            df.loc[:, self.COORDINATE_T] = df.apply(temporal_covariate_for_fit.get_temporal_covariate, axis=1)
         return df
 
     @property
@@ -382,4 +387,4 @@ class AbstractCoordinates(object):
         return self.df_merged.equals(other.df_merged)
 
     def __str__(self):
-        return self.df_coordinates().__str__()
+        return pd.concat([self.df_coordinates(), self.df_coordinate_climate_model], axis=1).__str__()
diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py
index 1d798554..2db7f998 100644
--- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py
@@ -19,9 +19,7 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates):
                  transformation_class: type = None,
                  spatial_coordinates: AbstractSpatialCoordinates = None,
                  temporal_coordinates: AbstractTemporalCoordinates = None):
-        if df is None:
-            assert spatial_coordinates is not None and temporal_coordinates is not None
-            df = self.get_df_from_spatial_and_temporal_coordinates(spatial_coordinates, temporal_coordinates)
+        df = self.load_df_is_needed(df, spatial_coordinates, temporal_coordinates)
         super().__init__(df, slicer_class, s_split_spatial, s_split_temporal, None)
         # Spatial coordinates'
         if spatial_coordinates is None:
@@ -42,6 +40,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates):
         self.transformation = MultipleTransformation(transformation_1=self.spatial_coordinates.transformation,
                                                      transformation_2=self.temporal_coordinates.transformation)
 
+    def load_df_is_needed(self, df, spatial_coordinates, temporal_coordinates):
+        if df is None:
+            assert spatial_coordinates is not None and temporal_coordinates is not None
+            df = self.get_df_from_spatial_and_temporal_coordinates(spatial_coordinates, temporal_coordinates)
+        return df
+
     @property
     def spatial_coordinates(self):
         return self._spatial_coordinates
diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py
new file mode 100644
index 00000000..1f1a238e
--- /dev/null
+++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py
@@ -0,0 +1,27 @@
+import pandas as pd
+
+from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
+    AbstractSpatialCoordinates
+from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \
+    AbstractSpatioTemporalCoordinates
+from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \
+    AbstractTemporalCoordinates
+from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
+
+
+class SpatioTemporalCoordinatesForClimateModels(AbstractSpatioTemporalCoordinates):
+
+    def __init__(self, df: pd.DataFrame = None, slicer_class: type = SpatioTemporalSlicer,
+                 s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None,
+                 transformation_class: type = None, spatial_coordinates: AbstractSpatialCoordinates = None,
+                 temporal_coordinates: AbstractTemporalCoordinates = None,
+                 gcm_rcm_couple=None,
+                 scenario_str=None):
+        df = self.load_df_is_needed(df, spatial_coordinates, temporal_coordinates)
+        # Assign the climate model coordinates
+        gcm, rcm = gcm_rcm_couple
+        df[self.COORDINATE_RCP] = scenario_str
+        df[self.COORDINATE_GCM] = gcm
+        df[self.COORDINATE_RCM] = rcm
+        super().__init__(df, slicer_class, s_split_spatial, s_split_temporal, transformation_class, spatial_coordinates,
+                         temporal_coordinates)
diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py
index d1b3a617..acd16f0e 100644
--- a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py
+++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py
@@ -1,14 +1,35 @@
+import pandas as pd
+
+from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 import year_to_global_mean_temp
+from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+
+
 class AbstractTemporalCovariateForFit(object):
 
     @classmethod
-    def get_temporal_covariate(cls, t):
+    def get_temporal_covariate(cls, row: pd.Series):
         raise NotImplementedError
 
 
 class TimeTemporalCovariate(AbstractTemporalCovariateForFit):
 
     @classmethod
-    def get_temporal_covariate(cls, t):
-        return t
+    def get_temporal_covariate(cls, row: pd.Series):
+        return row[AbstractCoordinates.COORDINATE_T]
 
 
+class TemperatureTemporalCovariate(AbstractTemporalCovariateForFit):
+    gcm_and_scenario_to_d = {}
+
+    @classmethod
+    def get_temporal_covariate(cls, row: pd.Series):
+        year = row[AbstractCoordinates.COORDINATE_T]
+        gcm = None
+        scenario = None
+        if (gcm, scenario) not in cls.gcm_and_scenario_to_d:
+            d = year_to_global_mean_temp(gcm, scenario)
+            cls.gcm_and_scenario_to_d[(gcm, scenario)] = d
+        d = cls.gcm_and_scenario_to_d[(gcm, scenario)]
+        global_mean_temp = d[year]
+        print(type(global_mean_temp))
+        return global_mean_temp
diff --git a/test/test_projects/test_altitude_spatial/test_one_fold_fit.py b/test/test_projects/test_altitude_spatial/test_one_fold_fit.py
index fa4c9161..bb90656b 100644
--- a/test/test_projects/test_altitude_spatial/test_one_fold_fit.py
+++ b/test/test_projects/test_altitude_spatial/test_one_fold_fit.py
@@ -9,7 +9,7 @@ from extreme_fit.model.margin_model.polynomial_margin_model.models_based_on_pari
 from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies
 from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit
 from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
-    TimeTemporalCovariate
+    TimeTemporalCovariate, TemperatureTemporalCovariate
 
 
 class TestOneFoldFit(unittest.TestCase):
@@ -35,7 +35,7 @@ class TestOneFoldFit(unittest.TestCase):
             dataset = self.load_dataset(study_class)
             one_fold_fit = OneFoldFit(self.massif_name, dataset,
                                       models_classes=self.model_classes, temporal_covariate_for_fit=None)
-            print(type(one_fold_fit.best_estimator.margin_model))
+            _ = one_fold_fit.best_estimator.margin_model
         self.assertTrue(True)
 
     def test_with_temporal_covariate_for_time(self):
@@ -44,9 +44,18 @@ class TestOneFoldFit(unittest.TestCase):
             one_fold_fit = OneFoldFit(self.massif_name, dataset,
                                       models_classes=self.model_classes,
                                       temporal_covariate_for_fit=TimeTemporalCovariate)
-            print(type(one_fold_fit.best_estimator.margin_model))
+            _ = one_fold_fit.best_estimator.margin_model
         self.assertTrue(True)
 
+    # def test_with_temporal_covariate_for_temperature(self):
+    #     for study_class in [AdamontSnowfall][:]:
+    #         dataset = self.load_dataset(study_class)
+    #         one_fold_fit = OneFoldFit(self.massif_name, dataset,
+    #                                   models_classes=self.model_classes,
+    #                                   temporal_covariate_for_fit=TemperatureTemporalCovariate)
+    #         print(type(one_fold_fit.best_estimator.margin_model))
+    #     self.assertTrue(True)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_projects/test_contrasting/test_altitudes_studies.py b/test/test_projects/test_contrasting/test_altitudes_studies.py
index a7a8c14e..a8a0fc0a 100644
--- a/test/test_projects/test_contrasting/test_altitudes_studies.py
+++ b/test/test_projects/test_contrasting/test_altitudes_studies.py
@@ -1,5 +1,7 @@
 import unittest
 
+from extreme_data.meteo_france_data.adamont_data.adamont.adamont_snowfall import AdamontSnowfall
+from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario
 from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day
 from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies
 from spatio_temporal_dataset.slicer.split import Split
@@ -77,5 +79,20 @@ class TestSpatioTemporalDataset(TestAltitudesStudies):
         self.assertEqual(len(dataset.maxima_gev(split=Split.test_spatiotemporal_spatial)), 3)
 
 
+class TestSpatioTemporalDatasetForClimateModels(unittest.TestCase):
+
+    def setUp(self) -> None:
+        super().setUp()
+        altitudes = [900, 1200]
+        study_class = AdamontSnowfall
+        self.studies = AltitudesStudies(study_class, altitudes,
+                                        year_min=2009, year_max=2012,
+                                        scenario=AdamontScenario.rcp85)
+        self.massif_name = "Vercors"
+
+    def test_dataset(self):
+        dataset = self.studies.spatio_temporal_dataset(self.massif_name)
+        self.assertEqual(len(dataset.coordinates.df_coordinate_climate_model.columns), 3)
+
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab