From 3388213bf97a40dbe2fb5046646ce1f851945dd3 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Thu, 18 Feb 2021 10:19:35 +0100 Subject: [PATCH] [projections] add TestSpatioTemporalDatasetForClimateModels. add TemperatureTemporalCovariate. integrate DatasetForClimateModels into altitude_studies.py --- .../cmip5/climate_explorer_cimp5.py | 5 ++- .../adamont_data/utils/download_cmip5.py | 24 ------------- .../altitudes_fit/altitudes_studies.py | 34 ++++++++++++++----- .../coordinates/abstract_coordinates.py | 21 +++++++----- .../abstract_spatio_temporal_coordinates.py | 10 ++++-- ...temporal_coordinates_for_climate_models.py | 27 +++++++++++++++ .../abstract_temporal_covariate_for_fit.py | 27 +++++++++++++-- .../test_one_fold_fit.py | 15 ++++++-- .../test_altitudes_studies.py | 17 ++++++++++ 9 files changed, 129 insertions(+), 51 deletions(-) delete mode 100644 extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py create mode 100644 spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py diff --git a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py index 01d1a7c9..d029c7c7 100644 --- a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py +++ b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py @@ -49,7 +49,8 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol download_dat(dat_filepath, txt_filepath) # Transform nc file into csv file if not op.exists(csv_filepath): - dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name, rolling=rolling) + dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name, + rolling=rolling) # Load csv file df = pd.read_csv(csv_filepath, index_col=0) @@ -119,12 +120,14 @@ def main_test_cmip5_loader(): print(years) print(temps) + def test_rolling(): df = pd.DataFrame([1, 2, 3, 4, 5]) print(df) df2 = df.rolling(window=3).mean() print(df2) + if __name__ == '__main__': # main_example() # test_rolling() diff --git a/extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py b/extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py deleted file mode 100644 index 71b5c599..00000000 --- a/extreme_data/meteo_france_data/adamont_data/utils/download_cmip5.py +++ /dev/null @@ -1,24 +0,0 @@ -import cdsapi - -c = cdsapi.Client() - -models = ['mpi_esm_lr', 'cnrm_cm5'] - - -def get_year_min_and_max(model, experiment): - pass - - -def retrieve(model, experiment): - year_min, year_max = get_year_min_and_max(model, experiment) - c.retrieve( - 'projections-cmip5-monthly-single-levels', - { - 'experiment': 'historical', - 'variable': '2m_temperature', - 'model': 'cnrm_cm5', - 'ensemble_member': 'r1i1p1', - 'period': '195001-200512', - 'format': 'zip', - }, - 'download.zip') diff --git a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py index efe1e5cf..b0566196 100644 --- a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py @@ -4,6 +4,8 @@ from collections import OrderedDict from cached_property import cached_property +from extreme_data.meteo_france_data.adamont_data.abstract_adamont_study import AbstractAdamontStudy +from extreme_data.meteo_france_data.adamont_data.adamont_scenario import scenario_to_str from extreme_data.meteo_france_data.scm_models_data.abstract_study import AbstractStudy from extreme_data.meteo_france_data.scm_models_data.visualization.main_study_visualizer import \ SCM_STUDY_CLASS_TO_ABBREVIATION, STUDY_CLASS_TO_ABBREVIATION @@ -14,6 +16,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_co AbstractSpatialCoordinates from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ AbstractSpatioTemporalCoordinates +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.spatio_temporal_coordinates_for_climate_models import \ + SpatioTemporalCoordinatesForClimateModels from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ ConsecutiveTemporalCoordinates from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset @@ -78,12 +82,23 @@ class AltitudesStudies(object): assert len(massif_altitudes) > 0 spatial_coordinates = self.spatial_coordinates_for_altitudes(massif_altitudes) slicer_class = get_slicer_class_from_s_splits(s_split_spatial, s_split_temporal) - return AbstractSpatioTemporalCoordinates(slicer_class=slicer_class, - s_split_spatial=s_split_spatial, - s_split_temporal=s_split_temporal, - transformation_class=self.spatial_transformation_class, - spatial_coordinates=spatial_coordinates, - temporal_coordinates=self.temporal_coordinates) + if isinstance(self.study, AbstractAdamontStudy): + return SpatioTemporalCoordinatesForClimateModels(slicer_class=slicer_class, + s_split_spatial=s_split_spatial, + s_split_temporal=s_split_temporal, + transformation_class=self.spatial_transformation_class, + spatial_coordinates=spatial_coordinates, + temporal_coordinates=self.temporal_coordinates, + gcm_rcm_couple=self.study.gcm_rcm_couple, + scenario_str=scenario_to_str(self.study.scenario), + ) + else: + return AbstractSpatioTemporalCoordinates(slicer_class=slicer_class, + s_split_spatial=s_split_spatial, + s_split_temporal=s_split_temporal, + transformation_class=self.spatial_transformation_class, + spatial_coordinates=spatial_coordinates, + temporal_coordinates=self.temporal_coordinates) @cached_property def temporal_coordinates(self): @@ -115,7 +130,8 @@ class AltitudesStudies(object): def show_or_save_to_file(self, plot_name, show=False, no_title=False, tight_layout=None): study_visualizer = StudyVisualizer(study=self.study, show=show, save_to_file=not show) study_visualizer.plot_name = plot_name - study_visualizer.show_or_save_to_file(add_classic_title=False, dpi=500, no_title=no_title, tight_layout=tight_layout) + study_visualizer.show_or_save_to_file(add_classic_title=False, dpi=500, no_title=no_title, + tight_layout=tight_layout) def run_for_each_massif(self, function, massif_names, **kwargs): massif_names = massif_names if massif_names is not None else self.study.all_massif_names() @@ -165,7 +181,7 @@ class AltitudesStudies(object): moment += ' change (between two block of 30 years) for' moment += 'mean' if not std else 'std' # if change is False: - # moment += ' (for the 60 years of data)' + # moment += ' (for the 60 years of data)' plot_name = '{} of {} maxima of {}'.format(moment, self.study.season_name, SCM_STUDY_CLASS_TO_ABBREVIATION[self.study_class]) ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15) @@ -196,4 +212,4 @@ class AltitudesStudies(object): moment = function(annual_maxima) mean_moment.append(moment) altitudes.append(altitude) - plot_against_altitude(altitudes, ax, massif_id, massif_name, mean_moment) \ No newline at end of file + plot_against_altitude(altitudes, ax, massif_id, massif_name, mean_moment) diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 363063c3..29315b02 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -6,8 +6,6 @@ import numpy as np import pandas as pd from mpl_toolkits.mplot3d import Axes3D -from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ - AbstractTemporalCovariateForFit from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ AbstractTransformation, IdentityTransformation from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix @@ -35,8 +33,13 @@ class AbstractCoordinates(object): # Temporal columns COORDINATE_T = 'coord_t' TEMPORAL_SPLIT = 'temporal_split' + # Climate model columns + COORDINATE_RCP = 'coord_rcp' + COORDINATE_GCM = 'coord_gcm' + COORDINATE_RCM = 'coord_rcm' + COORDINATE_CLIMATE_MODEL_NAMES = [COORDINATE_RCP, COORDINATE_GCM, COORDINATE_RCM] # Coordinates columns - COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] + COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] + COORDINATE_CLIMATE_MODEL_NAMES # Coordinate type ALL_COORDINATES_ACCEPTED_TYPES = ['int64', 'float64'] COORDINATE_TYPE = 'float64' @@ -50,7 +53,10 @@ class AbstractCoordinates(object): sorted_coordinates_columns = [c for c in self.COORDINATES_NAMES if c in coordinate_columns] self.df_all_coordinates = df.loc[:, sorted_coordinates_columns].copy() # type: pd.DataFrame # Cast coordinates - self.df_all_coordinates = self.df_all_coordinates.astype(self.COORDINATE_TYPE) # type: pd.DataFrame + ind = self.df_all_coordinates.columns.isin(self.COORDINATE_CLIMATE_MODEL_NAMES) + self.df_coordinate_climate_model = self.df_all_coordinates.loc[:, ind].copy() + self.df_all_coordinates = self.df_all_coordinates.loc[:, ~ind] # type: pd.DataFrame + self.df_all_coordinates = self.df_all_coordinates.astype(self.COORDINATE_TYPE) # Slicing attributes self.s_split_spatial = s_split_spatial # type: pd.Series @@ -64,7 +70,7 @@ class AbstractCoordinates(object): # Transformation only works for float coordinates accepted_dtypes = [self.COORDINATE_TYPE] assert len(self.df_all_coordinates.select_dtypes(include=accepted_dtypes).columns) \ - == len(coordinate_columns), 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes) + == len(coordinate_columns) - sum(ind), 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes) # Transformation class is instantiated with all coordinates self.transformation = transformation_class(self.df_all_coordinates) # type: AbstractTransformation assert isinstance(self.transformation, AbstractTransformation) @@ -278,8 +284,7 @@ class AbstractCoordinates(object): df = temporal_transformation.transform_df(df_temporal_coordinates) # Potentially transform the time covariate into another covariate if temporal_covariate_for_fit is not None: - assert issubclass(temporal_covariate_for_fit, AbstractTemporalCovariateForFit) - df.iloc[:, 0] = df.iloc[:, 0].apply(temporal_covariate_for_fit.get_temporal_covariate) + df.loc[:, self.COORDINATE_T] = df.apply(temporal_covariate_for_fit.get_temporal_covariate, axis=1) return df @property @@ -382,4 +387,4 @@ class AbstractCoordinates(object): return self.df_merged.equals(other.df_merged) def __str__(self): - return self.df_coordinates().__str__() + return pd.concat([self.df_coordinates(), self.df_coordinate_climate_model], axis=1).__str__() diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py index 1d798554..2db7f998 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py @@ -19,9 +19,7 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): transformation_class: type = None, spatial_coordinates: AbstractSpatialCoordinates = None, temporal_coordinates: AbstractTemporalCoordinates = None): - if df is None: - assert spatial_coordinates is not None and temporal_coordinates is not None - df = self.get_df_from_spatial_and_temporal_coordinates(spatial_coordinates, temporal_coordinates) + df = self.load_df_is_needed(df, spatial_coordinates, temporal_coordinates) super().__init__(df, slicer_class, s_split_spatial, s_split_temporal, None) # Spatial coordinates' if spatial_coordinates is None: @@ -42,6 +40,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): self.transformation = MultipleTransformation(transformation_1=self.spatial_coordinates.transformation, transformation_2=self.temporal_coordinates.transformation) + def load_df_is_needed(self, df, spatial_coordinates, temporal_coordinates): + if df is None: + assert spatial_coordinates is not None and temporal_coordinates is not None + df = self.get_df_from_spatial_and_temporal_coordinates(spatial_coordinates, temporal_coordinates) + return df + @property def spatial_coordinates(self): return self._spatial_coordinates diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py new file mode 100644 index 00000000..1f1a238e --- /dev/null +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/spatio_temporal_coordinates_for_climate_models.py @@ -0,0 +1,27 @@ +import pandas as pd + +from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ + AbstractSpatialCoordinates +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ + AbstractSpatioTemporalCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \ + AbstractTemporalCoordinates +from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer + + +class SpatioTemporalCoordinatesForClimateModels(AbstractSpatioTemporalCoordinates): + + def __init__(self, df: pd.DataFrame = None, slicer_class: type = SpatioTemporalSlicer, + s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None, + transformation_class: type = None, spatial_coordinates: AbstractSpatialCoordinates = None, + temporal_coordinates: AbstractTemporalCoordinates = None, + gcm_rcm_couple=None, + scenario_str=None): + df = self.load_df_is_needed(df, spatial_coordinates, temporal_coordinates) + # Assign the climate model coordinates + gcm, rcm = gcm_rcm_couple + df[self.COORDINATE_RCP] = scenario_str + df[self.COORDINATE_GCM] = gcm + df[self.COORDINATE_RCM] = rcm + super().__init__(df, slicer_class, s_split_spatial, s_split_temporal, transformation_class, spatial_coordinates, + temporal_coordinates) diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py index d1b3a617..acd16f0e 100644 --- a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py +++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py @@ -1,14 +1,35 @@ +import pandas as pd + +from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 import year_to_global_mean_temp +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates + + class AbstractTemporalCovariateForFit(object): @classmethod - def get_temporal_covariate(cls, t): + def get_temporal_covariate(cls, row: pd.Series): raise NotImplementedError class TimeTemporalCovariate(AbstractTemporalCovariateForFit): @classmethod - def get_temporal_covariate(cls, t): - return t + def get_temporal_covariate(cls, row: pd.Series): + return row[AbstractCoordinates.COORDINATE_T] +class TemperatureTemporalCovariate(AbstractTemporalCovariateForFit): + gcm_and_scenario_to_d = {} + + @classmethod + def get_temporal_covariate(cls, row: pd.Series): + year = row[AbstractCoordinates.COORDINATE_T] + gcm = None + scenario = None + if (gcm, scenario) not in cls.gcm_and_scenario_to_d: + d = year_to_global_mean_temp(gcm, scenario) + cls.gcm_and_scenario_to_d[(gcm, scenario)] = d + d = cls.gcm_and_scenario_to_d[(gcm, scenario)] + global_mean_temp = d[year] + print(type(global_mean_temp)) + return global_mean_temp diff --git a/test/test_projects/test_altitude_spatial/test_one_fold_fit.py b/test/test_projects/test_altitude_spatial/test_one_fold_fit.py index fa4c9161..bb90656b 100644 --- a/test/test_projects/test_altitude_spatial/test_one_fold_fit.py +++ b/test/test_projects/test_altitude_spatial/test_one_fold_fit.py @@ -9,7 +9,7 @@ from extreme_fit.model.margin_model.polynomial_margin_model.models_based_on_pari from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ - TimeTemporalCovariate + TimeTemporalCovariate, TemperatureTemporalCovariate class TestOneFoldFit(unittest.TestCase): @@ -35,7 +35,7 @@ class TestOneFoldFit(unittest.TestCase): dataset = self.load_dataset(study_class) one_fold_fit = OneFoldFit(self.massif_name, dataset, models_classes=self.model_classes, temporal_covariate_for_fit=None) - print(type(one_fold_fit.best_estimator.margin_model)) + _ = one_fold_fit.best_estimator.margin_model self.assertTrue(True) def test_with_temporal_covariate_for_time(self): @@ -44,9 +44,18 @@ class TestOneFoldFit(unittest.TestCase): one_fold_fit = OneFoldFit(self.massif_name, dataset, models_classes=self.model_classes, temporal_covariate_for_fit=TimeTemporalCovariate) - print(type(one_fold_fit.best_estimator.margin_model)) + _ = one_fold_fit.best_estimator.margin_model self.assertTrue(True) + # def test_with_temporal_covariate_for_temperature(self): + # for study_class in [AdamontSnowfall][:]: + # dataset = self.load_dataset(study_class) + # one_fold_fit = OneFoldFit(self.massif_name, dataset, + # models_classes=self.model_classes, + # temporal_covariate_for_fit=TemperatureTemporalCovariate) + # print(type(one_fold_fit.best_estimator.margin_model)) + # self.assertTrue(True) + if __name__ == '__main__': unittest.main() diff --git a/test/test_projects/test_contrasting/test_altitudes_studies.py b/test/test_projects/test_contrasting/test_altitudes_studies.py index a7a8c14e..a8a0fc0a 100644 --- a/test/test_projects/test_contrasting/test_altitudes_studies.py +++ b/test/test_projects/test_contrasting/test_altitudes_studies.py @@ -1,5 +1,7 @@ import unittest +from extreme_data.meteo_france_data.adamont_data.adamont.adamont_snowfall import AdamontSnowfall +from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies from spatio_temporal_dataset.slicer.split import Split @@ -77,5 +79,20 @@ class TestSpatioTemporalDataset(TestAltitudesStudies): self.assertEqual(len(dataset.maxima_gev(split=Split.test_spatiotemporal_spatial)), 3) +class TestSpatioTemporalDatasetForClimateModels(unittest.TestCase): + + def setUp(self) -> None: + super().setUp() + altitudes = [900, 1200] + study_class = AdamontSnowfall + self.studies = AltitudesStudies(study_class, altitudes, + year_min=2009, year_max=2012, + scenario=AdamontScenario.rcp85) + self.massif_name = "Vercors" + + def test_dataset(self): + dataset = self.studies.spatio_temporal_dataset(self.massif_name) + self.assertEqual(len(dataset.coordinates.df_coordinate_climate_model.columns), 3) + if __name__ == '__main__': unittest.main() -- GitLab