diff --git a/extreme_data/meteo_france_data/mean_alps_temperature.py b/extreme_data/meteo_france_data/mean_alps_temperature.py new file mode 100644 index 0000000000000000000000000000000000000000..4256c05468905babced7ea90deb9f160513df699 --- /dev/null +++ b/extreme_data/meteo_france_data/mean_alps_temperature.py @@ -0,0 +1,26 @@ +import pandas as pd +import matplotlib.pyplot as plt + +from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranTemperature + + +def show_year_to_mean_alps_temperatures(): + ax = plt.gca() + for altitude in range(900, 3300, 300): + # for altitude in [900]: + year_to_mean_alps_temperature = load_year_to_mean_alps_temperatures(altitude) + ax.plot(year_to_mean_alps_temperature.keys(), year_to_mean_alps_temperature.values(), label=altitude) + ax.legend() + plt.show() + + +def load_year_to_mean_alps_temperatures(altitude=900): + study = SafranTemperature(altitude=altitude) + df = pd.DataFrame.from_dict(data=study.year_to_annual_mean).transpose().mean(axis=1) # type: pd.Series + df.index = df.index.astype(float) + year_to_mean_alps_temperature = df.to_dict() + return year_to_mean_alps_temperature + + +if __name__ == '__main__': + show_year_to_mean_alps_temperatures() diff --git a/extreme_data/nasa_data/global_mean_temperature.py b/extreme_data/nasa_data/global_mean_temperature.py index 8475b218138769ba5b799f2c23c7a3ce0989403d..2b98d317ef2f171139918244599fbe395126bf59 100644 --- a/extreme_data/nasa_data/global_mean_temperature.py +++ b/extreme_data/nasa_data/global_mean_temperature.py @@ -16,9 +16,9 @@ def load_year_to_mean_global_temperature(): df = pd.read_csv(edf_filepath) df = df.astype({'Year': 'float'}) d = dict(zip(df['Year'], df['Actual Temp'])) - # Cheat for the last years - print('Warning: using fake values for the last few years !!!') - for idx in range(1, 5): - d[2016 + idx] = d[2016] + # # Cheat for the last years + # print('Warning: using fake values for the last few years !!!') + # for idx in range(1, 5): + # d[2016 + idx] = d[2016] return d diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py index 8a73686eb5e93647c94b6e047762da2074d1e147..62661c2582c33782bee30bb57a8fce6c7bce30a9 100644 --- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py @@ -2,6 +2,7 @@ from abc import ABC import numpy.testing as npt import numpy as np +import pandas as pd from cached_property import cached_property from extreme_fit.estimator.abstract_estimator import AbstractEstimator @@ -29,26 +30,29 @@ class LinearMarginEstimator(AbstractMarginEstimator): self.margin_model = margin_model def _fit(self) -> AbstractResultFromModelFit: - return self.margin_model.fitmargin_from_maxima_gev(data=self.maxima_gev_train, - df_coordinates_spat=self.df_coordinates_spat, - df_coordinates_temp=self.df_coordinates_temp) + data = self.data(self.train_split) + df_coordinate_temp = self.df_coordinates_temp(self.train_split) + df_coordinate_spat = self.df_coordinates_spat(self.train_split) + return self.margin_model.fitmargin_from_maxima_gev(data=data, + df_coordinates_spat=df_coordinate_spat, + df_coordinates_temp=df_coordinate_temp) - @property - def df_coordinates_spat(self): - return self.dataset.coordinates.df_spatial_coordinates(split=self.train_split, + def data(self, split): + return self._maxima_gev(split) + + def _maxima_gev(self, split): + return self.dataset.maxima_gev_for_spatial_extremes_package(split) + + def df_coordinates_spat(self, split): + return self.dataset.coordinates.df_spatial_coordinates(split=split, drop_duplicates=self.margin_model.drop_duplicates) - @property - def df_coordinates_temp(self): - return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split, + def df_coordinates_temp(self, split): + return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=split, temporal_covariate_for_fit=self.margin_model.temporal_covariate_for_fit, starting_point=self.margin_model.starting_point, drop_duplicates=self.margin_model.drop_duplicates) - @property - def maxima_gev_train(self): - return self.dataset.maxima_gev_for_spatial_extremes_package(self.train_split) - @cached_property def function_from_fit(self) -> LinearMarginFunction: return load_margin_function(self, self.margin_model) @@ -56,7 +60,7 @@ class LinearMarginEstimator(AbstractMarginEstimator): def nllh(self, split=Split.all): nllh = 0 maxima_values = self.dataset.maxima_gev(split=split) - coordinate_values = self.dataset.coordinates_values(split=split) + coordinate_values = self.dataset.df_coordinates(split=split).values for maximum, coordinate in zip(maxima_values, coordinate_values): assert len( maximum) == 1, 'So far, only one observation for each coordinate, but code would be easy to change' diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index b69a6b4375a2c075c74ea24f58287220fd30bc46..c02d1bd98f847699b81861feb758214407bd73f0 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -35,13 +35,15 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: - if len(data) == 1: - data = data[0] - assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data), - len( - df_coordinates_temp.values)) - else: - data = data.flatten() + data = data.flatten() + # if len(data) == 1: + # data = data[0] + # assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data), + # len( + # df_coordinates_temp.values)) + # else: + # data = data.flatten() + x = ro.FloatVector(data) if self.params_class is GevParams: if self.fit_method == MarginFitMethod.is_mev_gev_fit: diff --git a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py index 5bd6c2aeeab9af9486f34fd46601d3f9ddfc5244..940d4fa3f1bfd064ca00af9c507a17f49373717f 100644 --- a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py @@ -43,7 +43,7 @@ class AltitudesStudies(object): # Dataset Loader def spatio_temporal_dataset(self, massif_name, s_split_spatial: pd.Series = None, - s_split_temporal: pd.Series = None): + s_split_temporal: pd.Series = None, top_n_values_to_remove=None): coordinate_values_to_maxima = {} massif_altitudes = self.massif_name_to_altitudes[massif_name] for altitude in massif_altitudes: diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py index f22c1703dd40299744944672a84e58e7957f9305..aee48ba6151de3e16ac75dd1a28db49608dc6060 100644 --- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py @@ -6,8 +6,7 @@ from extreme_fit.model.margin_model.polynomial_margin_model.utils import ALTITUD ALTITUDINAL_GEV_MODELS_LOCATION_QUADRATIC_MINIMUM, ALTITUDINAL_GEV_MODELS_LOCATION_ONLY_SCALE_ALTITUDES, ALTITUDINAL_GEV_MODELS_LOCATION from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.plot_total_aic import plot_total_aic, \ plot_individual_aic -from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ - MeanGlobalTemperatureCovariate +from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import MeanAlpsTemperatureCovariate mpl.rcParams['text.usetex'] = True mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}'] @@ -45,7 +44,10 @@ def plot_altitudinal_fit(studies, massif_names=None): model_classes=model_classes, massif_names=massif_names, show=False, - temporal_covariate_for_fit=None) + temporal_covariate_for_fit=None, + # temporal_covariate_for_fit=MeanAlpsTemperatureCovariate, + top_n_values_to_remove=None, + ) # Plot the results for the model that minimizes the individual aic plot_individual_aic(visualizer) # Plot the results for the model that minimizes the total aic diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py index 1bce4578aae56629bb8f06edba7d433603b81b78..b4aca0e71a9092491936a506d77b2f1ea0d9fbd2 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py @@ -29,7 +29,8 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): massif_names=None, fit_method=MarginFitMethod.extremes_fevd_mle, temporal_covariate_for_fit=None, - display_only_model_that_pass_anderson_test=True): + display_only_model_that_pass_anderson_test=True, + top_n_values_to_remove=None): super().__init__(studies.study, show=show, save_to_file=not show) self.studies = studies self.non_stationary_models = model_classes @@ -41,7 +42,8 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): # Load one fold fit self._massif_name_to_one_fold_fit = {} for massif_name in self.massif_names: - dataset = studies.spatio_temporal_dataset(massif_name=massif_name) + assert top_n_values_to_remove is None + dataset = studies.spatio_temporal_dataset(massif_name=massif_name, top_n_values_to_remove=top_n_values_to_remove) old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method, self.temporal_covariate_for_fit) self._massif_name_to_one_fold_fit[massif_name] = old_fold_fit diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 8d1539856dd06597bea6798f01daeec1c1996674..363063c3b9cd4a8b5c58a105ff9856e9d9569899 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -230,12 +230,6 @@ class AbstractCoordinates(object): # Temporal attributes - @property - def temporal_dims(self): - start = self.nb_spatial_coordinates - end = start + self.nb_temporal_coordinates - return list(range(start, end)) - @property def temporal_coordinates_names(self) -> List[str]: return [self.COORDINATE_T] if self.COORDINATE_T in self.df_all_coordinates else [] diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py index 02777561c9ee68aa9f521f60712f8f718c5f508e..d1b3a6179f1101744a5e59fefb27069f3a1826c4 100644 --- a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py +++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py @@ -1,9 +1,3 @@ -import pandas as pd - -from extreme_data.nasa_data.global_mean_temperature import load_year_to_mean_global_temperature -from root_utils import classproperty - - class AbstractTemporalCovariateForFit(object): @classmethod @@ -18,19 +12,3 @@ class TimeTemporalCovariate(AbstractTemporalCovariateForFit): return t -class MeanGlobalTemperatureCovariate(AbstractTemporalCovariateForFit): - - _d = None - - @classproperty - def year_to_global_mean(cls): - if cls._d is None: - cls._d = load_year_to_mean_global_temperature() - return cls._d - - @classmethod - def get_temporal_covariate(cls, t): - try: - return pd.Series(cls.year_to_global_mean[t]) - except KeyError: - raise KeyError('Global mean temperature is not known for Year t={}'.format(t)) diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py new file mode 100644 index 0000000000000000000000000000000000000000..ca39e6f554955ee53eff380f1cb91cd59143928d --- /dev/null +++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/temperature_covariate.py @@ -0,0 +1,41 @@ +from extreme_data.meteo_france_data.mean_alps_temperature import load_year_to_mean_alps_temperatures +from extreme_data.nasa_data.global_mean_temperature import load_year_to_mean_global_temperature +from root_utils import classproperty +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ + AbstractTemporalCovariateForFit +import pandas as pd + + +class AbstractTemperatureCovariate(AbstractTemporalCovariateForFit): + _d = None + + @classproperty + def year_to_global_mean(cls): + if cls._d is None: + cls._d = cls.load_year_to_temperature_covariate() + return cls._d + + @classmethod + def load_year_to_temperature_covariate(cls): + raise NotImplemented + + @classmethod + def get_temporal_covariate(cls, t): + try: + return pd.Series(cls.year_to_global_mean[t]) + except KeyError: + raise KeyError('Global mean temperature is not known for Year t={}'.format(t)) + + +class MeanGlobalTemperatureCovariate(AbstractTemperatureCovariate): + + @classmethod + def load_year_to_temperature_covariate(cls): + return load_year_to_mean_global_temperature() + + +class MeanAlpsTemperatureCovariate(AbstractTemperatureCovariate): + + @classmethod + def load_year_to_temperature_covariate(cls): + return load_year_to_mean_alps_temperatures() diff --git a/test/test_extreme_data/test_nasa_data/test_mean_global_temperature.py b/test/test_extreme_data/test_nasa_data/test_mean_global_temperature.py index c91196b4fdb0c09c1e6dba4335ba8f37db7bed5f..45abe11af269cc084b03e6b5a19dfbb850f156ac 100644 --- a/test/test_extreme_data/test_nasa_data/test_mean_global_temperature.py +++ b/test/test_extreme_data/test_nasa_data/test_mean_global_temperature.py @@ -1,5 +1,6 @@ import unittest +from extreme_data.meteo_france_data.mean_alps_temperature import load_year_to_mean_alps_temperatures from extreme_data.nasa_data.global_mean_temperature import load_year_to_mean_global_temperature @@ -14,6 +15,16 @@ class TestMeanGlobalTemperatures(unittest.TestCase): value = list(d.values())[0] self.assertIsInstance(value, float) + def test_year_to_mean_alps_temperatures(self): + d = load_year_to_mean_alps_temperatures() + self.assertIn(2019, d) + self.assertNotIn(2020, d) + self.assertIn(1959, d) + key = list(d.keys())[0] + self.assertIsInstance(key, float) + value = list(d.values())[0] + self.assertIsInstance(value, float) + if __name__ == '__main__': unittest.main() diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py index bd5a883d566eb8f94e3aced52e2e6419029e942f..ce5365378835b0ded31d011ac946ef03803c5e35 100644 --- a/test/test_spatio_temporal_dataset/test_coordinates.py +++ b/test/test_spatio_temporal_dataset/test_coordinates.py @@ -18,9 +18,11 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ CircleSpatialCoordinates from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ - AbstractTemporalCovariateForFit, TimeTemporalCovariate, MeanGlobalTemperatureCovariate + AbstractTemporalCovariateForFit, TimeTemporalCovariate from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ ConsecutiveTemporalCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import \ + MeanGlobalTemperatureCovariate from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ CenteredScaledNormalization from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.uniform_normalization import \