diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py index 12e0169101c0ee91e94e4b1af3d5375c7d219ad5..93f7a7f4fae2dfc97e8ff7f7eb28975b001c0c0c 100644 --- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py @@ -75,6 +75,19 @@ class LinearMarginEstimator(AbstractMarginEstimator): assert not np.isinf(nllh) return nllh + def sorted_empirical_standard_gumbel_quantiles(self, split=Split.all): + sorted_empirical_quantiles = [] + maxima_values = self.dataset.maxima_gev(split=split) + coordinate_values = self.dataset.df_coordinates(split=split).values + for maximum, coordinate in zip(maxima_values, coordinate_values): + gev_param = self.function_from_fit.get_params( + coordinate=coordinate, + is_transformed=False) + maximum_standardized = gev_param.gumbel_standardization(maximum[0]) + sorted_empirical_quantiles.append(maximum_standardized) + sorted_empirical_quantiles = sorted(sorted_empirical_quantiles) + return sorted_empirical_quantiles + def deviance(self, split=Split.all): return 2 * self.nllh(split=split) diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py index a0573a7d2e26b3ce01eb5a2041c2a90c901194d0..9c47c027a83dfbf7d3a3458d81f7ba69717ca66c 100644 --- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py @@ -3,26 +3,19 @@ from typing import List import matplotlib as mpl -from projects.altitude_spatial_model.altitudes_fit.plot_histogram_altitude_studies import \ - plot_histogram_all_trends_against_altitudes, plot_histogram_all_models_against_altitudes +from projects.altitude_spatial_model.altitudes_fit.plots.plot_coherence_curves import plot_coherence_curves mpl.rcParams['text.usetex'] = True mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}'] from projects.altitude_spatial_model.altitudes_fit.utils_altitude_studies_visualizer import load_visualizer_list -from extreme_fit.model.margin_model.polynomial_margin_model.utils import \ - ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitude_group import altitudes_for_groups from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.plot_total_aic import plot_individual_aic from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day, SafranSnowfall3Days, \ - SafranSnowfall5Days, SafranSnowfall7Days, SafranPrecipitation1Day, SafranPrecipitation3Days, \ - SafranPrecipitation5Days, SafranPrecipitation7Days + SafranSnowfall5Days, SafranSnowfall7Days from extreme_data.meteo_france_data.scm_models_data.utils import Season -from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies -from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_studies_visualizer_for_non_stationary_models import \ - AltitudesStudiesVisualizerForNonStationaryModels def main(): @@ -59,13 +52,15 @@ def main_loop(altitudes_list, massif_names, seasons, study_classes): def plot_visualizers(massif_names, visualizer_list): - plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list) - plot_histogram_all_models_against_altitudes(massif_names, visualizer_list) + # plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list) + # plot_histogram_all_models_against_altitudes(massif_names, visualizer_list) + # plot_coherence_curves(massif_names, visualizer_list) + pass def plot_visualizer(massif_names, visualizer): # Plot time series - visualizer.studies.plot_maxima_time_series(massif_names=massif_names) + # visualizer.studies.plot_maxima_time_series(massif_names=massif_names) # Plot moments against altitude # for std in [True, False][:]: # for change in [True, False, None]: diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py index 74ad0ebbe4163ebc33a9821925f7b3e3f38a9e46..f5d7c97ab0bd230f215be7bc71d95fb8b72bfb25 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py @@ -1,4 +1,5 @@ from collections import Counter +from math import ceil, floor from typing import List, Dict import matplotlib @@ -49,11 +50,14 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): self.massif_name_to_massif_id = {m: i for i, m in enumerate(self.massif_names)} self.altitude_group = get_altitude_group_from_altitudes(self.studies.altitudes) # Load one fold fit + self.massif_name_to_massif_altitudes = {} self._massif_name_to_one_fold_fit = {} for massif_name in self.massif_names: # Load valid massif altitudes massif_altitudes = self.get_massif_altitudes(massif_name) if self.load_condition(massif_altitudes): + # Save the massif altitudes only for those who pass the condition + self.massif_name_to_massif_altitudes[massif_name] = massif_altitudes # Load dataset dataset = studies.spatio_temporal_dataset(massif_name=massif_name, massif_altitudes=massif_altitudes) old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method, @@ -79,8 +83,8 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): percentage_of_non_zeros = 100 * np.count_nonzero(annual_maxima) / len(annual_maxima) if percentage_of_non_zeros > 90: massif_altitudes.append(altitude) - else: - print(massif_name, altitude, percentage_of_non_zeros) + # else: + # print(massif_name, altitude, percentage_of_non_zeros) return massif_altitudes def load_condition(self, massif_altitudes): @@ -435,3 +439,34 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): def model_names(self): massif_name = list(self.massif_name_to_one_fold_fit.keys())[0] return self.massif_name_to_one_fold_fit[massif_name].model_names + + def plot_qqplots(self): + for massif_name, one_fold_fit in self.massif_name_to_one_fold_fit.items(): + ax = plt.gca() + standard_gumbel_quantiles = one_fold_fit.standard_gumbel_quantiles() + unconstrained_empirical_quantiles = one_fold_fit.best_estimator.sorted_empirical_standard_gumbel_quantiles() + all_quantiles = standard_gumbel_quantiles + unconstrained_empirical_quantiles + epsilon = 0.1 + ax_lim = [min(all_quantiles) - epsilon, max(all_quantiles) + epsilon] + + model_name = self.massif_name_to_best_name[massif_name] + altitudes = self.massif_name_to_massif_altitudes[massif_name] + massif_name_corrected = massif_name.replace('_', ' ') + label = '{} for altitudes {}'.format(massif_name_corrected, ' & '.join([str(a) + 'm' for a in altitudes])) + ax.plot(standard_gumbel_quantiles, unconstrained_empirical_quantiles, linestyle='None', + label=label + '\n(selected model is ${}$)'.format(model_name), marker='o') + + size_label = 20 + ax.set_xlabel("Theoretical quantile", fontsize=size_label) + ax.set_ylabel("Empirical quantile", fontsize=size_label) + ax.set_xlim(ax_lim) + ax.set_ylim(ax_lim) + + ax.plot(ax_lim, ax_lim, color='k') + ticks = [i for i in range(ceil(ax_lim[0]), floor(ax_lim[1]) + 1)] + ax.set_xticks(ticks) + ax.set_yticks(ticks) + ax.tick_params(labelsize=15) + plot_name = 'qqplot/{}'.format(massif_name_corrected) + self.studies.show_or_save_to_file(plot_name=plot_name, show=self.show) + plt.close() diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py index 59daa9b9963daed6939fae01f549eaa6a2dc2f0a..a84e1258f30e17a2fca302ff2e76e8ee3d598bd4 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py @@ -4,6 +4,7 @@ import rpy2 from cached_property import cached_property from scipy.stats import chi2 +from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.distribution.gumbel.gumbel_gof import goodness_of_fit_anderson from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import AbstractMarginEstimator, \ LinearMarginEstimator @@ -128,7 +129,9 @@ class OneFoldFit(object): def sorted_estimators_with_stationary(self): if self.only_models_that_pass_anderson_test: return [e for e in self.sorted_estimators - if self.goodness_of_fit_test(e) and self.sensitivity_of_fit_test(e)] + if self.goodness_of_fit_test(e) + and self.sensitivity_of_fit_test_top_maxima(e) + and self.sensitivity_of_fit_test_last_years(e)] else: return self._sorted_estimators_without_stationary @@ -256,7 +259,7 @@ class OneFoldFit(object): # self._folder_to_goodness[self.folder_for_plots] = goodness_of_fit_anderson_test # return goodness_of_fit_anderson_test - def sensitivity_of_fit_test(self, estimator: LinearMarginEstimator): + def sensitivity_of_fit_test_top_maxima(self, estimator: LinearMarginEstimator): # Build the dataset without the maxima for each altitude new_dataset = AbstractDataset.remove_top_maxima(self.dataset.observations, self.dataset.coordinates) @@ -266,10 +269,27 @@ class OneFoldFit(object): fit_method=self.fit_method, temporal_covariate_for_fit=self.temporal_covariate_for_fit) # Compare sign of change - print(self.massif_name, self.sign_of_change(estimator), self.sign_of_change(new_estimator)) has_not_opposite_sign = self.sign_of_change(estimator) * self.sign_of_change(new_estimator) >= 0 return has_not_opposite_sign + + def sensitivity_of_fit_test_last_years(self, estimator: LinearMarginEstimator): + # Build the dataset without the maxima for each altitude + new_dataset = AbstractDataset.remove_last_maxima(self.dataset.observations, + self.dataset.coordinates, + nb_years=10) + # Fit the new estimator + model_class = type(estimator.margin_model) + new_estimator = fitted_linear_margin_estimator_short(model_class=model_class, + dataset=new_dataset, + fit_method=self.fit_method, + temporal_covariate_for_fit=self.temporal_covariate_for_fit) + # Compare sign of change + has_not_opposite_sign = self.sign_of_change(estimator) * self.sign_of_change(new_estimator) >= 0 + # if not has_not_opposite_sign: + # print('Last years', self.massif_name, model_class, self.sign_of_change(estimator), self.sign_of_change(new_estimator)) + return has_not_opposite_sign + def sign_of_change(self, estimator): return_levels = [] for year in [2019 - 50, 2019]: @@ -281,25 +301,19 @@ class OneFoldFit(object): return 100 * (return_levels[1] - return_levels[0]) / return_levels[0] def goodness_of_fit_test(self, estimator): - quantiles = self.compute_empirical_quantiles(estimator=estimator) + quantiles = estimator.sorted_empirical_standard_gumbel_quantiles() goodness_of_fit_anderson_test = goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL) return goodness_of_fit_anderson_test - def compute_empirical_quantiles(self, estimator): - empirical_quantiles = [] - df_maxima_gev = self.dataset.observations.df_maxima_gev - df_coordinates = self.dataset.coordinates.df_coordinates() - for coordinate, maximum in zip(df_coordinates.values.copy(), df_maxima_gev.values.copy()): - gev_param = estimator.function_from_fit.get_params( - coordinate=coordinate, - is_transformed=False) - assert len(maximum) == 1 - maximum_standardized = gev_param.gumbel_standardization(maximum[0]) - empirical_quantiles.append(maximum_standardized) - empirical_quantiles = sorted(empirical_quantiles) - return empirical_quantiles + def standard_gumbel_quantiles(self): + standard_gumbel_distribution = GevParams(loc=0, scale=1, shape=0) + n = len(self.dataset.coordinates) + standard_gumbel_quantiles = [standard_gumbel_distribution.quantile(i / (n + 1)) for i in range(1, n + 1)] + return standard_gumbel_quantiles + # def best_confidence_interval(self): # EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(self.best_estimator, # ci_method=ConfidenceIntervalMethodFromExtremes.ci_mle, # temporal_covariate=np.array([2019, self.altitude_plot]),) + diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py index af5bdaefef9cb5f7365520e228bcef27fcbc37e6..92cc046bce1130292765090e655f8b1a61d251d3 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py @@ -12,9 +12,12 @@ from projects.exceeding_snow_loads.utils import dpi_paper1_figure def plots(visualizer): visualizer.plot_shape_map() + # visualizer.plot_moments() + visualizer.plot_qqplots() + + # for plot_mean in [True, False]: # visualizer.plot_year_for_the_peak(plot_mean=plot_mean) - visualizer.plot_moments() # visualizer.plot_best_coef_maps() # visualizer.plot_peak_year_against_altitude() # visualizer.plot_altitude_switch_against_peak_year() diff --git a/projects/altitude_spatial_model/altitudes_fit/plots/__init__.py b/projects/altitude_spatial_model/altitudes_fit/plots/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py b/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py new file mode 100644 index 0000000000000000000000000000000000000000..5ac1eb919340c87e3ff747bfd8c5469ad90a968b --- /dev/null +++ b/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py @@ -0,0 +1,47 @@ +from typing import List +import matplotlib.pyplot as plt + +from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_studies_visualizer_for_non_stationary_models import \ + AltitudesStudiesVisualizerForNonStationaryModels +from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit + + +def plot_coherence_curves(massif_names, visualizer_list: List[ + AltitudesStudiesVisualizerForNonStationaryModels]): + folder = 'Coherence' + visualizer = visualizer_list[0] + names = visualizer.get_valid_names(massif_names) + all_valid_names = set.union(*[v.get_valid_names(massif_names) for v in visualizer_list]) + for massif_name in all_valid_names: + plot_coherence_curve(massif_name, visualizer_list) + visualizer.plot_name = '{}/{}'.format(folder, massif_name.replace('_', '-')) + visualizer.show_or_save_to_file(add_classic_title=False, no_title=True) + plt.close() + + +def plot_coherence_curve(massif_name, visualizer_list: List[AltitudesStudiesVisualizerForNonStationaryModels]): + x, gev_params_list = load_all_list(massif_name, visualizer_list) + values = [(gev_params.location, gev_params.scale, gev_params.shape, gev_params.return_level(return_period=OneFoldFit.return_period)) + for gev_params in gev_params_list] + values = list(zip(*values)) + labels = ['loc', 'scale', 'shape', 'return level'] + _, axes = plt.subplots(2, 2) + for label, value, ax in zip(labels, values, axes.flatten()): + ax.plot(x, value) + ax.set_ylabel(label) + ax.set_xlabel('Altitude') + +def load_all_list(massif_name, visualizer_list): + all_altitudes_list = [] + all_gev_params_list = [] + for visualizer in visualizer_list: + if massif_name in visualizer.massif_name_to_one_fold_fit: + min_altitude, *_, max_altitude = visualizer.massif_name_to_massif_altitudes[massif_name] + altitudes_list = list(range(min_altitude, max_altitude, 10)) + gev_params_list = [] + for altitude in altitudes_list: + gev_params = visualizer.massif_name_to_one_fold_fit[massif_name].get_gev_params(altitude, 2019) + gev_params_list.append(gev_params) + all_gev_params_list.extend(gev_params_list) + all_altitudes_list.extend(altitudes_list) + return all_altitudes_list, all_gev_params_list diff --git a/projects/altitude_spatial_model/altitudes_fit/plot_histogram_altitude_studies.py b/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py similarity index 100% rename from projects/altitude_spatial_model/altitudes_fit/plot_histogram_altitude_studies.py rename to projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py index e4a213423de21eedf7821aa01a6c8433a1764163..8c319931bf1dad3da623a1365e32a80022eeac2b 100644 --- a/spatio_temporal_dataset/dataset/abstract_dataset.py +++ b/spatio_temporal_dataset/dataset/abstract_dataset.py @@ -17,8 +17,9 @@ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_tempor class AbstractDataset(object): - def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates,): - assert pd.Index.equals(observations.index, coordinates.index), '\n{}\n{}'.format(observations.index, coordinates.index) + def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates, ): + assert pd.Index.equals(observations.index, coordinates.index), '\n{}\n{}'.format(observations.index, + coordinates.index) self.observations = observations # type: AbstractSpatioTemporalObservations self.coordinates = coordinates # type: AbstractCoordinates @@ -41,7 +42,7 @@ class AbstractDataset(object): @classmethod def remove_top_maxima(cls, observations: AbstractSpatioTemporalObservations, - coordinates: AbstractSpatioTemporalCoordinates): + coordinates: AbstractSpatioTemporalCoordinates): """ We remove the top maxima w.r.t. each spatial coordinates""" assert isinstance(coordinates, AbstractSpatioTemporalCoordinates) idxs_top = [] @@ -52,6 +53,16 @@ class AbstractDataset(object): ind = ~coordinates.index.isin(idxs_top) return cls.create_new_dataset(coordinates, ind, observations) + @classmethod + def remove_last_maxima(cls, observations: AbstractSpatioTemporalObservations, + coordinates: AbstractSpatioTemporalCoordinates, + nb_years=1): + """ We remove the top maxima w.r.t. each spatial coordinates""" + assert isinstance(coordinates, AbstractSpatioTemporalCoordinates) + years = list(coordinates.temporal_coordinates.coordinates_values()[:, 0]) + last_years = sorted(years)[-nb_years:] + ind = ~coordinates.df_all_coordinates[coordinates.COORDINATE_T].isin(last_years) + return cls.create_new_dataset(coordinates, ind, observations) @property def df_dataset(self) -> pd.DataFrame: @@ -111,4 +122,3 @@ class AbstractDataset(object): def __str__(self) -> str: return 'coordinates:\n{}\nobservations:\n{}'.format(self.coordinates.__str__(), self.observations.__str__()) - diff --git a/test/test_spatio_temporal_dataset/test_dataset.py b/test/test_spatio_temporal_dataset/test_dataset.py index abcfd9145d3b39fc23f94c072ba6596bf1a2cd01..e480e29442d44611c9104f33399218c9af665178 100644 --- a/test/test_spatio_temporal_dataset/test_dataset.py +++ b/test/test_spatio_temporal_dataset/test_dataset.py @@ -3,7 +3,8 @@ from itertools import product import numpy as np -from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model import LinearNonStationaryLocationMarginModel +from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model import \ + LinearNonStationaryLocationMarginModel from extreme_fit.model.utils import set_seed_for_test, SafeRunException from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ @@ -36,10 +37,21 @@ class TestDataset(unittest.TestCase): def test_remove_top_maxima_from_dataset(self): coordinates, dataset_initial, observations = self.build_initial_dataset() dataset_without_top = AbstractDataset.remove_top_maxima(observations, - coordinates) + coordinates) self.assertEqual(4, len(dataset_initial.coordinates), 4) self.assertEqual(2, len(dataset_without_top.coordinates)) - self.assertEqual(set(dataset_without_top.coordinates.index.values), {0, 3}) + maxima = list(dataset_without_top.observations.df_maxima_gev.values[:, 0]) + self.assertEqual(set(maxima), {0}) + + def test_remove_last_maxima_from_dataset(self): + coordinates, dataset_initial, observations = self.build_initial_dataset() + dataset_without_last = AbstractDataset.remove_last_maxima(observations, + coordinates, + nb_years=1) + self.assertEqual(4, len(dataset_initial.coordinates), 4) + self.assertEqual(2, len(dataset_without_last.coordinates)) + maxima = list(dataset_without_last.observations.df_maxima_gev.values[:, 0]) + self.assertEqual(set(maxima), {0, 2}) def build_initial_dataset(self): temporal_coordinates = ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=2) @@ -56,7 +68,6 @@ class TestDataset(unittest.TestCase): dataset_initial = AbstractDataset(observations, coordinates) return coordinates, dataset_initial, observations - def test_max_stable_dataset_R1_and_R2(self): max_stable_models = load_test_max_stable_models()[:] coordinates = load_test_1D_and_2D_spatial_coordinates(self.nb_points) @@ -79,8 +90,9 @@ class TestDataset(unittest.TestCase): def test_max_stable_dataset_R3_with_R2_spatial_structure(self): """Test to create a dataset in R3, but where the spatial structure is only with respect to the 2 fisrt coordinates""" coordinates = \ - load_test_3D_spatial_coordinates(nb_points=self.nb_points, transformation_class=BetweenZeroAndOneNormalization)[ - 0] + load_test_3D_spatial_coordinates(nb_points=self.nb_points, + transformation_class=BetweenZeroAndOneNormalization)[ + 0] smith_process = load_test_max_stable_models()[0] MaxStableDataset.from_sampling(nb_obs=self.nb_obs, max_stable_model=smith_process,