diff --git a/extreme_fit/distribution/gumbel/gnfit_fixed.R b/extreme_fit/distribution/gumbel/gnfit_fixed.R index 6822300aa8717bbb9ac3c54a01f605b84faf0bed..2a638eefa91b2f0fea867edc26f3fc19a46a5c8e 100644 --- a/extreme_fit/distribution/gumbel/gnfit_fixed.R +++ b/extreme_fit/distribution/gumbel/gnfit_fixed.R @@ -77,6 +77,7 @@ gnfit_fixed <- function (dat, dist, df = NULL, pr = NULL, threshold = NULL) pval <- exp(1.111 - 34.242 * w + 12.832 * w^2) } else { + pval <- 0 # I added that to avoid the code to crash warning("p-value is smaller than 7.37e-10") } z$Wpval <- pval @@ -96,6 +97,7 @@ gnfit_fixed <- function (dat, dist, df = NULL, pr = NULL, threshold = NULL) pval <- exp(1.2937 - 5.709 * A + 0.0186 * A^2) } else { + pval <- 0 # I added that to avoid the code to crash warning("p-value is smaller than 7.37e-10") } z$Apval <- pval diff --git a/extreme_fit/distribution/gumbel/gumbel_gof.py b/extreme_fit/distribution/gumbel/gumbel_gof.py index 35900799fe8a39fdf394990341d22be6ba138545..416651097539bcdef8609624d9352c63055d5f7a 100644 --- a/extreme_fit/distribution/gumbel/gumbel_gof.py +++ b/extreme_fit/distribution/gumbel/gumbel_gof.py @@ -11,5 +11,11 @@ def cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution( return res['Wpval'], res['Apval'] +def goodness_of_fit_anderson(quantiles, significance_level=0.05): + test = cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(quantiles) + _, ander_darling_test_pvalue = test + return ander_darling_test_pvalue > significance_level + + if __name__ == '__main__': cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(np.arange(50)) diff --git a/extreme_trend/abstract_gev_trend_test.py b/extreme_trend/abstract_gev_trend_test.py index c18c003cc076bd81bd3763801f3ad68d57e2ffbc..43a1e800e5dbfea7dca7bab8be8ed67594a6eb41 100644 --- a/extreme_trend/abstract_gev_trend_test.py +++ b/extreme_trend/abstract_gev_trend_test.py @@ -11,7 +11,7 @@ from extreme_data.eurocode_data.utils import EUROCODE_QUANTILE, YEAR_OF_INTEREST from extreme_data.meteo_france_data.scm_models_data.crocus.crocus_variables import AbstractSnowLoadVariable from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.distribution.gumbel.gumbel_gof import \ - cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution + cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution, goodness_of_fit_anderson from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \ StationaryTemporalModel, GumbelTemporalModel @@ -70,18 +70,8 @@ class AbstractGevTrendTest(object): @property def goodness_of_fit_anderson_test(self): - # significance_level_to_index = dict(zip([0.25, 0.1, 0.05, 0.025, 0.01], list(range(5)))) - # print(significance_level_to_index) - # assert self.SIGNIFICANCE_LEVEL in significance_level_to_index - # # significance_level=array([25. , 10. , 5. , 2.5, 1. ])) - # index_for_significance_level_5_percent = 2 quantiles = self.compute_empirical_quantiles(estimator=self.unconstrained_estimator) - # test_res = anderson(quantiles, dist='gumbel_r') # type: AndersonResult - # return test_res.statistic < test_res.critical_values[index_for_significance_level_5_percent] - # print(quantiles) - test = cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(quantiles) - _, ander_darling_test_pvalue = test - return ander_darling_test_pvalue > self.SIGNIFICANCE_LEVEL + return goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL) @property def name(self): diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py index 097490a56bc3d8f5e501d2b0d2e8dbcc2be2d383..5b013a5f0f73b1cfd35f5374dd73d8d2426a6aa0 100644 --- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py @@ -2,7 +2,8 @@ import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np -from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.plot_total_aic import plot_total_aic +from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.plot_total_aic import plot_total_aic, \ + plot_individual_aic mpl.rcParams['text.usetex'] = True mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}'] @@ -22,21 +23,6 @@ from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_s AltitudesStudiesVisualizerForNonStationaryModels -def plot_altitudinal_fit(studies, massif_names=None): - model_classes = ALTITUDINAL_GEV_MODELS + ALTITUDINAL_GUMBEL_MODELS - visualizer = AltitudesStudiesVisualizerForNonStationaryModels(studies=studies, - model_classes=model_classes, - massif_names=massif_names, - show=False) - # Plot the results for the model that minimizes the individual aic - OneFoldFit.best_estimator_minimizes_total_aic = False - visualizer.plot_moments() - # visualizer.plot_best_coef_maps() - visualizer.plot_shape_map() - - plot_total_aic(model_classes, visualizer) - - def plot_time_series(studies, massif_names=None): studies.plot_maxima_time_series(massif_names=massif_names) @@ -47,19 +33,33 @@ def plot_moments(studies, massif_names=None): studies.plot_mean_maxima_against_altitude(massif_names=massif_names, std=std, change=change) +def plot_altitudinal_fit(studies, massif_names=None): + model_classes = ALTITUDINAL_GEV_MODELS + ALTITUDINAL_GUMBEL_MODELS + visualizer = AltitudesStudiesVisualizerForNonStationaryModels(studies=studies, + model_classes=model_classes, + massif_names=massif_names, + show=False) + # Plot the results for the model that minimizes the individual aic + plot_individual_aic(visualizer) + # Plot the results for the model that minimizes the total aic + plot_total_aic(model_classes, visualizer) + + def main(): - # altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][4:7] + altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][4:7] # altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][:] # todo: l ecart pour les saisons de l automne ou de sprint # vient probablement de certains zéros pas pris en compte pour le fit, # mais pris en compte pour le calcul de mon aic - altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900][:] + # altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900][:] study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, SafranSnowfall5Days, SafranSnowfall7Days][:2] study_classes = [SafranPrecipitation1Day, SafranPrecipitation3Days, SafranPrecipitation5Days, SafranPrecipitation7Days][:] study_classes = [SafranSnowfall1Day, SafranPrecipitation1Day, - SafranSnowfall3Days, SafranPrecipitation3Days][:] - seasons = [Season.automn, Season.winter, Season.spring][::-1] + SafranSnowfall3Days, SafranPrecipitation3Days][:1] + # seasons = [Season.automn, Season.winter, Season.spring][::-1] + seasons = [Season.winter] + massif_names = None # massif_names = ['Aravis'] # massif_names = ['Chartreuse', 'Belledonne'] @@ -68,8 +68,8 @@ def main(): for study_class in study_classes: studies = AltitudesStudies(study_class, altitudes, season=season) print('inner loop', season, study_class) - plot_time_series(studies, massif_names) - plot_moments(studies, massif_names) + # plot_time_series(studies, massif_names) + # plot_moments(studies, massif_names) plot_altitudinal_fit(studies, massif_names) diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py index 387a5acff4a374851a90ed1417a624ab474a3b4d..859e661f4a907f922fd53a1856423232915304f4 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Dict import matplotlib.pyplot as plt import numpy as np @@ -25,17 +25,26 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): model_classes: List[AbstractSpatioTemporalPolynomialModel], show=False, massif_names=None, - fit_method=MarginFitMethod.extremes_fevd_mle): + fit_method=MarginFitMethod.extremes_fevd_mle, + display_only_model_that_pass_anderson_test=True): super().__init__(studies.study, show=show, save_to_file=not show) - self.massif_names = massif_names if massif_names is not None else self.study.study_massif_names self.studies = studies self.non_stationary_models = model_classes self.fit_method = fit_method - self.massif_name_to_one_fold_fit = {} + self.display_only_model_that_pass_anderson_test = display_only_model_that_pass_anderson_test + self.massif_names = massif_names if massif_names is not None else self.study.study_massif_names + self.massif_name_to_massif_id = {m: i for i, m in enumerate(self.massif_names)} + # Load one fold fit + self._massif_name_to_one_fold_fit = {} for massif_name in self.massif_names: dataset = studies.spatio_temporal_dataset(massif_name=massif_name) old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method) - self.massif_name_to_one_fold_fit[massif_name] = old_fold_fit + self._massif_name_to_one_fold_fit[massif_name] = old_fold_fit + + @property + def massif_name_to_one_fold_fit(self) -> Dict[str, OneFoldFit]: + return {massif_name: old_fold_fit for massif_name, old_fold_fit in self._massif_name_to_one_fold_fit.items() + if not self.display_only_model_that_pass_anderson_test or old_fold_fit.goodness_of_fit_anderson_test} def plot_moments(self): for method_name in ['moment', 'changes_in_the_moment', 'relative_changes_in_the_moment']: @@ -46,12 +55,12 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): ax = plt.gca() min_altitude, *_, max_altitude = self.studies.altitudes altitudes_plot = np.linspace(min_altitude, max_altitude, num=50) - for massif_id, massif_name in enumerate(self.massif_names): + for massif_name, one_fold_fit in self.massif_name_to_one_fold_fit.items(): massif_altitudes = self.studies.massif_name_to_altitudes[massif_name] ind = (min(massif_altitudes) <= altitudes_plot) & (altitudes_plot <= max(massif_altitudes)) massif_altitudes_plot = altitudes_plot[ind] - one_fold_fit = self.massif_name_to_one_fold_fit[massif_name] values = one_fold_fit.__getattribute__(method_name)(massif_altitudes_plot, order=order) + massif_id = self.massif_name_to_massif_id[massif_name] plot_against_altitude(massif_altitudes_plot, ax, massif_id, massif_name, values) # Plot settings ax.legend(prop={'size': 7}, ncol=3) @@ -128,3 +137,6 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): label='Shape plot for {} {}'.format(SCM_STUDY_CLASS_TO_ABBREVIATION[type(self.study)], self.study.variable_unit), add_x_label=False, graduation=0.1, massif_name_to_text=self.massif_name_to_name) + + def plot_altitude_change_of_sign(self): + pass diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py index d288900c7603cd60962b0b4cf6e67f2c6f938b18..1a64a7a3e48dcad892a8d9f00e527043f2222eaf 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py @@ -4,6 +4,7 @@ import rpy2 from cached_property import cached_property from scipy.stats import chi2 +from extreme_fit.distribution.gumbel.gumbel_gof import goodness_of_fit_anderson from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short from extreme_fit.function.param_function.polynomial_coef import PolynomialAllCoef, PolynomialCoef from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal @@ -11,13 +12,14 @@ from extreme_fit.model.margin_model.polynomial_margin_model.gumbel_altitudinal_m StationaryGumbelAltitudinal, AbstractGumbelAltitudinalModel from extreme_fit.model.margin_model.utils import MarginFitMethod from root_utils import classproperty +from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset class OneFoldFit(object): SIGNIFICANCE_LEVEL = 0.05 best_estimator_minimizes_total_aic = False - def __init__(self, massif_name, dataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle): + def __init__(self, massif_name: str, dataset: AbstractDataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle): self.massif_name = massif_name self.dataset = dataset self.models_classes = models_classes @@ -88,20 +90,7 @@ class OneFoldFit(object): @cached_property def sorted_estimators(self): estimators = list(self.model_class_to_estimator.values()) - # estimators_with_finite_aic = [] - # for estimator in estimators: - # # try: - # aic = estimator.aic() - # npt.assert_almost_equal(estimator.result_from_model_fit.aic, aic, decimal=5) - # print(self.massif_name, estimator.margin_model.name_str, aic) - # estimators_with_finite_aic.append(estimator) - # # except (AssertionError, rpy2.rinterface.RRuntimeError) as e: - # # raise e - # # print(self.massif_name, estimator.margin_model.name_str, 'infinite aic') - # # print(e) - # print('Summary {}: {}/{} fitted'.format(self.massif_name, len(estimators_with_finite_aic), len(estimators))) - sorted_estimators = sorted([estimator for estimator in estimators], - key=lambda e: e.aic()) + sorted_estimators = sorted([estimator for estimator in estimators], key=lambda e: e.aic()) return sorted_estimators @property @@ -168,3 +157,25 @@ class OneFoldFit(object): return False else: return self.likelihood_ratio > chi2.ppf(q=1 - self.SIGNIFICANCE_LEVEL, df=self.degree_freedom_chi2) + + @property + def goodness_of_fit_anderson_test(self): + quantiles = self.compute_empirical_quantiles(estimator=self.best_estimator) + goodness_of_fit_anderson_test = goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL) + if not goodness_of_fit_anderson_test: + print('{} with {} does not pass the anderson test'.format(self.massif_name, self.folder_for_plots)) + return goodness_of_fit_anderson_test + + def compute_empirical_quantiles(self, estimator): + empirical_quantiles = [] + df_maxima_gev = self.dataset.observations.df_maxima_gev + df_coordinates = self.dataset.coordinates.df_coordinates() + for coordinate, maximum in zip(df_coordinates.values.copy(), df_maxima_gev.values.copy()): + gev_param = estimator.function_from_fit.get_params( + coordinate=coordinate, + is_transformed=False) + assert len(maximum) == 1 + maximum_standardized = gev_param.gumbel_standardization(maximum[0]) + empirical_quantiles.append(maximum_standardized) + empirical_quantiles = sorted(empirical_quantiles) + return empirical_quantiles diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py index bd28d16ec76cb96ed838f5a622e793959d1bd35a..0c9bba51c4f652c914691bea6b454f4c9c30d37a 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/plot_total_aic.py @@ -10,6 +10,13 @@ from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fi from projects.exceeding_snow_loads.utils import dpi_paper1_figure +def plot_individual_aic(visualizer): + OneFoldFit.best_estimator_minimizes_total_aic = False + visualizer.plot_moments() + # visualizer.plot_best_coef_maps() + visualizer.plot_shape_map() + + def plot_total_aic(model_classes, visualizer): # Compute the mean AIC for each model_class OneFoldFit.best_estimator_minimizes_total_aic = True @@ -35,7 +42,7 @@ def plot_total_aic(model_classes, visualizer): best_model_class_for_total_aic = sorted_model_class[0] for one_fold_fit in visualizer.massif_name_to_one_fold_fit.values(): one_fold_fit.best_estimator_class_for_total_aic = best_model_class_for_total_aic - # Plot the ranking of the + # Plot the ranking of the model based on their total aic plot_total_aic_repartition(visualizer, sorted_labels, sorted_scores) # Plot the results for the model that minimizes the mean aic visualizer.plot_moments() @@ -43,7 +50,6 @@ def plot_total_aic(model_classes, visualizer): visualizer.plot_best_coef_maps() - def plot_total_aic_repartition(visualizer, labels, scores): """ Plot a single trend curves