From 2078ac21e97e49fa1c1a906ea8b190a7d6f92b88 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 19 Jan 2021 18:53:56 +0100 Subject: [PATCH] [contrasting] add plot to compare the size of confidence interval between the pointwise approach and the piecewise elevational-temporal approach. bootstrap level is set to 100 --- .../scm_models_data/abstract_study.py | 2 +- .../scm_models_data/utils_function.py | 21 +++++-- .../abstract_extract_eurocode_return_level.py | 2 +- .../eurocode_return_level_uncertainties.py | 4 ++ .../altitudes_fit/main_altitudes_studies.py | 16 ++--- ...es_visualizer_for_non_stationary_models.py | 20 +++++++ .../plots/plot_histogram_altitude_studies.py | 60 ++++++++++++++++++- 7 files changed, 110 insertions(+), 15 deletions(-) diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py index feaef969..d3fe96f6 100644 --- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py @@ -879,7 +879,7 @@ class AbstractStudy(object): return d def massif_name_to_stationary_gev_params_and_confidence(self, quantile_level, - confidence_interval_based_on_delta_method): + confidence_interval_based_on_delta_method) -> Tuple[Dict]: t = (quantile_level, confidence_interval_based_on_delta_method) if t in self._cache_for_pointwise_fit: return self._cache_for_pointwise_fit[t] diff --git a/extreme_data/meteo_france_data/scm_models_data/utils_function.py b/extreme_data/meteo_france_data/scm_models_data/utils_function.py index 58b25540..a4d98068 100644 --- a/extreme_data/meteo_france_data/scm_models_data/utils_function.py +++ b/extreme_data/meteo_france_data/scm_models_data/utils_function.py @@ -1,8 +1,10 @@ from multiprocessing import Pool +from typing import Tuple, Dict import numpy as np from sklearn.utils import resample +from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.estimator.margin_estimator.utils import _fitted_stationary_gev from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel from extreme_fit.model.margin_model.utils import MarginFitMethod @@ -19,14 +21,15 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM model_class=StationaryTemporalModel, starting_year=None, quantile_level=0.98, - confidence_interval_based_on_delta_method=True): + confidence_interval_based_on_delta_method=True) -> Tuple[Dict[str, GevParams], Dict[str, EurocodeConfidenceIntervalFromExtremes]]: estimator, gev_param = _fitted_stationary_gev(fit_method, model_class, starting_year, x_gev) if quantile_level is not None: EurocodeConfidenceIntervalFromExtremes.quantile_level = quantile_level coordinate = estimator.dataset.coordinates.df_all_coordinates.iloc[0].values if confidence_interval_based_on_delta_method: - confidence_interval = EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(estimator, ConfidenceIntervalMethodFromExtremes.ci_mle, - coordinate) + confidence_interval = EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(estimator, + ConfidenceIntervalMethodFromExtremes.ci_mle, + coordinate) else: # Bootstrap method nb_bootstrap = AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP @@ -38,6 +41,14 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM return_level_list = p.map(_compute_return_level, arguments) else: return_level_list = [_compute_return_level(argument) for argument in arguments] + # Remove infinite return levels and return level + len_before_remove = len(return_level_list) + return_level_list = [r for r in return_level_list if not np.isinf(r)] + threshold = 2000 + return_level_list = [r for r in return_level_list if r < threshold] + len_after_remove = len(return_level_list) + if len_after_remove < len_before_remove: + print('Nb of fit removed (inf or > {}:'.format(threshold), len_before_remove - len_after_remove) confidence_interval = tuple([np.quantile(return_level_list, q) for q in AbstractExtractEurocodeReturnLevel.bottom_and_upper_quantile]) mean_estimate = gev_param.quantile(quantile_level) @@ -46,6 +57,8 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM confidence_interval = None return gev_param, confidence_interval + def _compute_return_level(t): fit_method, model_class, starting_year, x, quantile_level = t - return _fitted_stationary_gev(fit_method, model_class, starting_year, x)[1].quantile(quantile_level) \ No newline at end of file + gev_params = _fitted_stationary_gev(fit_method, model_class, starting_year, x)[1] + return gev_params.quantile(quantile_level) diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py index ac2c6d9d..db45b26a 100644 --- a/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py +++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py @@ -16,7 +16,7 @@ from root_utils import classproperty class AbstractExtractEurocodeReturnLevel(object): ALPHA_CONFIDENCE_INTERVAL_UNCERTAINTY = 0.05 - NB_BOOTSTRAP = 1000 + NB_BOOTSTRAP = 100 @classproperty def bottom_and_upper_quantile(cls): diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py index 1d45d575..34877299 100644 --- a/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py +++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py @@ -26,6 +26,10 @@ class EurocodeConfidenceIntervalFromExtremes(object): self.mean_estimate = mean_estimate self.confidence_interval = confidence_interval + @property + def interval_size(self): + return self.confidence_interval[1] - self.confidence_interval[0] + @property def triplet(self): return self.confidence_interval[0], self.mean_estimate, self.confidence_interval[1] diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py index f0b704df..ddb6cc3a 100644 --- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py @@ -8,7 +8,8 @@ from extreme_fit.model.utils import set_seed_for_test from projects.altitude_spatial_model.altitudes_fit.plots.plot_coherence_curves import plot_coherence_curves from projects.altitude_spatial_model.altitudes_fit.plots.plot_histogram_altitude_studies import \ plot_histogram_all_models_against_altitudes, plot_histogram_all_trends_against_altitudes, \ - plot_shoe_plot_changes_against_altitude, plot_shoe_plot_changes_against_altitude_for_maxima_and_total + plot_shoe_plot_changes_against_altitude, plot_shoe_plot_changes_against_altitude_for_maxima_and_total, \ + plot_shoe_plot_ratio_interval_size_against_altitude mpl.rcParams['text.usetex'] = True mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}'] @@ -29,13 +30,13 @@ def main(): set_seed_for_test() - fast = None + fast = False if fast is None: massif_names = ['Vanoise', 'Haute-Maurienne', 'Vercors'][:1] altitudes_list = altitudes_for_groups[:] elif fast: - massif_names = ['Vanoise', 'Haute-Maurienne', 'Vercors'][:1] - altitudes_list = altitudes_for_groups[3:] + massif_names = ['Vanoise', 'Haute-Maurienne', 'Vercors'][:] + altitudes_list = altitudes_for_groups[:2] else: massif_names = None altitudes_list = altitudes_for_groups[:] @@ -60,11 +61,12 @@ def main_loop(altitudes_list, massif_names, seasons, study_classes): def plot_visualizers(massif_names, visualizer_list): # plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list) + plot_shoe_plot_ratio_interval_size_against_altitude(massif_names, visualizer_list) # for relative in [True, False]: - # plot_shoe_plot_changes_against_altitude(massif_names, visualizer_list, relative=relative) + # plot_shoe_plot_changes_against_altitude(massif_names, visualizer_list, relative=relative) # plot_shoe_plot_changes_against_altitude_for_maxima_and_total(massif_names, visualizer_list, relative=relative) - # plot_coherence_curves(massif_names, visualizer_list) - plot_coherence_curves(['Vanoise'], visualizer_list) + plot_coherence_curves(massif_names, visualizer_list) + # plot_coherence_curves(['Vanoise'], visualizer_list) def plot_visualizer(massif_names, visualizer): diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py index f41da81e..d5ba71cc 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py @@ -140,6 +140,26 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): self._method_name_and_order_to_massif_name_to_value[c] = massif_name_to_value return self._method_name_and_order_to_massif_name_to_value[c] + def ratio_groups(self): + return [self.ratio_uncertainty_interval_size(altitude, 2019) for altitude in self.studies.altitudes] + + def ratio_uncertainty_interval_size(self, altitude, year): + study = self.studies.altitude_to_study[altitude] + massif_name_to_interval = study.massif_name_to_stationary_gev_params_and_confidence(OneFoldFit.quantile_level, + self.confidence_interval_based_on_delta_method)[ + 1] + massif_names_with_pointwise_interval = set(massif_name_to_interval) + valid_massif_names = set(self.massif_name_to_one_fold_fit.keys()) + intersection_massif_names = valid_massif_names.intersection(massif_names_with_pointwise_interval) + ratios = [] + for massif_name in intersection_massif_names: + one_fold_fit = self.massif_name_to_one_fold_fit[massif_name] + new_interval_size = one_fold_fit.best_confidence_interval(altitude, year).interval_size + old_interval_size = massif_name_to_interval[massif_name].interval_size + ratio = new_interval_size / old_interval_size + ratios.append(ratio) + return ratios + def plot_map_moment(self, method_name, order): massif_name_to_value = self.method_name_and_order_to_d(method_name, order) # Plot settings diff --git a/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py b/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py index 30a3635e..77d85d41 100644 --- a/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py @@ -6,6 +6,8 @@ import numpy as np import matplotlib.pyplot as plt from matplotlib.lines import Line2D +from extreme_fit.model.result_from_model_fit.result_from_extremes.abstract_extract_eurocode_return_level import \ + AbstractExtractEurocodeReturnLevel from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_studies_visualizer_for_non_stationary_models import \ AltitudesStudiesVisualizerForNonStationaryModels from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit @@ -120,6 +122,57 @@ def plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list: L plt.close() +def plot_shoe_plot_ratio_interval_size_against_altitude(massif_names, visualizer_list: List[ + AltitudesStudiesVisualizerForNonStationaryModels]): + visualizer = visualizer_list[0] + + ratio_groups = [] + for v in visualizer_list: + ratio_groups.extend(v.ratio_groups()) + print(len(ratio_groups)) + print(ratio_groups) + + + nb_massifs = [len(l) for l in ratio_groups] + + plt.close() + ax = plt.gca() + width = 5 + size = 8 + legend_fontsize = 10 + labelsize = 10 + linewidth = 3 + + x = np.array([2 * width * (i + 1) for i in range(len(ratio_groups))]) + ax.boxplot(ratio_groups, positions=x, widths=width, patch_artist=True, showmeans=True) + + ax.legend(prop={'size': 8}) + + ylabel = "Ratio for the size of {}\% confidence intervals, i.e. size for the\n" \ + " elevational-temporal model divided by the size for the pointwise model".format(AbstractExtractEurocodeReturnLevel.percentage_confidence_interval) + ax.set_ylabel(ylabel, + fontsize=legend_fontsize) + ax.set_xlabel('Elevation (m)', fontsize=legend_fontsize + 5) + ax.tick_params(axis='both', which='major', labelsize=labelsize) + ax.set_xticks(x) + ax.yaxis.grid() + + altitudes = [] + for v in visualizer_list: + altitudes.extend(v.studies.altitudes) + ax.set_xticklabels([str(a) for a in altitudes]) + + shift = 2 * width + ax.set_xlim((min(x) - shift, max(x) + shift)) + + # I could display the number of massif used to build each box plot. + plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x, add_for_percentage=False) + + visualizer.plot_name = 'All ' + "uncertainty size comparison for bootstrap size {}".format(AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP) + visualizer.show_or_save_to_file(add_classic_title=False, no_title=True) + + plt.close() + def plot_shoe_plot_changes_against_altitude(massif_names, visualizer_list: List[ AltitudesStudiesVisualizerForNonStationaryModels], @@ -250,7 +303,7 @@ def plot_shoe_plot_changes_against_altitude_for_maxima_and_total(massif_names, v plt.close() -def plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x): +def plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x, add_for_percentage=True): # Plot number of massifs on the upper axis ax_twiny = ax.twiny() ax_twiny.plot(x, [0 for _ in x], linewidth=0) @@ -258,4 +311,7 @@ def plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x): ax_twiny.tick_params(labelsize=labelsize) ax_twiny.set_xticklabels(nb_massifs) ax_twiny.set_xlim(ax.get_xlim()) - ax_twiny.set_xlabel('Total number of massifs at each range (for the percentage)', fontsize=legend_fontsize) + xlabel = 'Total number of massifs at each range' + if add_for_percentage: + xlabel += ' (for the percentage)' + ax_twiny.set_xlabel(xlabel, fontsize=legend_fontsize) -- GitLab