diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py index cdc61c4c3e2d9966e94b83f15de8a9ccc05ea50f..1f89b79e0e1b9e2b0bd9a5e487c751a4153ce0bd 100644 --- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py @@ -27,6 +27,8 @@ from extreme_data.meteo_france_data.scm_models_data.utils import ALTITUDES, ZS_I ORDERED_ALLSLOPES_SLOPES, ORDERED_ALLSLOPES_MASSIFNUM, date_to_str, WP_PATTERN_MAX_YEAR, Season, \ first_day_and_last_day, FrenchRegion, ZS_INT_MASK_PYRENNES, alps_massif_order, ZS_INT_MASK_PYRENNES_LIST, \ season_to_str +from extreme_data.meteo_france_data.scm_models_data.utils_function import \ + fitted_stationary_gev_with_uncertainty_interval from extreme_data.meteo_france_data.scm_models_data.visualization.utils import get_km_formatter from extreme_fit.estimator.margin_estimator.utils import fitted_stationary_gev from extreme_fit.function.margin_function.abstract_margin_function import \ @@ -34,6 +36,8 @@ from extreme_fit.function.margin_function.abstract_margin_function import \ from extreme_data.meteo_france_data.scm_models_data.visualization.create_shifted_cmap import create_colorbase_axis, \ get_shifted_map, get_colors from extreme_fit.model.margin_model.utils import MarginFitMethod +from extreme_fit.model.result_from_model_fit.result_from_extremes.eurocode_return_level_uncertainties import \ + EurocodeConfidenceIntervalFromExtremes from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ AbstractSpatialCoordinates @@ -803,13 +807,28 @@ class AbstractStudy(object): return ~np.array(mask_french_alps, dtype=bool) @cached_property - def massif_name_to_stationary_gev_params_for_non_zero_annual_maxima(self): + def massif_name_to_stationary_gev_params(self): + d, _ = self._massif_name_to_stationary_gev_params_and_confidence(quantile_level=None) + return d + + @cached_property + def massif_name_to_stationary_gev_params_and_confidence_for_return_level_100(self): + return self._massif_name_to_stationary_gev_params_and_confidence(quantile_level=0.99) + + def _massif_name_to_stationary_gev_params_and_confidence(self, quantile_level=None): + """ at least 90% of values must be above zero""" massif_name_to_stationary_gev_params = {} + massif_name_to_confidence = {} for massif_name, annual_maxima in self.massif_name_to_annual_maxima.items(): - non_zero_annual_maxima = annual_maxima[annual_maxima > 0] - gev_params = fitted_stationary_gev(non_zero_annual_maxima, fit_method=self.fit_method) - massif_name_to_stationary_gev_params[massif_name] = gev_params - return massif_name_to_stationary_gev_params + percentage_of_non_zeros = 100 * np.count_nonzero(annual_maxima) / len(annual_maxima) + if percentage_of_non_zeros > 90: + gev_params, confidence = fitted_stationary_gev_with_uncertainty_interval(annual_maxima, + fit_method=MarginFitMethod.extremes_fevd_mle, + quantile_level=quantile_level) + if -0.5 <= gev_params.shape <= 0.5: + massif_name_to_stationary_gev_params[massif_name] = gev_params + massif_name_to_confidence[massif_name] = confidence + return massif_name_to_stationary_gev_params, massif_name_to_confidence def massif_name_to_gev_param_list(self, year_min_and_max_list): year_to_index = {y: i for i, y in enumerate(self.ordered_years)} diff --git a/extreme_data/meteo_france_data/scm_models_data/utils_function.py b/extreme_data/meteo_france_data/scm_models_data/utils_function.py new file mode 100644 index 0000000000000000000000000000000000000000..ed545b2f6c1fc7351e05f62ee78b787d7046ae0f --- /dev/null +++ b/extreme_data/meteo_france_data/scm_models_data/utils_function.py @@ -0,0 +1,22 @@ +from extreme_fit.estimator.margin_estimator.utils import _fitted_stationary_gev +from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel +from extreme_fit.model.margin_model.utils import MarginFitMethod +from extreme_fit.model.result_from_model_fit.result_from_extremes.confidence_interval_method import \ + ConfidenceIntervalMethodFromExtremes +from extreme_fit.model.result_from_model_fit.result_from_extremes.eurocode_return_level_uncertainties import \ + EurocodeConfidenceIntervalFromExtremes + + +def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitMethod.is_mev_gev_fit, + model_class=StationaryTemporalModel, + starting_year=None, + quantile_level=0.98): + estimator, gev_param = _fitted_stationary_gev(fit_method, model_class, starting_year, x_gev) + if quantile_level is not None: + EurocodeConfidenceIntervalFromExtremes.quantile_level = quantile_level + coordinate = estimator.dataset.coordinates.df_all_coordinates.iloc[0].values + confidence_interval = EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(estimator, ConfidenceIntervalMethodFromExtremes.ci_mle, + coordinate) + else: + confidence_interval = None + return gev_param, confidence_interval diff --git a/extreme_fit/estimator/margin_estimator/utils.py b/extreme_fit/estimator/margin_estimator/utils.py index 7d5962e70319df8c886ffa2540ade8104ebccaed..4742aa0cd9c471d26c60f8f0f3fd33024c9fa2ed 100644 --- a/extreme_fit/estimator/margin_estimator/utils.py +++ b/extreme_fit/estimator/margin_estimator/utils.py @@ -28,6 +28,11 @@ def fitted_linear_margin_estimator(model_class, coordinates, dataset, starting_y def fitted_stationary_gev(x_gev, fit_method=MarginFitMethod.is_mev_gev_fit, model_class=StationaryTemporalModel, starting_year=None, transformation_class=CenteredScaledNormalization) -> GevParams: + _, gev_param = _fitted_stationary_gev(fit_method, model_class, starting_year, x_gev) + return gev_param + + +def _fitted_stationary_gev(fit_method, model_class, starting_year, x_gev): coordinates = ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=len(x_gev), transformation_class=CenteredScaledNormalization) df = pd.DataFrame([pd.Series(dict(zip(coordinates.index, x_gev)))]).transpose() @@ -36,18 +41,17 @@ def fitted_stationary_gev(x_gev, fit_method=MarginFitMethod.is_mev_gev_fit, mode estimator = fitted_linear_margin_estimator(model_class, coordinates, dataset, starting_year, fit_method) first_coordinate = coordinates.coordinates_values()[0] gev_param = estimator.function_from_fit.get_params(first_coordinate) - # Build confidence interval fit_method_with_confidence_interval_implemented = [MarginFitMethod.is_mev_gev_fit] if fit_method in fit_method_with_confidence_interval_implemented: param_name_to_confidence_interval = {} for i, (param_name, param_value) in enumerate(gev_param.to_dict().items()): confidence_interval_half_size = estimator.result_from_model_fit.confidence_interval_half_sizes[i] - confidence_interval = (param_value - confidence_interval_half_size, param_value + confidence_interval_half_size) + confidence_interval = ( + param_value - confidence_interval_half_size, param_value + confidence_interval_half_size) param_name_to_confidence_interval[param_name] = confidence_interval gev_param.param_name_to_confidence_interval = param_name_to_confidence_interval - # Warning if not -0.5 < gev_param.shape < 0.5: warnings.warn('fitted shape parameter is outside physical bounds {}'.format(gev_param.shape)) - return gev_param + return estimator, gev_param diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py index 545e9784238862baf7548443d1bdfaa9c5508c31..1d45d5759afada6916d73188d85f21aa72400ae9 100644 --- a/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py +++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py @@ -33,8 +33,8 @@ class EurocodeConfidenceIntervalFromExtremes(object): @classmethod def from_estimator_extremes(cls, estimator_extremes: LinearMarginEstimator, ci_method: ConfidenceIntervalMethodFromExtremes, - coordinate: Union[int, np.ndarray], - ): + coordinate: Union[int, np.ndarray]): + if ci_method == ConfidenceIntervalMethodFromExtremes.my_bayes: extractor = ExtractEurocodeReturnLevelFromMyBayesianExtremes(estimator_extremes, ci_method, coordinate, cls.quantile_level) else: diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py index 461ca605cd7470400b5ac54d9ecaffd7194c63d4..cdc112819713da0768c95b215757382b789160de 100644 --- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py @@ -54,9 +54,9 @@ def main_loop(altitudes_list, massif_names, seasons, study_classes): def plot_visualizers(massif_names, visualizer_list): - plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list) - plot_histogram_all_models_against_altitudes(massif_names, visualizer_list) - # plot_coherence_curves(massif_names, visualizer_list) + # plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list) + # plot_histogram_all_models_against_altitudes(massif_names, visualizer_list) + plot_coherence_curves(massif_names, visualizer_list) pass @@ -68,7 +68,7 @@ def plot_visualizer(massif_names, visualizer): # for change in [True, False, None]: # studies.plot_mean_maxima_against_altitude(massif_names=massif_names, std=std, change=change) # Plot the results for the model that minimizes the individual aic - plot_individual_aic(visualizer) + # plot_individual_aic(visualizer) # Plot the results for the model that minimizes the total aic # plot_total_aic(model_classes, visualizer) pass diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py index c73cc0890444be47a6b1168ade69d7cf09579f70..c9b590607ca81282674966fe8005be5a93a2cd8d 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py @@ -32,6 +32,7 @@ class OneFoldFit(object): SIGNIFICANCE_LEVEL = 0.05 best_estimator_minimizes_total_aic = False return_period = 100 + quantile_level = 1 - (1 / return_period) def __init__(self, massif_name: str, dataset: AbstractDataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle, temporal_covariate_for_fit=None, @@ -311,9 +312,8 @@ class OneFoldFit(object): standard_gumbel_quantiles = [standard_gumbel_distribution.quantile(i / (n + 1)) for i in range(1, n + 1)] return standard_gumbel_quantiles - @property - def best_confidence_interval(self) -> EurocodeConfidenceIntervalFromExtremes: - return EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(self.best_estimator, - ci_method=ConfidenceIntervalMethodFromExtremes.ci_mle, - coordinate=np.array( - [2019, self.altitude_plot]), ) + def best_confidence_interval(self, altitude) -> EurocodeConfidenceIntervalFromExtremes: + EurocodeConfidenceIntervalFromExtremes.quantile_level = 1 - (1 / self.return_period) + return EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(estimator_extremes=self.best_estimator, + ci_method=ConfidenceIntervalMethodFromExtremes.ci_mle, + coordinate=np.array([altitude, 2019])) diff --git a/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py b/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py index 3042b18905acb2e06f03bb1e4b70800fa7b219f4..5298b24043b2b87fce570dfc24eacc7cdb479dd7 100644 --- a/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py +++ b/projects/altitude_spatial_model/altitudes_fit/plots/plot_coherence_curves.py @@ -1,5 +1,6 @@ from typing import List import matplotlib.pyplot as plt +import numpy as np from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_studies_visualizer_for_non_stationary_models import \ AltitudesStudiesVisualizerForNonStationaryModels @@ -13,50 +14,91 @@ def plot_coherence_curves(massif_names, visualizer_list: List[ names = visualizer.get_valid_names(massif_names) all_valid_names = set.union(*[v.get_valid_names(massif_names) for v in visualizer_list]) for massif_name in all_valid_names: - plot_coherence_curve(massif_name, visualizer_list) + _, axes = plt.subplots(2, 2) + axes = axes.flatten() + colors = ['blue', 'green'] + labels = ['Altitudinal model', 'Pointwise model'] + altitudinal_model = [True, False] + for color, global_label, boolean in list(zip(colors, labels, altitudinal_model))[:]: + plot_coherence_curve(axes, massif_name, visualizer_list, boolean, color, global_label) visualizer.plot_name = '{}/{}'.format(folder, massif_name.replace('_', '-')) visualizer.show_or_save_to_file(add_classic_title=False, no_title=True) plt.close() -def plot_coherence_curve(massif_name, visualizer_list: List[AltitudesStudiesVisualizerForNonStationaryModels]): - x_all_list, values_all_list, labels = load_all_list(massif_name, visualizer_list) - _, axes = plt.subplots(2, 2) - axes = axes.flatten() +def plot_coherence_curve(axes, massif_name, visualizer_list: List[AltitudesStudiesVisualizerForNonStationaryModels], + is_altitudinal, color, global_label): + x_all_list, values_all_list, labels, all_bound_list = load_all_list(massif_name, visualizer_list, is_altitudinal) for i, label in enumerate(labels): ax = axes[i] # Plot with complete line - for x_list, value_list in zip(x_all_list, values_all_list): - value_list = value_list[i] - ax.plot(x_list, value_list, linestyle='solid') + for j, (x_list, value_list) in enumerate(list(zip(x_all_list, values_all_list))): + value_list_i = value_list[i] + label_plot = global_label if j == 0 else None + if is_altitudinal: + ax.plot(x_list, value_list_i, linestyle='solid', color=color, label=label_plot) + else: + ax.plot(x_list, value_list_i, linestyle='None', color=color, label=label_plot, marker='x') + ax.plot(x_list, value_list_i, linestyle='dotted', color=color) + # Plot with dotted line for x_list_before, value_list_before, x_list_after, value_list_after in zip(x_all_list, values_all_list, x_all_list[1:], values_all_list[1:]): x_list = [x_list_before[-1], x_list_after[0]] - value_list = [value_list_before[i][-1], value_list_after[i][0]] - ax.plot(x_list, value_list, linestyle='dotted') + value_list_dotted = [value_list_before[i][-1], value_list_after[i][0]] + ax.plot(x_list, value_list_dotted, linestyle='dotted', color=color) + + # Plot confidence interval + if i == 3: + for x_list, bounds in zip(x_all_list, all_bound_list): + if len(bounds) > 0: + lower_bound, upper_bound = bounds + ax.fill_between(x_list, lower_bound, upper_bound, color=color, alpha=0.2) ax.set_ylabel(label) - ax.set_xlabel('Altitude') + ax.legend(prop={'size': 10}) + if i >= 2: + ax.set_xlabel('Altitude') -def load_all_list(massif_name, visualizer_list): + +def load_all_list(massif_name, visualizer_list, altitudinal_model=True): all_altitudes_list = [] all_values_list = [] + all_bound_list = [] for visualizer in visualizer_list: + if massif_name in visualizer.massif_name_to_one_fold_fit: - min_altitude, *_, max_altitude = visualizer.massif_name_to_massif_altitudes[massif_name] - altitudes_list = list(range(min_altitude, max_altitude, 10)) - gev_params_list = [] - for altitude in altitudes_list: - gev_params = visualizer.massif_name_to_one_fold_fit[massif_name].get_gev_params(altitude, 2019) - gev_params_list.append(gev_params) + if altitudinal_model: + min_altitude, *_, max_altitude = visualizer.massif_name_to_massif_altitudes[massif_name] + one_fold_fit = visualizer.massif_name_to_one_fold_fit[massif_name] + altitudes_list = list(range(min_altitude, max_altitude, 10)) + gev_params_list = [one_fold_fit.get_gev_params(altitude, 2019) for altitude in altitudes_list] + confidence_interval_values = [one_fold_fit.best_confidence_interval(altitude) for altitude in altitudes_list] + else: + assert OneFoldFit.return_period == 100, 'change the call below' + altitudes_list, study_list_valid = zip(*[(a, s) for a, s in visualizer.studies.altitude_to_study.items() + if massif_name in s.massif_name_to_stationary_gev_params_and_confidence_for_return_level_100[0]]) + gev_params_list = [study.massif_name_to_stationary_gev_params_and_confidence_for_return_level_100[0][massif_name] + for study in study_list_valid] + confidence_interval_values = [study.massif_name_to_stationary_gev_params_and_confidence_for_return_level_100[1][massif_name] + for study in study_list_valid] + + # Checks values = [(gev_params.location, gev_params.scale, gev_params.shape, gev_params.return_level(return_period=OneFoldFit.return_period)) for gev_params in gev_params_list] + for a, b in zip(values, confidence_interval_values): + if not np.isnan(b.mean_estimate): + assert np.isclose(a[-1], b.mean_estimate) + bound_list = [c.confidence_interval for c in confidence_interval_values if not np.isnan(c.mean_estimate)] values_list = list(zip(*values)) all_values_list.append(values_list) all_altitudes_list.append(altitudes_list) - labels = ['loc', 'scale', 'shape', 'return level'] - return all_altitudes_list, all_values_list, labels + all_bound_list.append(list(zip(*bound_list))) + labels = ['location parameter', 'scale parameter', 'shape pameter', '{}-year return level'.format(OneFoldFit.return_period)] + labels = [l + ' in 2019' for l in labels] + return all_altitudes_list, all_values_list, labels, all_bound_list + + diff --git a/projects/altitude_spatial_model/preliminary_analysis.py b/projects/altitude_spatial_model/preliminary_analysis.py index ad4b16b6535ecb83ffd4df8b14e36d3c07caf8e1..bef0bf888382ba46a51e1652b9ee13184c5716c4 100644 --- a/projects/altitude_spatial_model/preliminary_analysis.py +++ b/projects/altitude_spatial_model/preliminary_analysis.py @@ -2,7 +2,6 @@ import matplotlib.pyplot as plt import numpy as np from cached_property import cached_property -from extreme_data.meteo_france_data.scm_models_data.abstract_study import AbstractStudy from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day from extreme_data.meteo_france_data.scm_models_data.visualization.plot_utils import plot_against_altitude from extreme_data.meteo_france_data.scm_models_data.visualization.study_visualizer import StudyVisualizer @@ -60,13 +59,13 @@ class PointwiseGevStudyVisualizer(AltitudesStudies): def _plot_gev_params_against_altitude_one_massif(self, ax, massif_name, param_name): altitudes = [] params = [] - confidence_intervals = [] + # confidence_intervals = [] for altitude, study in self.altitude_to_study.items(): - if massif_name in study.study_massif_names: + if massif_name in study.massif_name_to_stationary_gev_params: + gev_params = study.massif_name_to_stationary_gev_params[massif_name] altitudes.append(altitude) - gev_params = study.massif_name_to_stationary_gev_params_for_non_zero_annual_maxima[massif_name] params.append(gev_params.to_dict()[param_name]) - confidence_intervals.append(gev_params.param_name_to_confidence_interval[param_name]) + # confidence_intervals.append(gev_params.param_name_to_confidence_interval[param_name]) massif_id = self.study.all_massif_names().index(massif_name) plot_against_altitude(altitudes, ax, massif_id, massif_name, params, fill=False) @@ -196,7 +195,7 @@ if __name__ == '__main__': altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300] altitudes = [600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900] # altitudes = paper_altitudes - # altitudes = [1800, 2100] + # altitudes = [1500, 1800] visualizer = PointwiseGevStudyVisualizer(SafranSnowfall1Day, altitudes=altitudes) visualizer.plot_gev_params_against_altitude() # visualizer.plot_gev_params_against_time_for_all_altitudes()