From 2078ac21e97e49fa1c1a906ea8b190a7d6f92b88 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Tue, 19 Jan 2021 18:53:56 +0100
Subject: [PATCH] [contrasting] add plot to compare the size of confidence
 interval between the pointwise approach and the piecewise
 elevational-temporal approach. bootstrap level is set to 100

---
 .../scm_models_data/abstract_study.py         |  2 +-
 .../scm_models_data/utils_function.py         | 21 +++++--
 .../abstract_extract_eurocode_return_level.py |  2 +-
 .../eurocode_return_level_uncertainties.py    |  4 ++
 .../altitudes_fit/main_altitudes_studies.py   | 16 ++---
 ...es_visualizer_for_non_stationary_models.py | 20 +++++++
 .../plots/plot_histogram_altitude_studies.py  | 60 ++++++++++++++++++-
 7 files changed, 110 insertions(+), 15 deletions(-)

diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
index feaef969..d3fe96f6 100644
--- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
+++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py
@@ -879,7 +879,7 @@ class AbstractStudy(object):
         return d
 
     def massif_name_to_stationary_gev_params_and_confidence(self, quantile_level,
-                                                            confidence_interval_based_on_delta_method):
+                                                            confidence_interval_based_on_delta_method) -> Tuple[Dict]:
         t = (quantile_level, confidence_interval_based_on_delta_method)
         if t in self._cache_for_pointwise_fit:
             return self._cache_for_pointwise_fit[t]
diff --git a/extreme_data/meteo_france_data/scm_models_data/utils_function.py b/extreme_data/meteo_france_data/scm_models_data/utils_function.py
index 58b25540..a4d98068 100644
--- a/extreme_data/meteo_france_data/scm_models_data/utils_function.py
+++ b/extreme_data/meteo_france_data/scm_models_data/utils_function.py
@@ -1,8 +1,10 @@
 from multiprocessing import Pool
+from typing import Tuple, Dict
 
 import numpy as np
 from sklearn.utils import resample
 
+from extreme_fit.distribution.gev.gev_params import GevParams
 from extreme_fit.estimator.margin_estimator.utils import _fitted_stationary_gev
 from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel
 from extreme_fit.model.margin_model.utils import MarginFitMethod
@@ -19,14 +21,15 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM
                                                     model_class=StationaryTemporalModel,
                                                     starting_year=None,
                                                     quantile_level=0.98,
-                                                    confidence_interval_based_on_delta_method=True):
+                                                    confidence_interval_based_on_delta_method=True) -> Tuple[Dict[str, GevParams], Dict[str, EurocodeConfidenceIntervalFromExtremes]]:
     estimator, gev_param = _fitted_stationary_gev(fit_method, model_class, starting_year, x_gev)
     if quantile_level is not None:
         EurocodeConfidenceIntervalFromExtremes.quantile_level = quantile_level
         coordinate = estimator.dataset.coordinates.df_all_coordinates.iloc[0].values
         if confidence_interval_based_on_delta_method:
-            confidence_interval = EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(estimator, ConfidenceIntervalMethodFromExtremes.ci_mle,
-                                                                           coordinate)
+            confidence_interval = EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(estimator,
+                                                                                                 ConfidenceIntervalMethodFromExtremes.ci_mle,
+                                                                                                 coordinate)
         else:
             # Bootstrap method
             nb_bootstrap = AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP
@@ -38,6 +41,14 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM
                     return_level_list = p.map(_compute_return_level, arguments)
             else:
                 return_level_list = [_compute_return_level(argument) for argument in arguments]
+            # Remove infinite return levels and return level
+            len_before_remove = len(return_level_list)
+            return_level_list = [r for r in return_level_list if not np.isinf(r)]
+            threshold = 2000
+            return_level_list = [r for r in return_level_list if r < threshold]
+            len_after_remove = len(return_level_list)
+            if len_after_remove < len_before_remove:
+                print('Nb of fit removed (inf or > {}:'.format(threshold), len_before_remove - len_after_remove)
             confidence_interval = tuple([np.quantile(return_level_list, q)
                                          for q in AbstractExtractEurocodeReturnLevel.bottom_and_upper_quantile])
             mean_estimate = gev_param.quantile(quantile_level)
@@ -46,6 +57,8 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM
         confidence_interval = None
     return gev_param, confidence_interval
 
+
 def _compute_return_level(t):
     fit_method, model_class, starting_year, x, quantile_level = t
-    return _fitted_stationary_gev(fit_method, model_class, starting_year, x)[1].quantile(quantile_level)
\ No newline at end of file
+    gev_params = _fitted_stationary_gev(fit_method, model_class, starting_year, x)[1]
+    return gev_params.quantile(quantile_level)
diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py
index ac2c6d9d..db45b26a 100644
--- a/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py
+++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/abstract_extract_eurocode_return_level.py
@@ -16,7 +16,7 @@ from root_utils import classproperty
 
 class AbstractExtractEurocodeReturnLevel(object):
     ALPHA_CONFIDENCE_INTERVAL_UNCERTAINTY = 0.05
-    NB_BOOTSTRAP = 1000
+    NB_BOOTSTRAP = 100
 
     @classproperty
     def bottom_and_upper_quantile(cls):
diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py
index 1d45d575..34877299 100644
--- a/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py
+++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/eurocode_return_level_uncertainties.py
@@ -26,6 +26,10 @@ class EurocodeConfidenceIntervalFromExtremes(object):
         self.mean_estimate = mean_estimate
         self.confidence_interval = confidence_interval
 
+    @property
+    def interval_size(self):
+        return self.confidence_interval[1] - self.confidence_interval[0]
+
     @property
     def triplet(self):
         return self.confidence_interval[0], self.mean_estimate, self.confidence_interval[1]
diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py
index f0b704df..ddb6cc3a 100644
--- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py
+++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py
@@ -8,7 +8,8 @@ from extreme_fit.model.utils import set_seed_for_test
 from projects.altitude_spatial_model.altitudes_fit.plots.plot_coherence_curves import plot_coherence_curves
 from projects.altitude_spatial_model.altitudes_fit.plots.plot_histogram_altitude_studies import \
     plot_histogram_all_models_against_altitudes, plot_histogram_all_trends_against_altitudes, \
-    plot_shoe_plot_changes_against_altitude, plot_shoe_plot_changes_against_altitude_for_maxima_and_total
+    plot_shoe_plot_changes_against_altitude, plot_shoe_plot_changes_against_altitude_for_maxima_and_total, \
+    plot_shoe_plot_ratio_interval_size_against_altitude
 
 mpl.rcParams['text.usetex'] = True
 mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']
@@ -29,13 +30,13 @@ def main():
 
     set_seed_for_test()
 
-    fast = None
+    fast = False
     if fast is None:
         massif_names = ['Vanoise', 'Haute-Maurienne', 'Vercors'][:1]
         altitudes_list = altitudes_for_groups[:]
     elif fast:
-        massif_names = ['Vanoise', 'Haute-Maurienne', 'Vercors'][:1]
-        altitudes_list = altitudes_for_groups[3:]
+        massif_names = ['Vanoise', 'Haute-Maurienne', 'Vercors'][:]
+        altitudes_list = altitudes_for_groups[:2]
     else:
         massif_names = None
         altitudes_list = altitudes_for_groups[:]
@@ -60,11 +61,12 @@ def main_loop(altitudes_list, massif_names, seasons, study_classes):
 
 def plot_visualizers(massif_names, visualizer_list):
     # plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list)
+    plot_shoe_plot_ratio_interval_size_against_altitude(massif_names, visualizer_list)
     # for relative in [True, False]:
-        # plot_shoe_plot_changes_against_altitude(massif_names, visualizer_list, relative=relative)
+    #     plot_shoe_plot_changes_against_altitude(massif_names, visualizer_list, relative=relative)
         # plot_shoe_plot_changes_against_altitude_for_maxima_and_total(massif_names, visualizer_list, relative=relative)
-    # plot_coherence_curves(massif_names, visualizer_list)
-    plot_coherence_curves(['Vanoise'], visualizer_list)
+    plot_coherence_curves(massif_names, visualizer_list)
+    # plot_coherence_curves(['Vanoise'], visualizer_list)
 
 
 def plot_visualizer(massif_names, visualizer):
diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py
index f41da81e..d5ba71cc 100644
--- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py
+++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py
@@ -140,6 +140,26 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
             self._method_name_and_order_to_massif_name_to_value[c] = massif_name_to_value
         return self._method_name_and_order_to_massif_name_to_value[c]
 
+    def ratio_groups(self):
+        return [self.ratio_uncertainty_interval_size(altitude, 2019) for altitude in self.studies.altitudes]
+
+    def ratio_uncertainty_interval_size(self, altitude, year):
+        study = self.studies.altitude_to_study[altitude]
+        massif_name_to_interval = study.massif_name_to_stationary_gev_params_and_confidence(OneFoldFit.quantile_level,
+                                                                                            self.confidence_interval_based_on_delta_method)[
+            1]
+        massif_names_with_pointwise_interval = set(massif_name_to_interval)
+        valid_massif_names = set(self.massif_name_to_one_fold_fit.keys())
+        intersection_massif_names = valid_massif_names.intersection(massif_names_with_pointwise_interval)
+        ratios = []
+        for massif_name in intersection_massif_names:
+            one_fold_fit = self.massif_name_to_one_fold_fit[massif_name]
+            new_interval_size = one_fold_fit.best_confidence_interval(altitude, year).interval_size
+            old_interval_size = massif_name_to_interval[massif_name].interval_size
+            ratio = new_interval_size / old_interval_size
+            ratios.append(ratio)
+        return ratios
+
     def plot_map_moment(self, method_name, order):
         massif_name_to_value = self.method_name_and_order_to_d(method_name, order)
         # Plot settings
diff --git a/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py b/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py
index 30a3635e..77d85d41 100644
--- a/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py
+++ b/projects/altitude_spatial_model/altitudes_fit/plots/plot_histogram_altitude_studies.py
@@ -6,6 +6,8 @@ import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.lines import Line2D
 
+from extreme_fit.model.result_from_model_fit.result_from_extremes.abstract_extract_eurocode_return_level import \
+    AbstractExtractEurocodeReturnLevel
 from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_studies_visualizer_for_non_stationary_models import \
     AltitudesStudiesVisualizerForNonStationaryModels
 from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit
@@ -120,6 +122,57 @@ def plot_histogram_all_trends_against_altitudes(massif_names, visualizer_list: L
 
     plt.close()
 
+def plot_shoe_plot_ratio_interval_size_against_altitude(massif_names, visualizer_list: List[
+    AltitudesStudiesVisualizerForNonStationaryModels]):
+    visualizer = visualizer_list[0]
+
+    ratio_groups = []
+    for v in visualizer_list:
+        ratio_groups.extend(v.ratio_groups())
+    print(len(ratio_groups))
+    print(ratio_groups)
+
+
+    nb_massifs = [len(l) for l in ratio_groups]
+
+    plt.close()
+    ax = plt.gca()
+    width = 5
+    size = 8
+    legend_fontsize = 10
+    labelsize = 10
+    linewidth = 3
+
+    x = np.array([2 * width * (i + 1) for i in range(len(ratio_groups))])
+    ax.boxplot(ratio_groups, positions=x, widths=width, patch_artist=True, showmeans=True)
+
+    ax.legend(prop={'size': 8})
+
+    ylabel = "Ratio for the size of {}\% confidence intervals, i.e. size for the\n" \
+    " elevational-temporal model divided by the size for the pointwise model".format(AbstractExtractEurocodeReturnLevel.percentage_confidence_interval)
+    ax.set_ylabel(ylabel,
+                  fontsize=legend_fontsize)
+    ax.set_xlabel('Elevation (m)', fontsize=legend_fontsize + 5)
+    ax.tick_params(axis='both', which='major', labelsize=labelsize)
+    ax.set_xticks(x)
+    ax.yaxis.grid()
+
+    altitudes = []
+    for v in visualizer_list:
+        altitudes.extend(v.studies.altitudes)
+    ax.set_xticklabels([str(a) for a in altitudes])
+
+    shift = 2 * width
+    ax.set_xlim((min(x) - shift, max(x) + shift))
+
+    # I could display the number of massif used to build each box plot.
+    plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x, add_for_percentage=False)
+
+    visualizer.plot_name = 'All ' + "uncertainty size comparison for bootstrap size {}".format(AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP)
+    visualizer.show_or_save_to_file(add_classic_title=False, no_title=True)
+
+    plt.close()
+
 
 def plot_shoe_plot_changes_against_altitude(massif_names, visualizer_list: List[
     AltitudesStudiesVisualizerForNonStationaryModels],
@@ -250,7 +303,7 @@ def plot_shoe_plot_changes_against_altitude_for_maxima_and_total(massif_names, v
     plt.close()
 
 
-def plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x):
+def plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x, add_for_percentage=True):
     # Plot number of massifs on the upper axis
     ax_twiny = ax.twiny()
     ax_twiny.plot(x, [0 for _ in x], linewidth=0)
@@ -258,4 +311,7 @@ def plot_nb_massif_on_upper_axis(ax, labelsize, legend_fontsize, nb_massifs, x):
     ax_twiny.tick_params(labelsize=labelsize)
     ax_twiny.set_xticklabels(nb_massifs)
     ax_twiny.set_xlim(ax.get_xlim())
-    ax_twiny.set_xlabel('Total number of massifs at each range (for the percentage)', fontsize=legend_fontsize)
+    xlabel = 'Total number of massifs at each range'
+    if add_for_percentage:
+        xlabel += ' (for the percentage)'
+    ax_twiny.set_xlabel(xlabel, fontsize=legend_fontsize)
-- 
GitLab