From 56324216afdd0b90da70d90ba2d6b1703435dbbb Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Tue, 30 Jun 2020 11:13:48 +0200
Subject: [PATCH] [contrasting] add return level plots & add plot with model
 that minimizes the mean aic

---
 .../abstract_margin_estimator.py              |  6 ++-
 .../altitudes_fit/altitudes_studies.py        |  5 ++-
 .../altitudes_fit/main_altitudes_studies.py   | 29 +++++++++++++-
 ...es_visualizer_for_non_stationary_models.py | 23 ++++++-----
 .../one_fold_analysis/one_fold_fit.py         | 40 ++++++++++++++++++-
 5 files changed, 87 insertions(+), 16 deletions(-)

diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py
index d739cadc..0b18e721 100644
--- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py
+++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py
@@ -74,6 +74,8 @@ class LinearMarginEstimator(AbstractMarginEstimator):
         npt.assert_almost_equal(self.result_from_model_fit.aic, aic, decimal=5)
         return aic
 
+    def n(self, split=Split.all):
+        return len(self.dataset.maxima_gev(split=split))
+
     def bic(self, split=Split.all):
-        n = len(self.dataset.maxima_gev(split=split))
-        return np.log(n) * self.margin_model.nb_params + 2 * self.nllh(split=split)
+        return np.log(self.n(split=split)) * self.margin_model.nb_params + 2 * self.nllh(split=split)
diff --git a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py
index 545bac0e..6f87bf5d 100644
--- a/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py
+++ b/projects/altitude_spatial_model/altitudes_fit/altitudes_studies.py
@@ -132,7 +132,8 @@ class AltitudesStudies(object):
         ax.xaxis.set_ticks(x[1::10])
         ax.tick_params(axis='both', which='major', labelsize=13)
         ax.legend()
-        plot_name = 'Annual maxima of {} in {}'.format(SCM_STUDY_CLASS_TO_ABBREVIATION[self.study_class], massif_name)
+        plot_name = 'Annual maxima of {} in {}'.format(SCM_STUDY_CLASS_TO_ABBREVIATION[self.study_class],
+                                                       massif_name.replace('_', ' '))
         ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15)
         ax.set_xlabel('years', fontsize=15)
         self.show_or_save_to_file(plot_name=plot_name, show=show)
@@ -148,6 +149,8 @@ class AltitudesStudies(object):
         if change is True or change is None:
             moment += ' change (between two block of 30 years) for'
         moment += 'mean' if not std else 'std'
+        if change is False:
+            moment += ' (for the 60 years of data)'
         plot_name = '{} of annual maxima of {}'.format(moment, SCM_STUDY_CLASS_TO_ABBREVIATION[self.study_class])
         ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15)
         ax.set_xlabel('altitudes', fontsize=15)
diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py
index ff604f44..99e903a5 100644
--- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py
+++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py
@@ -1,4 +1,8 @@
 import matplotlib as mpl
+import numpy as np
+
+from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import OneFoldFit
+
 mpl.rcParams['text.usetex'] = True
 mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']
 
@@ -13,10 +17,31 @@ from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_s
 
 
 def plot_altitudinal_fit(studies, massif_names=None):
+    model_classes = ALTITUDINAL_MODELS
     visualizer = AltitudesStudiesVisualizerForNonStationaryModels(studies=studies,
-                                                                  model_classes=ALTITUDINAL_MODELS,
+                                                                  model_classes=model_classes,
                                                                   massif_names=massif_names,
                                                                   show=False)
+    # Plot the results for the model that minimizes the individual aic
+    visualizer.plot_moments()
+    visualizer.plot_shape_map()
+
+    # Compute the mean AIC for each model_class
+    OneFoldFit.best_estimator_minimizes_mean_aic = True
+    model_class_to_aic_scores = {model_class: [] for model_class in model_classes}
+    for one_fold_fit in visualizer.massif_name_to_one_fold_fit.values():
+        for model_class, estimator in one_fold_fit.model_class_to_estimator_with_finite_aic.items():
+            print(estimator.n())
+            aic_score_normalized = estimator.aic() / estimator.n()
+            model_class_to_aic_scores[model_class].append(aic_score_normalized)
+    model_class_to_mean_aic_score = {model_class: np.array(aic_scores).mean()
+                                     for model_class, aic_scores in model_class_to_aic_scores.items()}
+    sorted_model_class = sorted(model_classes, key=lambda m: model_class_to_mean_aic_score[m])
+    best_model_class_for_mean_aic = sorted_model_class[0]
+    for one_fold_fit in visualizer.massif_name_to_one_fold_fit.values():
+        one_fold_fit.best_estimator_class_for_mean_aic = best_model_class_for_mean_aic
+
+    # Plot the results for the model that minimizes the mean aic
     visualizer.plot_moments()
     visualizer.plot_shape_map()
 
@@ -34,7 +59,7 @@ def plot_moments(studies, massif_names=None):
 def main():
     altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][4:7]
     altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][:]
-    # altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900]
+    altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900]
     study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, SafranSnowfall5Days, SafranSnowfall7Days][:2]
     study_classes = [SafranPrecipitation1Day, SafranPrecipitation3Days, SafranPrecipitation5Days,
                      SafranPrecipitation7Days][:]
diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py
index bad9c767..e378f384 100644
--- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py
+++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py
@@ -36,7 +36,7 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
 
     def plot_moments(self):
         for method_name in ['moment', 'changes_in_the_moment', 'relative_changes_in_the_moment']:
-            for order in [1, 2]:
+            for order in [1, 2, None]:
                 self.plot_general(method_name, order)
 
     def plot_general(self, method_name, order):
@@ -47,14 +47,16 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
             massif_altitudes = self.studies.massif_name_to_altitudes[massif_name]
             ind = (min(massif_altitudes) <= altitudes_plot) & (altitudes_plot <= max(massif_altitudes))
             massif_altitudes_plot = altitudes_plot[ind]
-            old_fold_fit = self.massif_name_to_one_fold_fit[massif_name]
-            values = old_fold_fit.__getattribute__(method_name)(massif_altitudes_plot, order=order)
-            plot_against_altitude(massif_altitudes_plot, ax, massif_id, massif_name, values)
+            one_fold_fit = self.massif_name_to_one_fold_fit[massif_name]
+            if one_fold_fit.best_estimator_has_finite_aic:
+                values = one_fold_fit.__getattribute__(method_name)(massif_altitudes_plot, order=order)
+                plot_against_altitude(massif_altitudes_plot, ax, massif_id, massif_name, values)
         # Plot settings
         ax.legend(prop={'size': 7}, ncol=3)
         moment = ' '.join(method_name.split('_'))
-        moment = moment.replace('moment', 'mean' if order == 1 else 'std')
-        plot_name = '{} annual maxima of {}'.format(moment, SCM_STUDY_CLASS_TO_ABBREVIATION[self.studies.study_class])
+        moment = moment.replace('moment', '{} in 2019'.format(OneFoldFit.get_moment_str(order=order)))
+        plot_name = '{}/Model {} annual maxima of {}'.format(OneFoldFit.folder_for_plots, moment,
+                                                             SCM_STUDY_CLASS_TO_ABBREVIATION[self.studies.study_class])
         ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15)
         ax.set_xlabel('altitudes', fontsize=15)
         # lim_down, lim_up = ax.get_ylim()
@@ -66,13 +68,16 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
 
     def plot_abstract_fast(self, massif_name_to_value, label, graduation=10.0, cmap=plt.cm.coolwarm, add_x_label=True,
                            negative_and_positive_values=True, massif_name_to_text=None):
-        super().plot_abstract(massif_name_to_value, label, label, self.fit_method, graduation, cmap, add_x_label,
-                              negative_and_positive_values, massif_name_to_text)
+        plot_name = '{}/{}'.format(OneFoldFit.folder_for_plots, label)
+        self.plot_map(cmap, self.fit_method, graduation, label, massif_name_to_value, plot_name, add_x_label,
+                      negative_and_positive_values,
+                      massif_name_to_text)
 
     @property
     def massif_name_to_shape(self):
         return {massif_name: one_fold_fit.best_shape
-                for massif_name, one_fold_fit in self.massif_name_to_one_fold_fit.items()}
+                for massif_name, one_fold_fit in self.massif_name_to_one_fold_fit.items()
+                if one_fold_fit.best_estimator_has_finite_aic}
 
     @property
     def massif_name_to_name(self):
diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py
index cb90188b..113e1329 100644
--- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py
+++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/one_fold_fit.py
@@ -7,10 +7,12 @@ from scipy.stats import chi2
 from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short
 from extreme_fit.model.margin_model.polynomial_margin_model.altitudinal_models import StationaryAltitudinal
 from extreme_fit.model.margin_model.utils import MarginFitMethod
+from root_utils import classproperty
 
 
 class OneFoldFit(object):
     SIGNIFICANCE_LEVEL = 0.05
+    best_estimator_minimizes_mean_aic = False
 
     def __init__(self, massif_name, dataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle):
         self.massif_name = massif_name
@@ -25,12 +27,30 @@ class OneFoldFit(object):
                                                                                               dataset=self.dataset,
                                                                                               fit_method=self.fit_method)
 
+        # Best estimator definition
+        self.best_estimator_class_for_mean_aic = None
+
+    @classproperty
+    def folder_for_plots(cls):
+        return 'Mean aic' if cls.best_estimator_minimizes_mean_aic else 'Individual aic'
+
+    @classmethod
+    def get_moment_str(cls, order):
+        if order == 1:
+            return 'Mean'
+        elif order == 2:
+            return 'Std'
+        elif order is None:
+            return 'Return level'
+
     def get_moment(self, altitude, year, order=1):
         gev_params = self.get_gev_params(altitude, year)
         if order == 1:
             return gev_params.mean
         elif order == 2:
             return gev_params.std
+        elif order is None:
+            return gev_params.return_level(return_period=2019)
         else:
             raise NotImplementedError
 
@@ -79,13 +99,29 @@ class OneFoldFit(object):
                                                    key=lambda e: e.aic())
         return sorted_estimators_with_finite_aic
 
+    @property
+    def model_class_to_estimator_with_finite_aic(self):
+        return {type(estimator.margin_model): estimator for estimator in self.sorted_estimators_with_finite_aic}
+
     @cached_property
+    def set_estimators_with_finite_aic(self):
+        return set(self.sorted_estimators_with_finite_aic)
+
+    @property
     def best_estimator(self):
-        return self.sorted_estimators_with_finite_aic[0]
+        if self.best_estimator_minimizes_mean_aic and self.best_estimator_class_for_mean_aic is not None:
+            return self.model_class_to_estimator[self.best_estimator_class_for_mean_aic]
+        else:
+            return self.sorted_estimators_with_finite_aic[0]
+
+    @property
+    def best_estimator_has_finite_aic(self):
+        return self.best_estimator in self.set_estimators_with_finite_aic
 
     @property
     def best_shape(self):
-        return self.get_gev_params(1000, year=2019).shape
+        # We take any altitude (altitude=1000 for instance) as the shape is constant w.r.t the altitude
+        return self.get_gev_params(altitude=1000, year=2019).shape
 
     @property
     def best_function_from_fit(self):
-- 
GitLab