From c86652d5ef55af1e4305e8ba39ad29aea123d4c9 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Wed, 8 Jan 2020 17:33:25 +0100
Subject: [PATCH] [paper 1] add qqplots and standard gumbel transformation

---
 .../__init__.py}                              |  0
 .../qqplot/plot_qqplot.py                     | 48 +++++++++++++++++++
 .../shape/__init__.py                         |  0
 .../{ => shape}/main_shape_repartition.py     |  2 +-
 .../study_visualizer_for_shape_repartition.py |  0
 .../data/main_example_swe_total_plot.py       | 14 +++---
 ...dy_visualizer_for_non_stationary_trends.py | 17 ++++++-
 .../abstract_gev_trend_test.py                | 30 +++++++++++-
 extreme_fit/distribution/gev/gev_params.py    | 10 +++-
 .../abstract_temporal_linear_margin_model.py  |  8 ++--
 10 files changed, 114 insertions(+), 15 deletions(-)
 rename experiment/paper_past_snow_loads/check_mle_convergence_for_trends/{main_mle_diagnosis.py => qqplot/__init__.py} (100%)
 create mode 100644 experiment/paper_past_snow_loads/check_mle_convergence_for_trends/qqplot/plot_qqplot.py
 create mode 100644 experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/__init__.py
 rename experiment/paper_past_snow_loads/check_mle_convergence_for_trends/{ => shape}/main_shape_repartition.py (97%)
 rename experiment/paper_past_snow_loads/check_mle_convergence_for_trends/{ => shape}/study_visualizer_for_shape_repartition.py (100%)

diff --git a/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/main_mle_diagnosis.py b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/qqplot/__init__.py
similarity index 100%
rename from experiment/paper_past_snow_loads/check_mle_convergence_for_trends/main_mle_diagnosis.py
rename to experiment/paper_past_snow_loads/check_mle_convergence_for_trends/qqplot/__init__.py
diff --git a/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/qqplot/plot_qqplot.py b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/qqplot/plot_qqplot.py
new file mode 100644
index 00000000..db7b5e6c
--- /dev/null
+++ b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/qqplot/plot_qqplot.py
@@ -0,0 +1,48 @@
+from typing import Dict
+
+from experiment.meteo_france_data.scm_models_data.crocus.crocus import CrocusSnowLoadTotal
+from experiment.paper_past_snow_loads.data.main_example_swe_total_plot import marker_altitude_massif_name_for_paper1
+from experiment.paper_past_snow_loads.paper_main_utils import load_altitude_to_visualizer
+from experiment.paper_past_snow_loads.study_visualizer_for_non_stationary_trends import \
+    StudyVisualizerForNonStationaryTrends
+from experiment.trend_analysis.univariate_test.extreme_trend_test.abstract_gev_trend_test import AbstractGevTrendTest
+
+
+def plot_qqplot_wrt_standard_gumbel(altitude_to_visualizer: Dict[int, StudyVisualizerForNonStationaryTrends],
+                                    plot_all=False):
+    if plot_all:
+        pass
+    else:
+        # Plot only some examples
+        plot_qqplot_for_time_series_examples(altitude_to_visualizer)
+        plot_qqplot_for_time_series_with_missing_zeros(altitude_to_visualizer)
+
+
+def plot_qqplot_for_time_series_with_missing_zeros(altitude_to_visualizer: Dict[int, StudyVisualizerForNonStationaryTrends],
+                                                   nb_worst_examples=3):
+    # Extract all the values
+    l = []
+    for a, v in altitude_to_visualizer.items():
+        l.extend([(a, v, m, p) for m, p in v.massif_name_to_psnow.items()])
+    # Sort them and keep the worst examples
+    l = sorted(l, key=lambda t: t[-1])[:nb_worst_examples]
+    print('Worst examples:')
+    for a, v, m, p in l:
+        print(a, m, p)
+        v.qqplot(m)
+
+
+def plot_qqplot_for_time_series_examples(altitude_to_visualizer: Dict[int, StudyVisualizerForNonStationaryTrends]):
+    for color, a, m in marker_altitude_massif_name_for_paper1:
+        v = altitude_to_visualizer[a]
+        v.qqplot(m, color)
+
+
+if __name__ == '__main__':
+    # for the five worst, 300 is interesti
+    altitudes = [300, 900, 1800, 2700]
+    altitude_to_visualizer = {altitude:  StudyVisualizerForNonStationaryTrends(CrocusSnowLoadTotal(altitude=altitude),
+                                                                               multiprocessing=True)
+                              for altitude in altitudes}
+    plot_qqplot_wrt_standard_gumbel(altitude_to_visualizer)
+
diff --git a/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/__init__.py b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/main_shape_repartition.py b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/main_shape_repartition.py
similarity index 97%
rename from experiment/paper_past_snow_loads/check_mle_convergence_for_trends/main_shape_repartition.py
rename to experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/main_shape_repartition.py
index e7c9f80b..f9cb04c4 100644
--- a/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/main_shape_repartition.py
+++ b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/main_shape_repartition.py
@@ -1,5 +1,5 @@
 from experiment.meteo_france_data.scm_models_data.crocus.crocus import CrocusSnowLoadTotal
-from experiment.paper_past_snow_loads.check_mle_convergence_for_trends.study_visualizer_for_shape_repartition import \
+from experiment.paper_past_snow_loads.check_mle_convergence_for_trends.shape.study_visualizer_for_shape_repartition import \
     StudyVisualizerForShape
 from experiment.paper_past_snow_loads.paper_main_utils import load_altitude_to_visualizer
 
diff --git a/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/study_visualizer_for_shape_repartition.py b/experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/study_visualizer_for_shape_repartition.py
similarity index 100%
rename from experiment/paper_past_snow_loads/check_mle_convergence_for_trends/study_visualizer_for_shape_repartition.py
rename to experiment/paper_past_snow_loads/check_mle_convergence_for_trends/shape/study_visualizer_for_shape_repartition.py
diff --git a/experiment/paper_past_snow_loads/data/main_example_swe_total_plot.py b/experiment/paper_past_snow_loads/data/main_example_swe_total_plot.py
index 0e3c1a38..4d7313fd 100644
--- a/experiment/paper_past_snow_loads/data/main_example_swe_total_plot.py
+++ b/experiment/paper_past_snow_loads/data/main_example_swe_total_plot.py
@@ -7,6 +7,12 @@ from experiment.meteo_france_data.scm_models_data.visualization.study_visualizat
     StudyVisualizer
 from experiment.paper_past_snow_loads.paper_utils import dpi_paper1_figure
 
+marker_altitude_massif_name_for_paper1 = [
+    ('magenta', 900, 'Ubaye'),
+    ('darkmagenta', 1800, 'Vercors'),
+    ('mediumpurple', 2700, 'Beaufortain'),
+]
+
 
 def max_graph_annual_maxima_poster():
     """
@@ -16,15 +22,11 @@ def max_graph_annual_maxima_poster():
     """
     save_to_file = True
     study_class = CrocusSnowLoadTotal
-    marker_altitude_massif_name = [
-        ('magenta', 900, 'Ubaye'),
-        ('darkmagenta', 1800, 'Vercors'),
-        ('mediumpurple', 2700, 'Beaufortain'),
-    ]
+
     ax = plt.gca()
     ax.set_ylim([0, 20])
     ax.set_yticks(list(range(0, 21, 2)))
-    for color, altitude, massif_name in marker_altitude_massif_name:
+    for color, altitude, massif_name in marker_altitude_massif_name_for_paper1:
         for study in study_iterator_global([study_class], altitudes=[altitude]):
             study_visualizer = StudyVisualizer(study, save_to_file=save_to_file,
                                                verbose=True,
diff --git a/experiment/paper_past_snow_loads/study_visualizer_for_non_stationary_trends.py b/experiment/paper_past_snow_loads/study_visualizer_for_non_stationary_trends.py
index 7406dfff..bce1e2fc 100644
--- a/experiment/paper_past_snow_loads/study_visualizer_for_non_stationary_trends.py
+++ b/experiment/paper_past_snow_loads/study_visualizer_for_non_stationary_trends.py
@@ -25,6 +25,8 @@ from experiment.trend_analysis.univariate_test.extreme_trend_test.trend_test_two
     GevLocationAgainstGumbel, GevScaleAgainstGumbel
 from experiment.trend_analysis.univariate_test.extreme_trend_test.trend_test_two_parameters.gumbel_test_two_parameters import \
     GumbelLocationAndScaleTrendTest
+from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \
+    TemporalMarginFitMethod
 from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import GumbelTemporalModel
 from extreme_fit.model.result_from_model_fit.result_from_extremes.confidence_interval_method import \
     ConfidenceIntervalMethodFromExtremes
@@ -46,7 +48,7 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer):
                  effective_temporal_covariate=2017,
                  relative_change_trend_plot=True,
                  non_stationary_trend_test_to_marker=None,
-                 fit_method=None,
+                 fit_method=TemporalMarginFitMethod.extremes_fevd_mle,
                  select_only_acceptable_shape_parameter=False):
         super().__init__(study, show, save_to_file, only_one_graph, only_first_row, vertical_kde_plot,
                          year_for_kde_plot, plot_block_maxima_quantiles, temporal_non_stationarity,
@@ -218,7 +220,9 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer):
 
     @property
     def label_tdrl_bar(self):
-        return 'Change in {} years'.format(AbstractGevTrendTest.nb_years_for_quantile_evolution)
+        nb_years = AbstractGevTrendTest.nb_years_for_quantile_evolution
+        suffix = 'per decade' if nb_years == 10 else 'in {} years'.format(nb_years)
+        return 'Change {}'.format(suffix)
 
     @property
     def ticks_values_and_labels(self):
@@ -361,3 +365,12 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer):
                        uncertainty.confidence_interval[1] > eurocode)
                       for eurocode, uncertainty in eurocode_and_uncertainties])
         return 100 * np.mean(a, axis=0)
+
+    # Part 3 - Zeros
+
+    # Part 4 - QQPLOT
+
+    def qqplot(self, massif_name, color=None):
+        trend_test = self.massif_name_to_trend_test_that_minimized_aic[massif_name]
+        marker = self.massif_name_to_marker_style[massif_name]
+        trend_test.qqplot_wrt_standard_gumbel(marker, color)
\ No newline at end of file
diff --git a/experiment/trend_analysis/univariate_test/extreme_trend_test/abstract_gev_trend_test.py b/experiment/trend_analysis/univariate_test/extreme_trend_test/abstract_gev_trend_test.py
index b9047dc1..0eea9ea0 100644
--- a/experiment/trend_analysis/univariate_test/extreme_trend_test/abstract_gev_trend_test.py
+++ b/experiment/trend_analysis/univariate_test/extreme_trend_test/abstract_gev_trend_test.py
@@ -1,4 +1,5 @@
 import numpy as np
+import matplotlib.pyplot as plt
 import pandas as pd
 from cached_property import cached_property
 from scipy.stats import chi2
@@ -161,8 +162,9 @@ class AbstractGevTrendTest(AbstractUnivariateTest):
     def relative_change_in_return_level(self, initial_year, final_year):
         return_level_values = []
         for year in [initial_year, final_year]:
-            gev_params = self.unconstrained_estimator.margin_function_from_fit.get_gev_params(coordinate=np.array([year]),
-                                                                                            is_transformed=False)
+            gev_params = self.unconstrained_estimator.margin_function_from_fit.get_gev_params(
+                coordinate=np.array([year]),
+                is_transformed=False)
             return_level_values.append(gev_params.quantile(self.quantile_level))
         change_until_final_year = self.time_derivative_times_years(nb_years=final_year - initial_year)
         change_in_between = (return_level_values[1] - return_level_values[0])
@@ -204,3 +206,27 @@ class AbstractGevTrendTest(AbstractUnivariateTest):
     @classproperty
     def label(self):
         return '\\mathcal{M}_{%s}'
+
+    # Some display function
+
+    def qqplot_wrt_standard_gumbel(self, marker, color=None):
+        # Standard Gumbel quantiles
+        standard_gumbel_distribution = GevParams(loc=0, scale=1, shape=0)
+        n = len(self.years)
+        standard_gumbel_quantiles = [standard_gumbel_distribution.quantile(i / (n + 1)) for i in range(1, n + 1)]
+        unconstrained_empirical_quantiles = self.compute_empirical_quantiles(self.unconstrained_estimator)
+        constrained_empirical_quantiles = self.compute_empirical_quantiles(self.constrained_estimator)
+        plt.plot(standard_gumbel_quantiles, standard_gumbel_quantiles, color=color)
+        plt.plot(standard_gumbel_quantiles, constrained_empirical_quantiles, 'x')
+        plt.plot(standard_gumbel_quantiles, unconstrained_empirical_quantiles, linestyle='None', **marker)
+        plt.show()
+
+    def compute_empirical_quantiles(self, estimator):
+        empirical_quantiles = []
+        for year, maximum in sorted(zip(self.years, self.maxima), key=lambda t: t[1]):
+            gev_param = estimator.margin_function_from_fit.get_gev_params(
+                coordinate=np.array([year]),
+                is_transformed=False)
+            maximum_standardized = gev_param.gumbel_standardization(maximum)
+            empirical_quantiles.append(maximum_standardized)
+        return empirical_quantiles
diff --git a/extreme_fit/distribution/gev/gev_params.py b/extreme_fit/distribution/gev/gev_params.py
index 5e957005..f19b54d2 100644
--- a/extreme_fit/distribution/gev/gev_params.py
+++ b/extreme_fit/distribution/gev/gev_params.py
@@ -122,4 +122,12 @@ class GevParams(AbstractParams):
             cls.LOC: 'mu',
             cls.SCALE: 'sigma',
             cls.SHAPE: 'zeta',
-        }[gev_param_name]
\ No newline at end of file
+        }[gev_param_name]
+
+    def gumbel_standardization(self, x):
+        x -= self.location
+        x /= self.scale
+        if self.shape == 0:
+            return x
+        else:
+            return np.log(1 + self.shape * x) / self.shape
\ No newline at end of file
diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
index 94011d95..be160d1e 100644
--- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
+++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
@@ -25,14 +25,16 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
     """Linearity only with respect to the temporal coordinates"""
 
     def __init__(self, coordinates: AbstractCoordinates, use_start_value=False, params_start_fit=None,
-                 params_sample=None, starting_point=None, fit_method=TemporalMarginFitMethod.is_mev_gev_fit,
-                 nb_iterations_for_bayesian_fit=5000, params_start_fit_bayesian=None,
+                 params_sample=None, starting_point=None,
+                 fit_method=TemporalMarginFitMethod.is_mev_gev_fit,
+                 nb_iterations_for_bayesian_fit=5000,
+                 params_start_fit_bayesian=None,
                  type_for_MLE="GEV"):
         super().__init__(coordinates, use_start_value, params_start_fit, params_sample, starting_point)
         self.type_for_mle = type_for_MLE
         self.params_start_fit_bayesian = params_start_fit_bayesian
         self.nb_iterations_for_bayesian_fit = nb_iterations_for_bayesian_fit
-        assert isinstance(fit_method, TemporalMarginFitMethod)
+        assert isinstance(fit_method, TemporalMarginFitMethod), fit_method
         self.fit_method = fit_method
 
     def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame,
-- 
GitLab