From d5e81395482ca6026879fcf4d1161af2abb32fbd Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Tue, 12 Feb 2019 18:37:15 +0100
Subject: [PATCH] [EXTREME ESTIMATOR] add safran visualization for gpd (with a
 threshold fixed by hand)

---
 .../margin_fits/abstract_params.py            |  3 +-
 .../margin_fits/extreme_params.py             | 10 +--
 .../margin_fits/gev/gev_params.py             |  9 +++
 .../margin_fits/gpd/gpd_params.py             | 16 ++++-
 .../margin_fits/gpd/gpdmle_fit.py             |  2 +-
 safran_study/safran.py                        | 62 ++++++++++++++-----
 safran_study/snowfall_annual_maxima.py        |  9 ++-
 safran_study/visualize_safran.py              |  7 ++-
 8 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/extreme_estimator/margin_fits/abstract_params.py b/extreme_estimator/margin_fits/abstract_params.py
index 9f180d18..fc5b80e3 100644
--- a/extreme_estimator/margin_fits/abstract_params.py
+++ b/extreme_estimator/margin_fits/abstract_params.py
@@ -3,8 +3,6 @@ from typing import List
 import numpy as np
 import pandas as pd
 
-from extreme_estimator.extreme_models.utils import r
-
 
 class AbstractParams(object):
     # Parameters
@@ -29,6 +27,7 @@ class AbstractParams(object):
         raise NotImplementedError
 
     def to_dict(self) -> dict:
+        assert len(self.PARAM_NAMES) == len(self.param_values)
         return dict(zip(self.PARAM_NAMES, self.param_values))
 
     def to_serie(self) -> pd.Series:
diff --git a/extreme_estimator/margin_fits/extreme_params.py b/extreme_estimator/margin_fits/extreme_params.py
index 60bbeca3..01a39d3f 100644
--- a/extreme_estimator/margin_fits/extreme_params.py
+++ b/extreme_estimator/margin_fits/extreme_params.py
@@ -1,20 +1,16 @@
-from extreme_estimator.margin_fits.abstract_params import AbstractParams
+from abc import ABC
 
+from extreme_estimator.margin_fits.abstract_params import AbstractParams
 
-class ExtremeParams(AbstractParams):
 
+class ExtremeParams(AbstractParams, ABC):
     # Extreme parameters
     SCALE = 'scale'
     LOC = 'loc'
     SHAPE = 'shape'
-    PARAM_NAMES = [LOC, SCALE, SHAPE]
 
     def __init__(self, loc: float, scale: float, shape: float):
         self.location = loc
         self.scale = scale
         self.shape = shape
         assert self.scale > 0
-
-    @property
-    def param_values(self):
-        return [self.location, self.scale, self.shape]
\ No newline at end of file
diff --git a/extreme_estimator/margin_fits/gev/gev_params.py b/extreme_estimator/margin_fits/gev/gev_params.py
index ba743f8a..b4d9cbc2 100644
--- a/extreme_estimator/margin_fits/gev/gev_params.py
+++ b/extreme_estimator/margin_fits/gev/gev_params.py
@@ -4,9 +4,18 @@ from extreme_estimator.margin_fits.extreme_params import ExtremeParams
 
 class GevParams(ExtremeParams):
 
+    # Parameters
+    PARAM_NAMES = [ExtremeParams.LOC, ExtremeParams.SCALE, ExtremeParams.SHAPE]
+    # Summary
+    SUMMARY_NAMES = PARAM_NAMES + ExtremeParams.QUANTILE_NAMES
+
     def __init__(self, loc: float, scale: float, shape: float, block_size: int = None):
         super().__init__(loc, scale, shape)
         self.block_size = block_size
 
     def quantile(self, p) -> float:
         return r.qgev(p, self.location, self.scale, self.shape)[0]
+
+    @property
+    def param_values(self):
+        return [self.location, self.scale, self.shape]
\ No newline at end of file
diff --git a/extreme_estimator/margin_fits/gpd/gpd_params.py b/extreme_estimator/margin_fits/gpd/gpd_params.py
index 3eb42331..0c8cc815 100644
--- a/extreme_estimator/margin_fits/gpd/gpd_params.py
+++ b/extreme_estimator/margin_fits/gpd/gpd_params.py
@@ -3,10 +3,20 @@ from extreme_estimator.margin_fits.extreme_params import ExtremeParams
 
 
 class GpdParams(ExtremeParams):
+    # TODO: understand better why the gpdfit return 2 parameters, alors que d'un autre cote d autres definitions de la distribution parlent d un parametre location
 
-    def __init__(self, loc: float, scale: float, shape: float, threshold: float = None):
-        super().__init__(loc, scale, shape)
+    # Parameters
+    PARAM_NAMES = [ExtremeParams.SCALE, ExtremeParams.SHAPE]
+    # Summary
+    SUMMARY_NAMES = PARAM_NAMES + ExtremeParams.QUANTILE_NAMES
+
+    def __init__(self, scale: float, shape: float, threshold: float = None):
+        super().__init__(loc=0.0, scale=scale, shape=shape)
         self.threshold = threshold
 
     def quantile(self, p) -> float:
-        return r.qgpd(p, self.location, self.scale, self.shape)[0]
+        return r.qgpd(p, self.location, self.scale, self.shape)[0] + self.threshold
+
+    @property
+    def param_values(self):
+        return [self.scale, self.shape]
diff --git a/extreme_estimator/margin_fits/gpd/gpdmle_fit.py b/extreme_estimator/margin_fits/gpd/gpdmle_fit.py
index da57af36..c26d924e 100644
--- a/extreme_estimator/margin_fits/gpd/gpdmle_fit.py
+++ b/extreme_estimator/margin_fits/gpd/gpdmle_fit.py
@@ -12,4 +12,4 @@ class GpdMleFit(object):
         self.x_gev = x_gev
         self.threshold = threshold
         self.mle_params = spatial_extreme_gpdmle_fit(x_gev, threshold)
-        self.gev_params = GpdParams.from_dict({**self.mle_params, 'threshold': threshold})
\ No newline at end of file
+        self.gpd_params = GpdParams.from_dict({**self.mle_params, 'threshold': threshold})
\ No newline at end of file
diff --git a/safran_study/safran.py b/safran_study/safran.py
index 2009ac91..43348aab 100644
--- a/safran_study/safran.py
+++ b/safran_study/safran.py
@@ -10,8 +10,11 @@ from matplotlib import cm
 from mpl_toolkits.axes_grid1 import make_axes_locatable
 from netCDF4 import Dataset
 
+from extreme_estimator.margin_fits.extreme_params import ExtremeParams
 from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit
 from extreme_estimator.margin_fits.gev.gev_params import GevParams
+from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams
+from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit
 from safran_study.massif import safran_massif_names_from_datasets
 from safran_study.shifted_color_map import shiftedColorMap
 from safran_study.snowfall_annual_maxima import SafranSnowfall
@@ -56,27 +59,34 @@ class Safran(object):
         if show:
             plt.show()
 
-    def visualize_gev_fit_with_cmap(self, show=True, axes=None):
-        params_names = GevParams.SUMMARY_NAMES
+    def visualize_margin_fits_with_cmap(self, threshold=None, show=True, axes=None):
+        if threshold is None:
+            params_names = GevParams.SUMMARY_NAMES
+            df = self.df_gev_mle_each_massif
+            # todo: understand how Maurienne could be negative
+            # print(df.head())
+        else:
+            params_names = GpdParams.SUMMARY_NAMES
+            df = self.df_gpd_mle_each_massif(threshold)
+
         if axes is None:
             fig, axes = plt.subplots(1, len(params_names))
             fig.subplots_adjust(hspace=1.0, wspace=1.0)
 
         for i, gev_param_name in enumerate(params_names):
             ax = axes[i]
-
-            massif_name_to_value = self.df_gev_mle_each_massif.loc[gev_param_name, :].to_dict()
+            massif_name_to_value = df.loc[gev_param_name, :].to_dict()
             # Compute the middle point of the values for the color map
             values = list(massif_name_to_value.values())
             vmin, vmax = min(values), max(values)
-            midpoint = 1 - vmax / (vmax + abs(vmin))
-            maxmax = max(vmax, -vmin)
-            scaling_factor = 2 * maxmax
-            # print(gev_param_name, midpoint, vmin, vmax, scaling_factor)
+            try:
+                midpoint = 1 - vmax / (vmax + abs(vmin))
+            except ZeroDivisionError:
+                pass
             # Load the shifted cmap to center on a middle point
 
             cmap = [plt.cm.coolwarm, plt.cm.bwr, plt.cm.seismic][1]
-            if gev_param_name == GevParams.SHAPE:
+            if gev_param_name == ExtremeParams.SHAPE:
                 shifted_cmap = shiftedColorMap(cmap, midpoint=midpoint, name='shifted')
                 norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
             else:
@@ -112,12 +122,24 @@ class Safran(object):
 
     @property
     def df_gev_mle_each_massif(self):
-        # Fit a margin_fits n each massif
+        # Fit a margin_fits on each massif
         massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.summary_serie
                              for massif_name in self.safran_massif_names}
         return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names)
 
-    """ Annual maxima of snowfall """
+    def df_gpd_mle_each_massif(self, threshold):
+        # Fit a margin fit on each massif
+        massif_to_gev_mle = {massif_name: GpdMleFit(self.df_all_snowfall_concatenated[massif_name], threshold=threshold).gpd_params.summary_serie
+                             for massif_name in self.safran_massif_names}
+        return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names)
+
+    """ Data """
+
+    @property
+    def df_all_snowfall_concatenated(self):
+        df_list = [pd.DataFrame(snowfall, columns=self.safran_massif_names) for snowfall in self.year_to_snowfall.values()]
+        df_concatenated = pd.concat(df_list)
+        return df_concatenated
 
     @property
     def df_annual_maxima(self):
@@ -126,14 +148,22 @@ class Safran(object):
     """ Load some attributes only once """
 
     @cached_property
-    def year_to_annual_maxima(self):
+    def year_to_annual_maxima(self) -> OrderedDict:
+        # Map each year to an array of size nb_massif
+        year_to_annual_maxima = OrderedDict()
+        for year, snowfall in self.year_to_snowfall.items():
+            year_to_annual_maxima[year] = snowfall.max(axis=0)
+        return year_to_annual_maxima
+
+    @cached_property
+    def year_to_snowfall(self) -> OrderedDict:
+        # Map each year to a matrix of size 365-nb_days_consecutive+1 x nb_massifs
         year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in
                                    self.year_to_dataset_ordered_dict.items()}
-        year_to_annual_maxima = OrderedDict()
+        year_to_snowfall = OrderedDict()
         for year in self.year_to_dataset_ordered_dict.keys():
-            year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall(
-                self.nb_days_of_snowfall)
-        return year_to_annual_maxima
+            year_to_snowfall[year] = year_to_safran_snowfall[year].annual_snowfall(self.nb_days_of_snowfall)
+        return year_to_snowfall
 
     @property
     def safran_massif_names(self):
diff --git a/safran_study/snowfall_annual_maxima.py b/safran_study/snowfall_annual_maxima.py
index a92c992f..f2bce13b 100644
--- a/safran_study/snowfall_annual_maxima.py
+++ b/safran_study/snowfall_annual_maxima.py
@@ -31,12 +31,17 @@ class SafranSnowfall(object):
         nb_days = len(hourly_snowfall) // 24
         self.daily_snowfall = [sum(hourly_snowfall[24 * i:24 * (i+1)]) for i in range(nb_days)]
 
-    def annual_maxima_of_snowfall(self, nb_days_of_snowfall=1):
+    def annual_snowfall(self, nb_days_of_snowfall=1):
         # Aggregate the daily snowfall by the number of consecutive days
         shifted_list = [self.daily_snowfall[i:] for i in range(nb_days_of_snowfall)]
+        # First element of shifted_list is of length n, Second element of length n-1, Third element n-2....
+        # The zip is done with respect to the shortest list
         snowfall_in_consecutive_days = [sum(e) for e in zip(*shifted_list)]
         # Return the maximum of the aggregated list
-        return np.array(snowfall_in_consecutive_days).max(axis=0)
+        # The returned array is of size n-nb_days+1 x nb_massif
+        return np.array(snowfall_in_consecutive_days)
+
+
 
 
 
diff --git a/safran_study/visualize_safran.py b/safran_study/visualize_safran.py
index 8fbcdaa6..2b921725 100644
--- a/safran_study/visualize_safran.py
+++ b/safran_study/visualize_safran.py
@@ -12,11 +12,11 @@ def load_all_safran(only_first_one=False):
     return all_safran
 
 
-def fit_mle_gev_independent():
+def fit_mle_gev_independent(threshold=None):
     # Dump the result in a csv
     dfs = []
     for safran in load_all_safran(only_first_one=True):
-        safran.visualize_gev_fit_with_cmap()
+        safran.visualize_margin_fits_with_cmap(threshold=threshold)
         # path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
         # df.to_csv(path.format(VERSION))
 
@@ -26,4 +26,5 @@ def fit_max_stab():
 
 
 if __name__ == '__main__':
-    fit_mle_gev_independent()
+    threshold = [None, 20, 40, 60][1]
+    fit_mle_gev_independent(threshold=threshold)
-- 
GitLab