From c32972cce4628e22d7bc10d5bfa0166478db5f4b Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Tue, 4 Dec 2018 16:48:22 +0100
Subject: [PATCH] [GEV] add quantile computation in GevParams. add display of
 those quantile in split_curve

---
 experiment/fit_diagnosis/main_split.py        | 12 ++---
 experiment/fit_diagnosis/split_curve.py       | 30 ++++++-----
 .../abstract_margin_function.py               | 53 ++++++++++++-------
 .../margin_model/margin_function/utils.py     | 24 +++++----
 extreme_estimator/gev_params.py               | 36 +++++++++++++
 5 files changed, 106 insertions(+), 49 deletions(-)

diff --git a/experiment/fit_diagnosis/main_split.py b/experiment/fit_diagnosis/main_split.py
index a12220bb..c6c32619 100644
--- a/experiment/fit_diagnosis/main_split.py
+++ b/experiment/fit_diagnosis/main_split.py
@@ -2,6 +2,7 @@ import random
 
 from experiment.fit_diagnosis.split_curve import SplitCurve, LocFunction
 from extreme_estimator.estimator.full_estimator import FullEstimatorInASingleStepWithSmoothMargin
+from extreme_estimator.estimator.margin_estimator import SmoothMarginEstimator
 from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel, \
     LinearAllParametersAllDimsMarginModel
 from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith
@@ -14,6 +15,7 @@ from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporal
 
 random.seed(42)
 
+
 def load_dataset():
     nb_points = 50
     nb_obs = 60
@@ -30,20 +32,18 @@ def load_dataset():
 
     return FullSimulatedDataset.from_double_sampling(nb_obs=nb_obs, margin_model=margin_model,
                                                      coordinates=coordinates,
-                                                     max_stable_model=max_stable_model,
-                                                     train_split_ratio=0.8,
-                                                     slicer_class=SpatialSlicer)
+                                                     max_stable_model=max_stable_model)
 
 
 def full_estimator(dataset):
     max_stable_model = Smith()
     margin_model_for_estimator = LinearAllParametersAllDimsMarginModel(dataset.coordinates)
-    full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model)
-    return full_estimator
+    # full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model)
+    fast_estimator = SmoothMarginEstimator(dataset, margin_model_for_estimator)
+    return fast_estimator
 
 
 if __name__ == '__main__':
-
     dataset = load_dataset()
     dataset.slicer.summary()
     full_estimator = full_estimator(dataset)
diff --git a/experiment/fit_diagnosis/split_curve.py b/experiment/fit_diagnosis/split_curve.py
index 78c48bd4..bc408c5e 100644
--- a/experiment/fit_diagnosis/split_curve.py
+++ b/experiment/fit_diagnosis/split_curve.py
@@ -43,36 +43,42 @@ class SplitCurve(object):
         self.margin_function_fitted = estimator.margin_function_fitted
 
         self.error_dict = error_dict_between_margin_functions(self.margin_function_sample, self.margin_function_fitted)
-        # todo: add quantile curve, additionally to the marginal curve
 
     @property
     def main_title(self):
         return self.dataset.slicer.summary(show=False)
 
     def visualize(self):
-        fig, axes = plt.subplots(3, 2)
+        fig, axes = plt.subplots(len(GevParams.GEV_VALUE_NAMES), 2)
         fig.subplots_adjust(hspace=0.4, wspace=0.4, )
-        for i, gev_param_name in enumerate(GevParams.GEV_PARAM_NAMES):
-            self.margin_graph(axes[i, 0], gev_param_name)
-            self.score_graph(axes[i, 1], gev_param_name)
+        for i, gev_value_name in enumerate(GevParams.GEV_VALUE_NAMES):
+            self.margin_graph(axes[i, 0], gev_value_name)
+            self.score_graph(axes[i, 1], gev_value_name)
         fig.suptitle(self.main_title)
         plt.show()
 
-    def margin_graph(self, ax, gev_param_name):
+    def margin_graph(self, ax, gev_value_name):
         # Display the fitted curve
-        self.margin_function_fitted.visualize_single_param(gev_param_name, ax, show=False)
+        self.margin_function_fitted.visualize_single_param(gev_value_name, ax, show=False)
         # Display train/test points
         for split, marker in [(self.dataset.train_split, 'o'), (self.dataset.test_split, 'x')]:
             self.margin_function_sample.set_datapoint_display_parameters(split, datapoint_marker=marker)
-            self.margin_function_sample.visualize_single_param(gev_param_name, ax, show=False)
-        title_str = gev_param_name
-        ax.set_title(title_str)
+            self.margin_function_sample.visualize_single_param(gev_value_name, ax, show=False)
 
-    def score_graph(self, ax, gev_param_name):
+    def score_graph(self, ax, gev_value_name):
         # todo: for the moment only the train/test are interresting (the spatio temporal isn"t working yet)
         sns.set_style('whitegrid')
-        s = self.error_dict[gev_param_name]
+        s = self.error_dict[gev_value_name]
         for split in self.dataset.splits:
             ind = self.dataset.coordinates_index(split)
             data = s.loc[ind].values
             sns.kdeplot(data, bw=0.5, ax=ax, label=split.name).set(xlim=0)
+        ax.legend()
+        # X axis
+        ax.set_xlabel('Absolute error in percentage')
+        plt.setp(ax.get_xticklabels(), visible=True)
+        ax.xaxis.set_tick_params(labelbottom=True)
+        # Y axis
+        ax.set_ylabel(gev_value_name)
+        plt.setp(ax.get_yticklabels(), visible=True)
+        ax.yaxis.set_tick_params(labelbottom=True)
diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py
index 5fc08a3a..0d65ba0e 100644
--- a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py
+++ b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py
@@ -1,3 +1,5 @@
+from typing import Dict
+
 import matplotlib.cm as cm
 import matplotlib.pyplot as plt
 import numpy as np
@@ -24,16 +26,17 @@ class AbstractMarginFunction(object):
         """Main method that maps each coordinate to its GEV parameters"""
         pass
 
-    # Extraction function
-
     @property
-    def gev_params_for_coordinates(self):
-        gev_params = [self.get_gev_params(coordinate).to_dict() for coordinate in self.coordinates.coordinates_values()]
-        gev_param_name_to_serie = {}
-        for gev_param_name in GevParams.GEV_PARAM_NAMES:
-            s = pd.Series(data=[p[gev_param_name] for p in gev_params], index=self.coordinates.index)
-            gev_param_name_to_serie[gev_param_name] = s
-        return gev_param_name_to_serie
+    def gev_value_name_to_serie(self) -> Dict[str, pd.Series]:
+        # Load the gev_params
+        gev_params = [self.get_gev_params(coordinate) for coordinate in self.coordinates.coordinates_values()]
+        # Load the dictionary of values (gev parameters + the quantiles)
+        value_dicts = [gev_param.value_dict for gev_param in gev_params]
+        gev_value_name_to_serie = {}
+        for value_name in GevParams.GEV_VALUE_NAMES:
+            s = pd.Series(data=[d[value_name] for d in value_dicts], index=self.coordinates.index)
+            gev_value_name_to_serie[value_name] = s
+        return gev_value_name_to_serie
 
     # Visualization function
 
@@ -56,24 +59,32 @@ class AbstractMarginFunction(object):
         if show:
             plt.show()
 
-    def visualize_single_param(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True):
+    def visualize_single_param(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True):
+        assert gev_value_name in GevParams.GEV_VALUE_NAMES
         if self.coordinates.nb_coordinates_spatial == 1:
-            self.visualize_1D(gev_param_name, ax, show)
+            self.visualize_1D(gev_value_name, ax, show)
         elif self.coordinates.nb_coordinates_spatial == 2:
-            self.visualize_2D(gev_param_name, ax, show)
+            self.visualize_2D(gev_value_name, ax, show)
         else:
             raise NotImplementedError('3D Margin visualization not yet implemented')
 
-    def visualize_1D(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True):
+    def visualize_1D(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True):
         x = self.coordinates.x_coordinates
-        grid, linspace = self.get_grid_1D(x)
-        gev_param_idx = GevParams.GEV_PARAM_NAMES.index(gev_param_name)
+        grid, linspace = self.get_grid_values_1D(x)
         if ax is None:
             ax = plt.gca()
         if self.datapoint_display:
-            ax.plot(linspace, grid[:, gev_param_idx], self.datapoint_marker)
+            ax.plot(linspace, grid[gev_value_name], self.datapoint_marker)
         else:
-            ax.plot(linspace, grid[:, gev_param_idx])
+            ax.plot(linspace, grid[gev_value_name])
+        # X axis
+        ax.set_xlabel('coordinate')
+        plt.setp(ax.get_xticklabels(), visible=True)
+        ax.xaxis.set_tick_params(labelbottom=True)
+        # Y axis
+        ax.set_ylabel(gev_value_name)
+        plt.setp(ax.get_yticklabels(), visible=True)
+        ax.yaxis.set_tick_params(labelbottom=True)
 
         if show:
             plt.show()
@@ -92,7 +103,7 @@ class AbstractMarginFunction(object):
         if show:
             plt.show()
 
-    def get_grid_1D(self, x):
+    def get_grid_values_1D(self, x):
         # TODO: to avoid getting the value several times, I could cache the results
         if self.datapoint_display:
             linspace = self.coordinates.coordinates_values(self.spatio_temporal_split)[:, 0]
@@ -101,9 +112,11 @@ class AbstractMarginFunction(object):
             resolution = 100
             linspace = np.linspace(x.min(), x.max(), resolution)
 
-        grid = np.zeros([resolution, 3])
+        grid = []
         for i, xi in enumerate(linspace):
-            grid[i] = self.get_gev_params(np.array([xi])).to_array()
+            gev_param = self.get_gev_params(np.array([xi]))
+            grid.append(gev_param.value_dict)
+        grid = {gev_param: [g[gev_param] for g in grid] for gev_param in GevParams.GEV_VALUE_NAMES}
         return grid, linspace
 
     def get_grid_2D(self, x, y):
diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/utils.py b/extreme_estimator/extreme_models/margin_model/margin_function/utils.py
index 441ef09a..2ea29cc1 100644
--- a/extreme_estimator/extreme_models/margin_model/margin_function/utils.py
+++ b/extreme_estimator/extreme_models/margin_model/margin_function/utils.py
@@ -5,23 +5,25 @@ from extreme_estimator.extreme_models.margin_model.margin_function.abstract_marg
 from extreme_estimator.gev_params import GevParams
 
 
-def abs_error(s1, s2):
-    return (s1 - s2).abs().pow(2)
+def relative_abs_error(reference_value, fitted_value):
+    return (reference_value - fitted_value).abs() / reference_value
 
 
-def error_dict_between_margin_functions(margin1: AbstractMarginFunction, margin2: AbstractMarginFunction):
+def error_dict_between_margin_functions(reference: AbstractMarginFunction, fitted: AbstractMarginFunction):
     """
     Return a serie, indexed by the same index as the coordinates
     Each value correspond to the error for this coordinate
-    :param margin1:
-    :param margin2:
+    :param reference:
+    :param fitted:
     :return:
     """
-    assert margin1.coordinates == margin2.coordinates
-    margin1_gev_params, margin2_gev_params = margin1.gev_params_for_coordinates, margin2.gev_params_for_coordinates
+    assert reference.coordinates == fitted.coordinates
+    reference_values = reference.gev_value_name_to_serie
+    fitted_values = fitted.gev_value_name_to_serie
+
     gev_param_name_to_error_serie = {}
-    for gev_param_name in GevParams.GEV_PARAM_NAMES:
-        serie1, serie2 = margin1_gev_params[gev_param_name], margin2_gev_params[gev_param_name]
-        error = abs_error(serie1, serie2)
-        gev_param_name_to_error_serie[gev_param_name] = error
+    for value_name in GevParams.GEV_VALUE_NAMES:
+        print(value_name)
+        error = relative_abs_error(reference_values[value_name], fitted_values[value_name])
+        gev_param_name_to_error_serie[value_name] = error
     return gev_param_name_to_error_serie
diff --git a/extreme_estimator/gev_params.py b/extreme_estimator/gev_params.py
index 5ee8656f..78a6fa2e 100644
--- a/extreme_estimator/gev_params.py
+++ b/extreme_estimator/gev_params.py
@@ -1,11 +1,23 @@
 import numpy as np
 
+from extreme_estimator.extreme_models.utils import r
+
 
 class GevParams(object):
+    # GEV parameters
     GEV_SCALE = 'scale'
     GEV_LOC = 'loc'
     GEV_SHAPE = 'shape'
     GEV_PARAM_NAMES = [GEV_LOC, GEV_SCALE, GEV_SHAPE]
+    # GEV quantiles
+    GEV_QUANTILE_10 = 'quantile 10'
+    GEV_QUANTILE_100 = 'quantile 100'
+    GEV_QUANTILE_1000 = 'quantile 1000'
+    GEV_QUANTILE_NAMES = [GEV_QUANTILE_10, GEV_QUANTILE_100, GEV_QUANTILE_1000]
+    # GEV values
+    GEV_VALUE_NAMES = GEV_PARAM_NAMES + GEV_QUANTILE_NAMES[:-1]
+
+    # GEV parameters
 
     def __init__(self, loc: float, scale: float, shape: float):
         self.location = loc
@@ -28,3 +40,27 @@ class GevParams(object):
         gev_param_name_to_value = self.to_dict()
         return np.array([gev_param_name_to_value[gev_param_name] for gev_param_name in self.GEV_PARAM_NAMES])
 
+    # GEV quantiles
+
+    def qgev(self, p):
+        return r.qgev(p, self.location, self.scale, self.shape)[0]
+
+    @property
+    def quantile_name_to_p(self) -> dict:
+        return {
+            self.GEV_QUANTILE_10: 0.1,
+            self.GEV_QUANTILE_100: 0.01,
+            self.GEV_QUANTILE_1000: 0.001,
+        }
+
+    @property
+    def quantile_dict(self) -> dict:
+        return {quantile_name: self.qgev(p) for quantile_name, p in self.quantile_name_to_p.items()}
+
+    # GEV values
+
+    @property
+    def value_dict(self) -> dict:
+        return {**self.to_dict(), **self.quantile_dict}
+
+
-- 
GitLab