[GEV] add quantile computation in GevParams. add display of those quantile in split_curve

c32972cc · Le Roux Erwan · 8ab8b0c1 · c32972cc · c32972cc · c32972cc
Commit c32972cc authored 6 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 106 additions and 49 deletions
+106 -49
--- a/experiment/fit_diagnosis/main_split.py
+++ b/experiment/fit_diagnosis/main_split.py
@@ -2,6 +2,7 @@ import random

 from experiment.fit_diagnosis.split_curve import SplitCurve, LocFunction
 from extreme_estimator.estimator.full_estimator import FullEstimatorInASingleStepWithSmoothMargin
+from extreme_estimator.estimator.margin_estimator import SmoothMarginEstimator
 from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel, \
    LinearAllParametersAllDimsMarginModel
 from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith
@@ -14,6 +15,7 @@ from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporal

 random.seed(42)

+
 def load_dataset():
    nb_points = 50
    nb_obs = 60
@@ -30,20 +32,18 @@ def load_dataset():

    return FullSimulatedDataset.from_double_sampling(nb_obs=nb_obs, margin_model=margin_model,
                                                     coordinates=coordinates,
-                                                     max_stable_model=max_stable_model,
-                                                     train_split_ratio=0.8,
-                                                     slicer_class=SpatialSlicer)
+                                                     max_stable_model=max_stable_model)


 def full_estimator(dataset):
    max_stable_model = Smith()
    margin_model_for_estimator = LinearAllParametersAllDimsMarginModel(dataset.coordinates)
-    full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model)
-    return full_estimator
+    # full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model)
+    fast_estimator = SmoothMarginEstimator(dataset, margin_model_for_estimator)
+    return fast_estimator


 if __name__ == '__main__':
-
    dataset = load_dataset()
    dataset.slicer.summary()
    full_estimator = full_estimator(dataset)

--- a/experiment/fit_diagnosis/split_curve.py
+++ b/experiment/fit_diagnosis/split_curve.py
@@ -43,36 +43,42 @@ class SplitCurve(object):
        self.margin_function_fitted = estimator.margin_function_fitted

        self.error_dict = error_dict_between_margin_functions(self.margin_function_sample, self.margin_function_fitted)
-        # todo: add quantile curve, additionally to the marginal curve

    @property
    def main_title(self):
        return self.dataset.slicer.summary(show=False)

    def visualize(self):
-        fig, axes = plt.subplots(3, 2)
+        fig, axes = plt.subplots(len(GevParams.GEV_VALUE_NAMES), 2)
        fig.subplots_adjust(hspace=0.4, wspace=0.4, )
-        for i, gev_param_name in enumerate(GevParams.GEV_PARAM_NAMES):
-            self.margin_graph(axes[i, 0], gev_param_name)
-            self.score_graph(axes[i, 1], gev_param_name)
+        for i, gev_value_name in enumerate(GevParams.GEV_VALUE_NAMES):
+            self.margin_graph(axes[i, 0], gev_value_name)
+            self.score_graph(axes[i, 1], gev_value_name)
        fig.suptitle(self.main_title)
        plt.show()

-    def margin_graph(self, ax, gev_param_name):
+    def margin_graph(self, ax, gev_value_name):
        # Display the fitted curve
-        self.margin_function_fitted.visualize_single_param(gev_param_name, ax, show=False)
+        self.margin_function_fitted.visualize_single_param(gev_value_name, ax, show=False)
        # Display train/test points
        for split, marker in [(self.dataset.train_split, 'o'), (self.dataset.test_split, 'x')]:
            self.margin_function_sample.set_datapoint_display_parameters(split, datapoint_marker=marker)
-            self.margin_function_sample.visualize_single_param(gev_param_name, ax, show=False)
-        title_str = gev_param_name
-        ax.set_title(title_str)
+            self.margin_function_sample.visualize_single_param(gev_value_name, ax, show=False)

-    def score_graph(self, ax, gev_param_name):
+    def score_graph(self, ax, gev_value_name):
        # todo: for the moment only the train/test are interresting (the spatio temporal isn"t working yet)
        sns.set_style('whitegrid')
-        s = self.error_dict[gev_param_name]
+        s = self.error_dict[gev_value_name]
        for split in self.dataset.splits:
            ind = self.dataset.coordinates_index(split)
            data = s.loc[ind].values
            sns.kdeplot(data, bw=0.5, ax=ax, label=split.name).set(xlim=0)
+        ax.legend()
+        # X axis
+        ax.set_xlabel('Absolute error in percentage')
+        plt.setp(ax.get_xticklabels(), visible=True)
+        ax.xaxis.set_tick_params(labelbottom=True)
+        # Y axis
+        ax.set_ylabel(gev_value_name)
+        plt.setp(ax.get_yticklabels(), visible=True)
+        ax.yaxis.set_tick_params(labelbottom=True)
--- a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py
+++ b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py
+from typing import Dict
+
 import matplotlib.cm as cm
 import matplotlib.pyplot as plt
 import numpy as np
@@ -24,16 +26,17 @@ class AbstractMarginFunction(object):
        """Main method that maps each coordinate to its GEV parameters"""
        pass

-    # Extraction function
-
    @property
-    def gev_params_for_coordinates(self):
-        gev_params = [self.get_gev_params(coordinate).to_dict() for coordinate in self.coordinates.coordinates_values()]
-        gev_param_name_to_serie = {}
-        for gev_param_name in GevParams.GEV_PARAM_NAMES:
-            s = pd.Series(data=[p[gev_param_name] for p in gev_params], index=self.coordinates.index)
-            gev_param_name_to_serie[gev_param_name] = s
-        return gev_param_name_to_serie
+    def gev_value_name_to_serie(self) -> Dict[str, pd.Series]:
+        # Load the gev_params
+        gev_params = [self.get_gev_params(coordinate) for coordinate in self.coordinates.coordinates_values()]
+        # Load the dictionary of values (gev parameters + the quantiles)
+        value_dicts = [gev_param.value_dict for gev_param in gev_params]
+        gev_value_name_to_serie = {}
+        for value_name in GevParams.GEV_VALUE_NAMES:
+            s = pd.Series(data=[d[value_name] for d in value_dicts], index=self.coordinates.index)
+            gev_value_name_to_serie[value_name] = s
+        return gev_value_name_to_serie

    # Visualization function

@@ -56,24 +59,32 @@ class AbstractMarginFunction(object):
        if show:
            plt.show()

-    def visualize_single_param(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True):
+    def visualize_single_param(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True):
+        assert gev_value_name in GevParams.GEV_VALUE_NAMES
        if self.coordinates.nb_coordinates_spatial == 1:
-            self.visualize_1D(gev_param_name, ax, show)
+            self.visualize_1D(gev_value_name, ax, show)
        elif self.coordinates.nb_coordinates_spatial == 2:
-            self.visualize_2D(gev_param_name, ax, show)
+            self.visualize_2D(gev_value_name, ax, show)
        else:
            raise NotImplementedError('3D Margin visualization not yet implemented')

-    def visualize_1D(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True):
+    def visualize_1D(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True):
        x = self.coordinates.x_coordinates
-        grid, linspace = self.get_grid_1D(x)
-        gev_param_idx = GevParams.GEV_PARAM_NAMES.index(gev_param_name)
+        grid, linspace = self.get_grid_values_1D(x)
        if ax is None:
            ax = plt.gca()
        if self.datapoint_display:
-            ax.plot(linspace, grid[:, gev_param_idx], self.datapoint_marker)
+            ax.plot(linspace, grid[gev_value_name], self.datapoint_marker)
        else:
-            ax.plot(linspace, grid[:, gev_param_idx])
+            ax.plot(linspace, grid[gev_value_name])
+        # X axis
+        ax.set_xlabel('coordinate')
+        plt.setp(ax.get_xticklabels(), visible=True)
+        ax.xaxis.set_tick_params(labelbottom=True)
+        # Y axis
+        ax.set_ylabel(gev_value_name)
+        plt.setp(ax.get_yticklabels(), visible=True)
+        ax.yaxis.set_tick_params(labelbottom=True)

        if show:
            plt.show()
@@ -92,7 +103,7 @@ class AbstractMarginFunction(object):
        if show:
            plt.show()

-    def get_grid_1D(self, x):
+    def get_grid_values_1D(self, x):
        # TODO: to avoid getting the value several times, I could cache the results
        if self.datapoint_display:
            linspace = self.coordinates.coordinates_values(self.spatio_temporal_split)[:, 0]
@@ -101,9 +112,11 @@ class AbstractMarginFunction(object):
            resolution = 100
            linspace = np.linspace(x.min(), x.max(), resolution)

-        grid = np.zeros([resolution, 3])
+        grid = []
        for i, xi in enumerate(linspace):
-            grid[i] = self.get_gev_params(np.array([xi])).to_array()
+            gev_param = self.get_gev_params(np.array([xi]))
+            grid.append(gev_param.value_dict)
+        grid = {gev_param: [g[gev_param] for g in grid] for gev_param in GevParams.GEV_VALUE_NAMES}
        return grid, linspace

    def get_grid_2D(self, x, y):

--- a/extreme_estimator/extreme_models/margin_model/margin_function/utils.py
+++ b/extreme_estimator/extreme_models/margin_model/margin_function/utils.py
@@ -5,23 +5,25 @@ from extreme_estimator.extreme_models.margin_model.margin_function.abstract_marg
 from extreme_estimator.gev_params import GevParams


-def abs_error(s1, s2):
-    return (s1 - s2).abs().pow(2)
+def relative_abs_error(reference_value, fitted_value):
+    return (reference_value - fitted_value).abs() / reference_value


-def error_dict_between_margin_functions(margin1: AbstractMarginFunction, margin2: AbstractMarginFunction):
+def error_dict_between_margin_functions(reference: AbstractMarginFunction, fitted: AbstractMarginFunction):
    """
    Return a serie, indexed by the same index as the coordinates
    Each value correspond to the error for this coordinate
-    :param margin1:
-    :param margin2:
+    :param reference:
+    :param fitted:
    :return:
    """
-    assert margin1.coordinates == margin2.coordinates
-    margin1_gev_params, margin2_gev_params = margin1.gev_params_for_coordinates, margin2.gev_params_for_coordinates
+    assert reference.coordinates == fitted.coordinates
+    reference_values = reference.gev_value_name_to_serie
+    fitted_values = fitted.gev_value_name_to_serie
+
    gev_param_name_to_error_serie = {}
-    for gev_param_name in GevParams.GEV_PARAM_NAMES:
-        serie1, serie2 = margin1_gev_params[gev_param_name], margin2_gev_params[gev_param_name]
-        error = abs_error(serie1, serie2)
-        gev_param_name_to_error_serie[gev_param_name] = error
+    for value_name in GevParams.GEV_VALUE_NAMES:
+        print(value_name)
+        error = relative_abs_error(reference_values[value_name], fitted_values[value_name])
+        gev_param_name_to_error_serie[value_name] = error
    return gev_param_name_to_error_serie
--- a/extreme_estimator/gev_params.py
+++ b/extreme_estimator/gev_params.py
 import numpy as np

+from extreme_estimator.extreme_models.utils import r
+

 class GevParams(object):
+    # GEV parameters
    GEV_SCALE = 'scale'
    GEV_LOC = 'loc'
    GEV_SHAPE = 'shape'
    GEV_PARAM_NAMES = [GEV_LOC, GEV_SCALE, GEV_SHAPE]
+    # GEV quantiles
+    GEV_QUANTILE_10 = 'quantile 10'
+    GEV_QUANTILE_100 = 'quantile 100'
+    GEV_QUANTILE_1000 = 'quantile 1000'
+    GEV_QUANTILE_NAMES = [GEV_QUANTILE_10, GEV_QUANTILE_100, GEV_QUANTILE_1000]
+    # GEV values
+    GEV_VALUE_NAMES = GEV_PARAM_NAMES + GEV_QUANTILE_NAMES[:-1]
+
+    # GEV parameters

    def __init__(self, loc: float, scale: float, shape: float):
        self.location = loc
@@ -28,3 +40,27 @@ class GevParams(object):
        gev_param_name_to_value = self.to_dict()
        return np.array([gev_param_name_to_value[gev_param_name] for gev_param_name in self.GEV_PARAM_NAMES])

+    # GEV quantiles
+
+    def qgev(self, p):
+        return r.qgev(p, self.location, self.scale, self.shape)[0]
+
+    @property
+    def quantile_name_to_p(self) -> dict:
+        return {
+            self.GEV_QUANTILE_10: 0.1,
+            self.GEV_QUANTILE_100: 0.01,
+            self.GEV_QUANTILE_1000: 0.001,
+        }
+
+    @property
+    def quantile_dict(self) -> dict:
+        return {quantile_name: self.qgev(p) for quantile_name, p in self.quantile_name_to_p.items()}
+
+    # GEV values
+
+    @property
+    def value_dict(self) -> dict:
+        return {**self.to_dict(), **self.quantile_dict}
+
+