From c32972cce4628e22d7bc10d5bfa0166478db5f4b Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 4 Dec 2018 16:48:22 +0100 Subject: [PATCH] [GEV] add quantile computation in GevParams. add display of those quantile in split_curve --- experiment/fit_diagnosis/main_split.py | 12 ++--- experiment/fit_diagnosis/split_curve.py | 30 ++++++----- .../abstract_margin_function.py | 53 ++++++++++++------- .../margin_model/margin_function/utils.py | 24 +++++---- extreme_estimator/gev_params.py | 36 +++++++++++++ 5 files changed, 106 insertions(+), 49 deletions(-) diff --git a/experiment/fit_diagnosis/main_split.py b/experiment/fit_diagnosis/main_split.py index a12220bb..c6c32619 100644 --- a/experiment/fit_diagnosis/main_split.py +++ b/experiment/fit_diagnosis/main_split.py @@ -2,6 +2,7 @@ import random from experiment.fit_diagnosis.split_curve import SplitCurve, LocFunction from extreme_estimator.estimator.full_estimator import FullEstimatorInASingleStepWithSmoothMargin +from extreme_estimator.estimator.margin_estimator import SmoothMarginEstimator from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel, \ LinearAllParametersAllDimsMarginModel from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith @@ -14,6 +15,7 @@ from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporal random.seed(42) + def load_dataset(): nb_points = 50 nb_obs = 60 @@ -30,20 +32,18 @@ def load_dataset(): return FullSimulatedDataset.from_double_sampling(nb_obs=nb_obs, margin_model=margin_model, coordinates=coordinates, - max_stable_model=max_stable_model, - train_split_ratio=0.8, - slicer_class=SpatialSlicer) + max_stable_model=max_stable_model) def full_estimator(dataset): max_stable_model = Smith() margin_model_for_estimator = LinearAllParametersAllDimsMarginModel(dataset.coordinates) - full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model) - return full_estimator + # full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model) + fast_estimator = SmoothMarginEstimator(dataset, margin_model_for_estimator) + return fast_estimator if __name__ == '__main__': - dataset = load_dataset() dataset.slicer.summary() full_estimator = full_estimator(dataset) diff --git a/experiment/fit_diagnosis/split_curve.py b/experiment/fit_diagnosis/split_curve.py index 78c48bd4..bc408c5e 100644 --- a/experiment/fit_diagnosis/split_curve.py +++ b/experiment/fit_diagnosis/split_curve.py @@ -43,36 +43,42 @@ class SplitCurve(object): self.margin_function_fitted = estimator.margin_function_fitted self.error_dict = error_dict_between_margin_functions(self.margin_function_sample, self.margin_function_fitted) - # todo: add quantile curve, additionally to the marginal curve @property def main_title(self): return self.dataset.slicer.summary(show=False) def visualize(self): - fig, axes = plt.subplots(3, 2) + fig, axes = plt.subplots(len(GevParams.GEV_VALUE_NAMES), 2) fig.subplots_adjust(hspace=0.4, wspace=0.4, ) - for i, gev_param_name in enumerate(GevParams.GEV_PARAM_NAMES): - self.margin_graph(axes[i, 0], gev_param_name) - self.score_graph(axes[i, 1], gev_param_name) + for i, gev_value_name in enumerate(GevParams.GEV_VALUE_NAMES): + self.margin_graph(axes[i, 0], gev_value_name) + self.score_graph(axes[i, 1], gev_value_name) fig.suptitle(self.main_title) plt.show() - def margin_graph(self, ax, gev_param_name): + def margin_graph(self, ax, gev_value_name): # Display the fitted curve - self.margin_function_fitted.visualize_single_param(gev_param_name, ax, show=False) + self.margin_function_fitted.visualize_single_param(gev_value_name, ax, show=False) # Display train/test points for split, marker in [(self.dataset.train_split, 'o'), (self.dataset.test_split, 'x')]: self.margin_function_sample.set_datapoint_display_parameters(split, datapoint_marker=marker) - self.margin_function_sample.visualize_single_param(gev_param_name, ax, show=False) - title_str = gev_param_name - ax.set_title(title_str) + self.margin_function_sample.visualize_single_param(gev_value_name, ax, show=False) - def score_graph(self, ax, gev_param_name): + def score_graph(self, ax, gev_value_name): # todo: for the moment only the train/test are interresting (the spatio temporal isn"t working yet) sns.set_style('whitegrid') - s = self.error_dict[gev_param_name] + s = self.error_dict[gev_value_name] for split in self.dataset.splits: ind = self.dataset.coordinates_index(split) data = s.loc[ind].values sns.kdeplot(data, bw=0.5, ax=ax, label=split.name).set(xlim=0) + ax.legend() + # X axis + ax.set_xlabel('Absolute error in percentage') + plt.setp(ax.get_xticklabels(), visible=True) + ax.xaxis.set_tick_params(labelbottom=True) + # Y axis + ax.set_ylabel(gev_value_name) + plt.setp(ax.get_yticklabels(), visible=True) + ax.yaxis.set_tick_params(labelbottom=True) diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py index 5fc08a3a..0d65ba0e 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py @@ -1,3 +1,5 @@ +from typing import Dict + import matplotlib.cm as cm import matplotlib.pyplot as plt import numpy as np @@ -24,16 +26,17 @@ class AbstractMarginFunction(object): """Main method that maps each coordinate to its GEV parameters""" pass - # Extraction function - @property - def gev_params_for_coordinates(self): - gev_params = [self.get_gev_params(coordinate).to_dict() for coordinate in self.coordinates.coordinates_values()] - gev_param_name_to_serie = {} - for gev_param_name in GevParams.GEV_PARAM_NAMES: - s = pd.Series(data=[p[gev_param_name] for p in gev_params], index=self.coordinates.index) - gev_param_name_to_serie[gev_param_name] = s - return gev_param_name_to_serie + def gev_value_name_to_serie(self) -> Dict[str, pd.Series]: + # Load the gev_params + gev_params = [self.get_gev_params(coordinate) for coordinate in self.coordinates.coordinates_values()] + # Load the dictionary of values (gev parameters + the quantiles) + value_dicts = [gev_param.value_dict for gev_param in gev_params] + gev_value_name_to_serie = {} + for value_name in GevParams.GEV_VALUE_NAMES: + s = pd.Series(data=[d[value_name] for d in value_dicts], index=self.coordinates.index) + gev_value_name_to_serie[value_name] = s + return gev_value_name_to_serie # Visualization function @@ -56,24 +59,32 @@ class AbstractMarginFunction(object): if show: plt.show() - def visualize_single_param(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True): + def visualize_single_param(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True): + assert gev_value_name in GevParams.GEV_VALUE_NAMES if self.coordinates.nb_coordinates_spatial == 1: - self.visualize_1D(gev_param_name, ax, show) + self.visualize_1D(gev_value_name, ax, show) elif self.coordinates.nb_coordinates_spatial == 2: - self.visualize_2D(gev_param_name, ax, show) + self.visualize_2D(gev_value_name, ax, show) else: raise NotImplementedError('3D Margin visualization not yet implemented') - def visualize_1D(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True): + def visualize_1D(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True): x = self.coordinates.x_coordinates - grid, linspace = self.get_grid_1D(x) - gev_param_idx = GevParams.GEV_PARAM_NAMES.index(gev_param_name) + grid, linspace = self.get_grid_values_1D(x) if ax is None: ax = plt.gca() if self.datapoint_display: - ax.plot(linspace, grid[:, gev_param_idx], self.datapoint_marker) + ax.plot(linspace, grid[gev_value_name], self.datapoint_marker) else: - ax.plot(linspace, grid[:, gev_param_idx]) + ax.plot(linspace, grid[gev_value_name]) + # X axis + ax.set_xlabel('coordinate') + plt.setp(ax.get_xticklabels(), visible=True) + ax.xaxis.set_tick_params(labelbottom=True) + # Y axis + ax.set_ylabel(gev_value_name) + plt.setp(ax.get_yticklabels(), visible=True) + ax.yaxis.set_tick_params(labelbottom=True) if show: plt.show() @@ -92,7 +103,7 @@ class AbstractMarginFunction(object): if show: plt.show() - def get_grid_1D(self, x): + def get_grid_values_1D(self, x): # TODO: to avoid getting the value several times, I could cache the results if self.datapoint_display: linspace = self.coordinates.coordinates_values(self.spatio_temporal_split)[:, 0] @@ -101,9 +112,11 @@ class AbstractMarginFunction(object): resolution = 100 linspace = np.linspace(x.min(), x.max(), resolution) - grid = np.zeros([resolution, 3]) + grid = [] for i, xi in enumerate(linspace): - grid[i] = self.get_gev_params(np.array([xi])).to_array() + gev_param = self.get_gev_params(np.array([xi])) + grid.append(gev_param.value_dict) + grid = {gev_param: [g[gev_param] for g in grid] for gev_param in GevParams.GEV_VALUE_NAMES} return grid, linspace def get_grid_2D(self, x, y): diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/utils.py b/extreme_estimator/extreme_models/margin_model/margin_function/utils.py index 441ef09a..2ea29cc1 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/utils.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/utils.py @@ -5,23 +5,25 @@ from extreme_estimator.extreme_models.margin_model.margin_function.abstract_marg from extreme_estimator.gev_params import GevParams -def abs_error(s1, s2): - return (s1 - s2).abs().pow(2) +def relative_abs_error(reference_value, fitted_value): + return (reference_value - fitted_value).abs() / reference_value -def error_dict_between_margin_functions(margin1: AbstractMarginFunction, margin2: AbstractMarginFunction): +def error_dict_between_margin_functions(reference: AbstractMarginFunction, fitted: AbstractMarginFunction): """ Return a serie, indexed by the same index as the coordinates Each value correspond to the error for this coordinate - :param margin1: - :param margin2: + :param reference: + :param fitted: :return: """ - assert margin1.coordinates == margin2.coordinates - margin1_gev_params, margin2_gev_params = margin1.gev_params_for_coordinates, margin2.gev_params_for_coordinates + assert reference.coordinates == fitted.coordinates + reference_values = reference.gev_value_name_to_serie + fitted_values = fitted.gev_value_name_to_serie + gev_param_name_to_error_serie = {} - for gev_param_name in GevParams.GEV_PARAM_NAMES: - serie1, serie2 = margin1_gev_params[gev_param_name], margin2_gev_params[gev_param_name] - error = abs_error(serie1, serie2) - gev_param_name_to_error_serie[gev_param_name] = error + for value_name in GevParams.GEV_VALUE_NAMES: + print(value_name) + error = relative_abs_error(reference_values[value_name], fitted_values[value_name]) + gev_param_name_to_error_serie[value_name] = error return gev_param_name_to_error_serie diff --git a/extreme_estimator/gev_params.py b/extreme_estimator/gev_params.py index 5ee8656f..78a6fa2e 100644 --- a/extreme_estimator/gev_params.py +++ b/extreme_estimator/gev_params.py @@ -1,11 +1,23 @@ import numpy as np +from extreme_estimator.extreme_models.utils import r + class GevParams(object): + # GEV parameters GEV_SCALE = 'scale' GEV_LOC = 'loc' GEV_SHAPE = 'shape' GEV_PARAM_NAMES = [GEV_LOC, GEV_SCALE, GEV_SHAPE] + # GEV quantiles + GEV_QUANTILE_10 = 'quantile 10' + GEV_QUANTILE_100 = 'quantile 100' + GEV_QUANTILE_1000 = 'quantile 1000' + GEV_QUANTILE_NAMES = [GEV_QUANTILE_10, GEV_QUANTILE_100, GEV_QUANTILE_1000] + # GEV values + GEV_VALUE_NAMES = GEV_PARAM_NAMES + GEV_QUANTILE_NAMES[:-1] + + # GEV parameters def __init__(self, loc: float, scale: float, shape: float): self.location = loc @@ -28,3 +40,27 @@ class GevParams(object): gev_param_name_to_value = self.to_dict() return np.array([gev_param_name_to_value[gev_param_name] for gev_param_name in self.GEV_PARAM_NAMES]) + # GEV quantiles + + def qgev(self, p): + return r.qgev(p, self.location, self.scale, self.shape)[0] + + @property + def quantile_name_to_p(self) -> dict: + return { + self.GEV_QUANTILE_10: 0.1, + self.GEV_QUANTILE_100: 0.01, + self.GEV_QUANTILE_1000: 0.001, + } + + @property + def quantile_dict(self) -> dict: + return {quantile_name: self.qgev(p) for quantile_name, p in self.quantile_name_to_p.items()} + + # GEV values + + @property + def value_dict(self) -> dict: + return {**self.to_dict(), **self.quantile_dict} + + -- GitLab