Commit c32972cc authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[GEV] add quantile computation in GevParams. add display of those quantile in split_curve

parent 8ab8b0c1
No related merge requests found
Showing with 106 additions and 49 deletions
+106 -49
......@@ -2,6 +2,7 @@ import random
from experiment.fit_diagnosis.split_curve import SplitCurve, LocFunction
from extreme_estimator.estimator.full_estimator import FullEstimatorInASingleStepWithSmoothMargin
from extreme_estimator.estimator.margin_estimator import SmoothMarginEstimator
from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel, \
LinearAllParametersAllDimsMarginModel
from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith
......@@ -14,6 +15,7 @@ from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporal
random.seed(42)
def load_dataset():
nb_points = 50
nb_obs = 60
......@@ -30,20 +32,18 @@ def load_dataset():
return FullSimulatedDataset.from_double_sampling(nb_obs=nb_obs, margin_model=margin_model,
coordinates=coordinates,
max_stable_model=max_stable_model,
train_split_ratio=0.8,
slicer_class=SpatialSlicer)
max_stable_model=max_stable_model)
def full_estimator(dataset):
max_stable_model = Smith()
margin_model_for_estimator = LinearAllParametersAllDimsMarginModel(dataset.coordinates)
full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model)
return full_estimator
# full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model_for_estimator, max_stable_model)
fast_estimator = SmoothMarginEstimator(dataset, margin_model_for_estimator)
return fast_estimator
if __name__ == '__main__':
dataset = load_dataset()
dataset.slicer.summary()
full_estimator = full_estimator(dataset)
......
......@@ -43,36 +43,42 @@ class SplitCurve(object):
self.margin_function_fitted = estimator.margin_function_fitted
self.error_dict = error_dict_between_margin_functions(self.margin_function_sample, self.margin_function_fitted)
# todo: add quantile curve, additionally to the marginal curve
@property
def main_title(self):
return self.dataset.slicer.summary(show=False)
def visualize(self):
fig, axes = plt.subplots(3, 2)
fig, axes = plt.subplots(len(GevParams.GEV_VALUE_NAMES), 2)
fig.subplots_adjust(hspace=0.4, wspace=0.4, )
for i, gev_param_name in enumerate(GevParams.GEV_PARAM_NAMES):
self.margin_graph(axes[i, 0], gev_param_name)
self.score_graph(axes[i, 1], gev_param_name)
for i, gev_value_name in enumerate(GevParams.GEV_VALUE_NAMES):
self.margin_graph(axes[i, 0], gev_value_name)
self.score_graph(axes[i, 1], gev_value_name)
fig.suptitle(self.main_title)
plt.show()
def margin_graph(self, ax, gev_param_name):
def margin_graph(self, ax, gev_value_name):
# Display the fitted curve
self.margin_function_fitted.visualize_single_param(gev_param_name, ax, show=False)
self.margin_function_fitted.visualize_single_param(gev_value_name, ax, show=False)
# Display train/test points
for split, marker in [(self.dataset.train_split, 'o'), (self.dataset.test_split, 'x')]:
self.margin_function_sample.set_datapoint_display_parameters(split, datapoint_marker=marker)
self.margin_function_sample.visualize_single_param(gev_param_name, ax, show=False)
title_str = gev_param_name
ax.set_title(title_str)
self.margin_function_sample.visualize_single_param(gev_value_name, ax, show=False)
def score_graph(self, ax, gev_param_name):
def score_graph(self, ax, gev_value_name):
# todo: for the moment only the train/test are interresting (the spatio temporal isn"t working yet)
sns.set_style('whitegrid')
s = self.error_dict[gev_param_name]
s = self.error_dict[gev_value_name]
for split in self.dataset.splits:
ind = self.dataset.coordinates_index(split)
data = s.loc[ind].values
sns.kdeplot(data, bw=0.5, ax=ax, label=split.name).set(xlim=0)
ax.legend()
# X axis
ax.set_xlabel('Absolute error in percentage')
plt.setp(ax.get_xticklabels(), visible=True)
ax.xaxis.set_tick_params(labelbottom=True)
# Y axis
ax.set_ylabel(gev_value_name)
plt.setp(ax.get_yticklabels(), visible=True)
ax.yaxis.set_tick_params(labelbottom=True)
from typing import Dict
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
......@@ -24,16 +26,17 @@ class AbstractMarginFunction(object):
"""Main method that maps each coordinate to its GEV parameters"""
pass
# Extraction function
@property
def gev_params_for_coordinates(self):
gev_params = [self.get_gev_params(coordinate).to_dict() for coordinate in self.coordinates.coordinates_values()]
gev_param_name_to_serie = {}
for gev_param_name in GevParams.GEV_PARAM_NAMES:
s = pd.Series(data=[p[gev_param_name] for p in gev_params], index=self.coordinates.index)
gev_param_name_to_serie[gev_param_name] = s
return gev_param_name_to_serie
def gev_value_name_to_serie(self) -> Dict[str, pd.Series]:
# Load the gev_params
gev_params = [self.get_gev_params(coordinate) for coordinate in self.coordinates.coordinates_values()]
# Load the dictionary of values (gev parameters + the quantiles)
value_dicts = [gev_param.value_dict for gev_param in gev_params]
gev_value_name_to_serie = {}
for value_name in GevParams.GEV_VALUE_NAMES:
s = pd.Series(data=[d[value_name] for d in value_dicts], index=self.coordinates.index)
gev_value_name_to_serie[value_name] = s
return gev_value_name_to_serie
# Visualization function
......@@ -56,24 +59,32 @@ class AbstractMarginFunction(object):
if show:
plt.show()
def visualize_single_param(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True):
def visualize_single_param(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True):
assert gev_value_name in GevParams.GEV_VALUE_NAMES
if self.coordinates.nb_coordinates_spatial == 1:
self.visualize_1D(gev_param_name, ax, show)
self.visualize_1D(gev_value_name, ax, show)
elif self.coordinates.nb_coordinates_spatial == 2:
self.visualize_2D(gev_param_name, ax, show)
self.visualize_2D(gev_value_name, ax, show)
else:
raise NotImplementedError('3D Margin visualization not yet implemented')
def visualize_1D(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True):
def visualize_1D(self, gev_value_name=GevParams.GEV_LOC, ax=None, show=True):
x = self.coordinates.x_coordinates
grid, linspace = self.get_grid_1D(x)
gev_param_idx = GevParams.GEV_PARAM_NAMES.index(gev_param_name)
grid, linspace = self.get_grid_values_1D(x)
if ax is None:
ax = plt.gca()
if self.datapoint_display:
ax.plot(linspace, grid[:, gev_param_idx], self.datapoint_marker)
ax.plot(linspace, grid[gev_value_name], self.datapoint_marker)
else:
ax.plot(linspace, grid[:, gev_param_idx])
ax.plot(linspace, grid[gev_value_name])
# X axis
ax.set_xlabel('coordinate')
plt.setp(ax.get_xticklabels(), visible=True)
ax.xaxis.set_tick_params(labelbottom=True)
# Y axis
ax.set_ylabel(gev_value_name)
plt.setp(ax.get_yticklabels(), visible=True)
ax.yaxis.set_tick_params(labelbottom=True)
if show:
plt.show()
......@@ -92,7 +103,7 @@ class AbstractMarginFunction(object):
if show:
plt.show()
def get_grid_1D(self, x):
def get_grid_values_1D(self, x):
# TODO: to avoid getting the value several times, I could cache the results
if self.datapoint_display:
linspace = self.coordinates.coordinates_values(self.spatio_temporal_split)[:, 0]
......@@ -101,9 +112,11 @@ class AbstractMarginFunction(object):
resolution = 100
linspace = np.linspace(x.min(), x.max(), resolution)
grid = np.zeros([resolution, 3])
grid = []
for i, xi in enumerate(linspace):
grid[i] = self.get_gev_params(np.array([xi])).to_array()
gev_param = self.get_gev_params(np.array([xi]))
grid.append(gev_param.value_dict)
grid = {gev_param: [g[gev_param] for g in grid] for gev_param in GevParams.GEV_VALUE_NAMES}
return grid, linspace
def get_grid_2D(self, x, y):
......
......@@ -5,23 +5,25 @@ from extreme_estimator.extreme_models.margin_model.margin_function.abstract_marg
from extreme_estimator.gev_params import GevParams
def abs_error(s1, s2):
return (s1 - s2).abs().pow(2)
def relative_abs_error(reference_value, fitted_value):
return (reference_value - fitted_value).abs() / reference_value
def error_dict_between_margin_functions(margin1: AbstractMarginFunction, margin2: AbstractMarginFunction):
def error_dict_between_margin_functions(reference: AbstractMarginFunction, fitted: AbstractMarginFunction):
"""
Return a serie, indexed by the same index as the coordinates
Each value correspond to the error for this coordinate
:param margin1:
:param margin2:
:param reference:
:param fitted:
:return:
"""
assert margin1.coordinates == margin2.coordinates
margin1_gev_params, margin2_gev_params = margin1.gev_params_for_coordinates, margin2.gev_params_for_coordinates
assert reference.coordinates == fitted.coordinates
reference_values = reference.gev_value_name_to_serie
fitted_values = fitted.gev_value_name_to_serie
gev_param_name_to_error_serie = {}
for gev_param_name in GevParams.GEV_PARAM_NAMES:
serie1, serie2 = margin1_gev_params[gev_param_name], margin2_gev_params[gev_param_name]
error = abs_error(serie1, serie2)
gev_param_name_to_error_serie[gev_param_name] = error
for value_name in GevParams.GEV_VALUE_NAMES:
print(value_name)
error = relative_abs_error(reference_values[value_name], fitted_values[value_name])
gev_param_name_to_error_serie[value_name] = error
return gev_param_name_to_error_serie
import numpy as np
from extreme_estimator.extreme_models.utils import r
class GevParams(object):
# GEV parameters
GEV_SCALE = 'scale'
GEV_LOC = 'loc'
GEV_SHAPE = 'shape'
GEV_PARAM_NAMES = [GEV_LOC, GEV_SCALE, GEV_SHAPE]
# GEV quantiles
GEV_QUANTILE_10 = 'quantile 10'
GEV_QUANTILE_100 = 'quantile 100'
GEV_QUANTILE_1000 = 'quantile 1000'
GEV_QUANTILE_NAMES = [GEV_QUANTILE_10, GEV_QUANTILE_100, GEV_QUANTILE_1000]
# GEV values
GEV_VALUE_NAMES = GEV_PARAM_NAMES + GEV_QUANTILE_NAMES[:-1]
# GEV parameters
def __init__(self, loc: float, scale: float, shape: float):
self.location = loc
......@@ -28,3 +40,27 @@ class GevParams(object):
gev_param_name_to_value = self.to_dict()
return np.array([gev_param_name_to_value[gev_param_name] for gev_param_name in self.GEV_PARAM_NAMES])
# GEV quantiles
def qgev(self, p):
return r.qgev(p, self.location, self.scale, self.shape)[0]
@property
def quantile_name_to_p(self) -> dict:
return {
self.GEV_QUANTILE_10: 0.1,
self.GEV_QUANTILE_100: 0.01,
self.GEV_QUANTILE_1000: 0.001,
}
@property
def quantile_dict(self) -> dict:
return {quantile_name: self.qgev(p) for quantile_name, p in self.quantile_name_to_p.items()}
# GEV values
@property
def value_dict(self) -> dict:
return {**self.to_dict(), **self.quantile_dict}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment