Commit 2a846717 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SIMULATION] add use of start value by default for any model. improve display for simulation

parent 33f31bc8
No related merge requests found
Showing with 152 additions and 39 deletions
+152 -39
import os
from typing import List
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
import numpy as np
import os.path as op
import pickle
......@@ -8,12 +15,15 @@ import numpy as np
import seaborn as sns
from extreme_estimator.estimator.abstract_estimator import AbstractEstimator
from extreme_estimator.extreme_models.margin_model.margin_function.abstract_margin_function import \
AbstractMarginFunction
from extreme_estimator.extreme_models.margin_model.margin_function.combined_margin_function import \
CombinedMarginFunction
from extreme_estimator.extreme_models.margin_model.margin_function.utils import error_dict_between_margin_functions
from extreme_estimator.gev_params import GevParams
from spatio_temporal_dataset.dataset.abstract_dataset import get_subset_dataset
from spatio_temporal_dataset.dataset.simulation_dataset import SimulatedDataset
from spatio_temporal_dataset.slicer.split import split_to_display_kwargs
from utils import get_full_path, get_display_name_from_object_type
SIMULATION_RELATIVE_PATH = op.join('local', 'simulation')
......@@ -21,14 +31,15 @@ SIMULATION_RELATIVE_PATH = op.join('local', 'simulation')
class AbstractSimulation(object):
def __init__(self):
self.nb_fit = 1
self.margin_function_fitted_list = None
def __init__(self, nb_fit=1):
self.nb_fit = nb_fit
self.margin_function_fitted_list = None # type: List[AbstractMarginFunction]
self.full_dataset = None
self.error_dict_all = None
self.margin_function_sample = None
self.mean_error_dict = None
self.mean_margin_function_fitted = None
self.mean_margin_function_fitted = None # type: AbstractMarginFunction
self.estimator_name = ''
def fit(self, estimator_class, show=True):
assert estimator_class not in self.already_fitted_estimator_names, \
......@@ -75,7 +86,18 @@ class AbstractSimulation(object):
self.margin_function_sample = self.full_dataset.margin_model.margin_function_sample
fig, axes = self.load_fig_and_axes()
for estimator_name in estimator_names:
# Binary color should
values = np.linspace(0, 1, len(estimator_names))
jet = plt.get_cmap('jet')
cNorm = matplotlib.colors.Normalize(vmin=0, vmax=values[-1])
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
colors = [scalarMap.to_rgba(value) for value in values]
for j, (estimator_name, color) in enumerate(zip(estimator_names, colors)):
self.j = j
self.color = color
self.estimator_name = estimator_name
self.margin_function_fitted_list = self.load_fitted_margins_pickles(estimator_name)
self.error_dict_all = [error_dict_between_margin_functions(reference=self.margin_function_sample,
......@@ -89,6 +111,11 @@ class AbstractSimulation(object):
self.mean_error_dict = error_dict_between_margin_functions(self.margin_function_sample,
self.mean_margin_function_fitted)
self.visualize(fig, axes, show=False)
title = self.main_title
for j, estimator_name in enumerate(estimator_names):
title += '\n y{}: {}'.format(j, estimator_name)
fig.suptitle(title)
plt.show()
@property
......@@ -137,15 +164,17 @@ class AbstractSimulation(object):
# todo: fix binary color problem
self.margin_function_sample.set_datapoint_display_parameters(split, datapoint_marker=marker,
filter=data_filter)
filter=data_filter,
color='black',
datapoint_display=True)
self.margin_function_sample.visualize_single_param(gev_value_name, ax, show=False)
# Display the individual fitted curve
self.mean_margin_function_fitted.color = 'lightskyblue'
for m in self.margin_function_fitted_list:
m.set_datapoint_display_parameters(linewidth=0.1, color=self.color)
m.visualize_single_param(gev_value_name, ax, show=False)
# Display the mean fitted curve
self.mean_margin_function_fitted.color = 'blue'
self.mean_margin_function_fitted.set_datapoint_display_parameters(color=self.color, linewidth=2)
self.mean_margin_function_fitted.visualize_single_param(gev_value_name, ax, show=False)
def score_graph(self, ax, gev_value_name):
......@@ -156,10 +185,19 @@ class AbstractSimulation(object):
for split in self.full_dataset.splits:
ind = self.full_dataset.coordinates_index(split)
data = s.loc[ind].values
sns.kdeplot(data, bw=0.5, ax=ax, label=split.name).set(xlim=0)
display_kwargs = split_to_display_kwargs(split)
print(split, 'train' in split.name)
if 'train' in split.name:
display_kwargs.update({"label": 'y' + str(self.j)})
markersize=3
else:
markersize = 10
ax.plot([data.mean()], [0], color=self.color, marker='o', markersize=markersize)
sns.kdeplot(data, bw=1, ax=ax, color=self.color, **display_kwargs).set(xlim=0)
ax.legend()
# X axis
ax.set_xlabel('Absolute error in percentage')
ax.set_xlabel('Mean absolute error in %')
plt.setp(ax.get_xticklabels(), visible=True)
ax.xaxis.set_tick_params(labelbottom=True)
# Y axis
......
from experiment.simulation.abstract_simulation import AbstractSimulation
from extreme_estimator.estimator.full_estimator.full_estimator_for_simulation import FULL_ESTIMATORS_FOR_SIMULATION
from extreme_estimator.estimator.margin_estimator.margin_estimator_for_simulation import \
MARGIN_ESTIMATORS_FOR_SIMULATION
from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel
from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith
from extreme_estimator.gev_params import GevParams
from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import LinSpaceSpatialCoordinates
from spatio_temporal_dataset.dataset.simulation_dataset import FullSimulatedDataset
class LinSpace3Simulation(AbstractSimulation):
FITTED_ESTIMATORS = []
def __init__(self, nb_fit=1):
super().__init__(nb_fit)
# Simulation parameters
self.nb_obs = 60
self.coordinates = LinSpaceSpatialCoordinates.from_nb_points(nb_points=100, train_split_ratio=0.75)
# MarginModel Linear with respect to the shape (from 0.01 to 0.02)
params_sample = {
(GevParams.GEV_LOC, 0): 1.0,
(GevParams.GEV_SHAPE, 0): 1.0,
(GevParams.GEV_SCALE, 0): 1.0,
}
self.margin_model = ConstantMarginModel(coordinates=self.coordinates, params_sample=params_sample)
self.max_stable_model = Smith()
def dump(self):
dataset = FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, margin_model=self.margin_model,
coordinates=self.coordinates,
max_stable_model=self.max_stable_model)
self._dump(dataset=dataset)
if __name__ == '__main__':
simu = LinSpace3Simulation(nb_fit=7)
simu.dump()
for estimator_class in MARGIN_ESTIMATORS_FOR_SIMULATION + FULL_ESTIMATORS_FOR_SIMULATION:
simu.fit(estimator_class, show=False)
simu.visualize_comparison_graph()
......@@ -12,9 +12,8 @@ from spatio_temporal_dataset.dataset.simulation_dataset import FullSimulatedData
class LinSpaceSimulation(AbstractSimulation):
FITTED_ESTIMATORS = []
def __init__(self):
super().__init__()
self.nb_fit = 2
def __init__(self, nb_fit=1):
super().__init__(nb_fit)
# Simulation parameters
self.nb_obs = 60
self.coordinates = LinSpaceSpatialCoordinates.from_nb_points(nb_points=21, train_split_ratio=0.75)
......@@ -35,7 +34,7 @@ class LinSpaceSimulation(AbstractSimulation):
if __name__ == '__main__':
simu = LinSpaceSimulation()
simu = LinSpaceSimulation(nb_fit=3)
simu.dump()
# for estimator_class in MARGIN_ESTIMATORS_FOR_SIMULATION + FULL_ESTIMATORS_FOR_SIMULATION:
# simu.fit(estimator_class, show=False)
......
class AbstractModel(object):
def __init__(self, use_start_value=True, params_start_fit=None, params_sample=None):
def __init__(self, use_start_value=False, params_start_fit=None, params_sample=None):
self.default_params_start_fit = None
self.default_params_sample = None
self.use_start_value = use_start_value
......
......@@ -11,7 +11,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo
class AbstractMarginModel(AbstractModel):
def __init__(self, coordinates: AbstractCoordinates, use_start_value=True, params_start_fit=None, params_sample=None):
def __init__(self, coordinates: AbstractCoordinates, use_start_value=False, params_start_fit=None, params_sample=None):
super().__init__(use_start_value, params_start_fit, params_sample)
assert isinstance(coordinates, AbstractCoordinates), type(coordinates)
self.coordinates = coordinates
......
......@@ -23,6 +23,7 @@ class AbstractMarginFunction(object):
self.datapoint_marker = 'o'
self.color = 'skyblue'
self.filter = None
self.linewidth = 1
self._grid_2D = None
self._grid_1D = None
......@@ -45,10 +46,12 @@ class AbstractMarginFunction(object):
# Visualization function
def set_datapoint_display_parameters(self, spatio_temporal_split, datapoint_marker, filter=None, color=None):
self.datapoint_display = True
def set_datapoint_display_parameters(self, spatio_temporal_split=Split.all, datapoint_marker=None, filter=None, color=None,
linewidth=1, datapoint_display=False):
self.datapoint_display = datapoint_display
self.spatio_temporal_split = spatio_temporal_split
self.datapoint_marker = datapoint_marker
self.linewidth = linewidth
self.filter = filter
self.color = color
......@@ -83,9 +86,10 @@ class AbstractMarginFunction(object):
if ax is None:
ax = plt.gca()
if self.datapoint_display:
ax.plot(linspace, grid[gev_value_name], self.datapoint_marker, color=self.color)
ax.plot(linspace, grid[gev_value_name], marker=self.datapoint_marker, color=self.color)
else:
ax.plot(linspace, grid[gev_value_name], color=self.color)
print('here')
ax.plot(linspace, grid[gev_value_name], color=self.color, linewidth=self.linewidth)
# X axis
ax.set_xlabel('coordinate X')
plt.setp(ax.get_xticklabels(), visible=True)
......
......@@ -65,9 +65,8 @@ class LinearMarginModel(AbstractMarginModel):
data = np.transpose(maxima_gev)
covariables = get_coord(df_coordinates)
fit_params = get_margin_formula(self.margin_function_start_fit.form_dict)
if self.use_start_value:
fit_params['start'] = r.list(**self.margin_function_start_fit.coef_dict)
res = safe_run_r_estimator(function=r.fitspatgev, data=data, covariables=covariables, **fit_params)
fit_params['start'] = r.list(**self.margin_function_start_fit.coef_dict)
res = safe_run_r_estimator(function=r.fitspatgev, use_start=self.use_start_value, data=data, covariables=covariables, **fit_params)
return retrieve_fitted_values(res)
......
......@@ -12,7 +12,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo
class AbstractMaxStableModel(AbstractModel):
def __init__(self, use_start_value=True, params_start_fit=None, params_sample=None):
def __init__(self, use_start_value=False, params_start_fit=None, params_sample=None):
super().__init__(use_start_value, params_start_fit, params_sample)
self.cov_mod = None
......@@ -59,12 +59,11 @@ class AbstractMaxStableModel(AbstractModel):
fit_params.update(margin_formulas)
if fitmaxstab_with_one_dimensional_data:
fit_params['iso'] = True
if self.use_start_value:
fit_params['start'] = r.list(**start_dict)
fit_params['start'] = r.list(**start_dict)
fit_params['fit.marge'] = fit_marge
# Run the fitmaxstab in R
res = safe_run_r_estimator(function=r.fitmaxstab, data=data, coord=coord, **fit_params)
res = safe_run_r_estimator(function=r.fitmaxstab, use_start=self.use_start_value, data=data, coord=coord, **fit_params)
return retrieve_fitted_values(res)
def rmaxstab(self, nb_obs: int, coordinates_values: np.ndarray) -> np.ndarray:
......@@ -89,7 +88,7 @@ class CovarianceFunction(Enum):
class AbstractMaxStableModelWithCovarianceFunction(AbstractMaxStableModel):
def __init__(self, use_start_value=True, params_start_fit=None, params_sample=None, covariance_function: CovarianceFunction = None):
def __init__(self, use_start_value=False, params_start_fit=None, params_sample=None, covariance_function: CovarianceFunction = None):
super().__init__(use_start_value, params_start_fit, params_sample)
assert covariance_function is not None
self.covariance_function = covariance_function
......
......@@ -29,15 +29,26 @@ def get_associated_r_file(python_filepath: str) -> str:
return r_filepath
def safe_run_r_estimator(function, **parameters):
try:
res = function(**parameters) # type:
except (RRuntimeError, RRuntimeWarning) as e:
if isinstance(e, RRuntimeError):
raise Exception('Some R exception have been launched at RunTime: \n {}'.format(e.__repr__()))
if isinstance(e, RRuntimeWarning):
print(e.__repr__())
print('WARNING')
def safe_run_r_estimator(function, use_start=False, **parameters):
# First run without using start value
# Then if it crashes, use start value
run_successful = False
while not run_successful:
current_parameter = parameters.copy()
if not use_start:
current_parameter.pop('start')
try:
res = function(**current_parameter) # type:
run_successful = True
except (RRuntimeError, RRuntimeWarning) as e:
if not use_start:
use_start = True
continue
elif isinstance(e, RRuntimeError):
raise Exception('Some R exception have been launched at RunTime: \n {}'.format(e.__repr__()))
if isinstance(e, RRuntimeWarning):
print(e.__repr__())
print('WARNING')
return res
......
......@@ -23,6 +23,7 @@ class GevParams(object):
self.location = loc
self.scale = scale
self.shape = shape
# self.scale = max(self.scale, 1e-4)
assert self.scale > 0
@classmethod
......
......@@ -19,6 +19,24 @@ class Split(Enum):
test_temporal = 8
def split_to_display_kwargs(split: Split):
marker = None
gridsize = 1000
if 'train' in split.name:
linewidth = 0.5
else:
linewidth = 2
if 'spatiotemporal' in split.name:
gridsize = 20
if 'spatial' in split.name and 'temporal' in split.name:
marker = '*'
elif 'spatial' in split.name:
marker = '^'
else:
marker = '>'
return {'marker': marker, 'linewidth': linewidth, 'gridsize':gridsize}
ALL_SPLITS_EXCEPT_ALL = [split for split in Split if split is not Split.all]
SPLIT_NAME = 'split'
......@@ -46,7 +64,7 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio):
def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, spatial_split) -> Union[None, pd.Series]:
df = df.copy() # type: pd.DataFrame
df = df.copy() # type: pd.DataFrame
# Extract the index
if train_split_ratio is None:
return None
......@@ -69,4 +87,3 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, s
s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor]
s_split.index = df.index
return s_split
......@@ -20,10 +20,14 @@ class TestFullEstimators(unittest.TestCase):
for coordinates in self.spatial_coordinates:
smooth_margin_models = load_smooth_margin_models(coordinates=coordinates)
for margin_model, max_stable_model in product(smooth_margin_models, self.max_stable_models):
dataset = FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, margin_model=margin_model,
coordinates=coordinates,
max_stable_model=max_stable_model)
margin_model.use_start_value = True
# todo: understand why it is crashing without specifying that (when not using start value was passed by default this test started crashing)
for full_estimator in load_test_full_estimators(dataset, margin_model, max_stable_model):
full_estimator.fit()
if self.DISPLAY:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment