diff --git a/experiment/simulation/abstract_simulation.py b/experiment/simulation/abstract_simulation.py index 4319f7842c346a6db8145aa9e4cabd449e6e9637..3e99de35bc1a743721fae71f5d918fc3bdcac190 100644 --- a/experiment/simulation/abstract_simulation.py +++ b/experiment/simulation/abstract_simulation.py @@ -1,4 +1,11 @@ import os +from typing import List + +import matplotlib +import matplotlib.pyplot as plt +import matplotlib.colors as colors +import matplotlib.cm as cmx +import numpy as np import os.path as op import pickle @@ -8,12 +15,15 @@ import numpy as np import seaborn as sns from extreme_estimator.estimator.abstract_estimator import AbstractEstimator +from extreme_estimator.extreme_models.margin_model.margin_function.abstract_margin_function import \ + AbstractMarginFunction from extreme_estimator.extreme_models.margin_model.margin_function.combined_margin_function import \ CombinedMarginFunction from extreme_estimator.extreme_models.margin_model.margin_function.utils import error_dict_between_margin_functions from extreme_estimator.gev_params import GevParams from spatio_temporal_dataset.dataset.abstract_dataset import get_subset_dataset from spatio_temporal_dataset.dataset.simulation_dataset import SimulatedDataset +from spatio_temporal_dataset.slicer.split import split_to_display_kwargs from utils import get_full_path, get_display_name_from_object_type SIMULATION_RELATIVE_PATH = op.join('local', 'simulation') @@ -21,14 +31,15 @@ SIMULATION_RELATIVE_PATH = op.join('local', 'simulation') class AbstractSimulation(object): - def __init__(self): - self.nb_fit = 1 - self.margin_function_fitted_list = None + def __init__(self, nb_fit=1): + self.nb_fit = nb_fit + self.margin_function_fitted_list = None # type: List[AbstractMarginFunction] self.full_dataset = None self.error_dict_all = None self.margin_function_sample = None self.mean_error_dict = None - self.mean_margin_function_fitted = None + self.mean_margin_function_fitted = None # type: AbstractMarginFunction + self.estimator_name = '' def fit(self, estimator_class, show=True): assert estimator_class not in self.already_fitted_estimator_names, \ @@ -75,7 +86,18 @@ class AbstractSimulation(object): self.margin_function_sample = self.full_dataset.margin_model.margin_function_sample fig, axes = self.load_fig_and_axes() - for estimator_name in estimator_names: + + # Binary color should + values = np.linspace(0, 1, len(estimator_names)) + jet = plt.get_cmap('jet') + cNorm = matplotlib.colors.Normalize(vmin=0, vmax=values[-1]) + scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) + colors = [scalarMap.to_rgba(value) for value in values] + + for j, (estimator_name, color) in enumerate(zip(estimator_names, colors)): + self.j = j + self.color = color + self.estimator_name = estimator_name self.margin_function_fitted_list = self.load_fitted_margins_pickles(estimator_name) self.error_dict_all = [error_dict_between_margin_functions(reference=self.margin_function_sample, @@ -89,6 +111,11 @@ class AbstractSimulation(object): self.mean_error_dict = error_dict_between_margin_functions(self.margin_function_sample, self.mean_margin_function_fitted) self.visualize(fig, axes, show=False) + + title = self.main_title + for j, estimator_name in enumerate(estimator_names): + title += '\n y{}: {}'.format(j, estimator_name) + fig.suptitle(title) plt.show() @property @@ -137,15 +164,17 @@ class AbstractSimulation(object): # todo: fix binary color problem self.margin_function_sample.set_datapoint_display_parameters(split, datapoint_marker=marker, - filter=data_filter) + filter=data_filter, + color='black', + datapoint_display=True) self.margin_function_sample.visualize_single_param(gev_value_name, ax, show=False) # Display the individual fitted curve - self.mean_margin_function_fitted.color = 'lightskyblue' for m in self.margin_function_fitted_list: + m.set_datapoint_display_parameters(linewidth=0.1, color=self.color) m.visualize_single_param(gev_value_name, ax, show=False) # Display the mean fitted curve - self.mean_margin_function_fitted.color = 'blue' + self.mean_margin_function_fitted.set_datapoint_display_parameters(color=self.color, linewidth=2) self.mean_margin_function_fitted.visualize_single_param(gev_value_name, ax, show=False) def score_graph(self, ax, gev_value_name): @@ -156,10 +185,19 @@ class AbstractSimulation(object): for split in self.full_dataset.splits: ind = self.full_dataset.coordinates_index(split) data = s.loc[ind].values - sns.kdeplot(data, bw=0.5, ax=ax, label=split.name).set(xlim=0) + display_kwargs = split_to_display_kwargs(split) + print(split, 'train' in split.name) + if 'train' in split.name: + display_kwargs.update({"label": 'y' + str(self.j)}) + markersize=3 + else: + markersize = 10 + ax.plot([data.mean()], [0], color=self.color, marker='o', markersize=markersize) + sns.kdeplot(data, bw=1, ax=ax, color=self.color, **display_kwargs).set(xlim=0) ax.legend() + # X axis - ax.set_xlabel('Absolute error in percentage') + ax.set_xlabel('Mean absolute error in %') plt.setp(ax.get_xticklabels(), visible=True) ax.xaxis.set_tick_params(labelbottom=True) # Y axis diff --git a/experiment/simulation/lin_space2_simulation.py b/experiment/simulation/lin_space2_simulation.py new file mode 100644 index 0000000000000000000000000000000000000000..c28f8ce23a2c241378c9b508c9b426f23d54ee9d --- /dev/null +++ b/experiment/simulation/lin_space2_simulation.py @@ -0,0 +1,41 @@ +from experiment.simulation.abstract_simulation import AbstractSimulation +from extreme_estimator.estimator.full_estimator.full_estimator_for_simulation import FULL_ESTIMATORS_FOR_SIMULATION +from extreme_estimator.estimator.margin_estimator.margin_estimator_for_simulation import \ + MARGIN_ESTIMATORS_FOR_SIMULATION +from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel +from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith +from extreme_estimator.gev_params import GevParams +from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import LinSpaceSpatialCoordinates +from spatio_temporal_dataset.dataset.simulation_dataset import FullSimulatedDataset + + +class LinSpace3Simulation(AbstractSimulation): + FITTED_ESTIMATORS = [] + + def __init__(self, nb_fit=1): + super().__init__(nb_fit) + # Simulation parameters + self.nb_obs = 60 + self.coordinates = LinSpaceSpatialCoordinates.from_nb_points(nb_points=100, train_split_ratio=0.75) + # MarginModel Linear with respect to the shape (from 0.01 to 0.02) + params_sample = { + (GevParams.GEV_LOC, 0): 1.0, + (GevParams.GEV_SHAPE, 0): 1.0, + (GevParams.GEV_SCALE, 0): 1.0, + } + self.margin_model = ConstantMarginModel(coordinates=self.coordinates, params_sample=params_sample) + self.max_stable_model = Smith() + + def dump(self): + dataset = FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, margin_model=self.margin_model, + coordinates=self.coordinates, + max_stable_model=self.max_stable_model) + self._dump(dataset=dataset) + + +if __name__ == '__main__': + simu = LinSpace3Simulation(nb_fit=7) + simu.dump() + for estimator_class in MARGIN_ESTIMATORS_FOR_SIMULATION + FULL_ESTIMATORS_FOR_SIMULATION: + simu.fit(estimator_class, show=False) + simu.visualize_comparison_graph() diff --git a/experiment/simulation/lin_space_simulation.py b/experiment/simulation/lin_space_simulation.py index 910eb3a3c3492713513951be1240018f1f471887..3d11e2dd9b6c1f48487291129de322507266c735 100644 --- a/experiment/simulation/lin_space_simulation.py +++ b/experiment/simulation/lin_space_simulation.py @@ -12,9 +12,8 @@ from spatio_temporal_dataset.dataset.simulation_dataset import FullSimulatedData class LinSpaceSimulation(AbstractSimulation): FITTED_ESTIMATORS = [] - def __init__(self): - super().__init__() - self.nb_fit = 2 + def __init__(self, nb_fit=1): + super().__init__(nb_fit) # Simulation parameters self.nb_obs = 60 self.coordinates = LinSpaceSpatialCoordinates.from_nb_points(nb_points=21, train_split_ratio=0.75) @@ -35,7 +34,7 @@ class LinSpaceSimulation(AbstractSimulation): if __name__ == '__main__': - simu = LinSpaceSimulation() + simu = LinSpaceSimulation(nb_fit=3) simu.dump() # for estimator_class in MARGIN_ESTIMATORS_FOR_SIMULATION + FULL_ESTIMATORS_FOR_SIMULATION: # simu.fit(estimator_class, show=False) diff --git a/extreme_estimator/extreme_models/abstract_model.py b/extreme_estimator/extreme_models/abstract_model.py index 016362d0cd73c3bd81957346cca850108e9a0101..28e19653d23939caf41b5dd849e67c946752ff75 100644 --- a/extreme_estimator/extreme_models/abstract_model.py +++ b/extreme_estimator/extreme_models/abstract_model.py @@ -1,6 +1,6 @@ class AbstractModel(object): - def __init__(self, use_start_value=True, params_start_fit=None, params_sample=None): + def __init__(self, use_start_value=False, params_start_fit=None, params_sample=None): self.default_params_start_fit = None self.default_params_sample = None self.use_start_value = use_start_value diff --git a/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py b/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py index e604abc6e5019b17235f8d01f3eb1047f7976935..26f2a98f5e6ba40c820b6f496438433218d65aca 100644 --- a/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py +++ b/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py @@ -11,7 +11,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo class AbstractMarginModel(AbstractModel): - def __init__(self, coordinates: AbstractCoordinates, use_start_value=True, params_start_fit=None, params_sample=None): + def __init__(self, coordinates: AbstractCoordinates, use_start_value=False, params_start_fit=None, params_sample=None): super().__init__(use_start_value, params_start_fit, params_sample) assert isinstance(coordinates, AbstractCoordinates), type(coordinates) self.coordinates = coordinates diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py index a2475ef6780d449b147c53d5091fde7f1a91b453..1a75950badc61a2d5df40bfd6dc335ce6b7dc860 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py @@ -23,6 +23,7 @@ class AbstractMarginFunction(object): self.datapoint_marker = 'o' self.color = 'skyblue' self.filter = None + self.linewidth = 1 self._grid_2D = None self._grid_1D = None @@ -45,10 +46,12 @@ class AbstractMarginFunction(object): # Visualization function - def set_datapoint_display_parameters(self, spatio_temporal_split, datapoint_marker, filter=None, color=None): - self.datapoint_display = True + def set_datapoint_display_parameters(self, spatio_temporal_split=Split.all, datapoint_marker=None, filter=None, color=None, + linewidth=1, datapoint_display=False): + self.datapoint_display = datapoint_display self.spatio_temporal_split = spatio_temporal_split self.datapoint_marker = datapoint_marker + self.linewidth = linewidth self.filter = filter self.color = color @@ -83,9 +86,10 @@ class AbstractMarginFunction(object): if ax is None: ax = plt.gca() if self.datapoint_display: - ax.plot(linspace, grid[gev_value_name], self.datapoint_marker, color=self.color) + ax.plot(linspace, grid[gev_value_name], marker=self.datapoint_marker, color=self.color) else: - ax.plot(linspace, grid[gev_value_name], color=self.color) + print('here') + ax.plot(linspace, grid[gev_value_name], color=self.color, linewidth=self.linewidth) # X axis ax.set_xlabel('coordinate X') plt.setp(ax.get_xticklabels(), visible=True) diff --git a/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py b/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py index 1655c1f0550a57cbd87c48e70e54a771ad4d7f33..b2089aa11935f80ea45d832cfa37052dadcc4ff9 100644 --- a/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py +++ b/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py @@ -65,9 +65,8 @@ class LinearMarginModel(AbstractMarginModel): data = np.transpose(maxima_gev) covariables = get_coord(df_coordinates) fit_params = get_margin_formula(self.margin_function_start_fit.form_dict) - if self.use_start_value: - fit_params['start'] = r.list(**self.margin_function_start_fit.coef_dict) - res = safe_run_r_estimator(function=r.fitspatgev, data=data, covariables=covariables, **fit_params) + fit_params['start'] = r.list(**self.margin_function_start_fit.coef_dict) + res = safe_run_r_estimator(function=r.fitspatgev, use_start=self.use_start_value, data=data, covariables=covariables, **fit_params) return retrieve_fitted_values(res) diff --git a/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py b/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py index 2c3b4314b0a9743f760c97f7c1a123dd6fd3c33d..78ae219a174b9cd846fbc218196066789af4723d 100644 --- a/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py +++ b/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py @@ -12,7 +12,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo class AbstractMaxStableModel(AbstractModel): - def __init__(self, use_start_value=True, params_start_fit=None, params_sample=None): + def __init__(self, use_start_value=False, params_start_fit=None, params_sample=None): super().__init__(use_start_value, params_start_fit, params_sample) self.cov_mod = None @@ -59,12 +59,11 @@ class AbstractMaxStableModel(AbstractModel): fit_params.update(margin_formulas) if fitmaxstab_with_one_dimensional_data: fit_params['iso'] = True - if self.use_start_value: - fit_params['start'] = r.list(**start_dict) + fit_params['start'] = r.list(**start_dict) fit_params['fit.marge'] = fit_marge # Run the fitmaxstab in R - res = safe_run_r_estimator(function=r.fitmaxstab, data=data, coord=coord, **fit_params) + res = safe_run_r_estimator(function=r.fitmaxstab, use_start=self.use_start_value, data=data, coord=coord, **fit_params) return retrieve_fitted_values(res) def rmaxstab(self, nb_obs: int, coordinates_values: np.ndarray) -> np.ndarray: @@ -89,7 +88,7 @@ class CovarianceFunction(Enum): class AbstractMaxStableModelWithCovarianceFunction(AbstractMaxStableModel): - def __init__(self, use_start_value=True, params_start_fit=None, params_sample=None, covariance_function: CovarianceFunction = None): + def __init__(self, use_start_value=False, params_start_fit=None, params_sample=None, covariance_function: CovarianceFunction = None): super().__init__(use_start_value, params_start_fit, params_sample) assert covariance_function is not None self.covariance_function = covariance_function diff --git a/extreme_estimator/extreme_models/utils.py b/extreme_estimator/extreme_models/utils.py index 0ea8f3283edc5938917a0560727a97d0bd3040a5..2d84f995292750b39c1bfc1411b7b5e08fb3ae80 100644 --- a/extreme_estimator/extreme_models/utils.py +++ b/extreme_estimator/extreme_models/utils.py @@ -29,15 +29,26 @@ def get_associated_r_file(python_filepath: str) -> str: return r_filepath -def safe_run_r_estimator(function, **parameters): - try: - res = function(**parameters) # type: - except (RRuntimeError, RRuntimeWarning) as e: - if isinstance(e, RRuntimeError): - raise Exception('Some R exception have been launched at RunTime: \n {}'.format(e.__repr__())) - if isinstance(e, RRuntimeWarning): - print(e.__repr__()) - print('WARNING') +def safe_run_r_estimator(function, use_start=False, **parameters): + # First run without using start value + # Then if it crashes, use start value + run_successful = False + while not run_successful: + current_parameter = parameters.copy() + if not use_start: + current_parameter.pop('start') + try: + res = function(**current_parameter) # type: + run_successful = True + except (RRuntimeError, RRuntimeWarning) as e: + if not use_start: + use_start = True + continue + elif isinstance(e, RRuntimeError): + raise Exception('Some R exception have been launched at RunTime: \n {}'.format(e.__repr__())) + if isinstance(e, RRuntimeWarning): + print(e.__repr__()) + print('WARNING') return res diff --git a/extreme_estimator/gev_params.py b/extreme_estimator/gev_params.py index 78a6fa2e3a766971f84dd4be6157d00b9bcfe144..3fe0a213669a8de5d942cef63d552cbda233aa4e 100644 --- a/extreme_estimator/gev_params.py +++ b/extreme_estimator/gev_params.py @@ -23,6 +23,7 @@ class GevParams(object): self.location = loc self.scale = scale self.shape = shape + # self.scale = max(self.scale, 1e-4) assert self.scale > 0 @classmethod diff --git a/spatio_temporal_dataset/slicer/split.py b/spatio_temporal_dataset/slicer/split.py index 564657e0905957d7a699cb834cad75106859eef2..6558fbdf133b227803f541e9d62ac259d5dac04f 100644 --- a/spatio_temporal_dataset/slicer/split.py +++ b/spatio_temporal_dataset/slicer/split.py @@ -19,6 +19,24 @@ class Split(Enum): test_temporal = 8 +def split_to_display_kwargs(split: Split): + marker = None + gridsize = 1000 + if 'train' in split.name: + linewidth = 0.5 + else: + linewidth = 2 + if 'spatiotemporal' in split.name: + gridsize = 20 + if 'spatial' in split.name and 'temporal' in split.name: + marker = '*' + elif 'spatial' in split.name: + marker = '^' + else: + marker = '>' + return {'marker': marker, 'linewidth': linewidth, 'gridsize':gridsize} + + ALL_SPLITS_EXCEPT_ALL = [split for split in Split if split is not Split.all] SPLIT_NAME = 'split' @@ -46,7 +64,7 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio): def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, spatial_split) -> Union[None, pd.Series]: - df = df.copy() # type: pd.DataFrame + df = df.copy() # type: pd.DataFrame # Extract the index if train_split_ratio is None: return None @@ -69,4 +87,3 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, s s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor] s_split.index = df.index return s_split - diff --git a/test/test_extreme_estimator/test_estimator/test_full_estimators.py b/test/test_extreme_estimator/test_estimator/test_full_estimators.py index 25ceec23cec4b27413082c9dbd9ad36498076a78..2925c2736dc6ae9db259458e0224ede8b6f3f71d 100644 --- a/test/test_extreme_estimator/test_estimator/test_full_estimators.py +++ b/test/test_extreme_estimator/test_estimator/test_full_estimators.py @@ -20,10 +20,14 @@ class TestFullEstimators(unittest.TestCase): for coordinates in self.spatial_coordinates: smooth_margin_models = load_smooth_margin_models(coordinates=coordinates) for margin_model, max_stable_model in product(smooth_margin_models, self.max_stable_models): + dataset = FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, margin_model=margin_model, coordinates=coordinates, max_stable_model=max_stable_model) + margin_model.use_start_value = True + # todo: understand why it is crashing without specifying that (when not using start value was passed by default this test started crashing) + for full_estimator in load_test_full_estimators(dataset, margin_model, max_stable_model): full_estimator.fit() if self.DISPLAY: