From d45d0f4394955fb5c4774b947ce8202badda6191 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Mon, 6 May 2019 18:19:13 +0200 Subject: [PATCH] [EXTREME ESTIMATOR][MARGIN MODEL] add ismev gev fit. and refactor. add result_from_fit abstract class. --- .../study_visualization/study_visualizer.py | 4 +- .../estimator/abstract_estimator.py | 33 +++-- .../full_estimator/abstract_full_estimator.py | 17 +-- .../abstract_margin_estimator.py | 16 +-- .../margin_estimator_for_simulation.py | 10 +- .../abstract_max_stable_estimator.py | 2 +- .../margin_function/linear_margin_function.py | 5 + .../margin_model/parametric_margin_model.py | 20 ++- .../abstract_max_stable_model.py | 7 +- .../extreme_models/result_from_fit.py | 42 +++++- extreme_estimator/extreme_models/utils.py | 36 +++-- extreme_estimator/margin_fits/gev/gev_fit.py | 16 +++ .../margin_fits/gev/gevmle_fit.py | 17 ++- .../margin_fits/gev/ismev_gev_fit.py | 23 ++++ .../margin_fits/gev/wrapper_ismev_gev_fit.R | 128 ++++++++++++++++++ .../margin_fits/margin_fits_utils.py | 30 +++- .../test_estimator/test_margin_estimators.py | 4 +- .../test_gev/test_gevmle_fit.py | 19 ++- .../test_fitmaxstab_with_margin.py | 4 +- 19 files changed, 339 insertions(+), 94 deletions(-) create mode 100644 extreme_estimator/margin_fits/gev/gev_fit.py create mode 100644 extreme_estimator/margin_fits/gev/ismev_gev_fit.py create mode 100644 extreme_estimator/margin_fits/gev/wrapper_ismev_gev_fit.R diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py index a57c41b2..94e46bcd 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py @@ -13,7 +13,7 @@ from experiment.meteo_france_SCM_study.visualization.utils import create_adjuste from experiment.utils import average_smoothing_with_sliding_window from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ FullEstimatorInASingleStepWithSmoothMargin -from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator +from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import LinearMarginEstimator from extreme_estimator.extreme_models.margin_model.linear_margin_model import LinearAllParametersAllDimsMarginModel, \ LinearNonStationaryMarginModel, LinearStationaryMarginModel from extreme_estimator.extreme_models.margin_model.margin_function.abstract_margin_function import \ @@ -283,7 +283,7 @@ class StudyVisualizer(object): # Plot the smooth margin only margin_model = margin_class(coordinates=self.coordinates, starting_point=None) - estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model) + estimator = LinearMarginEstimator(dataset=self.dataset, margin_model=margin_model) self.fit_and_visualize_estimator(estimator, axes[0], title='without max stable') # Plot the smooth margin fitted with a max stable diff --git a/extreme_estimator/estimator/abstract_estimator.py b/extreme_estimator/estimator/abstract_estimator.py index a5da6543..0dfb1643 100644 --- a/extreme_estimator/estimator/abstract_estimator.py +++ b/extreme_estimator/estimator/abstract_estimator.py @@ -37,30 +37,29 @@ class AbstractEstimator(object): self.additional_information[self.DURATION] = int((te - ts) * 1000) @property - def fitted_values(self): - assert self.is_fitted - return self._result_from_fit.fitted_values - - # @property - # def max_stable_fitted(self) -> AbstractMarginFunction: - # assert self._margin_function_fitted is not None, 'Error: estimator has not been fitted' - # return self._margin_function_fitted + def result_from_fit(self) -> ResultFromFit: + assert self._result_from_fit is not None, 'Fit has not be done' + return self._result_from_fit @property def margin_function_fitted(self) -> AbstractMarginFunction: - assert self.is_fitted + if self._margin_function_fitted is None: + self._margin_function_fitted = self.extract_function_fitted() assert self._margin_function_fitted is not None, 'No margin function has been fitted' return self._margin_function_fitted - def extract_fitted_models_from_fitted_params(self, margin_function_to_fit: ParametricMarginFunction, full_params_fitted): - coef_dict = {k: v for k, v in full_params_fitted.items() if LinearCoef.COEFF_STR in k} - self._margin_function_fitted = LinearMarginFunction.from_coef_dict(coordinates=self.dataset.coordinates, - gev_param_name_to_dims=margin_function_to_fit.gev_param_name_to_dims, - coef_dict=coef_dict) + def extract_function_fitted(self) -> AbstractMarginFunction: + raise NotImplementedError + + def extract_function_fitted_from_function_to_fit(self, margin_function_to_fit: ParametricMarginFunction): + return LinearMarginFunction.from_coef_dict(coordinates=self.dataset.coordinates, + gev_param_name_to_dims=margin_function_to_fit.gev_param_name_to_dims, + coef_dict=self.result_from_fit.margin_coef_dict) - @property - def is_fitted(self): - return self._result_from_fit is not None + # @property + # def max_stable_fitted(self) -> AbstractMarginFunction: + # assert self._margin_function_fitted is not None, 'Error: estimator has not been fitted' + # return self._margin_function_fitted @property def train_split(self): diff --git a/extreme_estimator/estimator/full_estimator/abstract_full_estimator.py b/extreme_estimator/estimator/full_estimator/abstract_full_estimator.py index 07ed06c1..547b54f6 100644 --- a/extreme_estimator/estimator/full_estimator/abstract_full_estimator.py +++ b/extreme_estimator/estimator/full_estimator/abstract_full_estimator.py @@ -1,5 +1,5 @@ from extreme_estimator.estimator.abstract_estimator import AbstractEstimator -from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator +from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import LinearMarginEstimator from extreme_estimator.estimator.max_stable_estimator.abstract_max_stable_estimator import MaxStableEstimator from extreme_estimator.extreme_models.margin_model.abstract_margin_model import AbstractMarginModel from extreme_estimator.extreme_models.margin_model.margin_function.linear_margin_function import LinearMarginFunction @@ -20,7 +20,7 @@ class SmoothMarginalsThenUnitaryMsp(AbstractFullEstimator): max_stable_model: AbstractMaxStableModel): super().__init__(dataset) # Instantiate the two associated estimators - self.margin_estimator = SmoothMarginEstimator(dataset=dataset, margin_model=margin_model) + self.margin_estimator = LinearMarginEstimator(dataset=dataset, margin_model=margin_model) self.max_stable_estimator = MaxStableEstimator(dataset=dataset, max_stable_model=max_stable_model) def _fit(self): @@ -50,10 +50,10 @@ class FullEstimatorInASingleStepWithSmoothMargin(AbstractFullEstimator): super().__init__(dataset) self.max_stable_model = max_stable_model self.linear_margin_model = margin_model - assert isinstance(self.linear_margin_function_to_fit, LinearMarginFunction) + assert isinstance(self.margin_function_start_fit, LinearMarginFunction) @property - def linear_margin_function_to_fit(self): + def margin_function_start_fit(self): return self.linear_margin_model.margin_function_start_fit def _fit(self): @@ -63,11 +63,12 @@ class FullEstimatorInASingleStepWithSmoothMargin(AbstractFullEstimator): df_coordinates_spat=self.dataset.coordinates.df_spatial_coordinates(self.train_split), df_coordinates_temp=self.dataset.coordinates.df_temporal_coordinates(self.train_split), fit_marge=True, - fit_marge_form_dict=self.linear_margin_function_to_fit.form_dict, - margin_start_dict=self.linear_margin_function_to_fit.coef_dict + fit_marge_form_dict=self.margin_function_start_fit.form_dict, + margin_start_dict=self.margin_function_start_fit.coef_dict ) - # Create the fitted margin function - self.extract_fitted_models_from_fitted_params(self.linear_margin_function_to_fit, self.fitted_values) + + def extract_function_fitted(self): + return self.extract_function_fitted_from_function_to_fit(self.margin_function_start_fit) class PointwiseAndThenUnitaryMsp(AbstractFullEstimator): diff --git a/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py index fc6025ec..7acdf0ce 100644 --- a/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py @@ -15,18 +15,8 @@ class AbstractMarginEstimator(AbstractEstimator, ABC): assert self.dataset.maxima_gev() is not None self._margin_function_fitted = None - @property - def margin_function_fitted(self) -> AbstractMarginFunction: - assert self._margin_function_fitted is not None, 'Error: estimator has not been fitted' - assert isinstance(self._margin_function_fitted, AbstractMarginFunction) - return self._margin_function_fitted - -class PointWiseMarginEstimator(AbstractMarginEstimator): - pass - - -class SmoothMarginEstimator(AbstractMarginEstimator): +class LinearMarginEstimator(AbstractMarginEstimator): """# with different type of marginals: cosntant, linear....""" def _error(self, true_max_stable_params: dict): @@ -44,5 +34,7 @@ class SmoothMarginEstimator(AbstractMarginEstimator): self._result_from_fit = self.margin_model.fitmargin_from_maxima_gev(data=maxima_gev_specialized, df_coordinates_spat=df_coordinates_spat, df_coordinates_temp=df_coordinates_temp) - self.extract_fitted_models_from_fitted_params(self.margin_model.margin_function_start_fit, self.fitted_values) + + def extract_function_fitted(self): + return self.extract_function_fitted_from_function_to_fit(self.margin_model.margin_function_start_fit) diff --git a/extreme_estimator/estimator/margin_estimator/margin_estimator_for_simulation.py b/extreme_estimator/estimator/margin_estimator/margin_estimator_for_simulation.py index 3ce73ecb..0737759e 100644 --- a/extreme_estimator/estimator/margin_estimator/margin_estimator_for_simulation.py +++ b/extreme_estimator/estimator/margin_estimator/margin_estimator_for_simulation.py @@ -1,17 +1,17 @@ -from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator +from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import LinearMarginEstimator from extreme_estimator.extreme_models.margin_model.linear_margin_model import LinearAllParametersAllDimsMarginModel, \ ConstantMarginModel from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset -class SmoothMarginEstimator_LinearAllParametersAllDims(SmoothMarginEstimator): +class LinearMarginEstimator_LinearAllParametersAllDims(LinearMarginEstimator): @classmethod def from_dataset(cls, dataset: AbstractDataset): return cls(dataset, LinearAllParametersAllDimsMarginModel(dataset.coordinates)) -class SmoothMarginEstimator_Constant(SmoothMarginEstimator): +class LinearMarginEstimator_Constant(LinearMarginEstimator): @classmethod def from_dataset(cls, dataset: AbstractDataset): @@ -19,6 +19,6 @@ class SmoothMarginEstimator_Constant(SmoothMarginEstimator): MARGIN_ESTIMATORS_FOR_SIMULATION = [ - SmoothMarginEstimator_LinearAllParametersAllDims, - SmoothMarginEstimator_Constant, + LinearMarginEstimator_LinearAllParametersAllDims, + LinearMarginEstimator_Constant, ] diff --git a/extreme_estimator/estimator/max_stable_estimator/abstract_max_stable_estimator.py b/extreme_estimator/estimator/max_stable_estimator/abstract_max_stable_estimator.py index 3468d859..868c7c07 100644 --- a/extreme_estimator/estimator/max_stable_estimator/abstract_max_stable_estimator.py +++ b/extreme_estimator/estimator/max_stable_estimator/abstract_max_stable_estimator.py @@ -21,7 +21,7 @@ class MaxStableEstimator(AbstractMaxStableEstimator): self._result_from_fit = self.max_stable_model.fitmaxstab( data_frech=self.dataset.maxima_frech_for_spatial_extremes_package(split=self.train_split), df_coordinates_spat=self.dataset.df_coordinates(split=self.train_split)) - self.max_stable_params_fitted = self.fitted_values + self.max_stable_params_fitted = self.result_from_fit.all_parameters def scalars(self, true_max_stable_params: dict): error = self._error(true_max_stable_params) diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py index 103d6d38..7545bc06 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py @@ -36,6 +36,11 @@ class LinearMarginFunction(ParametricMarginFunction): self.gev_param_name_to_coef = None # type: Dict[str, LinearCoef] super().__init__(coordinates, gev_param_name_to_dims, gev_param_name_to_coef) + # @classmethod + # def from_coef_dict(cls, coordinates: AbstractCoordinates, gev_param_name_to_dims: Dict[str, List[int]], + # coef_dict: Dict[str, float]): + # return super().from_coef_dict(coordinates, gev_param_name_to_dims, coef_dict) + def load_specific_param_function(self, gev_param_name) -> AbstractParamFunction: return LinearParamFunction(dims=self.gev_param_name_to_dims[gev_param_name], coordinates=self.coordinates.coordinates_values(), diff --git a/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py b/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py index 2a933e9d..81abad9c 100644 --- a/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py +++ b/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py @@ -5,7 +5,7 @@ import pandas as pd from extreme_estimator.extreme_models.margin_model.margin_function.parametric_margin_function import \ ParametricMarginFunction -from extreme_estimator.extreme_models.result_from_fit import ResultFromFit +from extreme_estimator.extreme_models.result_from_fit import ResultFromFit, ResultFromSpatialExtreme from extreme_estimator.extreme_models.margin_model.abstract_margin_model import AbstractMarginModel from extreme_estimator.extreme_models.utils import safe_run_r_estimator, r, get_coord, \ get_margin_formula @@ -24,11 +24,7 @@ class ParametricMarginModel(AbstractMarginModel, ABC): self.margin_function_start_fit = None # type: ParametricMarginFunction super().__init__(coordinates, use_start_value, params_start_fit, params_sample) - def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, - df_coordinates_temp: pd.DataFrame) -> ResultFromFit: - assert data.shape[1] == len(df_coordinates_spat) - # assert data.shape[0] == len(df_coordinates_temp) - + def add_starting_temporal_point(self, df_coordinates_temp: pd.DataFrame): # Enforce a starting point for the temporal trend if self.starting_point is not None: ind_to_modify = df_coordinates_temp.iloc[:, 0] <= self.starting_point # type: pd.Series @@ -36,7 +32,16 @@ class ParametricMarginModel(AbstractMarginModel, ABC): assert 0 < sum(ind_to_modify) < len(ind_to_modify) - 20 # Modify the temporal coordinates to enforce the stationarity df_coordinates_temp.loc[ind_to_modify] = self.starting_point + return df_coordinates_temp + + def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, + df_coordinates_temp: pd.DataFrame) -> ResultFromFit: + assert data.shape[1] == len(df_coordinates_spat) + + # Modify df_coordinates_temp + df_coordinates_temp = self.add_starting_temporal_point(df_coordinates_temp) + # Margin formula for fitspatgev fit_params = get_margin_formula(self.margin_function_start_fit.form_dict) # Covariables @@ -47,5 +52,6 @@ class ParametricMarginModel(AbstractMarginModel, ABC): coef_dict = self.margin_function_start_fit.coef_dict fit_params['start'] = r.list(**coef_dict) - return safe_run_r_estimator(function=r.fitspatgev, use_start=self.use_start_value, data=data, + res = safe_run_r_estimator(function=r.fitspatgev, use_start=self.use_start_value, data=data, covariables=covariables, **fit_params) + return ResultFromSpatialExtreme(res) diff --git a/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py b/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py index a3b136ee..56add58d 100644 --- a/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py +++ b/extreme_estimator/extreme_models/max_stable_model/abstract_max_stable_model.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd from extreme_estimator.extreme_models.abstract_model import AbstractModel -from extreme_estimator.extreme_models.result_from_fit import ResultFromFit +from extreme_estimator.extreme_models.result_from_fit import ResultFromFit, ResultFromSpatialExtreme from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, get_coord, \ get_margin_formula from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates @@ -67,8 +67,9 @@ class AbstractMaxStableModel(AbstractModel): fit_params['temp.cov'] = get_coord(df_coordinates_temp) # Run the fitmaxstab in R - return safe_run_r_estimator(function=r.fitmaxstab, use_start=self.use_start_value, data=data, coord=coord, - **fit_params) + res = safe_run_r_estimator(function=r.fitmaxstab, use_start=self.use_start_value, data=data, coord=coord, + **fit_params) + return ResultFromSpatialExtreme(res) def rmaxstab(self, nb_obs: int, coordinates_values: np.ndarray) -> np.ndarray: """ diff --git a/extreme_estimator/extreme_models/result_from_fit.py b/extreme_estimator/extreme_models/result_from_fit.py index f2a41d50..9ddcf476 100644 --- a/extreme_estimator/extreme_models/result_from_fit.py +++ b/extreme_estimator/extreme_models/result_from_fit.py @@ -2,13 +2,10 @@ from typing import Dict from rpy2 import robjects +from extreme_estimator.extreme_models.margin_model.param_function.linear_coef import LinearCoef + class ResultFromFit(object): - """ - Handler from any result with the result of a fit functions from the package Spatial Extreme - """ - FITTED_VALUES_NAME = 'fitted.values' - CONVERGENCE_NAME = 'convergence' def __init__(self, result_from_fit: robjects.ListVector) -> None: if hasattr(result_from_fit, 'names'): @@ -20,6 +17,35 @@ class ResultFromFit(object): def names(self): return self.name_to_value.keys() + @property + def all_parameters(self): + raise NotImplementedError + + @property + def margin_coef_dict(self): + raise NotImplementedError + + +class ResultFromIsmev(ResultFromFit): + pass + + @property + def mle(self): + return self.res['mle'] + + + @property + def nllh(self): + return self.res['nllh'] + +class ResultFromSpatialExtreme(ResultFromFit): + """ + Handler from any result with the result of a fit functions from the package Spatial Extreme + """ + FITTED_VALUES_NAME = 'fitted.values' + CONVERGENCE_NAME = 'convergence' + + @property def convergence(self) -> str: convergence_value = self.name_to_value[self.CONVERGENCE_NAME] @@ -30,6 +56,10 @@ class ResultFromFit(object): return self.convergence == "successful" @property - def fitted_values(self) -> Dict[str, float]: + def all_parameters(self) -> Dict[str, float]: fitted_values = self.name_to_value[self.FITTED_VALUES_NAME] return {key: fitted_values.rx2(key)[0] for key in fitted_values.names} + + @property + def margin_coef_dict(self): + return {k: v for k, v in self.all_parameters.items() if LinearCoef.COEFF_STR in k} diff --git a/extreme_estimator/extreme_models/utils.py b/extreme_estimator/extreme_models/utils.py index 327ea0d6..afeaf6fb 100644 --- a/extreme_estimator/extreme_models/utils.py +++ b/extreme_estimator/extreme_models/utils.py @@ -16,12 +16,13 @@ from rpy2.rinterface._rinterface import RRuntimeError from rpy2.robjects import numpy2ri from rpy2.robjects import pandas2ri -from extreme_estimator.extreme_models.result_from_fit import ResultFromFit +from extreme_estimator.extreme_models.result_from_fit import ResultFromFit, ResultFromSpatialExtreme r = ro.R() numpy2ri.activate() pandas2ri.activate() r.library('SpatialExtremes') +r.library('ismev') # Notice: R is not reloading all the time, the SpatialExtremes, so it's quite hard to debug or print in the code... @@ -43,16 +44,18 @@ class WarningMaximumAbsoluteValueTooHigh(Warning): pass -def safe_run_r_estimator(function, data, use_start=False, threshold_max_abs_value=100, **parameters) -> ResultFromFit: - # Raise warning if the maximum absolute value is above a threshold - assert isinstance(data, np.ndarray) - maximum_absolute_value = np.max(np.abs(data)) - if maximum_absolute_value > threshold_max_abs_value: - msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}"\ - .format(maximum_absolute_value, threshold_max_abs_value) - msg += '\nPotentially in that case, data should be re-normalized' - warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh) - parameters['data'] = data +def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs_value=100, **parameters) -> robjects.ListVector: + # Some checks for Spatial Extremes + if data is not None: + # Raise warning if the maximum absolute value is above a threshold + assert isinstance(data, np.ndarray) + maximum_absolute_value = np.max(np.abs(data)) + if maximum_absolute_value > threshold_max_abs_value: + msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \ + .format(maximum_absolute_value, threshold_max_abs_value) + msg += '\nPotentially in that case, data should be re-normalized' + warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh) + parameters['data'] = data # First run without using start value # Then if it crashes, use start value run_successful = False @@ -73,7 +76,7 @@ def safe_run_r_estimator(function, data, use_start=False, threshold_max_abs_valu if isinstance(e, RRuntimeWarning): print(e.__repr__()) print('WARNING') - return ResultFromFit(res) + return res def get_coord(df_coordinates: pd.DataFrame): @@ -82,9 +85,13 @@ def get_coord(df_coordinates: pd.DataFrame): return coord -def get_margin_formula(fit_marge_form_dict) -> Dict: +def get_null(): as_null = r['as.null'] - margin_formula = {k: robjects.Formula(v) if v != 'NULL' else as_null(1.0) for k, v in fit_marge_form_dict.items()} + return as_null(1.0) + + +def get_margin_formula(fit_marge_form_dict) -> Dict: + margin_formula = {k: robjects.Formula(v) if v != 'NULL' else get_null() for k, v in fit_marge_form_dict.items()} return margin_formula # def conversion_to_FloatVector(data): @@ -93,4 +100,3 @@ def get_margin_formula(fit_marge_form_dict) -> Dict: # data = data.values # assert isinstance(data, np.ndarray) # return npr.numpy2ri(data) - diff --git a/extreme_estimator/margin_fits/gev/gev_fit.py b/extreme_estimator/margin_fits/gev/gev_fit.py new file mode 100644 index 00000000..16865750 --- /dev/null +++ b/extreme_estimator/margin_fits/gev/gev_fit.py @@ -0,0 +1,16 @@ +import numpy as np + +from extreme_estimator.margin_fits.gev.gev_params import GevParams +from extreme_estimator.margin_fits.margin_fits_utils import spatial_extreme_gevmle_fit + + +class GevFit(object): + + def __init__(self, x_gev: np.ndarray, block_size=None): + assert np.ndim(x_gev) == 1 + assert len(x_gev) > 1 + self.x_gev = x_gev + + @property + def gev_params(self): + raise NotImplementedError \ No newline at end of file diff --git a/extreme_estimator/margin_fits/gev/gevmle_fit.py b/extreme_estimator/margin_fits/gev/gevmle_fit.py index 8ad75d18..fff6dad1 100644 --- a/extreme_estimator/margin_fits/gev/gevmle_fit.py +++ b/extreme_estimator/margin_fits/gev/gevmle_fit.py @@ -1,14 +1,19 @@ import numpy as np +from extreme_estimator.margin_fits.gev.gev_fit import GevFit from extreme_estimator.margin_fits.gev.gev_params import GevParams from extreme_estimator.margin_fits.margin_fits_utils import spatial_extreme_gevmle_fit -class GevMleFit(object): +class GevMleFit(GevFit): def __init__(self, x_gev: np.ndarray, block_size=None): - assert np.ndim(x_gev) == 1 - assert len(x_gev) > 1 - self.x_gev = x_gev - self.mle_params = spatial_extreme_gevmle_fit(x_gev) - self.gev_params = GevParams.from_dict({**self.mle_params, 'block_size': block_size}) + super().__init__(x_gev, block_size) + self._gev_params = spatial_extreme_gevmle_fit(x_gev) + self.gev_params_object = GevParams.from_dict({**self._gev_params, 'block_size': block_size}) + + @property + def gev_params(self): + return self._gev_params + + diff --git a/extreme_estimator/margin_fits/gev/ismev_gev_fit.py b/extreme_estimator/margin_fits/gev/ismev_gev_fit.py new file mode 100644 index 00000000..42eba04a --- /dev/null +++ b/extreme_estimator/margin_fits/gev/ismev_gev_fit.py @@ -0,0 +1,23 @@ +import numpy as np + +from extreme_estimator.margin_fits.gev.gev_fit import GevFit +from extreme_estimator.margin_fits.gev.gev_params import GevParams +from extreme_estimator.margin_fits.margin_fits_utils import spatial_extreme_gevmle_fit, ismev_gev_fit + + +class IsmevGevFit(GevFit): + # todo: this could be modeled with a margin_function depending only on time + + def __init__(self, x_gev: np.ndarray, y=None, mul=None): + super().__init__(x_gev) + self.y = y + self.mul = mul + self.res = ismev_gev_fit(x_gev, y, mul) + + @property + def gev_params(self): + assert self.y is None + return dict(zip(GevParams.PARAM_NAMES, self.res['mle'])) + + + diff --git a/extreme_estimator/margin_fits/gev/wrapper_ismev_gev_fit.R b/extreme_estimator/margin_fits/gev/wrapper_ismev_gev_fit.R new file mode 100644 index 00000000..3eed3a0f --- /dev/null +++ b/extreme_estimator/margin_fits/gev/wrapper_ismev_gev_fit.R @@ -0,0 +1,128 @@ +# Title : TODO +# Objective : TODO +# Created by: erwan +# Created on: 06/05/2019 + +library(stats4) +library(ismev) +library(SpatialExtremes) + +# Boolean for python call +call_main = !exists("python_wrapping") + +if (call_main) { + set.seed(42) + N <- 50 + loc = 0; scale = 1; shape <- 1 + x_gev <- rgev(N, loc = loc, scale = scale, shape = shape) + start_loc = 0; start_scale = 1; start_shape = 1 + # mul = NULL + # y = NULL + mul = 1 + y =matrix(ncol=1,nrow=50) + y[,1]=seq(1,50,1) +} + + + + +gev_fit_copy <- function (xdat, ydat = NULL, mul = NULL, sigl = NULL, shl = NULL, + mulink = identity, siglink = identity, shlink = identity, + muinit = NULL, siginit = NULL, shinit = NULL, show = TRUE, + method = "Nelder-Mead", maxit = 10000, ...) +{ + z <- list() + npmu <- length(mul) + 1 + npsc <- length(sigl) + 1 + npsh <- length(shl) + 1 + z$trans <- FALSE + in2 <- sqrt(6 * var(xdat))/pi + in1 <- mean(xdat) - 0.57722 * in2 + if (is.null(mul)) { + mumat <- as.matrix(rep(1, length(xdat))) + if (is.null(muinit)) + muinit <- in1 + } + else { + z$trans <- TRUE + mumat <- cbind(rep(1, length(xdat)), ydat[, mul]) + if (is.null(muinit)) + muinit <- c(in1, rep(0, length(mul))) + } + if (is.null(sigl)) { + sigmat <- as.matrix(rep(1, length(xdat))) + if (is.null(siginit)) + siginit <- in2 + } + else { + z$trans <- TRUE + sigmat <- cbind(rep(1, length(xdat)), ydat[, sigl]) + if (is.null(siginit)) + siginit <- c(in2, rep(0, length(sigl))) + } + if (is.null(shl)) { + shmat <- as.matrix(rep(1, length(xdat))) + if (is.null(shinit)) + shinit <- 0.1 + } + else { + z$trans <- TRUE + shmat <- cbind(rep(1, length(xdat)), ydat[, shl]) + if (is.null(shinit)) + shinit <- c(0.1, rep(0, length(shl))) + } + z$model <- list(mul, sigl, shl) + z$link <- deparse(substitute(c(mulink, siglink, shlink))) + init <- c(muinit, siginit, shinit) + gev.lik <- function(a) { + mu <- mulink(mumat %*% (a[1:npmu])) + sc <- siglink(sigmat %*% (a[seq(npmu + 1, length = npsc)])) + xi <- shlink(shmat %*% (a[seq(npmu + npsc + 1, length = npsh)])) + print('here') + print(class(xdat)) + print(class(mu)) + y <- (xdat - mu)/sc + y <- 1 + xi * y + if (any(y <= 0) || any(sc <= 0)) + return(10^6) + sum(log(sc)) + sum(y^(-1/xi)) + sum(log(y) * (1/xi + + 1)) + } + x <- optim(init, gev.lik, hessian = TRUE, method = method, + control = list(maxit = maxit, ...)) + z$conv <- x$convergence + mu <- mulink(mumat %*% (x$par[1:npmu])) + sc <- siglink(sigmat %*% (x$par[seq(npmu + 1, length = npsc)])) + xi <- shlink(shmat %*% (x$par[seq(npmu + npsc + 1, length = npsh)])) + z$nllh <- x$value + z$data <- xdat + if (z$trans) { + z$data <- -log(as.vector((1 + (xi * (xdat - mu))/sc)^(-1/xi))) + } + z$mle <- x$par + z$cov <- solve(x$hessian) + z$se <- sqrt(diag(z$cov)) + z$vals <- cbind(mu, sc, xi) + if (show) { + if (z$trans) + print(z[c(2, 3, 4)]) + else print(z[4]) + if (!z$conv) + print(z[c(5, 7, 9)]) + } + class(z) <- "gev.fit" + invisible(z) +} + +gev_fit <- function(data, y = NULL, trend=NULL){ + params = gev_fit_copy(data, y, mul=trend) + return(params) +} + +main <- function(){ + param = gev_fit(x_gev, y, mul) +} + +if (call_main) { + main() +} diff --git a/extreme_estimator/margin_fits/margin_fits_utils.py b/extreme_estimator/margin_fits/margin_fits_utils.py index c630955c..c22a9ca3 100644 --- a/extreme_estimator/margin_fits/margin_fits_utils.py +++ b/extreme_estimator/margin_fits/margin_fits_utils.py @@ -1,4 +1,7 @@ -from extreme_estimator.extreme_models.utils import r +import numpy as np +import rpy2.robjects as ro + +from extreme_estimator.extreme_models.utils import r, get_null """ These two functions are “extremely light†functions to fit the GEV/GPD. These functions are mainlyuseful @@ -17,4 +20,29 @@ def spatial_extreme_gpdmle_fit(x_gev, threshold): res = r.gpdmle(x_gev, threshold, method="Nelder") return dict(zip(res.names, res)) + # todo: define more robust function gevmle_fit/gpdmle_fit + +""" +IsMev fit functions +""" + + +def ismev_gev_fit(x_gev, y, mul): + xdat = ro.FloatVector(x_gev) + gev_fit = r('gev.fit') + y = y if y is not None else get_null() + mul = mul if mul is not None else get_null() + res = gev_fit(xdat, y, mul) + + # r.assign('python_wrapping', True) + # r.source(file="""/home/erwan/Documents/projects/spatiotemporalextremes/extreme_estimator/margin_fits/gev/wrapper_ismev_gev_fit.R""") + # y = np.arange(1, len(x_gev), 1).reshape((-11, 1)) + # res = r.gev_fit(data=xdat, y=y, trend=1) + return dict(zip(res.names, res)) + +# if __name__ == '__main__': +# a = np.array([2, 2]) +# v = ro.vectors.FloatVector((1, 2, 3, 4, 5)) +# ro.globalenv['a'] = a +# print(r('class(a)')) \ No newline at end of file diff --git a/test/test_extreme_estimator/test_estimator/test_margin_estimators.py b/test/test_extreme_estimator/test_estimator/test_margin_estimators.py index 97c4a63c..309d8701 100644 --- a/test/test_extreme_estimator/test_estimator/test_margin_estimators.py +++ b/test/test_extreme_estimator/test_estimator/test_margin_estimators.py @@ -1,6 +1,6 @@ import unittest -from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator +from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import LinearMarginEstimator from spatio_temporal_dataset.dataset.simulation_dataset import MarginDataset from test.test_utils import load_smooth_margin_models, load_test_1D_and_2D_spatial_coordinates, \ load_test_spatiotemporal_coordinates @@ -28,7 +28,7 @@ class TestSmoothMarginEstimator(unittest.TestCase): margin_model=margin_model, coordinates=coordinates) # Fit estimator - estimator = SmoothMarginEstimator(dataset=dataset, margin_model=margin_model) + estimator = LinearMarginEstimator(dataset=dataset, margin_model=margin_model) estimator.fit() # Plot if self.DISPLAY: diff --git a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py index 99a56cb3..27b97271 100644 --- a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py +++ b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py @@ -4,6 +4,7 @@ import numpy as np from extreme_estimator.extreme_models.utils import r, set_seed_r from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit +from extreme_estimator.margin_fits.gev.ismev_gev_fit import IsmevGevFit class TestGevMleFit(unittest.TestCase): @@ -18,16 +19,20 @@ class TestGevMleFit(unittest.TestCase): """) def test_gevmle_fit(self): - # Get the MLE estimator estimator = GevMleFit(x_gev=np.array(r['x_gev'])) - self.fit_estimator(estimator) + mle_params_ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8290} + self.fit_estimator(estimator, ref=mle_params_ref) + + def test_ismev_gev_fit(self): + estimator = IsmevGevFit(x_gev=np.array(r['x_gev'])) + ismev_ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8295} + self.fit_estimator(estimator, ismev_ref) - def fit_estimator(self, estimator): + def fit_estimator(self, estimator, ref): # Compare the MLE estimated parameters to the reference - mle_params_estimated = estimator.mle_params - mle_params_ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8290} - for key in mle_params_ref.keys(): - self.assertAlmostEqual(mle_params_ref[key], mle_params_estimated[key], places=3) + mle_params_estimated = estimator.gev_params + for key in ref.keys(): + self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3) if __name__ == '__main__': diff --git a/test/test_unitary/test_fitmaxstab/test_fitmaxstab_with_margin.py b/test/test_unitary/test_fitmaxstab/test_fitmaxstab_with_margin.py index 45ccd462..60aa9998 100644 --- a/test/test_unitary/test_fitmaxstab/test_fitmaxstab_with_margin.py +++ b/test/test_unitary/test_fitmaxstab/test_fitmaxstab_with_margin.py @@ -32,7 +32,7 @@ class TestMaxStableFitWithConstantMargin(TestUnitaryAbstract): full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model, max_stable_model) full_estimator.fit() - return full_estimator.fitted_values + return full_estimator.result_from_fit.all_parameters def test_max_stable_fit_with_constant_margin(self): self.compare() @@ -60,7 +60,7 @@ class TestMaxStableFitWithLinearMargin(TestUnitaryAbstract): full_estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset, margin_model, max_stable_model) full_estimator.fit() - return full_estimator.fitted_values + return full_estimator.result_from_fit.all_parameters def test_max_stable_fit_with_linear_margin(self): self.compare() -- GitLab