diff --git a/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py index 64082a398d0062e587a7898e91055089e8d63489..0b3ebd762ff3d89474e8e9ce99970fae7fd6ef3e 100644 --- a/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py @@ -1,3 +1,5 @@ +from abc import ABC + from extreme_estimator.estimator.abstract_estimator import AbstractEstimator from extreme_estimator.extreme_models.margin_model.margin_function.abstract_margin_function import \ AbstractMarginFunction @@ -6,7 +8,7 @@ from extreme_estimator.extreme_models.margin_model.smooth_margin_model import Li from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset -class AbstractMarginEstimator(AbstractEstimator): +class AbstractMarginEstimator(AbstractEstimator, ABC): def __init__(self, dataset: AbstractDataset): super().__init__(dataset) @@ -34,7 +36,10 @@ class SmoothMarginEstimator(AbstractMarginEstimator): def _fit(self): maxima_gev = self.dataset.maxima_gev(split=self.train_split) df_coordinates = self.dataset.df_coordinates(self.train_split) + df_coordinates_spatial = df_coordinates.loc[:, self.dataset.coordinates.coordinates_spatial_names] + df_coordinates_temporal = df_coordinates.loc[:, self.dataset.coordinates.coordinates_temporal_names] self._params_fitted = self.margin_model.fitmargin_from_maxima_gev(maxima_gev=maxima_gev, - df_coordinates=df_coordinates) + df_coordinates_spatial=df_coordinates_spatial, + df_coordinates_temporal=df_coordinates_temporal) self.extract_fitted_models_from_fitted_params(self.margin_model.margin_function_start_fit, self._params_fitted) assert isinstance(self.margin_function_fitted, AbstractMarginFunction) diff --git a/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py b/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py index df51314ac37f5765a73a2c4b0269dbcb63512b71..d4ca048ff05cee419c60c8397101cac85855465e 100644 --- a/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py +++ b/extreme_estimator/extreme_models/margin_model/abstract_margin_model.py @@ -69,8 +69,8 @@ class AbstractMarginModel(AbstractModel): # Fitting methods needs to be defined in child classes - def fitmargin_from_maxima_gev(self, maxima_gev: np.ndarray, df_coordinates: pd.DataFrame) \ - -> AbstractMarginFunction: + def fitmargin_from_maxima_gev(self, maxima_gev: np.ndarray, df_coordinates_spatial: pd.DataFrame, + df_coordinates_temporal: pd.DataFrame) -> AbstractMarginFunction: raise NotImplementedError diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py index 65749ca67407fd87f7b5b77e28baf4528f686a92..c9c988823a86e4644e3a05fa5184ca24df1d9f05 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py @@ -13,9 +13,9 @@ class LinearMarginFunction(IndependentMarginFunction): """ Margin Function, where each parameter can augment linearly along any dimension. dim = 0 correspond to the intercept - dim = 1 correspond to the coordinate X - dim = 2 correspond to the coordinate Y - dim = 3 correspond to the coordinate Z + dim = 1 correspond to the first coordinate + dim = 2 correspond to the second coordinate + dim = 3 correspond to the third coordinate.... gev_param_name_to_linear_dims maps each parameter of the GEV distribution to its linear dimensions @@ -37,7 +37,8 @@ class LinearMarginFunction(IndependentMarginFunction): # Check the linear_dim are well-defined with respect to the coordinates for linear_dims in self.gev_param_name_to_linear_dims.values(): for dim in linear_dims: - assert 0 < dim <= coordinates.nb_coordinates, "dim={}, nb_columns={}".format(dim, coordinates.nb_coordinates) + assert 0 < dim <= coordinates.nb_coordinates, "dim={}, nb_columns={}".format(dim, + coordinates.nb_coordinates) # Map each gev_param_name to its corresponding param_function for gev_param_name in GevParams.PARAM_NAMES: @@ -60,16 +61,41 @@ class LinearMarginFunction(IndependentMarginFunction): for gev_param_name in GevParams.PARAM_NAMES: linear_dims = gev_param_name_to_linear_dims.get(gev_param_name, []) linear_coef = LinearCoef.from_coef_dict(coef_dict=coef_dict, gev_param_name=gev_param_name, - linear_dims=linear_dims) + linear_dims=linear_dims, + dim_to_coefficient_name=cls.dim_to_coefficient_name(coordinates)) gev_param_name_to_linear_coef[gev_param_name] = linear_coef return cls(coordinates, gev_param_name_to_linear_dims, gev_param_name_to_linear_coef) + @classmethod + def dim_to_coefficient_name(cls, coordinates: AbstractCoordinates) -> Dict[int, str]: + # Intercept correspond to the dimension 0 + dim_to_coefficient_name = {0: LinearCoef.INTERCEPT_NAME} + # Coordinates correspond to the dimension starting from 1 + for i, coordinate_name in enumerate(coordinates.coordinates_names, 1): + dim_to_coefficient_name[i] = coordinate_name + return dim_to_coefficient_name + + @classmethod + def coefficient_name_to_dim(cls, coordinates: AbstractCoordinates) -> Dict[int, str]: + return {v: k for k, v in cls.dim_to_coefficient_name(coordinates).items()} + @property def form_dict(self) -> Dict[str, str]: form_dict = {} for gev_param_name in GevParams.PARAM_NAMES: linear_dims = self.gev_param_name_to_linear_dims.get(gev_param_name, []) - form_dict.update(self.gev_param_name_to_linear_coef[gev_param_name].form_dict(linear_dims=linear_dims)) + # Load spatial form_dict (only if we have some spatial coordinates) + if self.coordinates.coordinates_spatial_names: + spatial_names = [name for name in self.coordinates.coordinates_spatial_names + if self.coefficient_name_to_dim(self.coordinates)[name] in linear_dims] + spatial_form = self.gev_param_name_to_linear_coef[gev_param_name].spatial_form_dict(spatial_names) + form_dict.update(spatial_form) + # Load temporal form dict (only if we have some temporal coordinates) + if self.coordinates.coordinates_temporal_names: + temporal_names = [name for name in self.coordinates.coordinates_temporal_names + if self.coefficient_name_to_dim(self.coordinates)[name] in linear_dims] + temporal_form = self.gev_param_name_to_linear_coef[gev_param_name].temporal_form_dict(temporal_names) + form_dict.update(temporal_form) return form_dict @property @@ -77,5 +103,6 @@ class LinearMarginFunction(IndependentMarginFunction): coef_dict = {} for gev_param_name in GevParams.PARAM_NAMES: linear_dims = self.gev_param_name_to_linear_dims.get(gev_param_name, []) - coef_dict.update(self.gev_param_name_to_linear_coef[gev_param_name].coef_dict(linear_dims=linear_dims)) + linear_coef = self.gev_param_name_to_linear_coef[gev_param_name] + coef_dict.update(linear_coef.coef_dict(linear_dims, self.dim_to_coefficient_name(self.coordinates))) return coef_dict diff --git a/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py b/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py index d025a3d1f875e28b972e13ae4711b84cbb2ebd4e..66e4067183446d086362455f4605eddcd8ffd786 100644 --- a/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py +++ b/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, List from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates @@ -7,10 +7,11 @@ class LinearCoef(object): """ Object that maps each dimension to its corresponding coefficient. dim = 0 correspond to the intercept - dim = 1 correspond to the coordinate X - dim = 2 correspond to the coordinate Y - dim = 3 correspond to the coordinate Z + dim = 1 correspond to the first coordinate + dim = 2 correspond to the second coordinate + dim = 3 correspond to the third coordinate... """ + INTERCEPT_NAME = 'intercept' def __init__(self, gev_param_name: str, dim_to_coef: Dict[int, float] = None, default_value: float = 0.0): self.gev_param_name = gev_param_name @@ -24,31 +25,50 @@ class LinearCoef(object): return self.dim_to_coef.get(dim, self.default_value) @property - def intercept(self): + def intercept(self) -> float: return self.get_coef(dim=0) - @staticmethod - def coef_template_str(gev_param_name): - return gev_param_name + 'Coeff{}' + @classmethod + def coef_template_str(cls, gev_param_name: str, coefficient_name: str) -> str: + """ + Example of coef that can be specified + -for the spatial covariates: locCoeff1 + -for the temporal covariates: tempCoeffLoc + :param gev_param_name: + :param coefficient_name: + :return: + """ + assert coefficient_name == cls.INTERCEPT_NAME or coefficient_name in AbstractCoordinates.COORDINATES_NAMES + if coefficient_name == cls.INTERCEPT_NAME or coefficient_name in AbstractCoordinates.COORDINATE_SPATIAL_NAMES: + return gev_param_name + 'Coeff{}' + else: + return 'tempCoeff' + gev_param_name.title() + '{}' @classmethod - def from_coef_dict(cls, coef_dict: Dict[str, float], gev_param_name: str, linear_dims): + def from_coef_dict(cls, coef_dict: Dict[str, float], gev_param_name: str, linear_dims: List[int], + dim_to_coefficient_name): dims = [0] + linear_dims dim_to_coef = {} for j, dim in enumerate(dims, 1): - coef = coef_dict[cls.coef_template_str(gev_param_name).format(j)] + coefficient_name = dim_to_coefficient_name[dim] + coef = coef_dict[cls.coef_template_str(gev_param_name, coefficient_name).format(j)] dim_to_coef[dim] = coef return cls(gev_param_name, dim_to_coef) - def coef_dict(self, linear_dims) -> Dict[str, float]: - # Constant param must be specified for all the parameters - coef_dict = {self.coef_template_str(self.gev_param_name).format(1): self.intercept} - # Specify only the param that belongs to dim_to_coef - for j, dim in enumerate(linear_dims, 2): - coef_dict[self.coef_template_str(self.gev_param_name).format(j)] = self.dim_to_coef[dim] + def coef_dict(self, linear_dims, dim_to_coefficient_name) -> Dict[str, float]: + dims = [0] + linear_dims + coef_dict = {} + for j, dim in enumerate(dims, 1): + coefficient_name = dim_to_coefficient_name[dim] + coef = self.dim_to_coef[dim] + coef_dict[self.coef_template_str(self.gev_param_name, coefficient_name).format(j)] = coef return coef_dict - def form_dict(self, linear_dims) -> Dict[str, str]: + def form_dict(self, names: List[str]) -> Dict[str, str]: + formula_str = '1' if not names else '+'.join(names) + return {self.gev_param_name + '.form': self.gev_param_name + ' ~ ' + formula_str} + + def spatial_form_dict(self, coordinate_spatial_names: List[str]) -> Dict[str, str]: """ Example of formula that could be specified: loc.form = loc ~ coord_x @@ -56,6 +76,16 @@ class LinearCoef(object): shape.form = shape ~ coord_x+coord_y :return: """ - dim_to_name = {i: name for i, name in enumerate(AbstractCoordinates.COORDINATES_NAMES, 1)} - formula_str = '1' if not linear_dims else '+'.join([dim_to_name[dim] for dim in linear_dims]) - return {self.gev_param_name + '.form': self.gev_param_name + ' ~ ' + formula_str} \ No newline at end of file + assert all([name in AbstractCoordinates.COORDINATE_SPATIAL_NAMES for name in coordinate_spatial_names]) + return self.form_dict(coordinate_spatial_names) + + def temporal_form_dict(self, coordinate_temporal_names: List[str]) -> Dict[str, str]: + """ + Example of formula that could be specified: + temp.loc.form = loc ~ coord_t + temp.scale.form = scale ~ coord_t + temp.shape.form = shape ~ 1 + :return: + """ + assert all([name in [AbstractCoordinates.COORDINATE_T] for name in coordinate_temporal_names]) + return {'temp.' + k: v for k, v in self.form_dict(coordinate_temporal_names).items()} diff --git a/extreme_estimator/extreme_models/max_stable_model/max_stable_fit.R b/extreme_estimator/extreme_models/max_stable_model/max_stable_fit.R index 0de2193bd8a64c50dccf311fab6a5c18cc7234d2..744235c88eba7d8d0a8357d565e184f169bd8e4d 100644 --- a/extreme_estimator/extreme_models/max_stable_model/max_stable_fit.R +++ b/extreme_estimator/extreme_models/max_stable_model/max_stable_fit.R @@ -29,6 +29,49 @@ rmaxstab2D <- function (n.obs){ print(res['fitted.values']) } + +# rmaxstab with 3D data +# rmaxstab3D <- function (n.obs){ +# # todo: problem this function is currently not available in dimensions 3 +# n.site = 2 +# dimension = 3 +# ar = rnorm(3*n.obs*n.site, sd = sqrt(.2)) +# print(ar) +# coord <- array(ar, dim = c(4,3)) +# print(coord) +# colnames(coord) = c("E", "N", "T") +# print(colnames(coord)) +# data <- coord +# +# # Generate the data +# # data <- rmaxstab(n.obs, coord, "gauss", cov11 = 100, cov12 = 25, cov22 = 220) +# # data <- rmaxstab(n.obs, coord, "brown", range = 3, smooth = 0.5) +# # data <- rmaxstab(n.obs, coord, "whitmat", nugget = 0.0, range = 3, smooth = 0.5) +# # Fit back the data +# # print(data)n +# # res = fitmaxstab(data, coord, "gauss", fit.marge=FALSE, ) +# # res = fitmaxstab(data, coord, "brown") +# # res = fitmaxstab(data, coord, "whitmat", start=) +# print(class(coord)) +# print(colnames(coord)) +# +# loc.form = loc ~ N +# scale.form = scale ~ 1 +# shape.form = shape ~ 1 +# +# temp_loc.form = loc ~ T +# temp_scale.form = scale ~ 1 +# temp_shape.form = shape ~ 1 +# +# namedlist = list(cov11 = 1.0, cov12 = 1.2, cov22 = 2.2, locCoeff1=1.0, locCoeff2=1.0, scaleCoeff1=1.0, shapeCoeff1=1.0) +# res = fitmaxstab(data=data, coord=coord, cov.mod="gauss", start=namedlist, fit.marge=TRUE, +# loc.form=loc.form, scale.form=scale.form,shape.form=shape.form, +# temp.loc.form = temp_loc.form, temp.scale.form = temp_scale.form, temp.shape.form = temp_shape.form) +# print(res['fitted.values']) +# } + +fitspatgev() + # rmaxstab with 1D data rmaxstab1D <- function (n.obs){ @@ -62,7 +105,8 @@ call_main = !exists("python_wrapping") if (call_main) { set.seed(42) n.obs = 500 - rmaxstab2D(n.obs) + # rmaxstab2D(n.obs) + # rmaxstab3D(n.obs) # rmaxstab1D(n.obs) # namedlist = list(cov11 = 1.0, cov12 = 1.2, cov22 = 2.2) diff --git a/extreme_estimator/extreme_models/utils.py b/extreme_estimator/extreme_models/utils.py index 2d84f995292750b39c1bfc1411b7b5e08fb3ae80..97c0c7eeb026426d15719e9d19808dba2e9dd63d 100644 --- a/extreme_estimator/extreme_models/utils.py +++ b/extreme_estimator/extreme_models/utils.py @@ -16,7 +16,7 @@ r = ro.R() numpy2ri.activate() pandas2ri.activate() r.library('SpatialExtremes') - +# todo: R is not reloading all the time, the SpatialExtremes, so it's quite hard to debug or print in the code... def set_seed_r(seed=42): r("set.seed({})".format(seed)) @@ -35,7 +35,7 @@ def safe_run_r_estimator(function, use_start=False, **parameters): run_successful = False while not run_successful: current_parameter = parameters.copy() - if not use_start: + if not use_start and 'start' in current_parameter: current_parameter.pop('start') try: res = function(**current_parameter) # type: @@ -52,7 +52,7 @@ def safe_run_r_estimator(function, use_start=False, **parameters): return res -def retrieve_fitted_values(res: robjects.ListVector): +def retrieve_fitted_values(res: robjects.ListVector) -> Dict[str, float]: # todo: maybe if the convergence was not successful I could try other starting point several times # Retrieve the resulting fitted values fitted_values = res.rx2('fitted.values')