Commit 79e4ebc3 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[MARGIN MODEL] handle spatio_temporal form & coef. first version

parent 011a1e15
No related merge requests found
Showing with 141 additions and 35 deletions
+141 -35
from abc import ABC
from extreme_estimator.estimator.abstract_estimator import AbstractEstimator
from extreme_estimator.extreme_models.margin_model.margin_function.abstract_margin_function import \
AbstractMarginFunction
......@@ -6,7 +8,7 @@ from extreme_estimator.extreme_models.margin_model.smooth_margin_model import Li
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
class AbstractMarginEstimator(AbstractEstimator):
class AbstractMarginEstimator(AbstractEstimator, ABC):
def __init__(self, dataset: AbstractDataset):
super().__init__(dataset)
......@@ -34,7 +36,10 @@ class SmoothMarginEstimator(AbstractMarginEstimator):
def _fit(self):
maxima_gev = self.dataset.maxima_gev(split=self.train_split)
df_coordinates = self.dataset.df_coordinates(self.train_split)
df_coordinates_spatial = df_coordinates.loc[:, self.dataset.coordinates.coordinates_spatial_names]
df_coordinates_temporal = df_coordinates.loc[:, self.dataset.coordinates.coordinates_temporal_names]
self._params_fitted = self.margin_model.fitmargin_from_maxima_gev(maxima_gev=maxima_gev,
df_coordinates=df_coordinates)
df_coordinates_spatial=df_coordinates_spatial,
df_coordinates_temporal=df_coordinates_temporal)
self.extract_fitted_models_from_fitted_params(self.margin_model.margin_function_start_fit, self._params_fitted)
assert isinstance(self.margin_function_fitted, AbstractMarginFunction)
......@@ -69,8 +69,8 @@ class AbstractMarginModel(AbstractModel):
# Fitting methods needs to be defined in child classes
def fitmargin_from_maxima_gev(self, maxima_gev: np.ndarray, df_coordinates: pd.DataFrame) \
-> AbstractMarginFunction:
def fitmargin_from_maxima_gev(self, maxima_gev: np.ndarray, df_coordinates_spatial: pd.DataFrame,
df_coordinates_temporal: pd.DataFrame) -> AbstractMarginFunction:
raise NotImplementedError
......@@ -13,9 +13,9 @@ class LinearMarginFunction(IndependentMarginFunction):
""" Margin Function, where each parameter can augment linearly along any dimension.
dim = 0 correspond to the intercept
dim = 1 correspond to the coordinate X
dim = 2 correspond to the coordinate Y
dim = 3 correspond to the coordinate Z
dim = 1 correspond to the first coordinate
dim = 2 correspond to the second coordinate
dim = 3 correspond to the third coordinate....
gev_param_name_to_linear_dims maps each parameter of the GEV distribution to its linear dimensions
......@@ -37,7 +37,8 @@ class LinearMarginFunction(IndependentMarginFunction):
# Check the linear_dim are well-defined with respect to the coordinates
for linear_dims in self.gev_param_name_to_linear_dims.values():
for dim in linear_dims:
assert 0 < dim <= coordinates.nb_coordinates, "dim={}, nb_columns={}".format(dim, coordinates.nb_coordinates)
assert 0 < dim <= coordinates.nb_coordinates, "dim={}, nb_columns={}".format(dim,
coordinates.nb_coordinates)
# Map each gev_param_name to its corresponding param_function
for gev_param_name in GevParams.PARAM_NAMES:
......@@ -60,16 +61,41 @@ class LinearMarginFunction(IndependentMarginFunction):
for gev_param_name in GevParams.PARAM_NAMES:
linear_dims = gev_param_name_to_linear_dims.get(gev_param_name, [])
linear_coef = LinearCoef.from_coef_dict(coef_dict=coef_dict, gev_param_name=gev_param_name,
linear_dims=linear_dims)
linear_dims=linear_dims,
dim_to_coefficient_name=cls.dim_to_coefficient_name(coordinates))
gev_param_name_to_linear_coef[gev_param_name] = linear_coef
return cls(coordinates, gev_param_name_to_linear_dims, gev_param_name_to_linear_coef)
@classmethod
def dim_to_coefficient_name(cls, coordinates: AbstractCoordinates) -> Dict[int, str]:
# Intercept correspond to the dimension 0
dim_to_coefficient_name = {0: LinearCoef.INTERCEPT_NAME}
# Coordinates correspond to the dimension starting from 1
for i, coordinate_name in enumerate(coordinates.coordinates_names, 1):
dim_to_coefficient_name[i] = coordinate_name
return dim_to_coefficient_name
@classmethod
def coefficient_name_to_dim(cls, coordinates: AbstractCoordinates) -> Dict[int, str]:
return {v: k for k, v in cls.dim_to_coefficient_name(coordinates).items()}
@property
def form_dict(self) -> Dict[str, str]:
form_dict = {}
for gev_param_name in GevParams.PARAM_NAMES:
linear_dims = self.gev_param_name_to_linear_dims.get(gev_param_name, [])
form_dict.update(self.gev_param_name_to_linear_coef[gev_param_name].form_dict(linear_dims=linear_dims))
# Load spatial form_dict (only if we have some spatial coordinates)
if self.coordinates.coordinates_spatial_names:
spatial_names = [name for name in self.coordinates.coordinates_spatial_names
if self.coefficient_name_to_dim(self.coordinates)[name] in linear_dims]
spatial_form = self.gev_param_name_to_linear_coef[gev_param_name].spatial_form_dict(spatial_names)
form_dict.update(spatial_form)
# Load temporal form dict (only if we have some temporal coordinates)
if self.coordinates.coordinates_temporal_names:
temporal_names = [name for name in self.coordinates.coordinates_temporal_names
if self.coefficient_name_to_dim(self.coordinates)[name] in linear_dims]
temporal_form = self.gev_param_name_to_linear_coef[gev_param_name].temporal_form_dict(temporal_names)
form_dict.update(temporal_form)
return form_dict
@property
......@@ -77,5 +103,6 @@ class LinearMarginFunction(IndependentMarginFunction):
coef_dict = {}
for gev_param_name in GevParams.PARAM_NAMES:
linear_dims = self.gev_param_name_to_linear_dims.get(gev_param_name, [])
coef_dict.update(self.gev_param_name_to_linear_coef[gev_param_name].coef_dict(linear_dims=linear_dims))
linear_coef = self.gev_param_name_to_linear_coef[gev_param_name]
coef_dict.update(linear_coef.coef_dict(linear_dims, self.dim_to_coefficient_name(self.coordinates)))
return coef_dict
from typing import Dict
from typing import Dict, List
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
......@@ -7,10 +7,11 @@ class LinearCoef(object):
"""
Object that maps each dimension to its corresponding coefficient.
dim = 0 correspond to the intercept
dim = 1 correspond to the coordinate X
dim = 2 correspond to the coordinate Y
dim = 3 correspond to the coordinate Z
dim = 1 correspond to the first coordinate
dim = 2 correspond to the second coordinate
dim = 3 correspond to the third coordinate...
"""
INTERCEPT_NAME = 'intercept'
def __init__(self, gev_param_name: str, dim_to_coef: Dict[int, float] = None, default_value: float = 0.0):
self.gev_param_name = gev_param_name
......@@ -24,31 +25,50 @@ class LinearCoef(object):
return self.dim_to_coef.get(dim, self.default_value)
@property
def intercept(self):
def intercept(self) -> float:
return self.get_coef(dim=0)
@staticmethod
def coef_template_str(gev_param_name):
return gev_param_name + 'Coeff{}'
@classmethod
def coef_template_str(cls, gev_param_name: str, coefficient_name: str) -> str:
"""
Example of coef that can be specified
-for the spatial covariates: locCoeff1
-for the temporal covariates: tempCoeffLoc
:param gev_param_name:
:param coefficient_name:
:return:
"""
assert coefficient_name == cls.INTERCEPT_NAME or coefficient_name in AbstractCoordinates.COORDINATES_NAMES
if coefficient_name == cls.INTERCEPT_NAME or coefficient_name in AbstractCoordinates.COORDINATE_SPATIAL_NAMES:
return gev_param_name + 'Coeff{}'
else:
return 'tempCoeff' + gev_param_name.title() + '{}'
@classmethod
def from_coef_dict(cls, coef_dict: Dict[str, float], gev_param_name: str, linear_dims):
def from_coef_dict(cls, coef_dict: Dict[str, float], gev_param_name: str, linear_dims: List[int],
dim_to_coefficient_name):
dims = [0] + linear_dims
dim_to_coef = {}
for j, dim in enumerate(dims, 1):
coef = coef_dict[cls.coef_template_str(gev_param_name).format(j)]
coefficient_name = dim_to_coefficient_name[dim]
coef = coef_dict[cls.coef_template_str(gev_param_name, coefficient_name).format(j)]
dim_to_coef[dim] = coef
return cls(gev_param_name, dim_to_coef)
def coef_dict(self, linear_dims) -> Dict[str, float]:
# Constant param must be specified for all the parameters
coef_dict = {self.coef_template_str(self.gev_param_name).format(1): self.intercept}
# Specify only the param that belongs to dim_to_coef
for j, dim in enumerate(linear_dims, 2):
coef_dict[self.coef_template_str(self.gev_param_name).format(j)] = self.dim_to_coef[dim]
def coef_dict(self, linear_dims, dim_to_coefficient_name) -> Dict[str, float]:
dims = [0] + linear_dims
coef_dict = {}
for j, dim in enumerate(dims, 1):
coefficient_name = dim_to_coefficient_name[dim]
coef = self.dim_to_coef[dim]
coef_dict[self.coef_template_str(self.gev_param_name, coefficient_name).format(j)] = coef
return coef_dict
def form_dict(self, linear_dims) -> Dict[str, str]:
def form_dict(self, names: List[str]) -> Dict[str, str]:
formula_str = '1' if not names else '+'.join(names)
return {self.gev_param_name + '.form': self.gev_param_name + ' ~ ' + formula_str}
def spatial_form_dict(self, coordinate_spatial_names: List[str]) -> Dict[str, str]:
"""
Example of formula that could be specified:
loc.form = loc ~ coord_x
......@@ -56,6 +76,16 @@ class LinearCoef(object):
shape.form = shape ~ coord_x+coord_y
:return:
"""
dim_to_name = {i: name for i, name in enumerate(AbstractCoordinates.COORDINATES_NAMES, 1)}
formula_str = '1' if not linear_dims else '+'.join([dim_to_name[dim] for dim in linear_dims])
return {self.gev_param_name + '.form': self.gev_param_name + ' ~ ' + formula_str}
\ No newline at end of file
assert all([name in AbstractCoordinates.COORDINATE_SPATIAL_NAMES for name in coordinate_spatial_names])
return self.form_dict(coordinate_spatial_names)
def temporal_form_dict(self, coordinate_temporal_names: List[str]) -> Dict[str, str]:
"""
Example of formula that could be specified:
temp.loc.form = loc ~ coord_t
temp.scale.form = scale ~ coord_t
temp.shape.form = shape ~ 1
:return:
"""
assert all([name in [AbstractCoordinates.COORDINATE_T] for name in coordinate_temporal_names])
return {'temp.' + k: v for k, v in self.form_dict(coordinate_temporal_names).items()}
......@@ -29,6 +29,49 @@ rmaxstab2D <- function (n.obs){
print(res['fitted.values'])
}
# rmaxstab with 3D data
# rmaxstab3D <- function (n.obs){
# # todo: problem this function is currently not available in dimensions 3
# n.site = 2
# dimension = 3
# ar = rnorm(3*n.obs*n.site, sd = sqrt(.2))
# print(ar)
# coord <- array(ar, dim = c(4,3))
# print(coord)
# colnames(coord) = c("E", "N", "T")
# print(colnames(coord))
# data <- coord
#
# # Generate the data
# # data <- rmaxstab(n.obs, coord, "gauss", cov11 = 100, cov12 = 25, cov22 = 220)
# # data <- rmaxstab(n.obs, coord, "brown", range = 3, smooth = 0.5)
# # data <- rmaxstab(n.obs, coord, "whitmat", nugget = 0.0, range = 3, smooth = 0.5)
# # Fit back the data
# # print(data)n
# # res = fitmaxstab(data, coord, "gauss", fit.marge=FALSE, )
# # res = fitmaxstab(data, coord, "brown")
# # res = fitmaxstab(data, coord, "whitmat", start=)
# print(class(coord))
# print(colnames(coord))
#
# loc.form = loc ~ N
# scale.form = scale ~ 1
# shape.form = shape ~ 1
#
# temp_loc.form = loc ~ T
# temp_scale.form = scale ~ 1
# temp_shape.form = shape ~ 1
#
# namedlist = list(cov11 = 1.0, cov12 = 1.2, cov22 = 2.2, locCoeff1=1.0, locCoeff2=1.0, scaleCoeff1=1.0, shapeCoeff1=1.0)
# res = fitmaxstab(data=data, coord=coord, cov.mod="gauss", start=namedlist, fit.marge=TRUE,
# loc.form=loc.form, scale.form=scale.form,shape.form=shape.form,
# temp.loc.form = temp_loc.form, temp.scale.form = temp_scale.form, temp.shape.form = temp_shape.form)
# print(res['fitted.values'])
# }
fitspatgev()
# rmaxstab with 1D data
rmaxstab1D <- function (n.obs){
......@@ -62,7 +105,8 @@ call_main = !exists("python_wrapping")
if (call_main) {
set.seed(42)
n.obs = 500
rmaxstab2D(n.obs)
# rmaxstab2D(n.obs)
# rmaxstab3D(n.obs)
# rmaxstab1D(n.obs)
# namedlist = list(cov11 = 1.0, cov12 = 1.2, cov22 = 2.2)
......
......@@ -16,7 +16,7 @@ r = ro.R()
numpy2ri.activate()
pandas2ri.activate()
r.library('SpatialExtremes')
# todo: R is not reloading all the time, the SpatialExtremes, so it's quite hard to debug or print in the code...
def set_seed_r(seed=42):
r("set.seed({})".format(seed))
......@@ -35,7 +35,7 @@ def safe_run_r_estimator(function, use_start=False, **parameters):
run_successful = False
while not run_successful:
current_parameter = parameters.copy()
if not use_start:
if not use_start and 'start' in current_parameter:
current_parameter.pop('start')
try:
res = function(**current_parameter) # type:
......@@ -52,7 +52,7 @@ def safe_run_r_estimator(function, use_start=False, **parameters):
return res
def retrieve_fitted_values(res: robjects.ListVector):
def retrieve_fitted_values(res: robjects.ListVector) -> Dict[str, float]:
# todo: maybe if the convergence was not successful I could try other starting point several times
# Retrieve the resulting fitted values
fitted_values = res.rx2('fitted.values')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment