Commit 857c3b1d authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

refactor temporal_observations. add abstract_margin_model

parent 665ca20f
No related merge requests found
Showing with 266 additions and 88 deletions
+266 -88
from R.gev_fit.gev_mle_fit import GevMleFit
from extreme_estimator.R_fit.gev_fit.gev_mle_fit import GevMleFit, mle_gev
from extreme_estimator.R_fit.utils import get_loaded_r
def frechet_unitary_transformation(data, location, scale, shape):
......@@ -6,6 +7,7 @@ def frechet_unitary_transformation(data, location, scale, shape):
Compute the unitary Frechet transformed data
(1 + \zeta \frac{z - \mu}{\sigma}) ^ {\frac{1}{\zeta}}
"""
assert False
# todo: there is already a function doing that in R
return (1 + shape * (data - location) / scale) ** (1 / shape)
......@@ -22,30 +24,54 @@ def frechet_unitary_transformation_from_gev_parameters(data, gev_parameters: Gev
return frechet_unitary_transformation(data, gev_parameters.location)
class GevMarginal(object):
class AbstractMarginModel(object):
GEV_SCALE = GevMleFit.GEV_SCALE
GEV_LOCATION = GevMleFit.GEV_LOCATION
GEV_SHAPE = GevMleFit.GEV_SHAPE
GEV_PARAMETERS = [GEV_LOCATION, GEV_SCALE, GEV_SHAPE]
def __init__(self, coordinate, data, estimator=GevMleFit):
def __init__(self):
"""
Class to fit a GEV a list of data. Compute also the corresponding unitary data
:param coordinate: Represents the spatio-temporal spatial_coordinates of the marginals
:param data: array of data corresponding to this position (and potentially its neighborhood)
"""
self.coordinate = coordinate
self.data = data
self.gev_estimator = estimator(x_gev=data)
self.gev_parameters_estimated = [self.location, self.scale, self.shape]
self.frechet_unitary_data = frechet_unitary_transformation(data=data, location=self.location,
scale=self.scale, shape=self.shape)
@property
def location(self):
return self.gev_estimator.location
@property
def scale(self):
return self.gev_estimator.scale
@property
def shape(self):
return self.gev_estimator.shape
self.default_params = {gev_param: 1.0 for gev_param in self.GEV_PARAMETERS}
self.r = get_loaded_r()
# Define the method to sample/fit a single gev
def rgev(self, nb_obs, loc, scale, shape):
gev_params = {
self.GEV_LOCATION: loc,
self.GEV_SCALE: scale,
self.GEV_SHAPE: shape,
}
return self.r.rgev(nb_obs, **gev_params)
def fitgev(self, x_gev, estimator=GevMleFit):
mle_params = mle_gev(x_gev=x_gev)
# Define the method to sample/fit all marginals globally in the child classes
def fitmargin(self, maxima, coord):
df_gev_params = None
return df_gev_params
def rmargin(self, nb_obs, coord):
pass
def get_maxima(self, maxima_normalized, coord):
pass
def get_maxima_normalized(self, maxima, df_gev_params):
pass
class SmoothMarginModel(AbstractMarginModel):
pass
class ConstantMarginModel(SmoothMarginModel):
pass
......@@ -6,10 +6,6 @@ import os.path as op
# Defining some constants
from extreme_estimator.R_fit.utils import get_associated_r_file
GEV_SCALE = 'scale'
GEV_LOCATION = 'loc'
GEV_SHAPE = 'shape'
def mle_gev(x_gev: np.ndarray, start_loc=0, start_scale=1, start_shape=0):
assert np.ndim(x_gev) == 1
......@@ -25,10 +21,13 @@ def mle_gev(x_gev: np.ndarray, start_loc=0, start_scale=1, start_shape=0):
class GevMleFit(object):
GEV_SCALE = 'scale'
GEV_LOCATION = 'loc'
GEV_SHAPE = 'shape'
def __init__(self, x_gev: np.ndarray, start_loc=0, start_scale=1, start_shape=0):
self.x_gev = x_gev
self.mle_params = mle_gev(x_gev, start_loc, start_scale, start_shape)
self.shape = self.mle_params[GEV_SHAPE]
self.scale = self.mle_params[GEV_SCALE]
self.location = self.mle_params[GEV_LOCATION]
\ No newline at end of file
self.shape = self.mle_params[self.GEV_SHAPE]
self.scale = self.mle_params[self.GEV_SCALE]
self.location = self.mle_params[self.GEV_LOCATION]
......@@ -16,7 +16,8 @@ class AbstractMaxStableModel(object):
self.user_params_sample = params_sample
self.r = get_loaded_r()
def fitmaxstab(self, maxima: np.ndarray, coord: np.ndarray, fit_marge=False):
def fitmaxstab(self, maxima_normalized: np.ndarray, coord: np.ndarray, fit_marge=False):
assert all([isinstance(arr, np.ndarray) for arr in [maxima_normalized, coord]])
# Specify the fit params
fit_params = {
'fit.marge': fit_marge,
......@@ -25,7 +26,7 @@ class AbstractMaxStableModel(object):
# Run the fitmaxstab in R
# todo: find how to specify the optim function to use
try:
res = self.r.fitmaxstab(np.transpose(maxima), coord, **self.cov_mod_param, **fit_params) # type: ListVector
res = self.r.fitmaxstab(np.transpose(maxima_normalized), coord, **self.cov_mod_param, **fit_params) # type: ListVector
except rpy2.rinterface.RRuntimeError as error:
raise Exception('Some R exception have been launched at RunTime: {}'.format(error.__repr__()))
# todo: maybe if the convergence was not successful I could try other starting point several times
......@@ -81,10 +82,4 @@ class AbstractMaxStableModelWithCovarianceFunction(AbstractMaxStableModel):
'range': 3,
'smooth': 0.5,
'nugget': 0.5
}
if __name__ == '__main__':
print([c.name for c in CovarianceFunction])
for covariance_function in CovarianceFunction:
print(covariance_function)
\ No newline at end of file
}
\ No newline at end of file
from extreme_estimator.R_fit.max_stable_fit.abstract_max_stable_model import AbstractMaxStableModel, CovarianceFunction
from extreme_estimator.R_fit.max_stable_fit.max_stable_models import Smith, BrownResnick, Schlather, ExtremalT
from extreme_estimator.estimator.abstract_estimator import AbstractEstimator
from extreme_estimator.estimator.unitary_msp_estimator import MaxStableEstimator
from extreme_estimator.estimator.max_stable_estimator import MaxStableEstimator
from extreme_estimator.robustness_plot.multiple_plot import MultiplePlot
from extreme_estimator.robustness_plot.single_plot import SinglePlot
from spatio_temporal_dataset.dataset.simulation_dataset import SimulatedDataset
from spatio_temporal_dataset.spatial_coordinates.abstract_coordinates import AbstractSpatialCoordinates
from spatio_temporal_dataset.spatial_coordinates.alps_station_coordinates import \
AlpsStationCoordinatesBetweenZeroAndOne, AlpsStationCoordinatesBetweenZeroAndTwo
from spatio_temporal_dataset.spatial_coordinates.abstract_spatial_coordinates import AbstractSpatialCoordinates
from spatio_temporal_dataset.spatial_coordinates.alps_station_2D_coordinates import \
AlpsStation2DCoordinatesBetweenZeroAndOne, AlpsStationCoordinatesBetweenZeroAndTwo
from spatio_temporal_dataset.spatial_coordinates.generated_coordinates import CircleCoordinatesRadius1, \
CircleCoordinatesRadius2
from extreme_estimator.robustness_plot.display_item import DisplayItem
......@@ -102,7 +102,7 @@ def multiple_spatial_robustness_alps():
MspSpatial.MaxStableModelItem.name: msp_models,
MspSpatial.SpatialCoordinateClassItem.name: [CircleCoordinatesRadius1,
CircleCoordinatesRadius2,
AlpsStationCoordinatesBetweenZeroAndOne,
AlpsStation2DCoordinatesBetweenZeroAndOne,
AlpsStationCoordinatesBetweenZeroAndTwo][:],
})
......
import os
import numpy as np
import os.path as op
import pandas as pd
from spatio_temporal_dataset.temporal_maxima.temporal_maxima import TemporalMaxima
from spatio_temporal_dataset.spatial_coordinates.abstract_coordinates import AbstractSpatialCoordinates
from spatio_temporal_dataset.temporal_observations.abstract_temporal_observations import AbstractTemporalObservations
from spatio_temporal_dataset.spatial_coordinates.abstract_spatial_coordinates import AbstractSpatialCoordinates
class AbstractDataset(object):
def __init__(self, temporal_maxima: TemporalMaxima, spatial_coordinates: AbstractSpatialCoordinates):
assert (temporal_maxima.index == spatial_coordinates.index).all()
self.temporal_maxima = temporal_maxima
def __init__(self, temporal_observations: AbstractTemporalObservations, spatial_coordinates: AbstractSpatialCoordinates):
# assert
# is_same_index = temporal_observations.index == spatial_coordinates.index # type: pd.Series
# assert is_same_index.all()
self.temporal_observations = temporal_observations
self.spatial_coordinates = spatial_coordinates
@classmethod
def from_csv(cls, csv_path: str):
assert op.exists(csv_path)
df = pd.read_csv(csv_path)
temporal_maxima = TemporalMaxima.from_df(df)
temporal_maxima = AbstractTemporalObservations.from_df(df)
spatial_coordinates = AbstractSpatialCoordinates.from_df(df)
return cls(temporal_maxima=temporal_maxima, spatial_coordinates=spatial_coordinates)
return cls(temporal_maxima, spatial_coordinates)
def to_csv(self, csv_path: str):
dirname = op.dirname(csv_path)
......@@ -29,16 +32,21 @@ class AbstractDataset(object):
@property
def df_dataset(self) -> pd.DataFrame:
# Merge dataframes with the maxima and with the coordinates
return self.temporal_maxima.df_maxima.join(self.spatial_coordinates.df_coord)
return self.temporal_observations.df_maxima.join(self.spatial_coordinates.df_coord)
@property
def coord(self):
return self.spatial_coordinates.coord
@property
def maxima(self):
return self.temporal_maxima.maxima
def maxima(self) -> np.ndarray:
return self.temporal_observations.maxima
@property
def maxima_normalized(self):
return self.temporal_observations.maxima_normalized
@maxima_normalized.setter
def maxima_normalized(self, maxima_normalized_to_set):
self.temporal_observations.maxima_normalized = maxima_normalized_to_set
class RealDataset(AbstractDataset):
pass
from extreme_estimator.R_fit.gev_fit.abstract_margin_model import AbstractMarginModel
from extreme_estimator.R_fit.max_stable_fit.abstract_max_stable_model import AbstractMaxStableModel
import pandas as pd
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
from spatio_temporal_dataset.temporal_maxima.temporal_maxima import TemporalMaxima
from spatio_temporal_dataset.spatial_coordinates.abstract_coordinates import AbstractSpatialCoordinates
from spatio_temporal_dataset.spatial_coordinates.generated_coordinates import CircleCoordinatesRadius1
from spatio_temporal_dataset.spatial_coordinates.abstract_spatial_coordinates import AbstractSpatialCoordinates
from spatio_temporal_dataset.temporal_observations.abstract_temporal_observations import AbstractTemporalObservations
from spatio_temporal_dataset.temporal_observations.annual_maxima_observations import \
MaxStableAnnualMaxima, AnnualMaxima, MarginAnnualMaxima, FullAnnualMaxima
class SimulatedDataset(AbstractDataset):
def __init__(self, temporal_maxima: TemporalMaxima, spatial_coordinates: AbstractSpatialCoordinates,
max_stable_model: AbstractMaxStableModel):
super().__init__(temporal_maxima, spatial_coordinates)
"""
Class SimulatedDataset gives access to:
-the max_stable_model AND/OR marginal_model that was used for sampling
"""
def __init__(self, temporal_observations: AbstractTemporalObservations,
spatial_coordinates: AbstractSpatialCoordinates,
max_stable_model: AbstractMaxStableModel = None,
margin_model: AbstractMarginModel = None):
super().__init__(temporal_observations, spatial_coordinates)
assert margin_model is not None or max_stable_model is not None
self.margin_model = margin_model
self.max_stable_model = max_stable_model
class MaxStableDataset(SimulatedDataset):
@classmethod
def from_max_stable_sampling(cls, nb_obs: int, max_stable_model:AbstractMaxStableModel, spatial_coordinates: AbstractSpatialCoordinates):
maxima = max_stable_model.rmaxstab(nb_obs=nb_obs, coord=spatial_coordinates.coord)
df_maxima = pd.DataFrame(data=maxima, index=spatial_coordinates.index)
temporal_maxima = TemporalMaxima(df_maxima=df_maxima)
return cls(temporal_maxima=temporal_maxima, spatial_coordinates=spatial_coordinates, max_stable_model=max_stable_model)
def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel,
spatial_coordinates: AbstractSpatialCoordinates):
temporal_obs = MaxStableAnnualMaxima.from_sampling(nb_obs, max_stable_model, spatial_coordinates)
return cls(temporal_observations=temporal_obs,
spatial_coordinates=spatial_coordinates,
max_stable_model=max_stable_model)
class MarginDataset(SimulatedDataset):
@classmethod
def from_sampling(cls, nb_obs: int, margin_model: AbstractMarginModel,
spatial_coordinates: AbstractSpatialCoordinates):
temporal_obs = MarginAnnualMaxima.from_sampling(nb_obs, spatial_coordinates, margin_model)
return cls(temporal_observations=temporal_obs,
spatial_coordinates=spatial_coordinates,
margin_model=margin_model)
class FullSimulatedDataset(SimulatedDataset):
@classmethod
def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel,
spatial_coordinates: AbstractSpatialCoordinates):
temporal_obs = FullAnnualMaxima.from_double_sampling(nb_obs, max_stable_model,
spatial_coordinates)
return cls(temporal_obs, spatial_coordinates, max_stable_model)
from typing import List
from spatio_temporal_dataset.marginals.abstract_marginals import AbstractMarginals
from R.gev_fit.gev_marginal import GevMarginal
from spatio_temporal_dataset.stations.station import Station
from spatio_temporal_dataset.spatial_coordinates.abstract_spatial_coordinates import AbstractSpatialCoordinates
class SpatialMarginal(AbstractMarginals):
"""The main idea is to have on marginal per station"""
def __init__(self, spatial_coordinates: AbstractSpatialCoordinates):
self.spatial_coordinates = spatial_coordinates
def __init__(self, stations: List[Station]):
super().__init__(stations)
for station in self.stations:
self.gev_marginals.append(GevMarginal(coordinate=station, data=station.annual_maxima.values))
\ No newline at end of file
class SimulatedSpatialMarginal(SpatialMarginal):
pass
class AbstractMaxStableOptimization(object):
def __init__(self, marginals):
pass
"""
We can conclude on the dimension of the max stable
"""
import pandas as pd
class TemporalMaxima(object):
def __init__(self, df_maxima: pd.DataFrame):
"""
Main attribute of the class is the DataFrame df_maxima
Index are stations index, Columns are the year of the maxima
"""
self.df_maxima = df_maxima
@classmethod
def from_df(cls, df):
pass
@property
def index(self):
return self.df_maxima.index
@property
def maxima(self):
return self.df_maxima.values
# todo: add the transformed_maxima and the not-trasnformed maxima
import pandas as pd
class AbstractTemporalObservations(object):
def __init__(self, df_maxima_normalized: pd.DataFrame = None, df_maxima: pd.DataFrame = None):
"""
Main attribute of the class is the DataFrame df_maxima
Index are stations index
Columns are the temporal moment of the maxima
"""
self.df_maxima_normalized = df_maxima_normalized
self.df_maxima = df_maxima
@classmethod
def from_df(cls, df):
pass
@property
def maxima(self):
return self.df_maxima.values
@property
def maxima_normalized(self):
return self.df_maxima_normalized.values
@maxima_normalized.setter
def maxima_normalized(self, maxima_normalized_to_set):
assert self.df_maxima_normalized is None
assert maxima_normalized_to_set is not None
assert maxima_normalized_to_set.shape == self.maxima.shape
self.df_maxima_normalized = pd.DataFrame(data=maxima_normalized_to_set, index=self.df_maxima.index,
columns=self.df_maxima.columns)
@property
def column_to_time_index(self):
pass
@property
def index(self):
return self.df_maxima.index
class RealTemporalObservations(object):
def __init__(self):
pass
class NormalizedTemporalObservations(object):
pass
from spatio_temporal_dataset.temporal_observations.abstract_temporal_observations import AbstractTemporalObservations
class AlpsPrecipitationObservations(AbstractTemporalObservations):
pass
\ No newline at end of file
import pandas as pd
from extreme_estimator.R_fit.gev_fit.abstract_margin_model import AbstractMarginModel
from extreme_estimator.R_fit.max_stable_fit.abstract_max_stable_model import AbstractMaxStableModel
from spatio_temporal_dataset.spatial_coordinates.abstract_spatial_coordinates import AbstractSpatialCoordinates
from spatio_temporal_dataset.temporal_observations.abstract_temporal_observations import AbstractTemporalObservations
class AnnualMaxima(AbstractTemporalObservations):
"""
Index are stations index
Columns are the annual of the maxima
"""
pass
class MarginAnnualMaxima(AnnualMaxima):
@classmethod
def from_sampling(cls, nb_obs: int, spatial_coordinates: AbstractSpatialCoordinates,
margin_model: AbstractMarginModel):
maxima = margin_model.rmargin(nb_obs=nb_obs, coord=spatial_coordinates.coord)
df_maxima = pd.DataFrame(data=maxima, index=spatial_coordinates.index)
return cls(df_maxima=df_maxima)
class MaxStableAnnualMaxima(AbstractTemporalObservations):
@classmethod
def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel,
spatial_coordinates: AbstractSpatialCoordinates):
maxima_normalized = max_stable_model.rmaxstab(nb_obs=nb_obs, coord=spatial_coordinates.coord)
df_maxima_normalized = pd.DataFrame(data=maxima_normalized, index=spatial_coordinates.index)
return cls(df_maxima_normalized=df_maxima_normalized)
class FullAnnualMaxima(MaxStableAnnualMaxima):
@classmethod
def from_double_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel,
spatial_coordinates: AbstractSpatialCoordinates,
margin_model: AbstractMarginModel):
max_stable_annual_maxima = super().from_sampling(nb_obs, max_stable_model, spatial_coordinates)
# Compute df_maxima from df_maxima_normalized
maxima = margin_model.get_maxima(max_stable_annual_maxima.maxima_normalized, spatial_coordinates.coord)
max_stable_annual_maxima.df_maxima = pd.DataFrame(data=maxima, index=spatial_coordinates.index)
return max_stable_annual_maxima
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment