Commit 38b2bd8a authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

Initial main commit

parent ac99ffcc
No related merge requests found
Showing with 553 additions and 3 deletions
+553 -3
__init__.py 0 → 100644
File moved
......@@ -9,16 +9,28 @@ def frechet_unitary_transformation(data, location, scale, shape):
return (1 + shape * (data - location) / scale) ** (1 / shape)
class GevParameters(object):
def __init__(self, location, scale, shape):
self.location = location
self.scale = scale
self.shape = shape
def frechet_unitary_transformation_from_gev_parameters(data, gev_parameters: GevParameters):
return frechet_unitary_transformation(data, gev_parameters.location)
class GevMarginal(object):
def __init__(self, position, data, estimator=GevMleFit):
def __init__(self, coordinate, data, estimator=GevMleFit):
"""
Class to fit a GEV a list of data. Compute also the corresponding unitary data
:param position: Represents the spatio-temporal position of the marginals
:param coordinate: Represents the spatio-temporal spatial_coordinates of the marginals
:param data: array of data corresponding to this position (and potentially its neighborhood)
"""
self.position = position
self.coordinate = coordinate
self.data = data
self.gev_estimator = estimator(x_gev=data)
self.gev_parameters_estimated = [self.location, self.scale, self.shape]
......
library(SpatialExtremes)
# Boolean for python call
call_main = !exists("python_wrapping")
if (call_main) {
set.seed(42)
n.obs = 50
n.site = 2
coord <- matrix(rnorm(2*n.site, sd = sqrt(.2)), ncol = 2)
print(coord)
# Generate the data
# data <- rmaxstab(n.obs, coord, "gauss", cov11 = 100, cov12 = 25, cov22 = 220)
data <- rmaxstab(n.obs, coord, "brown", range = 3, smooth = 0.5)
# Fit back the data
print(data)
# res = fitmaxstab(data, coord, "gauss", fit.marge=FALSE, )
res = fitmaxstab(data, coord, "brown")
# res = fitmaxstab(data, coord, "gauss", start=list(0,0,0))
print(res)
print(class(res))
print(names(res))
print(res['fitted.values'])
print(res['convergence'])
}
import rpy2
from rpy2.robjects import ListVector
from extreme_estimator.R_fit.utils import get_loaded_r
import numpy as np
def optimize_fit(max_stable_model_list, params_start_fit_list):
pass
class MaxStableModel(object):
def __init__(self, params_start_fit=None, params_sample=None):
self.cov_mod = None
self.default_params_start_fit = None
self.default_params_sample = None
self.user_params_start_fit = params_start_fit
self.user_params_sample = params_sample
self.r = get_loaded_r()
def fitmaxstab(self, maxima: np.ndarray, coord: np.ndarray, ):
# todo: find how to specify a startign point, understand how is it set by default
# res = None
# tries = 5
# nb_tries = 0
# while res is None and nb_tries < tries:
# try:
# res = self.r.fitmaxstab(maxima, coord, **self.cov_mod_param) # type: ListVector
# except rpy2.rinterface.RRuntimeError:
# pass
# nb_tries += 1
# print(nb_tries)
res = self.r.fitmaxstab(np.transpose(maxima), coord, **self.cov_mod_param) # type: ListVector
fitted_values = res.rx2('fitted.values')
fitted_values = {key: fitted_values.rx2(key)[0] for key in fitted_values.names}
return fitted_values
def rmaxstab(self, nb_obs: int, coord: np.ndarray, ) -> np.ndarray:
"""
Return an numpy of maxima. With rows being the stations and columns being the years of maxima
"""
maxima = np.array(self.r.rmaxstab(nb_obs, coord, **self.cov_mod_param, **self.params_sample))
return np.transpose(maxima)
@property
def cov_mod_param(self):
return {'cov.mod': self.cov_mod}
@property
def params_start_fit(self):
return self.merge_params(default_params=self.default_params_start_fit, input_params=self.user_params_start_fit)
@property
def params_sample(self):
return self.merge_params(default_params=self.default_params_sample, input_params=self.user_params_sample)
@staticmethod
def merge_params(default_params, input_params):
merged_params = default_params.copy()
if input_params is not None:
assert isinstance(default_params, dict) and isinstance(input_params, dict)
assert set(input_params.keys()).issubset(set(default_params.keys()))
merged_params.update(input_params)
return merged_params
class GaussianMSP(MaxStableModel):
def __init__(self, params_start_fit=None, params_sample=None):
super().__init__(params_start_fit=params_start_fit, params_sample=params_sample)
self.cov_mod = 'gauss'
self.default_params_start_fit = {}
self.default_params_sample = {
'cov11': 100,
'cov12': 25,
'cov22': 220,
}
class BrownResick(MaxStableModel):
def __init__(self, params_start_fit=None, params_sample=None):
super().__init__(params_start_fit=params_start_fit, params_sample=params_sample)
self.cov_mod = 'brown'
self.default_params_start_fit = {}
self.default_params_sample = {
"range": 3,
"smooth": 0.5,
}
class ExtremalT(MaxStableModel):
pass
library(SpatialExtremes)
##Define the spatial_coordinates of each location
n.site <- 30
locations <- matrix(runif(2*n.site, 0, 10), ncol = 2)
colnames(locations) <- c("lon", "lat")
##Simulate a max-stable process - with unit Frechet margins
data <- rmaxstab(40, locations, cov.mod = "whitmat", nugget = 0, range = 3,
smooth = 0.5)
##Now define the spatial model for the GEV parameters
param.loc <- -10 + 2 * locations[,2]
param.scale <- 5 + 2 * locations[,1] + locations[,2]^2
param.shape <- rep(0.2, n.site)
##Transform the unit Frechet margins to GEV
for (i in 1:n.site)
data[,i] <- frech2gev(data[,i], param.loc[i], param.scale[i],
param.shape[i])
##Define a model for the GEV margins to be fitted
##shape ~ 1 stands for the GEV shape parameter is constant
##over the region
loc.form <- loc ~ lat
scale.form <- scale ~ lon + I(lat^2)
shape.form <- shape ~ 1
##Fit a max-stable process using the Schlather's model
fitmaxstab(data, locations, "whitmat", loc.form, scale.form,
shape.form)
## Model without any spatial structure for the GEV parameters
## Be careful this could be *REALLY* time consuming
fitmaxstab(data, locations, "whitmat")
## Fixing the smooth parameter of the Whittle-Matern family
## to 0.5 - e.g. considering exponential family. We suppose the data
## are unit Frechet here.
fitmaxstab(data, locations, "whitmat", smooth = 0.5, fit.marge = FALSE)
## Fitting a penalized smoothing splines for the margins with the
## Smith's model
data <- rmaxstab(40, locations, cov.mod = "gauss", cov11 = 100, cov12 =
25, cov22 = 220)
## And transform it to ordinary GEV margins with a non-linear
## function
fun <- function(x)
2 * sin(pi * x / 4) + 10
fun2 <- function(x)
(fun(x) - 7 ) / 15
param.loc <- fun(locations[,2])
param.scale <- fun(locations[,2])
param.shape <- fun2(locations[,1])
##Transformation from unit Frechet to common GEV margins
for (i in 1:n.site)
data[,i] <- frech2gev(data[,i], param.loc[i], param.scale[i],
param.shape[i])
##Defining the knots, penalty, degree for the splines
n.knots <- 5
knots <- quantile(locations[,2], prob = 1:n.knots/(n.knots+1))
knots2 <- quantile(locations[,1], prob = 1:n.knots/(n.knots+1))
##Be careful the choice of the penalty (i.e. the smoothing parameter)
##may strongly affect the result Here we use p-splines for each GEV
##parameter - so it's really CPU demanding but one can use 1 p-spline
##and 2 linear models.
##A simple linear model will be clearly faster...
loc.form <- y ~ rb(lat, knots = knots, degree = 3, penalty = .5)
scale.form <- y ~ rb(lat, knots = knots, degree = 3, penalty = .5)
shape.form <- y ~ rb(lon, knots = knots2, degree = 3, penalty = .5)
fitted <- fitmaxstab(data, locations, "gauss", loc.form, scale.form, shape.form,
control = list(ndeps = rep(1e-6, 24), trace = 10),
method = "BFGS")
fitted
op <- par(mfrow=c(1,3))
plot(locations[,2], param.loc, col = 2, ylim = c(7, 14),
ylab = "location parameter", xlab = "latitude")
plot(fun, from = 0, to = 10, add = TRUE, col = 2)
points(locations[,2], predict(fitted)[,"loc"], col = "blue", pch = 5)
new.data <- cbind(lon = seq(0, 10, length = 100), lat = seq(0, 10, length = 100))
lines(new.data[,1], predict(fitted, new.data)[,"loc"], col = "blue")
legend("topleft", c("true values", "predict. values", "true curve", "predict. curve"),
col = c("red", "blue", "red", "blue"), pch = c(1, 5, NA, NA), inset = 0.05,
lty = c(0, 0, 1, 1), ncol = 2)
plot(locations[,2], param.scale, col = 2, ylim = c(7, 14),
ylab = "scale parameter", xlab = "latitude")
plot(fun, from = 0, to = 10, add = TRUE, col = 2)
points(locations[,2], predict(fitted)[,"scale"], col = "blue", pch = 5)
lines(new.data[,1], predict(fitted, new.data)[,"scale"], col = "blue")
legend("topleft", c("true values", "predict. values", "true curve", "predict. curve"),
col = c("red", "blue", "red", "blue"), pch = c(1, 5, NA, NA), inset = 0.05,
lty = c(0, 0, 1, 1), ncol = 2)
plot(locations[,1], param.shape, col = 2,
ylab = "shape parameter", xlab = "longitude")
plot(fun2, from = 0, to = 10, add = TRUE, col = 2)
points(locations[,1], predict(fitted)[,"shape"], col = "blue", pch = 5)
lines(new.data[,1], predict(fitted, new.data)[,"shape"], col = "blue")
legend("topleft", c("true values", "predict. values", "true curve", "predict. curve"),
col = c("red", "blue", "red", "blue"), pch = c(1, 5, NA, NA), inset = 0.05,
lty = c(0, 0, 1, 1), ncol = 2)
par(op)
## End(Not run)
\ No newline at end of file
import rpy2.robjects as ro
import pandas as pd
import numpy as np
import rpy2.robjects.numpy2ri as npr
def get_loaded_r():
r = ro.r
ro.numpy2ri.activate()
r.library('SpatialExtremes')
return r
# def conversion_to_FloatVector(data):
# """Convert DataFrame or numpy array into FloatVector for r"""
# if isinstance(data, pd.DataFrame):
# data = data.values
# assert isinstance(data, np.ndarray)
# return npr.numpy2ri(data)
class AbstractEstimator(object):
pass
\ No newline at end of file
from extreme_estimator.estimator.abstract_estimator import AbstractEstimator
from extreme_estimator.R_fit.max_stable_fit.max_stable_models import MaxStableModel
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
import numpy as np
class MaxStableEstimator(AbstractEstimator):
def __init__(self, dataset: AbstractDataset, max_stable_model: MaxStableModel):
self.dataset = dataset
self.max_stable_model = max_stable_model
self.max_stable_params_fitted = None
def fit(self):
self.max_stable_params_fitted = self.max_stable_model.fitmaxstab(maxima=self.dataset.maxima, coord=self.dataset.coord)
def error(self, true_max_stable_params: dict):
absolute_errors = {param_name: np.abs(param_true_value - self.max_stable_params_fitted[param_name])
for param_name, param_true_value in true_max_stable_params.items()}
mean_absolute_error = np.mean(np.array(list(absolute_errors.values())))
# return {**absolute_errors, **{'mae': mean_absolute_error}}
return mean_absolute_error
from typing import List
from extreme_estimator.estimator.msp_estimator import MaxStableEstimator
from extreme_estimator.R_fit.max_stable_fit.max_stable_models import GaussianMSP, MaxStableModel
from itertools import product
from spatio_temporal_dataset.dataset.simulation_dataset import SimulatedDataset
from spatio_temporal_dataset.spatial_coordinates.generated_coordinate import CircleCoordinates
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
class DisplayItem(object):
def __init__(self, argument_name, default_value, dislay_name=None):
self.argument_name = argument_name
self.default_value = default_value
self.dislay_name = dislay_name if dislay_name is not None else self.argument_name
def values_from_kwargs(self, **kwargs):
return kwargs.get(self.argument_name, [self.default_value])
def value_from_kwargs(self, **kwargs):
return kwargs.get(self.argument_name, self.default_value)
MaxStableModelItem = DisplayItem('max_stable_model', GaussianMSP)
SpatialCoordinateClassItem = DisplayItem('spatial_coordinate_class', CircleCoordinates)
SpatialParamsItem = DisplayItem('spatial_params', {"r": 1})
NbStationItem = DisplayItem('nb_station', None)
NbObservationItem = DisplayItem('nb_obs', 50)
class AbstractRobustnessPlot(object):
def __init__(self, grid_row_item, grid_column_item, plot_row_item, plot_label_item):
self.grid_row_item = grid_row_item # type: DisplayItem
self.grid_column_item = grid_column_item # type: DisplayItem
self.plot_row_item = plot_row_item # type: DisplayItem
self.plot_label_item = plot_label_item # type: DisplayItem
self.estimation_error = self.estimation_error_max_stable_unitary_frechet
def robustness_grid_plot(self, **kwargs):
grid_row_values = self.grid_row_item.values_from_kwargs(**kwargs)
grid_column_values = self.grid_column_item.values_from_kwargs(**kwargs)
nb_grid_rows, nb_grid_columns = len(grid_row_values), len(grid_column_values)
fig = plt.figure()
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i, (grid_row_value, grid_column_value) in enumerate(product(grid_row_values, grid_column_values), 1):
print('Grid plot: {}={} {}={}'.format(self.grid_row_item.dislay_name, grid_row_value,
self.grid_column_item.dislay_name, grid_column_value))
ax = fig.add_subplot(nb_grid_rows, nb_grid_columns, i)
kwargs_single_plot = kwargs.copy()
kwargs_single_plot.update({self.grid_row_item.argument_name: grid_row_value,
self.grid_column_item.argument_name: grid_column_value})
self.robustness_single_plot(ax, **kwargs_single_plot)
plt.show()
def robustness_single_plot(self, ax, **kwargs_single_plot):
plot_row_values = self.plot_row_item.values_from_kwargs(**kwargs_single_plot)
plot_label_values = self.plot_label_item.values_from_kwargs(**kwargs_single_plot)
colors = ['blue', 'red', 'green', 'black']
assert isinstance(plot_label_values, list), plot_label_values
assert isinstance(plot_row_values, list), plot_row_values
for j, plot_label_value in enumerate(plot_label_values):
plot_row_value_to_error = {}
# todo: do some parallzlization here
for plot_row_value in plot_row_values:
kwargs_single_point = kwargs_single_plot.copy()
kwargs_single_point.update({self.plot_row_item.argument_name: plot_row_value,
self.plot_label_item.argument_name: plot_label_value})
plot_row_value_to_error[plot_row_value] = self.estimation_error(**kwargs_single_point)
plot_column_values = [plot_row_value_to_error[plot_row_value] for plot_row_value in plot_row_values]
ax.plot(plot_row_values, plot_column_values, color=colors[j % len(colors)], label=str(j))
ax.legend()
ax.set_xlabel(self.plot_row_item.dislay_name)
ax.set_ylabel('Absolute error')
ax.set_title('Title (display all the other parameters)')
@staticmethod
def estimation_error_max_stable_unitary_frechet(**kwargs_single_points):
# Get the argument from kwargs
print(kwargs_single_points)
max_stable_model = MaxStableModelItem.value_from_kwargs(**kwargs_single_points)
spatial_coordinate_class = SpatialCoordinateClassItem.value_from_kwargs(**kwargs_single_points)
nb_station = NbStationItem.value_from_kwargs(**kwargs_single_points)
spatial_params = SpatialParamsItem.value_from_kwargs(**kwargs_single_points)
nb_obs = NbObservationItem.value_from_kwargs(**kwargs_single_points)
# Run the estimation
spatial_coordinate = spatial_coordinate_class.from_nb_points(nb_points=nb_station, **spatial_params)
dataset = SimulatedDataset.from_max_stable_sampling(nb_obs=nb_obs, max_stable_model=max_stable_model,
spatial_coordinates=spatial_coordinate)
estimator = MaxStableEstimator(dataset, max_stable_model)
estimator.fit()
errors = estimator.error(max_stable_model.params_sample)
return errors
from extreme_estimator.R_fit.max_stable_fit.max_stable_models import GaussianMSP, BrownResick
from extreme_estimator.robustness_plot.abstract_robustness import DisplayItem, AbstractRobustnessPlot, \
SpatialCoordinateClassItem, NbObservationItem, NbStationItem, MaxStableModelItem, SpatialParamsItem
from spatio_temporal_dataset.spatial_coordinates.generated_coordinate import CircleCoordinates
spatial_robustness = AbstractRobustnessPlot(grid_row_item=SpatialCoordinateClassItem,
grid_column_item=NbObservationItem,
plot_row_item=NbStationItem,
plot_label_item=MaxStableModelItem)
# Put only the parameter that will vary
spatial_robustness.robustness_grid_plot(**{
NbStationItem.argument_name: [10, 20, 30, 40, 50],
MaxStableModelItem.argument_name: [GaussianMSP(), BrownResick()][:]
})
import os
import os.path as op
import pandas as pd
from spatio_temporal_dataset.temporal_maxima.temporal_maxima import TemporalMaxima
from spatio_temporal_dataset.spatial_coordinates.abstract_coordinate import AbstractSpatialCoordinates
class AbstractDataset(object):
def __init__(self, temporal_maxima: TemporalMaxima, spatial_coordinates: AbstractSpatialCoordinates):
assert (temporal_maxima.index == spatial_coordinates.index).all()
self.temporal_maxima = temporal_maxima
self.spatial_coordinates = spatial_coordinates
@classmethod
def from_csv(cls, csv_path: str):
assert op.exists(csv_path)
df = pd.read_csv(csv_path)
temporal_maxima = TemporalMaxima.from_df(df)
spatial_coordinates = AbstractSpatialCoordinates.from_df(df)
return cls(temporal_maxima=temporal_maxima, spatial_coordinates=spatial_coordinates)
def to_csv(self, csv_path: str):
dirname = op.dirname(csv_path)
if not op.exists(dirname):
os.makedirs(dirname)
self.df_dataset.to_csv(csv_path)
@property
def df_dataset(self) -> pd.DataFrame:
# Merge dataframes with the maxima and with the coordinates
return self.temporal_maxima.df_maxima.join(self.spatial_coordinates.df_coord)
@property
def coord(self):
return self.spatial_coordinates.coord
@property
def maxima(self):
return self.temporal_maxima.maxima
class RealDataset(AbstractDataset):
pass
from extreme_estimator.R_fit.max_stable_fit.max_stable_models import MaxStableModel, GaussianMSP
import pandas as pd
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
from spatio_temporal_dataset.temporal_maxima.temporal_maxima import TemporalMaxima
from spatio_temporal_dataset.spatial_coordinates.abstract_coordinate import AbstractSpatialCoordinates
from spatio_temporal_dataset.spatial_coordinates.generated_coordinate import CircleCoordinates
class SimulatedDataset(AbstractDataset):
def __init__(self, temporal_maxima: TemporalMaxima, spatial_coordinates: AbstractSpatialCoordinates,
max_stable_model: MaxStableModel):
super().__init__(temporal_maxima, spatial_coordinates)
self.max_stable_model = max_stable_model
@classmethod
def from_max_stable_sampling(cls, nb_obs: int, max_stable_model:MaxStableModel, spatial_coordinates: AbstractSpatialCoordinates):
maxima = max_stable_model.rmaxstab(nb_obs=nb_obs, coord=spatial_coordinates.coord)
df_maxima = pd.DataFrame(data=maxima, index=spatial_coordinates.index)
temporal_maxima = TemporalMaxima(df_maxima=df_maxima)
return cls(temporal_maxima=temporal_maxima, spatial_coordinates=spatial_coordinates, max_stable_model=max_stable_model)
if __name__ == '__main__':
coord = CircleCoordinates.from_nb_points(nb_points=5, max_radius=1)
max_stable_model = GaussianMSP()
dataset = SimulatedDataset.from_max_stable_sampling(nb_obs=50, max_stable_model=max_stable_model, spatial_coordinates=coord)
print(dataset.df_dataset)
from typing import List
from R.gev_fit.gev_marginal import GevMarginal, frechet_unitary_transformation
from spatio_temporal_dataset.stations.station import Station
class AbstractMarginals(object):
def __init__(self, stations: List[Station]):
self.stations = stations
self.gev_marginals = [] # type: List[GevMarginal]
# Compute some temporal arguments
self.years_of_study = self.stations[0].year_of_study
self.temporal_window_size = 20
self.smoothing_function = None
def sample_marginals(self, smoothing_function):
pass
@property
def gev_parameters(self):
return [gev_marginal.gev_parameters_estimated for gev_marginal in self.gev_marginals]
class FrechetUnitaryTransformationFunction(object):
pass
class NoSmoothing(object):
def gev_parameters(self, coordinate):
pass
class ContinuousSmoothing(object):
def frechet_unitary_transformation(self, data, coordinate):
gev_parameters = self.gev_parameters(coordinate)
transformed_data = frechet_unitary_transformation(data, gev_parameters)
def gev_parameters(self, coordinate):
return
return ()
if __name__ == '__main__':
pass
\ No newline at end of file
from typing import List
from spatio_temporal_dataset.marginals.abstract_marginals import AbstractMarginals
from R.gev_fit.gev_marginal import GevMarginal
from spatio_temporal_dataset.stations.station import Station
class SpatialMarginal(AbstractMarginals):
def __init__(self, stations: List[Station]):
super().__init__(stations)
for station in self.stations:
self.gev_marginals.append(GevMarginal(coordinate=station, data=station.annual_maxima.values))
\ No newline at end of file
class AbstractMaxStableOptimization(object):
def __init__(self, marginals):
pass
"""
We can conclude on the dimension of the max stable
"""
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment