Commit 3df67583 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[TEMPORAL LINEAR MARGIN][BAYESIAN ESTIMATION] add fevdPriorCustom as prior for...

[TEMPORAL LINEAR MARGIN][BAYESIAN ESTIMATION] add fevdPriorCustom as prior for the fit. update test accordingly. add non stationary version of fevD both in R and also in python. add test for that in python. modify upper and bottom quantile for eurocode
parent 7fea48fa
No related merge requests found
Showing with 95 additions and 72 deletions
+95 -72
...@@ -49,7 +49,8 @@ class ExtractEurocodeReturnLevelFromExtremes(object): ...@@ -49,7 +49,8 @@ class ExtractEurocodeReturnLevelFromExtremes(object):
@property @property
def posterior_eurocode_return_level_uncertainty_interval_for_the_year_of_interest(self): def posterior_eurocode_return_level_uncertainty_interval_for_the_year_of_interest(self):
bottom_and_upper_quantile = (0.05, 0.95) # Bottom and upper quantile correspond to the quantile
bottom_and_upper_quantile = (0.250, 0.975)
return [np.quantile(self.posterior_eurocode_return_level_samples_for_year_of_interest, q=q) return [np.quantile(self.posterior_eurocode_return_level_samples_for_year_of_interest, q=q)
for q in bottom_and_upper_quantile] for q in bottom_and_upper_quantile]
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# Created by: erwan # Created by: erwan
# Created on: 04/10/2019 # Created on: 04/10/2019
library(extRemes) library(extRemes)
library(data.table)
library(stats4) library(stats4)
library(SpatialExtremes) library(SpatialExtremes)
source('fevd_fixed.R') source('fevd_fixed.R')
...@@ -34,10 +35,18 @@ print(fevdPriorCustom(2.0, 0.0, 0.0)) ...@@ -34,10 +35,18 @@ print(fevdPriorCustom(2.0, 0.0, 0.0))
# res = fevd(x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) # res = fevd(x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
# res = fevd(x_gev, method='GMLE', iter=5000, verbose=TRUE, use.phi=FALSE) # res = fevd(x_gev, method='GMLE', iter=5000, verbose=TRUE, use.phi=FALSE)
# Without covariate
# res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
# Add covariate
coord <- matrix(ncol=1, nrow = N)
coord[,1]=seq(1,N,1)
colnames(coord) = c("T")
coord = data.frame(coord, stringsAsFactors = TRUE)
res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
print(res) print(res)
print('here') print('here')
...@@ -47,31 +56,10 @@ print(res$method) ...@@ -47,31 +56,10 @@ print(res$method)
print(res$priorFun) print(res$priorFun)
print(res$priorParams) print(res$priorParams)
m = res$results m = res$results
print(class(res$chain.info))
print(dim(m)) print(dim(m))
print(m[1,]) print(m[1,])
print(m[1,1]) print(m[1,1])
print(res$chain.info[1,]) print(res$chain.info[1,])
# print(class(res$chain.info))
# ch = res$chain.info
# print(dim(ch))
# print(ch)
# # summary(res)
print(attributes(res))
# print('here')
# print(attr(res, 'chain.info'))
# print(attr(res, "method"))
# print(attr(res, "x"))
# print(attr(res, "priorParams"))
# print(res.method)
# Bayesian method is using a normal distribution functions for the shape parameter
# GMLE distribution is using a Beta distribution for the shape parameter
...@@ -5,7 +5,7 @@ from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model impo ...@@ -5,7 +5,7 @@ from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model impo
from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit
from extreme_fit.model.result_from_model_fit.result_from_extremes import ResultFromExtremes from extreme_fit.model.result_from_model_fit.result_from_extremes import ResultFromExtremes
from extreme_fit.model.result_from_model_fit.result_from_ismev import ResultFromIsmev from extreme_fit.model.result_from_model_fit.result_from_ismev import ResultFromIsmev
from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes, get_coord, get_coord_df
from extreme_fit.model.utils import safe_run_r_estimator from extreme_fit.model.utils import safe_run_r_estimator
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
...@@ -25,15 +25,15 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): ...@@ -25,15 +25,15 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit:
assert data.shape[1] == len(df_coordinates_temp.values) assert data.shape[1] == len(df_coordinates_temp.values)
x = ro.FloatVector(data[0]) x = ro.FloatVector(data[0])
y = df_coordinates_temp.values
if self.fit_method == self.ISMEV_GEV_FIT_METHOD_STR: if self.fit_method == self.ISMEV_GEV_FIT_METHOD_STR:
return self.ismev_gev_fit(x, y) return self.ismev_gev_fit(x, df_coordinates_temp)
if self.fit_method == self.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR: if self.fit_method == self.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR:
return self.extremes_fevd_bayesian_fit(x, y) return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp)
# Gev Fit with isMev package # Gev Fit with isMev package
def ismev_gev_fit(self, x, y) -> ResultFromIsmev: def ismev_gev_fit(self, x, df_coordinates_temp) -> ResultFromIsmev:
y = df_coordinates_temp.values
res = safe_run_r_estimator(function=r('gev.fit'), use_start=self.use_start_value, res = safe_run_r_estimator(function=r('gev.fit'), use_start=self.use_start_value,
xdat=x, y=y, mul=self.mul, xdat=x, y=y, mul=self.mul,
sigl=self.sigl, shl=self.shl) sigl=self.sigl, shl=self.shl)
...@@ -41,20 +41,18 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): ...@@ -41,20 +41,18 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
# Gev fit with extRemes package # Gev fit with extRemes package
def extremes_fevd_bayesian_fit(self, x, y) -> ResultFromExtremes: def extremes_fevd_bayesian_fit(self, x, df_coordinates_temp) -> ResultFromExtremes:
# (x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p = c( # Disable the use of log sigma parametrization
# 9)), iter = 5000, verbose = TRUE, use.phi = FALSE) r_type_argument_kwargs = {'use.phi': False,
'verbose': False}
r_type_argument_kwargs = {'use.phi': False}
r_type_argument_kwargs.update(get_margin_formula_extremes(self.margin_function_start_fit.form_dict)) r_type_argument_kwargs.update(get_margin_formula_extremes(self.margin_function_start_fit.form_dict))
# location.fun = ~1, y = get_coord_df(df_coordinates_temp)
# scale.fun = ~1, shape.fun = ~1
# , priorParams = list(q=c(6), p=c(9))
res = safe_run_r_estimator(function=r('fevd_fixed'), res = safe_run_r_estimator(function=r('fevd_fixed'),
x=x, x=x,
data=y, data=y,
method='Bayesian', method='Bayesian',
# priorFun="fevdPriorCustom", priorFun="fevdPriorCustom",
priorParams=r.list(q=r.c(6), p=r.c(9)),
iter=5000, iter=5000,
**r_type_argument_kwargs **r_type_argument_kwargs
) )
......
...@@ -6,6 +6,7 @@ from extreme_fit.distribution.gev.gev_params import GevParams ...@@ -6,6 +6,7 @@ from extreme_fit.distribution.gev.gev_params import GevParams
from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import \ from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import \
AbstractResultFromModelFit AbstractResultFromModelFit
from extreme_fit.model.result_from_model_fit.utils import get_margin_coef_dict from extreme_fit.model.result_from_model_fit.utils import get_margin_coef_dict
from extreme_fit.model.utils import r
class ResultFromExtremes(AbstractResultFromModelFit): class ResultFromExtremes(AbstractResultFromModelFit):
...@@ -16,20 +17,21 @@ class ResultFromExtremes(AbstractResultFromModelFit): ...@@ -16,20 +17,21 @@ class ResultFromExtremes(AbstractResultFromModelFit):
self.burn_in_percentage = burn_in_percentage self.burn_in_percentage = burn_in_percentage
self.gev_param_name_to_dim = gev_param_name_to_dim self.gev_param_name_to_dim = gev_param_name_to_dim
@property def load_dataframe_from_r_matrix(self, k):
def results(self): r_matrix = self.name_to_value[k]
return np.array(self.name_to_value['results']) return pd.DataFrame(np.array(r_matrix), columns=r.colnames(r_matrix))
@property @property
def chain_info(self): def chain_info(self):
return np.array(self.name_to_value['chain.info']) return self.load_dataframe_from_r_matrix('chain.info')
@property
def results(self):
return self.load_dataframe_from_r_matrix('results')
@property @property
def df_posterior_samples(self) -> pd.DataFrame: def df_posterior_samples(self) -> pd.DataFrame:
d = dict(zip(GevParams.PARAM_NAMES, self.results.transpose())) df_all_samples = pd.concat([self.results.iloc[:, :-1], self.chain_info.iloc[:, -2:]], axis=1)
d['loglik'] = self.chain_info[:, -2]
d['prior'] = self.chain_info[:, -1]
df_all_samples = pd.DataFrame(d)
# Remove the burn in period # Remove the burn in period
burn_in_last_index = int(self.burn_in_percentage * len(df_all_samples)) burn_in_last_index = int(self.burn_in_percentage * len(df_all_samples))
df_posterior_samples = df_all_samples.iloc[burn_in_last_index:, :] df_posterior_samples = df_all_samples.iloc[burn_in_last_index:, :]
......
import io import io
import os.path as op import os.path as op
import random import random
import warnings import warnings
...@@ -14,7 +15,6 @@ from rpy2.rinterface._rinterface import RRuntimeError ...@@ -14,7 +15,6 @@ from rpy2.rinterface._rinterface import RRuntimeError
from rpy2.robjects import numpy2ri from rpy2.robjects import numpy2ri
from rpy2.robjects import pandas2ri from rpy2.robjects import pandas2ri
# Load R variables # Load R variables
from root_utils import get_root_path from root_utils import get_root_path
...@@ -22,6 +22,7 @@ r = ro.R() ...@@ -22,6 +22,7 @@ r = ro.R()
numpy2ri.activate() numpy2ri.activate()
pandas2ri.activate() pandas2ri.activate()
r.library('SpatialExtremes') r.library('SpatialExtremes')
r.library('data.table')
# Desactivate temporarily warnings # Desactivate temporarily warnings
default_filters = warnings.filters.copy() default_filters = warnings.filters.copy()
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
...@@ -81,13 +82,13 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs ...@@ -81,13 +82,13 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs
# Some checks for Spatial Extremes # Some checks for Spatial Extremes
if data is not None: if data is not None:
# Raise warning if the maximum absolute value is above a threshold # Raise warning if the maximum absolute value is above a threshold
assert isinstance(data, np.ndarray) if isinstance(data, np.ndarray):
maximum_absolute_value = np.max(np.abs(data)) maximum_absolute_value = np.max(np.abs(data))
if maximum_absolute_value > threshold_max_abs_value: if maximum_absolute_value > threshold_max_abs_value:
msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \ msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \
.format(maximum_absolute_value, threshold_max_abs_value) .format(maximum_absolute_value, threshold_max_abs_value)
msg += '\nPotentially in that case, data should be re-normalized' msg += '\nPotentially in that case, data should be re-normalized'
warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh) warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh)
parameters['data'] = data parameters['data'] = data
# First run without using start value # First run without using start value
# Then if it crashes, use start value # Then if it crashes, use start value
...@@ -120,6 +121,15 @@ def get_coord(df_coordinates: pd.DataFrame): ...@@ -120,6 +121,15 @@ def get_coord(df_coordinates: pd.DataFrame):
return coord return coord
def get_coord_df(df_coordinates: pd.DataFrame):
coord = pandas2ri.py2ri_pandasdataframe(df_coordinates)
# coord = r.transpose(coord)
colname = df_coordinates.columns[0]
coord.colnames = r.c(colname)
coord = r('data.frame')(coord, stringsAsFactors=True)
return coord
def get_null(): def get_null():
as_null = r['as.null'] as_null = r['as.null']
return as_null(1.0) return as_null(1.0)
...@@ -141,11 +151,9 @@ def old_coef_name_to_new_coef_name(): ...@@ -141,11 +151,9 @@ def old_coef_name_to_new_coef_name():
def get_margin_formula_extremes(fit_marge_form_dict) -> Dict: def get_margin_formula_extremes(fit_marge_form_dict) -> Dict:
d = old_coef_name_to_new_coef_name() d = old_coef_name_to_new_coef_name()
form_dict = {d[k]: v if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()} form_dict = {d[k]: ' '.join(v.split()[1:]) if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()}
return {k: robjects.Formula(v) for k, v in form_dict.items()} return {k: robjects.Formula(v) for k, v in form_dict.items()}
# def conversion_to_FloatVector(data): # def conversion_to_FloatVector(data):
# """Convert DataFrame or numpy array into FloatVector for r""" # """Convert DataFrame or numpy array into FloatVector for r"""
# if isinstance(data, pd.DataFrame): # if isinstance(data, pd.DataFrame):
......
...@@ -45,26 +45,52 @@ class TestGevTemporalBayesian(unittest.TestCase): ...@@ -45,26 +45,52 @@ class TestGevTemporalBayesian(unittest.TestCase):
estimator_fitted = fitted_linear_margin_estimator(StationaryStationModel, self.coordinates, self.dataset, estimator_fitted = fitted_linear_margin_estimator(StationaryStationModel, self.coordinates, self.dataset,
starting_year=0, starting_year=0,
fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR) fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
ref = {'loc': 0.082261, 'scale': 1.183703, 'shape': 0.882750} ref = {'loc': 0.30440183718071845, 'scale': 1.301639258861012, 'shape': 0.303652401064773}
for year in range(1, 3): for year in range(1, 3):
mle_params_estimated = estimator_fitted.margin_function_from_fit.get_gev_params(np.array([year])).to_dict() mle_params_estimated = estimator_fitted.margin_function_from_fit.get_gev_params(np.array([year])).to_dict()
for key in ref.keys(): for key in ref.keys():
self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3) self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)
# def test_gev_temporal_margin_fit_non_stationary(self): def test_gev_temporal_margin_fit_non_stationary(self):
# # Create estimator # Create estimator
# estimator_fitted = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset, estimator = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset,
# starting_year=0, starting_year=0,
# fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR) fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
# result_from_model_fit = estimator_fitted.result_from_model_fit # type: ResultFromExtremes mu1_values = estimator.result_from_model_fit.df_posterior_samples.iloc[:, 1]
# print(result_from_model_fit.posterior_samples) self.assertTrue((mu1_values != 0).any())
# Checks that parameters returned are indeed different
mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict()
mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict()
self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3)
# print(estimator.result_from_model_fit.r) #
# ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8295} # def test_gev_temporal_margin_fit_nonstationary_with_start_point(self):
# for year in range(1, 3): # # Create estimator
# mle_params_estimated = estimator.margin_function_from_fit.get_gev_params(np.array([year])).to_dict() # estimator = self.fit_non_stationary_estimator(starting_point=3)
# for key in ref.keys(): # self.assertNotEqual(estimator.margin_function_from_fit.mu1_temporal_trend, 0.0)
# self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3) # # Checks starting point parameter are well passed
# self.assertEqual(3, estimator.margin_function_from_fit.starting_point)
# # Checks that parameters returned are indeed different
# mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict()
# mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict()
# self.assertEqual(mle_params_estimated_year1, mle_params_estimated_year3)
# mle_params_estimated_year5 = estimator.margin_function_from_fit.get_gev_params(np.array([5])).to_dict()
# self.assertNotEqual(mle_params_estimated_year5, mle_params_estimated_year3)
#
# def fit_non_stationary_estimator(self, starting_point):
# margin_model = NonStationaryLocationStationModel(self.coordinates,
# starting_point=starting_point + self.start_year)
# estimator = LinearMarginEstimator(self.dataset, margin_model)
# estimator.fit()
# return estimator
#
# def test_two_different_starting_points(self):
# # Create two different estimators
# estimator1 = self.fit_non_stationary_estimator(starting_point=3)
# estimator2 = self.fit_non_stationary_estimator(starting_point=28)
# mu1_estimator1 = estimator1.margin_function_from_fit.mu1_temporal_trend
# mu1_estimator2 = estimator2.margin_function_from_fit.mu1_temporal_trend
# self.assertNotEqual(mu1_estimator1, mu1_estimator2)
if __name__ == '__main__': if __name__ == '__main__':
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment