Commit 3df67583 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[TEMPORAL LINEAR MARGIN][BAYESIAN ESTIMATION] add fevdPriorCustom as prior for...

[TEMPORAL LINEAR MARGIN][BAYESIAN ESTIMATION] add fevdPriorCustom as prior for the fit. update test accordingly. add non stationary version of fevD both in R and also in python. add test for that in python. modify upper and bottom quantile for eurocode
parent 7fea48fa
No related merge requests found
Showing with 95 additions and 72 deletions
+95 -72
......@@ -49,7 +49,8 @@ class ExtractEurocodeReturnLevelFromExtremes(object):
@property
def posterior_eurocode_return_level_uncertainty_interval_for_the_year_of_interest(self):
bottom_and_upper_quantile = (0.05, 0.95)
# Bottom and upper quantile correspond to the quantile
bottom_and_upper_quantile = (0.250, 0.975)
return [np.quantile(self.posterior_eurocode_return_level_samples_for_year_of_interest, q=q)
for q in bottom_and_upper_quantile]
......
......@@ -3,6 +3,7 @@
# Created by: erwan
# Created on: 04/10/2019
library(extRemes)
library(data.table)
library(stats4)
library(SpatialExtremes)
source('fevd_fixed.R')
......@@ -34,10 +35,18 @@ print(fevdPriorCustom(2.0, 0.0, 0.0))
# res = fevd(x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
# res = fevd(x_gev, method='GMLE', iter=5000, verbose=TRUE, use.phi=FALSE)
# Without covariate
# res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
# Add covariate
coord <- matrix(ncol=1, nrow = N)
coord[,1]=seq(1,N,1)
colnames(coord) = c("T")
coord = data.frame(coord, stringsAsFactors = TRUE)
res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
print(res)
print('here')
......@@ -47,31 +56,10 @@ print(res$method)
print(res$priorFun)
print(res$priorParams)
m = res$results
print(class(res$chain.info))
print(dim(m))
print(m[1,])
print(m[1,1])
print(res$chain.info[1,])
# print(class(res$chain.info))
# ch = res$chain.info
# print(dim(ch))
# print(ch)
# # summary(res)
print(attributes(res))
# print('here')
# print(attr(res, 'chain.info'))
# print(attr(res, "method"))
# print(attr(res, "x"))
# print(attr(res, "priorParams"))
# print(res.method)
# Bayesian method is using a normal distribution functions for the shape parameter
# GMLE distribution is using a Beta distribution for the shape parameter
......@@ -5,7 +5,7 @@ from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model impo
from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit
from extreme_fit.model.result_from_model_fit.result_from_extremes import ResultFromExtremes
from extreme_fit.model.result_from_model_fit.result_from_ismev import ResultFromIsmev
from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes
from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes, get_coord, get_coord_df
from extreme_fit.model.utils import safe_run_r_estimator
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
......@@ -25,15 +25,15 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit:
assert data.shape[1] == len(df_coordinates_temp.values)
x = ro.FloatVector(data[0])
y = df_coordinates_temp.values
if self.fit_method == self.ISMEV_GEV_FIT_METHOD_STR:
return self.ismev_gev_fit(x, y)
return self.ismev_gev_fit(x, df_coordinates_temp)
if self.fit_method == self.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR:
return self.extremes_fevd_bayesian_fit(x, y)
return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp)
# Gev Fit with isMev package
def ismev_gev_fit(self, x, y) -> ResultFromIsmev:
def ismev_gev_fit(self, x, df_coordinates_temp) -> ResultFromIsmev:
y = df_coordinates_temp.values
res = safe_run_r_estimator(function=r('gev.fit'), use_start=self.use_start_value,
xdat=x, y=y, mul=self.mul,
sigl=self.sigl, shl=self.shl)
......@@ -41,20 +41,18 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
# Gev fit with extRemes package
def extremes_fevd_bayesian_fit(self, x, y) -> ResultFromExtremes:
# (x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p = c(
# 9)), iter = 5000, verbose = TRUE, use.phi = FALSE)
r_type_argument_kwargs = {'use.phi': False}
def extremes_fevd_bayesian_fit(self, x, df_coordinates_temp) -> ResultFromExtremes:
# Disable the use of log sigma parametrization
r_type_argument_kwargs = {'use.phi': False,
'verbose': False}
r_type_argument_kwargs.update(get_margin_formula_extremes(self.margin_function_start_fit.form_dict))
# location.fun = ~1,
# scale.fun = ~1, shape.fun = ~1
# , priorParams = list(q=c(6), p=c(9))
y = get_coord_df(df_coordinates_temp)
res = safe_run_r_estimator(function=r('fevd_fixed'),
x=x,
data=y,
method='Bayesian',
# priorFun="fevdPriorCustom",
priorFun="fevdPriorCustom",
priorParams=r.list(q=r.c(6), p=r.c(9)),
iter=5000,
**r_type_argument_kwargs
)
......
......@@ -6,6 +6,7 @@ from extreme_fit.distribution.gev.gev_params import GevParams
from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import \
AbstractResultFromModelFit
from extreme_fit.model.result_from_model_fit.utils import get_margin_coef_dict
from extreme_fit.model.utils import r
class ResultFromExtremes(AbstractResultFromModelFit):
......@@ -16,20 +17,21 @@ class ResultFromExtremes(AbstractResultFromModelFit):
self.burn_in_percentage = burn_in_percentage
self.gev_param_name_to_dim = gev_param_name_to_dim
@property
def results(self):
return np.array(self.name_to_value['results'])
def load_dataframe_from_r_matrix(self, k):
r_matrix = self.name_to_value[k]
return pd.DataFrame(np.array(r_matrix), columns=r.colnames(r_matrix))
@property
def chain_info(self):
return np.array(self.name_to_value['chain.info'])
return self.load_dataframe_from_r_matrix('chain.info')
@property
def results(self):
return self.load_dataframe_from_r_matrix('results')
@property
def df_posterior_samples(self) -> pd.DataFrame:
d = dict(zip(GevParams.PARAM_NAMES, self.results.transpose()))
d['loglik'] = self.chain_info[:, -2]
d['prior'] = self.chain_info[:, -1]
df_all_samples = pd.DataFrame(d)
df_all_samples = pd.concat([self.results.iloc[:, :-1], self.chain_info.iloc[:, -2:]], axis=1)
# Remove the burn in period
burn_in_last_index = int(self.burn_in_percentage * len(df_all_samples))
df_posterior_samples = df_all_samples.iloc[burn_in_last_index:, :]
......
import io
import os.path as op
import random
import warnings
......@@ -14,7 +15,6 @@ from rpy2.rinterface._rinterface import RRuntimeError
from rpy2.robjects import numpy2ri
from rpy2.robjects import pandas2ri
# Load R variables
from root_utils import get_root_path
......@@ -22,6 +22,7 @@ r = ro.R()
numpy2ri.activate()
pandas2ri.activate()
r.library('SpatialExtremes')
r.library('data.table')
# Desactivate temporarily warnings
default_filters = warnings.filters.copy()
warnings.filterwarnings("ignore")
......@@ -81,13 +82,13 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs
# Some checks for Spatial Extremes
if data is not None:
# Raise warning if the maximum absolute value is above a threshold
assert isinstance(data, np.ndarray)
maximum_absolute_value = np.max(np.abs(data))
if maximum_absolute_value > threshold_max_abs_value:
msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \
.format(maximum_absolute_value, threshold_max_abs_value)
msg += '\nPotentially in that case, data should be re-normalized'
warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh)
if isinstance(data, np.ndarray):
maximum_absolute_value = np.max(np.abs(data))
if maximum_absolute_value > threshold_max_abs_value:
msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \
.format(maximum_absolute_value, threshold_max_abs_value)
msg += '\nPotentially in that case, data should be re-normalized'
warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh)
parameters['data'] = data
# First run without using start value
# Then if it crashes, use start value
......@@ -120,6 +121,15 @@ def get_coord(df_coordinates: pd.DataFrame):
return coord
def get_coord_df(df_coordinates: pd.DataFrame):
coord = pandas2ri.py2ri_pandasdataframe(df_coordinates)
# coord = r.transpose(coord)
colname = df_coordinates.columns[0]
coord.colnames = r.c(colname)
coord = r('data.frame')(coord, stringsAsFactors=True)
return coord
def get_null():
as_null = r['as.null']
return as_null(1.0)
......@@ -141,11 +151,9 @@ def old_coef_name_to_new_coef_name():
def get_margin_formula_extremes(fit_marge_form_dict) -> Dict:
d = old_coef_name_to_new_coef_name()
form_dict = {d[k]: v if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()}
form_dict = {d[k]: ' '.join(v.split()[1:]) if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()}
return {k: robjects.Formula(v) for k, v in form_dict.items()}
# def conversion_to_FloatVector(data):
# """Convert DataFrame or numpy array into FloatVector for r"""
# if isinstance(data, pd.DataFrame):
......
......@@ -45,26 +45,52 @@ class TestGevTemporalBayesian(unittest.TestCase):
estimator_fitted = fitted_linear_margin_estimator(StationaryStationModel, self.coordinates, self.dataset,
starting_year=0,
fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
ref = {'loc': 0.082261, 'scale': 1.183703, 'shape': 0.882750}
ref = {'loc': 0.30440183718071845, 'scale': 1.301639258861012, 'shape': 0.303652401064773}
for year in range(1, 3):
mle_params_estimated = estimator_fitted.margin_function_from_fit.get_gev_params(np.array([year])).to_dict()
for key in ref.keys():
self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)
# def test_gev_temporal_margin_fit_non_stationary(self):
# # Create estimator
# estimator_fitted = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset,
# starting_year=0,
# fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
# result_from_model_fit = estimator_fitted.result_from_model_fit # type: ResultFromExtremes
# print(result_from_model_fit.posterior_samples)
def test_gev_temporal_margin_fit_non_stationary(self):
# Create estimator
estimator = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset,
starting_year=0,
fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
mu1_values = estimator.result_from_model_fit.df_posterior_samples.iloc[:, 1]
self.assertTrue((mu1_values != 0).any())
# Checks that parameters returned are indeed different
mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict()
mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict()
self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3)
# print(estimator.result_from_model_fit.r)
# ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8295}
# for year in range(1, 3):
# mle_params_estimated = estimator.margin_function_from_fit.get_gev_params(np.array([year])).to_dict()
# for key in ref.keys():
# self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)
#
# def test_gev_temporal_margin_fit_nonstationary_with_start_point(self):
# # Create estimator
# estimator = self.fit_non_stationary_estimator(starting_point=3)
# self.assertNotEqual(estimator.margin_function_from_fit.mu1_temporal_trend, 0.0)
# # Checks starting point parameter are well passed
# self.assertEqual(3, estimator.margin_function_from_fit.starting_point)
# # Checks that parameters returned are indeed different
# mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict()
# mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict()
# self.assertEqual(mle_params_estimated_year1, mle_params_estimated_year3)
# mle_params_estimated_year5 = estimator.margin_function_from_fit.get_gev_params(np.array([5])).to_dict()
# self.assertNotEqual(mle_params_estimated_year5, mle_params_estimated_year3)
#
# def fit_non_stationary_estimator(self, starting_point):
# margin_model = NonStationaryLocationStationModel(self.coordinates,
# starting_point=starting_point + self.start_year)
# estimator = LinearMarginEstimator(self.dataset, margin_model)
# estimator.fit()
# return estimator
#
# def test_two_different_starting_points(self):
# # Create two different estimators
# estimator1 = self.fit_non_stationary_estimator(starting_point=3)
# estimator2 = self.fit_non_stationary_estimator(starting_point=28)
# mu1_estimator1 = estimator1.margin_function_from_fit.mu1_temporal_trend
# mu1_estimator2 = estimator2.margin_function_from_fit.mu1_temporal_trend
# self.assertNotEqual(mu1_estimator1, mu1_estimator2)
if __name__ == '__main__':
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment