diff --git a/experiment/eurocode_data/eurocode_return_level_uncertainties.py b/experiment/eurocode_data/eurocode_return_level_uncertainties.py index 1e416a8a78583d35b7887833ea1f96a9b734ed1e..750adf6445a4e7c450f370b5751aba3bdb6fe4ac 100644 --- a/experiment/eurocode_data/eurocode_return_level_uncertainties.py +++ b/experiment/eurocode_data/eurocode_return_level_uncertainties.py @@ -49,7 +49,8 @@ class ExtractEurocodeReturnLevelFromExtremes(object): @property def posterior_eurocode_return_level_uncertainty_interval_for_the_year_of_interest(self): - bottom_and_upper_quantile = (0.05, 0.95) + # Bottom and upper quantile correspond to the quantile + bottom_and_upper_quantile = (0.250, 0.975) return [np.quantile(self.posterior_eurocode_return_level_samples_for_year_of_interest, q=q) for q in bottom_and_upper_quantile] diff --git a/extreme_fit/distribution/gev/main_fevd_fixed.R b/extreme_fit/distribution/gev/main_fevd_fixed.R index 6c9dca5b8f69b070845383df4ceb2f1d2834e80f..a49ec49bcad9d4d9a7e1c630e9c6e376fcc799aa 100644 --- a/extreme_fit/distribution/gev/main_fevd_fixed.R +++ b/extreme_fit/distribution/gev/main_fevd_fixed.R @@ -3,6 +3,7 @@ # Created by: erwan # Created on: 04/10/2019 library(extRemes) +library(data.table) library(stats4) library(SpatialExtremes) source('fevd_fixed.R') @@ -34,10 +35,18 @@ print(fevdPriorCustom(2.0, 0.0, 0.0)) - # res = fevd(x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) -res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) # res = fevd(x_gev, method='GMLE', iter=5000, verbose=TRUE, use.phi=FALSE) + +# Without covariate +# res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) + +# Add covariate +coord <- matrix(ncol=1, nrow = N) +coord[,1]=seq(1,N,1) +colnames(coord) = c("T") +coord = data.frame(coord, stringsAsFactors = TRUE) +res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) print(res) print('here') @@ -47,31 +56,10 @@ print(res$method) print(res$priorFun) print(res$priorParams) m = res$results +print(class(res$chain.info)) print(dim(m)) print(m[1,]) print(m[1,1]) print(res$chain.info[1,]) - - -# print(class(res$chain.info)) -# ch = res$chain.info -# print(dim(ch)) -# print(ch) - -# # summary(res) -print(attributes(res)) -# print('here') -# print(attr(res, 'chain.info')) -# print(attr(res, "method")) -# print(attr(res, "x")) -# print(attr(res, "priorParams")) - -# print(res.method) - - - - -# Bayesian method is using a normal distribution functions for the shape parameter -# GMLE distribution is using a Beta distribution for the shape parameter diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index 1b96be1dfcd07848f22a24dba38e316ea8c99f7c..f3002d3bf16b36545cb600f3800d90f8e2f0078b 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -5,7 +5,7 @@ from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model impo from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit from extreme_fit.model.result_from_model_fit.result_from_extremes import ResultFromExtremes from extreme_fit.model.result_from_model_fit.result_from_ismev import ResultFromIsmev -from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes +from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes, get_coord, get_coord_df from extreme_fit.model.utils import safe_run_r_estimator from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates @@ -25,15 +25,15 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: assert data.shape[1] == len(df_coordinates_temp.values) x = ro.FloatVector(data[0]) - y = df_coordinates_temp.values if self.fit_method == self.ISMEV_GEV_FIT_METHOD_STR: - return self.ismev_gev_fit(x, y) + return self.ismev_gev_fit(x, df_coordinates_temp) if self.fit_method == self.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR: - return self.extremes_fevd_bayesian_fit(x, y) + return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp) # Gev Fit with isMev package - def ismev_gev_fit(self, x, y) -> ResultFromIsmev: + def ismev_gev_fit(self, x, df_coordinates_temp) -> ResultFromIsmev: + y = df_coordinates_temp.values res = safe_run_r_estimator(function=r('gev.fit'), use_start=self.use_start_value, xdat=x, y=y, mul=self.mul, sigl=self.sigl, shl=self.shl) @@ -41,20 +41,18 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): # Gev fit with extRemes package - def extremes_fevd_bayesian_fit(self, x, y) -> ResultFromExtremes: - # (x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p = c( - # 9)), iter = 5000, verbose = TRUE, use.phi = FALSE) - - r_type_argument_kwargs = {'use.phi': False} + def extremes_fevd_bayesian_fit(self, x, df_coordinates_temp) -> ResultFromExtremes: + # Disable the use of log sigma parametrization + r_type_argument_kwargs = {'use.phi': False, + 'verbose': False} r_type_argument_kwargs.update(get_margin_formula_extremes(self.margin_function_start_fit.form_dict)) - # location.fun = ~1, - # scale.fun = ~1, shape.fun = ~1 - # , priorParams = list(q=c(6), p=c(9)) + y = get_coord_df(df_coordinates_temp) res = safe_run_r_estimator(function=r('fevd_fixed'), x=x, data=y, method='Bayesian', - # priorFun="fevdPriorCustom", + priorFun="fevdPriorCustom", + priorParams=r.list(q=r.c(6), p=r.c(9)), iter=5000, **r_type_argument_kwargs ) diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes.py b/extreme_fit/model/result_from_model_fit/result_from_extremes.py index 162fe62861cd2c31f2c1f36f8c372b3514601089..9bbb1e22f9aa425bf3c890e8f9df2f95e2fd0977 100644 --- a/extreme_fit/model/result_from_model_fit/result_from_extremes.py +++ b/extreme_fit/model/result_from_model_fit/result_from_extremes.py @@ -6,6 +6,7 @@ from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import \ AbstractResultFromModelFit from extreme_fit.model.result_from_model_fit.utils import get_margin_coef_dict +from extreme_fit.model.utils import r class ResultFromExtremes(AbstractResultFromModelFit): @@ -16,20 +17,21 @@ class ResultFromExtremes(AbstractResultFromModelFit): self.burn_in_percentage = burn_in_percentage self.gev_param_name_to_dim = gev_param_name_to_dim - @property - def results(self): - return np.array(self.name_to_value['results']) + def load_dataframe_from_r_matrix(self, k): + r_matrix = self.name_to_value[k] + return pd.DataFrame(np.array(r_matrix), columns=r.colnames(r_matrix)) @property def chain_info(self): - return np.array(self.name_to_value['chain.info']) + return self.load_dataframe_from_r_matrix('chain.info') + + @property + def results(self): + return self.load_dataframe_from_r_matrix('results') @property def df_posterior_samples(self) -> pd.DataFrame: - d = dict(zip(GevParams.PARAM_NAMES, self.results.transpose())) - d['loglik'] = self.chain_info[:, -2] - d['prior'] = self.chain_info[:, -1] - df_all_samples = pd.DataFrame(d) + df_all_samples = pd.concat([self.results.iloc[:, :-1], self.chain_info.iloc[:, -2:]], axis=1) # Remove the burn in period burn_in_last_index = int(self.burn_in_percentage * len(df_all_samples)) df_posterior_samples = df_all_samples.iloc[burn_in_last_index:, :] diff --git a/extreme_fit/model/utils.py b/extreme_fit/model/utils.py index aec823bf7131b2462c4c7bac8ca4e41e87b4e5e3..90dfd27877ed7d2702b4bcbd45134e61c6a554cd 100644 --- a/extreme_fit/model/utils.py +++ b/extreme_fit/model/utils.py @@ -1,4 +1,5 @@ import io + import os.path as op import random import warnings @@ -14,7 +15,6 @@ from rpy2.rinterface._rinterface import RRuntimeError from rpy2.robjects import numpy2ri from rpy2.robjects import pandas2ri - # Load R variables from root_utils import get_root_path @@ -22,6 +22,7 @@ r = ro.R() numpy2ri.activate() pandas2ri.activate() r.library('SpatialExtremes') +r.library('data.table') # Desactivate temporarily warnings default_filters = warnings.filters.copy() warnings.filterwarnings("ignore") @@ -81,13 +82,13 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs # Some checks for Spatial Extremes if data is not None: # Raise warning if the maximum absolute value is above a threshold - assert isinstance(data, np.ndarray) - maximum_absolute_value = np.max(np.abs(data)) - if maximum_absolute_value > threshold_max_abs_value: - msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \ - .format(maximum_absolute_value, threshold_max_abs_value) - msg += '\nPotentially in that case, data should be re-normalized' - warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh) + if isinstance(data, np.ndarray): + maximum_absolute_value = np.max(np.abs(data)) + if maximum_absolute_value > threshold_max_abs_value: + msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \ + .format(maximum_absolute_value, threshold_max_abs_value) + msg += '\nPotentially in that case, data should be re-normalized' + warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh) parameters['data'] = data # First run without using start value # Then if it crashes, use start value @@ -120,6 +121,15 @@ def get_coord(df_coordinates: pd.DataFrame): return coord +def get_coord_df(df_coordinates: pd.DataFrame): + coord = pandas2ri.py2ri_pandasdataframe(df_coordinates) + # coord = r.transpose(coord) + colname = df_coordinates.columns[0] + coord.colnames = r.c(colname) + coord = r('data.frame')(coord, stringsAsFactors=True) + return coord + + def get_null(): as_null = r['as.null'] return as_null(1.0) @@ -141,11 +151,9 @@ def old_coef_name_to_new_coef_name(): def get_margin_formula_extremes(fit_marge_form_dict) -> Dict: d = old_coef_name_to_new_coef_name() - form_dict = {d[k]: v if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()} + form_dict = {d[k]: ' '.join(v.split()[1:]) if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()} return {k: robjects.Formula(v) for k, v in form_dict.items()} - - # def conversion_to_FloatVector(data): # """Convert DataFrame or numpy array into FloatVector for r""" # if isinstance(data, pd.DataFrame): diff --git a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py index 2b9aad9aef6871dc19c5de00fc6746293c3427ca..36aeac8168b1ccae46ad5b2431273d37d78bbff6 100644 --- a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py +++ b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py @@ -45,26 +45,52 @@ class TestGevTemporalBayesian(unittest.TestCase): estimator_fitted = fitted_linear_margin_estimator(StationaryStationModel, self.coordinates, self.dataset, starting_year=0, fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR) - ref = {'loc': 0.082261, 'scale': 1.183703, 'shape': 0.882750} + ref = {'loc': 0.30440183718071845, 'scale': 1.301639258861012, 'shape': 0.303652401064773} for year in range(1, 3): mle_params_estimated = estimator_fitted.margin_function_from_fit.get_gev_params(np.array([year])).to_dict() for key in ref.keys(): self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3) - # def test_gev_temporal_margin_fit_non_stationary(self): - # # Create estimator - # estimator_fitted = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset, - # starting_year=0, - # fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR) - # result_from_model_fit = estimator_fitted.result_from_model_fit # type: ResultFromExtremes - # print(result_from_model_fit.posterior_samples) + def test_gev_temporal_margin_fit_non_stationary(self): + # Create estimator + estimator = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset, + starting_year=0, + fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR) + mu1_values = estimator.result_from_model_fit.df_posterior_samples.iloc[:, 1] + self.assertTrue((mu1_values != 0).any()) + # Checks that parameters returned are indeed different + mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict() + mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict() + self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3) - # print(estimator.result_from_model_fit.r) - # ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8295} - # for year in range(1, 3): - # mle_params_estimated = estimator.margin_function_from_fit.get_gev_params(np.array([year])).to_dict() - # for key in ref.keys(): - # self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3) + # + # def test_gev_temporal_margin_fit_nonstationary_with_start_point(self): + # # Create estimator + # estimator = self.fit_non_stationary_estimator(starting_point=3) + # self.assertNotEqual(estimator.margin_function_from_fit.mu1_temporal_trend, 0.0) + # # Checks starting point parameter are well passed + # self.assertEqual(3, estimator.margin_function_from_fit.starting_point) + # # Checks that parameters returned are indeed different + # mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict() + # mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict() + # self.assertEqual(mle_params_estimated_year1, mle_params_estimated_year3) + # mle_params_estimated_year5 = estimator.margin_function_from_fit.get_gev_params(np.array([5])).to_dict() + # self.assertNotEqual(mle_params_estimated_year5, mle_params_estimated_year3) + # + # def fit_non_stationary_estimator(self, starting_point): + # margin_model = NonStationaryLocationStationModel(self.coordinates, + # starting_point=starting_point + self.start_year) + # estimator = LinearMarginEstimator(self.dataset, margin_model) + # estimator.fit() + # return estimator + # + # def test_two_different_starting_points(self): + # # Create two different estimators + # estimator1 = self.fit_non_stationary_estimator(starting_point=3) + # estimator2 = self.fit_non_stationary_estimator(starting_point=28) + # mu1_estimator1 = estimator1.margin_function_from_fit.mu1_temporal_trend + # mu1_estimator2 = estimator2.margin_function_from_fit.mu1_temporal_trend + # self.assertNotEqual(mu1_estimator1, mu1_estimator2) if __name__ == '__main__':