From 3df67583ef2505647fcc47134bb28bf93e6d2e68 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Mon, 21 Oct 2019 18:38:17 +0200
Subject: [PATCH] [TEMPORAL LINEAR MARGIN][BAYESIAN ESTIMATION] add
 fevdPriorCustom as prior for the fit. update test accordingly. add non
 stationary version of fevD both in R and also in python. add test for that in
 python. modify upper and bottom quantile for eurocode

---
 .../eurocode_return_level_uncertainties.py    |  3 +-
 .../distribution/gev/main_fevd_fixed.R        | 36 +++++--------
 .../abstract_temporal_linear_margin_model.py  | 26 +++++----
 .../result_from_extremes.py                   | 18 ++++---
 extreme_fit/model/utils.py                    | 30 +++++++----
 .../test_gev/test_gev_temporal_bayesian.py    | 54 ++++++++++++++-----
 6 files changed, 95 insertions(+), 72 deletions(-)

diff --git a/experiment/eurocode_data/eurocode_return_level_uncertainties.py b/experiment/eurocode_data/eurocode_return_level_uncertainties.py
index 1e416a8a..750adf64 100644
--- a/experiment/eurocode_data/eurocode_return_level_uncertainties.py
+++ b/experiment/eurocode_data/eurocode_return_level_uncertainties.py
@@ -49,7 +49,8 @@ class ExtractEurocodeReturnLevelFromExtremes(object):
 
     @property
     def posterior_eurocode_return_level_uncertainty_interval_for_the_year_of_interest(self):
-        bottom_and_upper_quantile = (0.05, 0.95)
+        # Bottom and upper quantile correspond to the quantile
+        bottom_and_upper_quantile = (0.250, 0.975)
         return [np.quantile(self.posterior_eurocode_return_level_samples_for_year_of_interest, q=q)
                 for q in bottom_and_upper_quantile]
 
diff --git a/extreme_fit/distribution/gev/main_fevd_fixed.R b/extreme_fit/distribution/gev/main_fevd_fixed.R
index 6c9dca5b..a49ec49b 100644
--- a/extreme_fit/distribution/gev/main_fevd_fixed.R
+++ b/extreme_fit/distribution/gev/main_fevd_fixed.R
@@ -3,6 +3,7 @@
 # Created by: erwan
 # Created on: 04/10/2019
 library(extRemes)
+library(data.table)
 library(stats4)
 library(SpatialExtremes)
 source('fevd_fixed.R')
@@ -34,10 +35,18 @@ print(fevdPriorCustom(2.0, 0.0, 0.0))
 
 
 
-
 # res = fevd(x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
-res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
 # res = fevd(x_gev, method='GMLE', iter=5000, verbose=TRUE, use.phi=FALSE)
+
+# Without covariate
+# res = fevd_fixed(x_gev, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
+
+# Add covariate
+coord <- matrix(ncol=1, nrow = N)
+coord[,1]=seq(1,N,1)
+colnames(coord) = c("T")
+coord = data.frame(coord, stringsAsFactors = TRUE)
+res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE)
 print(res)
 
 print('here')
@@ -47,31 +56,10 @@ print(res$method)
 print(res$priorFun)
 print(res$priorParams)
 m = res$results
+print(class(res$chain.info))
 print(dim(m))
 print(m[1,])
 print(m[1,1])
 print(res$chain.info[1,])
 
 
-
-
-# print(class(res$chain.info))
-# ch = res$chain.info
-# print(dim(ch))
-# print(ch)
-
-# # summary(res)
-print(attributes(res))
-# print('here')
-# print(attr(res, 'chain.info'))
-# print(attr(res, "method"))
-# print(attr(res, "x"))
-# print(attr(res, "priorParams"))
-
-# print(res.method)
-
-
-
-
-# Bayesian method is using a normal distribution functions for the shape parameter
-# GMLE distribution is using a Beta distribution for the shape parameter
diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
index 1b96be1d..f3002d3b 100644
--- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
+++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
@@ -5,7 +5,7 @@ from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model impo
 from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit
 from extreme_fit.model.result_from_model_fit.result_from_extremes import ResultFromExtremes
 from extreme_fit.model.result_from_model_fit.result_from_ismev import ResultFromIsmev
-from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes
+from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes, get_coord, get_coord_df
 from extreme_fit.model.utils import safe_run_r_estimator
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
 
@@ -25,15 +25,15 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
                                   df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit:
         assert data.shape[1] == len(df_coordinates_temp.values)
         x = ro.FloatVector(data[0])
-        y = df_coordinates_temp.values
         if self.fit_method == self.ISMEV_GEV_FIT_METHOD_STR:
-            return self.ismev_gev_fit(x, y)
+            return self.ismev_gev_fit(x, df_coordinates_temp)
         if self.fit_method == self.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR:
-            return self.extremes_fevd_bayesian_fit(x, y)
+            return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp)
 
     # Gev Fit with isMev package
 
-    def ismev_gev_fit(self, x, y) -> ResultFromIsmev:
+    def ismev_gev_fit(self, x, df_coordinates_temp) -> ResultFromIsmev:
+        y = df_coordinates_temp.values
         res = safe_run_r_estimator(function=r('gev.fit'), use_start=self.use_start_value,
                                    xdat=x, y=y, mul=self.mul,
                                    sigl=self.sigl, shl=self.shl)
@@ -41,20 +41,18 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
 
     # Gev fit with extRemes package
 
-    def extremes_fevd_bayesian_fit(self, x, y) -> ResultFromExtremes:
-        # (x_gev, method='Bayesian', priorFun="fevdPriorMyMy", priorParams=list(q=c(6), p = c(
-        #     9)), iter = 5000, verbose = TRUE, use.phi = FALSE)
-
-        r_type_argument_kwargs = {'use.phi': False}
+    def extremes_fevd_bayesian_fit(self, x, df_coordinates_temp) -> ResultFromExtremes:
+        # Disable the use of log sigma parametrization
+        r_type_argument_kwargs = {'use.phi': False,
+                                  'verbose': False}
         r_type_argument_kwargs.update(get_margin_formula_extremes(self.margin_function_start_fit.form_dict))
-        # location.fun = ~1,
-        # scale.fun = ~1, shape.fun = ~1
-        # , priorParams = list(q=c(6), p=c(9))
+        y = get_coord_df(df_coordinates_temp)
         res = safe_run_r_estimator(function=r('fevd_fixed'),
                                    x=x,
                                    data=y,
                                    method='Bayesian',
-                                   # priorFun="fevdPriorCustom",
+                                   priorFun="fevdPriorCustom",
+                                   priorParams=r.list(q=r.c(6), p=r.c(9)),
                                    iter=5000,
                                    **r_type_argument_kwargs
                                    )
diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes.py b/extreme_fit/model/result_from_model_fit/result_from_extremes.py
index 162fe628..9bbb1e22 100644
--- a/extreme_fit/model/result_from_model_fit/result_from_extremes.py
+++ b/extreme_fit/model/result_from_model_fit/result_from_extremes.py
@@ -6,6 +6,7 @@ from extreme_fit.distribution.gev.gev_params import GevParams
 from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import \
     AbstractResultFromModelFit
 from extreme_fit.model.result_from_model_fit.utils import get_margin_coef_dict
+from extreme_fit.model.utils import r
 
 
 class ResultFromExtremes(AbstractResultFromModelFit):
@@ -16,20 +17,21 @@ class ResultFromExtremes(AbstractResultFromModelFit):
         self.burn_in_percentage = burn_in_percentage
         self.gev_param_name_to_dim = gev_param_name_to_dim
 
-    @property
-    def results(self):
-        return np.array(self.name_to_value['results'])
+    def load_dataframe_from_r_matrix(self, k):
+        r_matrix = self.name_to_value[k]
+        return pd.DataFrame(np.array(r_matrix), columns=r.colnames(r_matrix))
 
     @property
     def chain_info(self):
-        return np.array(self.name_to_value['chain.info'])
+        return self.load_dataframe_from_r_matrix('chain.info')
+
+    @property
+    def results(self):
+        return self.load_dataframe_from_r_matrix('results')
 
     @property
     def df_posterior_samples(self) -> pd.DataFrame:
-        d = dict(zip(GevParams.PARAM_NAMES, self.results.transpose()))
-        d['loglik'] = self.chain_info[:, -2]
-        d['prior'] = self.chain_info[:, -1]
-        df_all_samples = pd.DataFrame(d)
+        df_all_samples = pd.concat([self.results.iloc[:, :-1], self.chain_info.iloc[:, -2:]], axis=1)
         # Remove the burn in period
         burn_in_last_index = int(self.burn_in_percentage * len(df_all_samples))
         df_posterior_samples = df_all_samples.iloc[burn_in_last_index:, :]
diff --git a/extreme_fit/model/utils.py b/extreme_fit/model/utils.py
index aec823bf..90dfd278 100644
--- a/extreme_fit/model/utils.py
+++ b/extreme_fit/model/utils.py
@@ -1,4 +1,5 @@
 import io
+
 import os.path as op
 import random
 import warnings
@@ -14,7 +15,6 @@ from rpy2.rinterface._rinterface import RRuntimeError
 from rpy2.robjects import numpy2ri
 from rpy2.robjects import pandas2ri
 
-
 # Load R variables
 from root_utils import get_root_path
 
@@ -22,6 +22,7 @@ r = ro.R()
 numpy2ri.activate()
 pandas2ri.activate()
 r.library('SpatialExtremes')
+r.library('data.table')
 # Desactivate temporarily warnings
 default_filters = warnings.filters.copy()
 warnings.filterwarnings("ignore")
@@ -81,13 +82,13 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs
     # Some checks for Spatial Extremes
     if data is not None:
         # Raise warning if the maximum absolute value is above a threshold
-        assert isinstance(data, np.ndarray)
-        maximum_absolute_value = np.max(np.abs(data))
-        if maximum_absolute_value > threshold_max_abs_value:
-            msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \
-                .format(maximum_absolute_value, threshold_max_abs_value)
-            msg += '\nPotentially in that case, data should be re-normalized'
-            warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh)
+        if isinstance(data, np.ndarray):
+            maximum_absolute_value = np.max(np.abs(data))
+            if maximum_absolute_value > threshold_max_abs_value:
+                msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \
+                    .format(maximum_absolute_value, threshold_max_abs_value)
+                msg += '\nPotentially in that case, data should be re-normalized'
+                warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh)
         parameters['data'] = data
     # First run without using start value
     # Then if it crashes, use start value
@@ -120,6 +121,15 @@ def get_coord(df_coordinates: pd.DataFrame):
     return coord
 
 
+def get_coord_df(df_coordinates: pd.DataFrame):
+    coord = pandas2ri.py2ri_pandasdataframe(df_coordinates)
+    # coord = r.transpose(coord)
+    colname = df_coordinates.columns[0]
+    coord.colnames = r.c(colname)
+    coord = r('data.frame')(coord, stringsAsFactors=True)
+    return coord
+
+
 def get_null():
     as_null = r['as.null']
     return as_null(1.0)
@@ -141,11 +151,9 @@ def old_coef_name_to_new_coef_name():
 
 def get_margin_formula_extremes(fit_marge_form_dict) -> Dict:
     d = old_coef_name_to_new_coef_name()
-    form_dict = {d[k]: v if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()}
+    form_dict = {d[k]: ' '.join(v.split()[1:]) if v != 'NULL' else ' ~1' for k, v in fit_marge_form_dict.items()}
     return {k: robjects.Formula(v) for k, v in form_dict.items()}
 
-
-
 # def conversion_to_FloatVector(data):
 #     """Convert DataFrame or numpy array into FloatVector for r"""
 #     if isinstance(data, pd.DataFrame):
diff --git a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py
index 2b9aad9a..36aeac81 100644
--- a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py
+++ b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gev_temporal_bayesian.py
@@ -45,26 +45,52 @@ class TestGevTemporalBayesian(unittest.TestCase):
         estimator_fitted = fitted_linear_margin_estimator(StationaryStationModel, self.coordinates, self.dataset,
                                                           starting_year=0,
                                                           fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
-        ref = {'loc': 0.082261, 'scale': 1.183703, 'shape': 0.882750}
+        ref = {'loc': 0.30440183718071845, 'scale': 1.301639258861012, 'shape': 0.303652401064773}
         for year in range(1, 3):
             mle_params_estimated = estimator_fitted.margin_function_from_fit.get_gev_params(np.array([year])).to_dict()
             for key in ref.keys():
                 self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)
 
-    # def test_gev_temporal_margin_fit_non_stationary(self):
-    #     # Create estimator
-    #     estimator_fitted = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset,
-    #                                                       starting_year=0,
-    #                                                       fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
-    #     result_from_model_fit = estimator_fitted.result_from_model_fit  # type: ResultFromExtremes
-    #     print(result_from_model_fit.posterior_samples)
+    def test_gev_temporal_margin_fit_non_stationary(self):
+        # Create estimator
+        estimator = fitted_linear_margin_estimator(NonStationaryLocationStationModel, self.coordinates, self.dataset,
+                                                   starting_year=0,
+                                                   fit_method=AbstractTemporalLinearMarginModel.EXTREMES_FEVD_BAYESIAN_FIT_METHOD_STR)
+        mu1_values = estimator.result_from_model_fit.df_posterior_samples.iloc[:, 1]
+        self.assertTrue((mu1_values != 0).any())
+        # Checks that parameters returned are indeed different
+        mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict()
+        mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict()
+        self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3)
 
-    # print(estimator.result_from_model_fit.r)
-    # ref = {'loc': 0.0219, 'scale': 1.0347, 'shape': 0.8295}
-    # for year in range(1, 3):
-    #     mle_params_estimated = estimator.margin_function_from_fit.get_gev_params(np.array([year])).to_dict()
-    #     for key in ref.keys():
-    #         self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)
+    #
+    # def test_gev_temporal_margin_fit_nonstationary_with_start_point(self):
+    #     # Create estimator
+    #     estimator = self.fit_non_stationary_estimator(starting_point=3)
+    #     self.assertNotEqual(estimator.margin_function_from_fit.mu1_temporal_trend, 0.0)
+    #     # Checks starting point parameter are well passed
+    #     self.assertEqual(3, estimator.margin_function_from_fit.starting_point)
+    #     # Checks that parameters returned are indeed different
+    #     mle_params_estimated_year1 = estimator.margin_function_from_fit.get_gev_params(np.array([1])).to_dict()
+    #     mle_params_estimated_year3 = estimator.margin_function_from_fit.get_gev_params(np.array([3])).to_dict()
+    #     self.assertEqual(mle_params_estimated_year1, mle_params_estimated_year3)
+    #     mle_params_estimated_year5 = estimator.margin_function_from_fit.get_gev_params(np.array([5])).to_dict()
+    #     self.assertNotEqual(mle_params_estimated_year5, mle_params_estimated_year3)
+    #
+    # def fit_non_stationary_estimator(self, starting_point):
+    #     margin_model = NonStationaryLocationStationModel(self.coordinates,
+    #                                                      starting_point=starting_point + self.start_year)
+    #     estimator = LinearMarginEstimator(self.dataset, margin_model)
+    #     estimator.fit()
+    #     return estimator
+    #
+    # def test_two_different_starting_points(self):
+    #     # Create two different estimators
+    #     estimator1 = self.fit_non_stationary_estimator(starting_point=3)
+    #     estimator2 = self.fit_non_stationary_estimator(starting_point=28)
+    #     mu1_estimator1 = estimator1.margin_function_from_fit.mu1_temporal_trend
+    #     mu1_estimator2 = estimator2.margin_function_from_fit.mu1_temporal_trend
+    #     self.assertNotEqual(mu1_estimator1, mu1_estimator2)
 
 
 if __name__ == '__main__':
-- 
GitLab