From ac48aac0dffdf1ebd0d9f779a22c35724a146b3c Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Thu, 8 Apr 2021 16:34:27 +0200
Subject: [PATCH] [projection snowfall] add spline simulation. implement
 splines for k=1. pass test_gev_temporal_spline.py

---
 extreme_fit/distribution/gev/evgam_example.R  | 36 +++++---
 .../result_from_extremes/result_from_evgam.py | 14 ++-
 .../simulation/__init__.py                    |  0
 .../simulation/spline_simulation.py           | 90 +++++++++++++++++++
 .../test_gev_temporal_spline.py               | 32 +++----
 5 files changed, 142 insertions(+), 30 deletions(-)
 create mode 100644 projects/projected_extreme_snowfall/simulation/__init__.py
 create mode 100644 projects/projected_extreme_snowfall/simulation/spline_simulation.py

diff --git a/extreme_fit/distribution/gev/evgam_example.R b/extreme_fit/distribution/gev/evgam_example.R
index 5c9b143b..65fa9e0a 100644
--- a/extreme_fit/distribution/gev/evgam_example.R
+++ b/extreme_fit/distribution/gev/evgam_example.R
@@ -4,26 +4,36 @@
 # Created on: 30/03/2021
 source('evgam_fixed.R')
 library(evgam)
+library(mgcv)
+library(SpatialExtremes)
 data(COprcp)
-COprcp$year <- format(COprcp$date, "%Y")
-COprcp_gev <- aggregate(prcp ~ year + meta_row, COprcp, max)
-COprcp_gev <- cbind(COprcp_gev, COprcp_meta[COprcp_gev$meta_row,])
+set.seed(42)
+N <- 101
+loc = 0; scale = 1; shape <- 1
+x_gev <- rgev(N, loc = loc, scale = scale, shape = shape)
+years = seq(0, 100) / 100
+df <- data.frame(x_gev, years)
+colnames(df) <- c("prcp", "year")
+print(length(years))
 # print(COprcp_gev)
 print('before call')
-# fmla_gev2 <- list(prcp ~ s(elev, k=3, m=1), ~ 1, ~ 1)
-fmla_gev2 <- list(prcp ~ s(elev, bs="bs", k=4, m=2), ~ 1, ~ 1)
-m_gev2 <- evgam_fixed(fmla_gev2, data=COprcp_gev, family="gev")
+fmla_gev2 <- list(prcp ~ s(year, k=3, m=1, bs="bs"), ~ 1, ~ 1)
+# fmla_gev2 <- list(prcp ~ s(elev, bs="bs", k=4, m=2), ~ 1, ~ 1)
+m_gev2 <- evgam_fixed(fmla_gev2, data=df, family="gev")
+# summary(m_gev2)
 print('print results')
-print(m_gev2)
+# print(m_gev2)
 # print(m_gev2$coefficients)
-location <- m_gev2$location
-print(location)
-# print(location$coefficients)
-# smooth <- m_gev2$location$smooth
+# location <- m_gev2$location
+# print(location)
+# # print(location)
+# smooth <- m_gev2$location$smooth[[1]]
+# # summary(location)
 # print(smooth)
+
+# print(smooth[1])
 # print(attr(smooth, "qrc"))
-# print(m_gev2.knots)
 # m_gev2 <- evgam(fmla_gev2, data=COprcp_gev, family="gev")
 # summary(m_gev2)
-# print(attributes(m_gev2))
+print(attributes(m_gev2))
 print('good finish')
diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py
index 9ff50268..f852fdd6 100644
--- a/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py
+++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py
@@ -1,6 +1,8 @@
 import numpy as np
 from rpy2 import robjects
 from rpy2.robjects.pandas2ri import ri2py_dataframe
+from scipy.interpolate import make_interp_spline
+
 
 from extreme_fit.distribution.gev.gev_params import GevParams
 from extreme_fit.model.result_from_model_fit.result_from_extremes.abstract_result_from_extremes import \
@@ -39,7 +41,7 @@ class ResultFromEvgam(AbstractResultFromExtremes):
             nllh += -np.log(p)
         return nllh
 
-    def get_gev_params(self, idx):
+    def get_gev_params_from_result(self, idx):
         param_names = ['location', 'logscale', 'shape']
         loc, log_scale, shape = [np.array(self.get_python_dictionary(self.name_to_value[param_name])['fitted'])[idx]
                                  for param_name in param_names]
@@ -106,9 +108,17 @@ class ResultFromEvgam(AbstractResultFromExtremes):
         else:
             dim, max_degree = dims[0]
             d = self.get_python_dictionary(self.name_to_value[r_param_name])
-            coefficients = np.array(d["coefficients"])
             smooth = list(d['smooth'])[0]
             knots = np.array(self.get_python_dictionary(smooth)['knots'])
+            x = np.array(self.name_to_value["data"])[1]
+            y = np.array(self.get_python_dictionary(self.name_to_value[r_param_name])['fitted'])
+            assert len(knots) == 5
+            x_for_interpolation = [int(knots[1]+1), int((knots[1] + knots[3])/2), int(knots[3]-1)]
+            index = [i for i, e in enumerate(x) if e in x_for_interpolation][:len(x_for_interpolation)]
+            x = [x[i] for i in index]
+            y = [y[i] for i in index]
+            spline = make_interp_spline(x, y, k=1, t=knots)
+            coefficients = spline.c
             assert len(knots) == len(coefficients) + 1 + max_degree
             dim_knots_and_coefficient[dim] = (knots, coefficients)
         return dim_knots_and_coefficient
diff --git a/projects/projected_extreme_snowfall/simulation/__init__.py b/projects/projected_extreme_snowfall/simulation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/projects/projected_extreme_snowfall/simulation/spline_simulation.py b/projects/projected_extreme_snowfall/simulation/spline_simulation.py
new file mode 100644
index 00000000..1b7c6540
--- /dev/null
+++ b/projects/projected_extreme_snowfall/simulation/spline_simulation.py
@@ -0,0 +1,90 @@
+from extreme_fit.distribution.gev.gev_params import GevParams
+import numpy as np
+from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator
+from extreme_fit.function.param_function.param_function import SplineParamFunction
+from extreme_fit.function.param_function.spline_coef import SplineAllCoef, SplineCoef
+from extreme_fit.model.margin_model.spline_margin_model.temporal_spline_model_degree_1 import \
+    NonStationaryTwoLinearLocationModel
+from extreme_fit.model.margin_model.utils import MarginFitMethod
+from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \
+    AbstractTemporalCoordinates
+from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \
+    ConsecutiveTemporalCoordinates
+import matplotlib.pyplot as plt
+
+from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
+from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \
+    AbstractSpatioTemporalObservations
+import pandas as pd
+
+nb_temporal_steps = 20
+
+
+def generate_df_coordinates():
+    return ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=nb_temporal_steps)
+
+
+coordinates = generate_df_coordinates()
+
+
+def get_margin_function_from_fit(model_class, gev_parameter):
+    ground_truth_model = load_ground_truth_model(gev_parameter, model_class)
+    df = pd.DataFrame()
+    # nb_samples should remain equal to 1
+    nb_samples = 1
+    for _ in range(nb_samples):
+        maxima = [ground_truth_model.margin_function.get_params(c).sample(1)[0]
+                  for c in coordinates.df_all_coordinates.values]
+        df2 = pd.DataFrame(data=maxima, index=coordinates.index)
+        df = pd.concat([df, df2], axis=0)
+    index = pd.RangeIndex(0, nb_samples * nb_temporal_steps)
+    df.index = index
+    observations = AbstractSpatioTemporalObservations(df_maxima_gev=df)
+
+    df = pd.concat([generate_df_coordinates().df_all_coordinates for _ in range(nb_samples)], axis=0)
+    df.index= index
+    dataset = AbstractDataset(observations=observations, coordinates=AbstractTemporalCoordinates.from_df(df))
+    estimator = fitted_linear_margin_estimator(model_class,
+                                               coordinates, dataset,
+                                               starting_year=None,
+                                               fit_method=MarginFitMethod.evgam)
+    return estimator.function_from_fit
+
+
+def get_params_from_margin(margin_function, gev_parameter, x):
+    return [margin_function.get_params(np.array([e])).to_dict()[gev_parameter]
+            for e in x]
+
+
+def plot_model_against_estimated_models(model_class, gev_parameter):
+    x = coordinates.t_coordinates
+    x = np.linspace(x[0], x[-1], num=100)
+
+    ground_truth_model = load_ground_truth_model(gev_parameter, model_class)
+    ground_truth_params = get_params_from_margin(ground_truth_model.margin_function, gev_parameter, x)
+    plt.plot(x, ground_truth_params)
+
+    for _ in range(10):
+        params = get_params_from_margin(get_margin_function_from_fit(model_class, gev_parameter), gev_parameter, x)
+        plt.plot(x, params)
+
+    plt.grid()
+    plt.show()
+
+
+def load_ground_truth_model(gev_parameter, model_class):
+    # knots = [-75.3, -0.15, 75, 150.15, 225.3]
+    # knots = [-4.96980e+01, -9.90000e-02,  4.95000e+01,  9.90990e+01,  1.48698e+02]
+    shift = 10
+    knots = [-shift, 0, nb_temporal_steps // 2, nb_temporal_steps, nb_temporal_steps + shift]
+    spline_coef = SplineCoef(param_name=gev_parameter, knots=knots, coefficients=[10, 30, -50])
+    spline_all_coef = SplineAllCoef(gev_parameter, {0: spline_coef})
+    spline_param_function = SplineParamFunction(dim_and_degree=[(0, 1)], coef=spline_all_coef)
+    model = model_class(coordinates=coordinates)
+    model.margin_function.param_name_to_param_function[gev_parameter] = spline_param_function
+    return model
+
+
+if __name__ == '__main__':
+    plot_model_against_estimated_models(NonStationaryTwoLinearLocationModel, GevParams.LOC)
diff --git a/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py b/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py
index 10120567..9df9ca10 100644
--- a/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py
+++ b/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py
@@ -26,16 +26,17 @@ class TestGevTemporalSpline(unittest.TestCase):
     def setUp(self) -> None:
         set_seed_r()
         r("""
-        N <- 50
+        N <- 51
         loc = 0; scale = 1; shape <- 1
         x_gev <- rgev(N, loc = loc, scale = scale, shape = shape)
         start_loc = 0; start_scale = 1; start_shape = 1
         """)
         # Compute the stationary temporal margin with isMev
-        self.start_year = 0
-        nb_years = 50
+        self.start_year = -25
+        nb_years = 51
         self.last_year = self.start_year + nb_years - 1
-        df = pd.DataFrame({AbstractCoordinates.COORDINATE_T: range(self.start_year, self.start_year + nb_years)})
+        years = np.array(range(self.start_year, self.start_year + nb_years))
+        df = pd.DataFrame({AbstractCoordinates.COORDINATE_T: years})
         self.coordinates = AbstractTemporalCoordinates.from_df(df)
         df2 = pd.DataFrame(data=np.array(r['x_gev']), index=df.index)
         observations = AbstractSpatioTemporalObservations(df_maxima_gev=df2)
@@ -46,7 +47,7 @@ class TestGevTemporalSpline(unittest.TestCase):
         # Create estimator
         estimator = fitted_linear_margin_estimator(model_class,
                                                    self.coordinates, self.dataset,
-                                                   starting_year=0,
+                                                   starting_year=None,
                                                    fit_method=self.fit_method)
         # Checks that parameters returned are indeed different
         mle_params_estimated_year1 = estimator.function_from_fit.get_params(np.array([0])).to_dict()
@@ -54,19 +55,21 @@ class TestGevTemporalSpline(unittest.TestCase):
         mle_params_estimated_year5 = estimator.function_from_fit.get_params(np.array([self.last_year])).to_dict()
         self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3)
         self.assertNotEqual(mle_params_estimated_year3, mle_params_estimated_year5)
-        # Assert the relationship for the location is different between the beginning and the end
+        # # Assert the relationship for the location is different between the beginning and the end
         diff1 = mle_params_estimated_year1[param_to_test] - mle_params_estimated_year3[param_to_test]
         diff2 = mle_params_estimated_year3[param_to_test] - mle_params_estimated_year5[param_to_test]
         self.assertNotAlmostEqual(diff1, diff2)
-        # Assert that the computed parameters are the same
-        # for year in range(self.start_year, self.start_year+5):
-        #     gev_params_from_result = estimator.result_from_model_fit.get_gev_params(year).to_dict()
-        #     my_gev_params = estimator.function_from_fit.get_params(np.array([year])).to_dict()
-        #     for param_name in GevParams.PARAM_NAMES:
-        #         self.assertAlmostEqual(gev_params_from_result[param_name], my_gev_params[param_name],
-        #                                msg='for the {} parameter at year={}'.format(param_name, year))
+
+        for idx, nb_year in enumerate(range(5)):
+            year = self.start_year + nb_year
+            gev_params_from_result = estimator.result_from_model_fit.get_gev_params_from_result(idx).to_dict()
+            my_gev_params = estimator.function_from_fit.get_params(np.array([year])).to_dict()
+            for param_name in GevParams.PARAM_NAMES:
+                self.assertAlmostEqual(gev_params_from_result[param_name], my_gev_params[param_name],
+                                       msg='for the {} parameter at year={}'.format(param_name, year),
+                                       places=2)
         # Assert that indicators are correctly computed
-        self.assertAlmostEqual(estimator.result_from_model_fit.nllh, estimator.nllh(), places=0)
+        self.assertAlmostEqual(estimator.result_from_model_fit.nllh, estimator.nllh())
         # self.assertAlmostEqual(estimator.result_from_model_fit.aic, estimator.aic())
         # self.assertAlmostEqual(estimator.result_from_model_fit.bic, estimator.bic())
 
@@ -74,7 +77,6 @@ class TestGevTemporalSpline(unittest.TestCase):
         self.function_test_gev_temporal_margin_fit_non_stationary_spline(NonStationaryTwoLinearLocationModel,
                                                                          param_to_test=GevParams.LOC)
 
-    #
     def test_gev_temporal_margin_fit_spline_two_linear_scale(self):
         self.function_test_gev_temporal_margin_fit_non_stationary_spline(NonStationaryTwoLinearScaleModel,
                                                                          param_to_test=GevParams.SCALE)
-- 
GitLab