From ac48aac0dffdf1ebd0d9f779a22c35724a146b3c Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Thu, 8 Apr 2021 16:34:27 +0200 Subject: [PATCH] [projection snowfall] add spline simulation. implement splines for k=1. pass test_gev_temporal_spline.py --- extreme_fit/distribution/gev/evgam_example.R | 36 +++++--- .../result_from_extremes/result_from_evgam.py | 14 ++- .../simulation/__init__.py | 0 .../simulation/spline_simulation.py | 90 +++++++++++++++++++ .../test_gev_temporal_spline.py | 32 +++---- 5 files changed, 142 insertions(+), 30 deletions(-) create mode 100644 projects/projected_extreme_snowfall/simulation/__init__.py create mode 100644 projects/projected_extreme_snowfall/simulation/spline_simulation.py diff --git a/extreme_fit/distribution/gev/evgam_example.R b/extreme_fit/distribution/gev/evgam_example.R index 5c9b143b..65fa9e0a 100644 --- a/extreme_fit/distribution/gev/evgam_example.R +++ b/extreme_fit/distribution/gev/evgam_example.R @@ -4,26 +4,36 @@ # Created on: 30/03/2021 source('evgam_fixed.R') library(evgam) +library(mgcv) +library(SpatialExtremes) data(COprcp) -COprcp$year <- format(COprcp$date, "%Y") -COprcp_gev <- aggregate(prcp ~ year + meta_row, COprcp, max) -COprcp_gev <- cbind(COprcp_gev, COprcp_meta[COprcp_gev$meta_row,]) +set.seed(42) +N <- 101 +loc = 0; scale = 1; shape <- 1 +x_gev <- rgev(N, loc = loc, scale = scale, shape = shape) +years = seq(0, 100) / 100 +df <- data.frame(x_gev, years) +colnames(df) <- c("prcp", "year") +print(length(years)) # print(COprcp_gev) print('before call') -# fmla_gev2 <- list(prcp ~ s(elev, k=3, m=1), ~ 1, ~ 1) -fmla_gev2 <- list(prcp ~ s(elev, bs="bs", k=4, m=2), ~ 1, ~ 1) -m_gev2 <- evgam_fixed(fmla_gev2, data=COprcp_gev, family="gev") +fmla_gev2 <- list(prcp ~ s(year, k=3, m=1, bs="bs"), ~ 1, ~ 1) +# fmla_gev2 <- list(prcp ~ s(elev, bs="bs", k=4, m=2), ~ 1, ~ 1) +m_gev2 <- evgam_fixed(fmla_gev2, data=df, family="gev") +# summary(m_gev2) print('print results') -print(m_gev2) +# print(m_gev2) # print(m_gev2$coefficients) -location <- m_gev2$location -print(location) -# print(location$coefficients) -# smooth <- m_gev2$location$smooth +# location <- m_gev2$location +# print(location) +# # print(location) +# smooth <- m_gev2$location$smooth[[1]] +# # summary(location) # print(smooth) + +# print(smooth[1]) # print(attr(smooth, "qrc")) -# print(m_gev2.knots) # m_gev2 <- evgam(fmla_gev2, data=COprcp_gev, family="gev") # summary(m_gev2) -# print(attributes(m_gev2)) +print(attributes(m_gev2)) print('good finish') diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py index 9ff50268..f852fdd6 100644 --- a/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py +++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_evgam.py @@ -1,6 +1,8 @@ import numpy as np from rpy2 import robjects from rpy2.robjects.pandas2ri import ri2py_dataframe +from scipy.interpolate import make_interp_spline + from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.model.result_from_model_fit.result_from_extremes.abstract_result_from_extremes import \ @@ -39,7 +41,7 @@ class ResultFromEvgam(AbstractResultFromExtremes): nllh += -np.log(p) return nllh - def get_gev_params(self, idx): + def get_gev_params_from_result(self, idx): param_names = ['location', 'logscale', 'shape'] loc, log_scale, shape = [np.array(self.get_python_dictionary(self.name_to_value[param_name])['fitted'])[idx] for param_name in param_names] @@ -106,9 +108,17 @@ class ResultFromEvgam(AbstractResultFromExtremes): else: dim, max_degree = dims[0] d = self.get_python_dictionary(self.name_to_value[r_param_name]) - coefficients = np.array(d["coefficients"]) smooth = list(d['smooth'])[0] knots = np.array(self.get_python_dictionary(smooth)['knots']) + x = np.array(self.name_to_value["data"])[1] + y = np.array(self.get_python_dictionary(self.name_to_value[r_param_name])['fitted']) + assert len(knots) == 5 + x_for_interpolation = [int(knots[1]+1), int((knots[1] + knots[3])/2), int(knots[3]-1)] + index = [i for i, e in enumerate(x) if e in x_for_interpolation][:len(x_for_interpolation)] + x = [x[i] for i in index] + y = [y[i] for i in index] + spline = make_interp_spline(x, y, k=1, t=knots) + coefficients = spline.c assert len(knots) == len(coefficients) + 1 + max_degree dim_knots_and_coefficient[dim] = (knots, coefficients) return dim_knots_and_coefficient diff --git a/projects/projected_extreme_snowfall/simulation/__init__.py b/projects/projected_extreme_snowfall/simulation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/projects/projected_extreme_snowfall/simulation/spline_simulation.py b/projects/projected_extreme_snowfall/simulation/spline_simulation.py new file mode 100644 index 00000000..1b7c6540 --- /dev/null +++ b/projects/projected_extreme_snowfall/simulation/spline_simulation.py @@ -0,0 +1,90 @@ +from extreme_fit.distribution.gev.gev_params import GevParams +import numpy as np +from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator +from extreme_fit.function.param_function.param_function import SplineParamFunction +from extreme_fit.function.param_function.spline_coef import SplineAllCoef, SplineCoef +from extreme_fit.model.margin_model.spline_margin_model.temporal_spline_model_degree_1 import \ + NonStationaryTwoLinearLocationModel +from extreme_fit.model.margin_model.utils import MarginFitMethod +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \ + AbstractTemporalCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ + ConsecutiveTemporalCoordinates +import matplotlib.pyplot as plt + +from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset +from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ + AbstractSpatioTemporalObservations +import pandas as pd + +nb_temporal_steps = 20 + + +def generate_df_coordinates(): + return ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=nb_temporal_steps) + + +coordinates = generate_df_coordinates() + + +def get_margin_function_from_fit(model_class, gev_parameter): + ground_truth_model = load_ground_truth_model(gev_parameter, model_class) + df = pd.DataFrame() + # nb_samples should remain equal to 1 + nb_samples = 1 + for _ in range(nb_samples): + maxima = [ground_truth_model.margin_function.get_params(c).sample(1)[0] + for c in coordinates.df_all_coordinates.values] + df2 = pd.DataFrame(data=maxima, index=coordinates.index) + df = pd.concat([df, df2], axis=0) + index = pd.RangeIndex(0, nb_samples * nb_temporal_steps) + df.index = index + observations = AbstractSpatioTemporalObservations(df_maxima_gev=df) + + df = pd.concat([generate_df_coordinates().df_all_coordinates for _ in range(nb_samples)], axis=0) + df.index= index + dataset = AbstractDataset(observations=observations, coordinates=AbstractTemporalCoordinates.from_df(df)) + estimator = fitted_linear_margin_estimator(model_class, + coordinates, dataset, + starting_year=None, + fit_method=MarginFitMethod.evgam) + return estimator.function_from_fit + + +def get_params_from_margin(margin_function, gev_parameter, x): + return [margin_function.get_params(np.array([e])).to_dict()[gev_parameter] + for e in x] + + +def plot_model_against_estimated_models(model_class, gev_parameter): + x = coordinates.t_coordinates + x = np.linspace(x[0], x[-1], num=100) + + ground_truth_model = load_ground_truth_model(gev_parameter, model_class) + ground_truth_params = get_params_from_margin(ground_truth_model.margin_function, gev_parameter, x) + plt.plot(x, ground_truth_params) + + for _ in range(10): + params = get_params_from_margin(get_margin_function_from_fit(model_class, gev_parameter), gev_parameter, x) + plt.plot(x, params) + + plt.grid() + plt.show() + + +def load_ground_truth_model(gev_parameter, model_class): + # knots = [-75.3, -0.15, 75, 150.15, 225.3] + # knots = [-4.96980e+01, -9.90000e-02, 4.95000e+01, 9.90990e+01, 1.48698e+02] + shift = 10 + knots = [-shift, 0, nb_temporal_steps // 2, nb_temporal_steps, nb_temporal_steps + shift] + spline_coef = SplineCoef(param_name=gev_parameter, knots=knots, coefficients=[10, 30, -50]) + spline_all_coef = SplineAllCoef(gev_parameter, {0: spline_coef}) + spline_param_function = SplineParamFunction(dim_and_degree=[(0, 1)], coef=spline_all_coef) + model = model_class(coordinates=coordinates) + model.margin_function.param_name_to_param_function[gev_parameter] = spline_param_function + return model + + +if __name__ == '__main__': + plot_model_against_estimated_models(NonStationaryTwoLinearLocationModel, GevParams.LOC) diff --git a/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py b/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py index 10120567..9df9ca10 100644 --- a/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py +++ b/test/test_extreme_fit/test_estimator/test_temporal_estimator/test_gev_temporal_spline.py @@ -26,16 +26,17 @@ class TestGevTemporalSpline(unittest.TestCase): def setUp(self) -> None: set_seed_r() r(""" - N <- 50 + N <- 51 loc = 0; scale = 1; shape <- 1 x_gev <- rgev(N, loc = loc, scale = scale, shape = shape) start_loc = 0; start_scale = 1; start_shape = 1 """) # Compute the stationary temporal margin with isMev - self.start_year = 0 - nb_years = 50 + self.start_year = -25 + nb_years = 51 self.last_year = self.start_year + nb_years - 1 - df = pd.DataFrame({AbstractCoordinates.COORDINATE_T: range(self.start_year, self.start_year + nb_years)}) + years = np.array(range(self.start_year, self.start_year + nb_years)) + df = pd.DataFrame({AbstractCoordinates.COORDINATE_T: years}) self.coordinates = AbstractTemporalCoordinates.from_df(df) df2 = pd.DataFrame(data=np.array(r['x_gev']), index=df.index) observations = AbstractSpatioTemporalObservations(df_maxima_gev=df2) @@ -46,7 +47,7 @@ class TestGevTemporalSpline(unittest.TestCase): # Create estimator estimator = fitted_linear_margin_estimator(model_class, self.coordinates, self.dataset, - starting_year=0, + starting_year=None, fit_method=self.fit_method) # Checks that parameters returned are indeed different mle_params_estimated_year1 = estimator.function_from_fit.get_params(np.array([0])).to_dict() @@ -54,19 +55,21 @@ class TestGevTemporalSpline(unittest.TestCase): mle_params_estimated_year5 = estimator.function_from_fit.get_params(np.array([self.last_year])).to_dict() self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3) self.assertNotEqual(mle_params_estimated_year3, mle_params_estimated_year5) - # Assert the relationship for the location is different between the beginning and the end + # # Assert the relationship for the location is different between the beginning and the end diff1 = mle_params_estimated_year1[param_to_test] - mle_params_estimated_year3[param_to_test] diff2 = mle_params_estimated_year3[param_to_test] - mle_params_estimated_year5[param_to_test] self.assertNotAlmostEqual(diff1, diff2) - # Assert that the computed parameters are the same - # for year in range(self.start_year, self.start_year+5): - # gev_params_from_result = estimator.result_from_model_fit.get_gev_params(year).to_dict() - # my_gev_params = estimator.function_from_fit.get_params(np.array([year])).to_dict() - # for param_name in GevParams.PARAM_NAMES: - # self.assertAlmostEqual(gev_params_from_result[param_name], my_gev_params[param_name], - # msg='for the {} parameter at year={}'.format(param_name, year)) + + for idx, nb_year in enumerate(range(5)): + year = self.start_year + nb_year + gev_params_from_result = estimator.result_from_model_fit.get_gev_params_from_result(idx).to_dict() + my_gev_params = estimator.function_from_fit.get_params(np.array([year])).to_dict() + for param_name in GevParams.PARAM_NAMES: + self.assertAlmostEqual(gev_params_from_result[param_name], my_gev_params[param_name], + msg='for the {} parameter at year={}'.format(param_name, year), + places=2) # Assert that indicators are correctly computed - self.assertAlmostEqual(estimator.result_from_model_fit.nllh, estimator.nllh(), places=0) + self.assertAlmostEqual(estimator.result_from_model_fit.nllh, estimator.nllh()) # self.assertAlmostEqual(estimator.result_from_model_fit.aic, estimator.aic()) # self.assertAlmostEqual(estimator.result_from_model_fit.bic, estimator.bic()) @@ -74,7 +77,6 @@ class TestGevTemporalSpline(unittest.TestCase): self.function_test_gev_temporal_margin_fit_non_stationary_spline(NonStationaryTwoLinearLocationModel, param_to_test=GevParams.LOC) - # def test_gev_temporal_margin_fit_spline_two_linear_scale(self): self.function_test_gev_temporal_margin_fit_non_stationary_spline(NonStationaryTwoLinearScaleModel, param_to_test=GevParams.SCALE) -- GitLab