diff --git a/extreme_fit/distribution/gev/main_fevd_mle_two_covariates.R b/extreme_fit/distribution/gev/main_fevd_mle_two_covariates.R index 3ab74777cbe548007fd62564a6696ca926be14b4..adeab5ac7a2a1f321e5da5a13e063dc74feb19bd 100644 --- a/extreme_fit/distribution/gev/main_fevd_mle_two_covariates.R +++ b/extreme_fit/distribution/gev/main_fevd_mle_two_covariates.R @@ -26,7 +26,8 @@ colnames(coord) = c("X", "T") coord = data.frame(coord, stringsAsFactors = TRUE) # res = fevd_fixed(x_gev, data=coord, method='MLE', verbose=TRUE, use.phi=FALSE) # res = fevd_fixed(x_gev, data=coord, location.fun= ~T, scale.fun= ~T, method='MLE', type="GEV", verbose=FALSE, use.phi=FALSE) -res = fevd_fixed(x_gev, data=coord, location.fun= ~sin(X) + cos(T), method='MLE', type="GEV", verbose=FALSE, use.phi=FALSE) +# res = fevd_fixed(x_gev, data=coord, location.fun= ~sin(X) + cos(T), method='MLE', type="GEV", verbose=FALSE, use.phi=FALSE) +res = fevd_fixed(x_gev, data=coord, location.fun= ~T * X + X +T , method='MLE', type="GEV", verbose=FALSE, use.phi=FALSE) print(res) # Some display for the results diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py index 5e36bfcd6f6a54b88efcafed09bcb4442801bb66..a68f6588ecb255baae3d5fae26e678c65e5160ab 100644 --- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py @@ -28,13 +28,18 @@ class LinearMarginEstimator(AbstractMarginEstimator): self.margin_model = margin_model def _fit(self) -> AbstractResultFromModelFit: - df_coordinates_spat = self.dataset.coordinates.df_spatial_coordinates(self.train_split) return self.margin_model.fitmargin_from_maxima_gev(data=self.maxima_gev_train, - df_coordinates_spat=df_coordinates_spat, - df_coordinates_temp=self.coordinate_temp) + df_coordinates_spat=self.df_coordinates_spat, + df_coordinates_temp=self.df_coordinates_temp) @property - def coordinate_temp(self): + def df_coordinates_spat(self): + return self.dataset.coordinates.df_spatial_coordinates(split=self.train_split, + drop_duplicates=self.margin_model.drop_duplicates) + + + @property + def df_coordinates_temp(self): return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split, starting_point=self.margin_model.starting_point, drop_duplicates=self.margin_model.drop_duplicates) diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index 5db6961bbbe0003cead10b20ca02621d50d2f82d..33a1d3b6bb087b7b92cffb469d78f80c6d295510 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -35,20 +35,23 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: - data = data[0] - assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data), - len( - df_coordinates_temp.values)) - x = ro.FloatVector(data) + if len(data) == 1: + data = data[0] + assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data), + len( + df_coordinates_temp.values)) + x = ro.FloatVector(data) + else: + x = ro.Matrix(data) if self.params_class is GevParams: if self.fit_method == MarginFitMethod.is_mev_gev_fit: return self.ismev_gev_fit(x, df_coordinates_temp) elif self.fit_method in FEVD_MARGIN_FIT_METHODS: - return self.extremes_fevd_fit(x, df_coordinates_temp) + return self.extremes_fevd_fit(x, df_coordinates_temp, df_coordinates_spat) else: raise NotImplementedError elif self.params_class is ExpParams: - return self.extreme_fevd_mle_exp_fit(x, df_coordinates_temp) + return self.extreme_fevd_mle_exp_fit(x, df_coordinates_temp, df_coordinates_spat) else: raise NotImplementedError @@ -63,21 +66,22 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): # Gev fit with extRemes package - def extremes_fevd_fit(self, x, df_coordinates_temp) -> AbstractResultFromExtremes: + def extremes_fevd_fit(self, x, df_coordinates_temp, df_coordinates_spat) -> AbstractResultFromExtremes: assert self.fit_method in FEVD_MARGIN_FIT_METHODS if self.fit_method == MarginFitMethod.extremes_fevd_bayesian: return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp) else: return self.run_fevd_fixed(df_coordinates_temp=df_coordinates_temp, + df_coordinates_spat=df_coordinates_spat, method=FEVD_MARGIN_FIT_METHOD_TO_ARGUMENT_STR[self.fit_method], x=x) - def extreme_fevd_mle_exp_fit(self, x, df_coordinates_temp): - return self.run_fevd_fixed(df_coordinates_temp, "Exponential", x) + def extreme_fevd_mle_exp_fit(self, x, df_coordinates_temp, df_coordinates_spat): + return self.run_fevd_fixed(df_coordinates_temp, df_coordinates_spat, "Exponential", x) - def run_fevd_fixed(self, df_coordinates_temp, method, x): + def run_fevd_fixed(self, df_coordinates_temp, df_coordinates_spat, method, x): if self.fit_method == MarginFitMethod.extremes_fevd_l_moments: assert self.margin_function.is_a_stationary_model - r_type_argument_kwargs, y = self.extreme_arguments(df_coordinates_temp) + r_type_argument_kwargs, y = self.extreme_arguments(df_coordinates_temp, df_coordinates_spat) res = safe_run_r_estimator(function=r('fevd_fixed'), x=x, data=y, @@ -107,13 +111,22 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): ) return ResultFromBayesianExtremes(res, self.param_name_to_list_for_result) - def extreme_arguments(self, df_coordinates_temp): + def extreme_arguments(self, df_coordinates_temp, df_coordinates_spat=None): + + # Load parameters + + if df_coordinates_spat is None or df_coordinates_spat.empty: + df = df_coordinates_temp + else: + df = pd.concat([df_coordinates_spat, df_coordinates_temp], axis=1) + print(df.shape) + print(df.head()) + y = get_coord_df(df) + # Disable the use of log sigma parametrization r_type_argument_kwargs = {'use.phi': False, 'verbose': False} - # Load parameters r_type_argument_kwargs.update(get_margin_formula_extremes(self.margin_function.form_dict)) - y = get_coord_df(df_coordinates_temp) return r_type_argument_kwargs, y # Default arguments for all methods diff --git a/extreme_fit/model/margin_model/polynomial_margin_model/spatio_temporal_polynomial_model.py b/extreme_fit/model/margin_model/polynomial_margin_model/spatio_temporal_polynomial_model.py new file mode 100644 index 0000000000000000000000000000000000000000..4f9cde3d24d1b35d0030f7542b5b02757d44e81f --- /dev/null +++ b/extreme_fit/model/margin_model/polynomial_margin_model/spatio_temporal_polynomial_model.py @@ -0,0 +1,23 @@ +from extreme_fit.distribution.gev.gev_params import GevParams +from extreme_fit.model.margin_model.polynomial_margin_model.polynomial_margin_model import PolynomialMarginModel +from extreme_fit.model.margin_model.utils import MarginFitMethod +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates + + +class AbstractSpatioTemporalPolynomialModel(PolynomialMarginModel): + + def __init__(self, coordinates: AbstractCoordinates, params_user=None, starting_point=None, + fit_method=MarginFitMethod.extremes_fevd_mle, nb_iterations_for_bayesian_fit=5000, + params_initial_fit_bayesian=None, type_for_MLE="GEV", params_class=GevParams, max_degree=2): + super().__init__(coordinates, params_user, starting_point, fit_method, nb_iterations_for_bayesian_fit, + params_initial_fit_bayesian, type_for_MLE, params_class, max_degree) + self.drop_duplicates = False + + +class NonStationaryLocationSpatioTemporalLinearityModel(AbstractSpatioTemporalPolynomialModel): + + def load_margin_function(self, param_name_to_dims=None): + return super().load_margin_function({GevParams.LOC: [ + (self.coordinates.idx_temporal_coordinates, 1), + (self.coordinates.idx_x_coordinates, 1), + ]}) diff --git a/projects/altitude_spatial_model/altitudes_fit/two_fold_detail_fit.py b/projects/altitude_spatial_model/altitudes_fit/two_fold_detail_fit.py index cc45a94f645ef1f29d9be3bb747a8f61f4f9c2ee..9e7166080fb19ae0eecd991f37e8845ca96104e7 100644 --- a/projects/altitude_spatial_model/altitudes_fit/two_fold_detail_fit.py +++ b/projects/altitude_spatial_model/altitudes_fit/two_fold_detail_fit.py @@ -69,7 +69,8 @@ class TwoFoldModelFit(object): self.model_class = model_class self.fit_method = fit_method self.estimators = [fitted_linear_margin_estimator_short(model_class=self.model_class, dataset=dataset, - fit_method=self.fit_method) for dataset in two_fold_datasets] # type: List[LinearMarginEstimator] + fit_method=self.fit_method) + for dataset in two_fold_datasets] # type: List[LinearMarginEstimator] self.estimator_fold_1 = self.estimators[0] self.estimator_fold_2 = self.estimators[1] diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 10b5e563f3aa344f924e9a88f1671fe9e19a56e4..443a557d425e18a25655a4380c581d8fb1ac85a9 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -210,11 +210,12 @@ class AbstractCoordinates(object): def has_spatial_coordinates(self) -> bool: return self.nb_spatial_coordinates > 0 - def df_spatial_coordinates(self, split: Split = Split.all, transformed=True) -> pd.DataFrame: + def df_spatial_coordinates(self, split: Split = Split.all, transformed=True, drop_duplicates=True) -> pd.DataFrame: if self.nb_spatial_coordinates == 0: return pd.DataFrame() else: - return self.df_coordinates(split, transformed).loc[:, self.spatial_coordinates_names].drop_duplicates() + df = self.df_coordinates(split, transformed).loc[:, self.spatial_coordinates_names] + return df.drop_duplicates() if drop_duplicates else df def nb_stations(self, split: Split = Split.all) -> int: return len(self.df_spatial_coordinates(split)) @@ -302,7 +303,11 @@ class AbstractCoordinates(object): def idx_temporal_coordinates(self): return self.coordinates_names.index(self.COORDINATE_T) - # Spatio temporal attributes + @property + def idx_x_coordinates(self): + return self.coordinates_names.index(self.COORDINATE_X) + +# Spatio temporal attributes @property def has_spatio_temporal_coordinates(self) -> bool: diff --git a/test/test_extreme_fit/test_estimator/test_gev_spatio_temporal_extremes_mle.py b/test/test_extreme_fit/test_estimator/test_gev_spatio_temporal_extremes_mle.py new file mode 100644 index 0000000000000000000000000000000000000000..e5ac3c51719c589e21aec5f0e59bcf406cc939ba --- /dev/null +++ b/test/test_extreme_fit/test_estimator/test_gev_spatio_temporal_extremes_mle.py @@ -0,0 +1,71 @@ +import unittest + +import numpy as np +import pandas as pd + +from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day +from extreme_fit.distribution.gev.gev_params import GevParams +from extreme_fit.model.margin_model.polynomial_margin_model.polynomial_margin_model import \ + NonStationaryQuadraticLocationModel, \ + NonStationaryQuadraticScaleModel, NonStationaryQuadraticLocationGumbelModel, NonStationaryQuadraticScaleGumbelModel +from extreme_fit.model.margin_model.polynomial_margin_model.spatio_temporal_polynomial_model import \ + NonStationaryLocationSpatioTemporalLinearityModel +from extreme_trend.abstract_gev_trend_test import fitted_linear_margin_estimator +from extreme_fit.model.margin_model.utils import \ + MarginFitMethod +from extreme_fit.model.utils import r, set_seed_r +from projects.altitude_spatial_model.altitudes_fit.altitudes_studies import AltitudesStudies +from projects.altitude_spatial_model.altitudes_fit.two_fold_datasets_generator import TwoFoldDatasetsGenerator +from projects.altitude_spatial_model.altitudes_fit.two_fold_fit import TwoFoldFit +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \ + AbstractTemporalCoordinates +from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset +from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ + AbstractSpatioTemporalObservations +from test.test_projects.test_contrasting.test_two_fold_fit import load_two_fold_fit + + +class TestGevTemporalQuadraticExtremesMle(unittest.TestCase): + + def get_estimator_fitted(self, model_class): + altitudes = [900, 1200] + study_class = SafranSnowfall1Day + studies = AltitudesStudies(study_class, altitudes, year_max=2019) + two_fold_datasets_generator = TwoFoldDatasetsGenerator(studies, nb_samples=1, massif_names=['Vercors']) + model_family_name_to_model_class = {'Non stationary': [model_class]} + two_fold_fit = TwoFoldFit(two_fold_datasets_generator=two_fold_datasets_generator, + model_family_name_to_model_classes=model_family_name_to_model_class, + fit_method=MarginFitMethod.extremes_fevd_mle) + massif_fit = two_fold_fit.massif_name_to_massif_fit['Vercors'] + sample_fit = massif_fit.sample_id_to_sample_fit[0] + model_fit = sample_fit.model_class_to_model_fit[model_class] # type: TwoFoldModelFit + estimator = model_fit.estimator_fold_1 + print(estimator.dataset) + return estimator + + # def test_location_spatio_temporal_linearity(self): + # # Create estimator + # # estimator = fitted_linear_margin_estimator(model_class, + # # self.coordinates, self.dataset, + # # starting_year=0, + # # fit_method=self.fit_method) + # # # Checks that parameters returned are indeed different + # # mle_params_estimated_year1 = estimator.function_from_fit.get_params(np.array([1])).to_dict() + # # mle_params_estimated_year3 = estimator.function_from_fit.get_params(np.array([21])).to_dict() + # # mle_params_estimated_year5 = estimator.function_from_fit.get_params(np.array([41])).to_dict() + # # self.assertNotEqual(mle_params_estimated_year1, mle_params_estimated_year3) + # # self.assertNotEqual(mle_params_estimated_year3, mle_params_estimated_year5) + # # # Assert the relationship for the location is indeed quadratic + # # diff1 = mle_params_estimated_year1[quadratic_param] - mle_params_estimated_year3[quadratic_param] + # # diff2 = mle_params_estimated_year3[quadratic_param] - mle_params_estimated_year5[quadratic_param] + # # self.assertNotAlmostEqual(diff1, diff2) + # estimator = self.get_estimator_fitted(NonStationaryLocationSpatioTemporalLinearityModel) + # # Assert that indicators are correctly computed + # self.assertAlmostEqual(estimator.result_from_model_fit.nllh, estimator.nllh()) + # self.assertAlmostEqual(estimator.result_from_model_fit.aic, estimator.aic()) + # self.assertAlmostEqual(estimator.result_from_model_fit.bic, estimator.bic()) + + +if __name__ == '__main__': + unittest.main()