From 08c20607777e77b24656d06344ee34d89731e108 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 24 Mar 2020 15:41:06 +0100 Subject: [PATCH] [quantile regression project] add test exp simulations (stationary and non stationary) for models on annual maxima. Prepare the code to fit with non stationary exponential models --- extreme_fit/distribution/abstract_params.py | 3 ++ extreme_fit/distribution/exp_params.py | 5 +++ extreme_fit/distribution/gev/gev_params.py | 3 -- .../margin_model/abstract_margin_model.py | 1 + .../abstract_temporal_linear_margin_model.py | 32 +++++++++---- .../temporal_linear_margin_exp_models.py | 13 ++++++ .../temporal_linear_margin_models.py | 1 + extreme_fit/model/utils.py | 5 +++ .../AbstractSimulation.py | 36 ++++++++++----- .../abstract_annual_maxima_simulation.py | 14 +++++- .../daily_exp_simulation.py | 45 ++++++++++++++++--- .../annual_maxima_observations.py | 1 + .../test_annual_maxima_simulations.py | 25 +++++++---- 13 files changed, 147 insertions(+), 37 deletions(-) create mode 100644 extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py diff --git a/extreme_fit/distribution/abstract_params.py b/extreme_fit/distribution/abstract_params.py index 631f8656..56852523 100644 --- a/extreme_fit/distribution/abstract_params.py +++ b/extreme_fit/distribution/abstract_params.py @@ -25,6 +25,9 @@ class AbstractParams(object): LOC = 'loc' SHAPE = 'shape' + def __str__(self): + return self.to_dict().__str__() + @classmethod def from_dict(cls, params: dict): return cls(**params) diff --git a/extreme_fit/distribution/exp_params.py b/extreme_fit/distribution/exp_params.py index 5ffb82f9..3637e4a0 100644 --- a/extreme_fit/distribution/exp_params.py +++ b/extreme_fit/distribution/exp_params.py @@ -1,6 +1,7 @@ import numpy as np from extreme_fit.distribution.abstract_params import AbstractParams +from extreme_fit.model.utils import r class ExpParams(AbstractParams): @@ -8,8 +9,12 @@ class ExpParams(AbstractParams): def __init__(self, rate) -> None: self.rate = rate + # todo: is this really the best solution, it might be best to raise an assert self.has_undefined_parameters = self.rate < 0 + def quantile(self, p) -> float: + return r.qexp(p, self.rate) + @property def param_values(self): if self.has_undefined_parameters: diff --git a/extreme_fit/distribution/gev/gev_params.py b/extreme_fit/distribution/gev/gev_params.py index a116f245..79dbe892 100644 --- a/extreme_fit/distribution/gev/gev_params.py +++ b/extreme_fit/distribution/gev/gev_params.py @@ -54,9 +54,6 @@ class GevParams(AbstractExtremeParams): else: return [self.location, self.scale, self.shape] - def __str__(self): - return self.to_dict().__str__() - def time_derivative_of_return_level(self, p=0.99, mu1=0.0, sigma1=0.0): """ Compute the variation of some quantile with respect to time. diff --git a/extreme_fit/model/margin_model/abstract_margin_model.py b/extreme_fit/model/margin_model/abstract_margin_model.py index 79638873..9fe18c9d 100644 --- a/extreme_fit/model/margin_model/abstract_margin_model.py +++ b/extreme_fit/model/margin_model/abstract_margin_model.py @@ -77,6 +77,7 @@ class AbstractMarginModel(AbstractModel, ABC): for coordinate in coordinates_values: gev_params = self.margin_function_sample.get_gev_params(coordinate) x_gev = r(sample_r_function)(nb_obs, **gev_params.to_dict()) + assert not np.isnan(x_gev).any(), 'params={} generated Nan values'.format(gev_params.__str__()) maxima_gev.append(x_gev) return np.array(maxima_gev) diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index 69f6531e..b441211c 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -3,10 +3,12 @@ from enum import Enum import numpy as np import pandas as pd +from extreme_fit.distribution.exp_params import ExpParams from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model import LinearMarginModel from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit -from extreme_fit.model.result_from_model_fit.result_from_extremes.result_from_bayesian_extremes import AbstractResultFromExtremes, ResultFromBayesianExtremes +from extreme_fit.model.result_from_model_fit.result_from_extremes.result_from_bayesian_extremes import \ + AbstractResultFromExtremes, ResultFromBayesianExtremes from extreme_fit.model.result_from_model_fit.result_from_extremes.result_from_mle_extremes import ResultFromMleExtremes from extreme_fit.model.result_from_model_fit.result_from_ismev import ResultFromIsmev from extreme_fit.model.utils import r, ro, get_null, get_margin_formula_extremes, get_coord_df @@ -44,12 +46,20 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): data = data[0] assert len(data) == len(df_coordinates_temp.values) x = ro.FloatVector(data) - if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit: - return self.ismev_gev_fit(x, df_coordinates_temp) - if self.fit_method == TemporalMarginFitMethod.extremes_fevd_bayesian: - return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp) - if self.fit_method in [TemporalMarginFitMethod.extremes_fevd_mle, TemporalMarginFitMethod.extremes_fevd_gmle]: - return self.extremes_fevd_mle_related_fit(x, df_coordinates_temp) + if self.params_class is GevParams: + if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit: + return self.ismev_gev_fit(x, df_coordinates_temp) + elif self.fit_method == TemporalMarginFitMethod.extremes_fevd_bayesian: + return self.extremes_fevd_bayesian_fit(x, df_coordinates_temp) + elif self.fit_method in [TemporalMarginFitMethod.extremes_fevd_mle, + TemporalMarginFitMethod.extremes_fevd_gmle]: + return self.extremes_fevd_mle_related_fit(x, df_coordinates_temp) + else: + raise NotImplementedError + elif self.params_class is ExpParams: + return self.extreme_fevd_mle_exp_fit(x, df_coordinates_temp) + else: + raise NotImplementedError # Gev Fit with isMev package @@ -63,13 +73,19 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): # Gev fit with extRemes package def extremes_fevd_mle_related_fit(self, x, df_coordinates_temp) -> AbstractResultFromExtremes: - r_type_argument_kwargs, y = self.extreme_arguments(df_coordinates_temp) if self.fit_method == TemporalMarginFitMethod.extremes_fevd_mle: method = "MLE" elif self.fit_method == TemporalMarginFitMethod.extremes_fevd_gmle: method = "GMLE" else: raise ValueError('wrong method') + return self.run_fevd_fixed(df_coordinates_temp, method, x) + + def extreme_fevd_mle_exp_fit(self, x, df_coordinates_temp): + return self.run_fevd_fixed(df_coordinates_temp, "Exponential", x) + + def run_fevd_fixed(self, df_coordinates_temp, method, x): + r_type_argument_kwargs, y = self.extreme_arguments(df_coordinates_temp) res = safe_run_r_estimator(function=r('fevd_fixed'), x=x, data=y, diff --git a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py new file mode 100644 index 00000000..80ffbb53 --- /dev/null +++ b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py @@ -0,0 +1,13 @@ +from extreme_fit.distribution.exp_params import ExpParams +from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \ + AbstractTemporalLinearMarginModel + + +class NonStationaryRateTemporalModel(AbstractTemporalLinearMarginModel): + + def __init__(self, *arg, **kwargs): + kwargs['params_class'] = ExpParams + super().__init__(*arg, **kwargs) + + def load_margin_functions(self, gev_param_name_to_dims=None): + super().load_margin_functions({ExpParams.RATE: [self.coordinates.idx_temporal_coordinates]}) diff --git a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_models.py b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_models.py index 9a5ae312..5e69a015 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_models.py +++ b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_models.py @@ -1,3 +1,4 @@ +from extreme_fit.distribution.exp_params import ExpParams from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \ AbstractTemporalLinearMarginModel, TemporalMarginFitMethod from extreme_fit.model.utils import r diff --git a/extreme_fit/model/utils.py b/extreme_fit/model/utils.py index 227f328f..7de527c0 100644 --- a/extreme_fit/model/utils.py +++ b/extreme_fit/model/utils.py @@ -85,8 +85,13 @@ def safe_run_r_estimator(function, data=None, use_start=False, max_ratio_between optim_dict = {'maxit': maxit} parameters['control'] = r.list(**optim_dict) + # Raise error if needed + if 'x' in parameters and np.isnan(parameters['x']).any(): + raise ValueError('x contains NaN values') + # Some checks for Spatial Extremes if data is not None: + # Raise some warnings if isinstance(data, np.ndarray): # Raise warning if the gap is too important between the two biggest values of data sorted_data = sorted(data.flatten()) diff --git a/projects/quantile_regression_vs_evt/AbstractSimulation.py b/projects/quantile_regression_vs_evt/AbstractSimulation.py index f17d7b1c..2ffd4851 100644 --- a/projects/quantile_regression_vs_evt/AbstractSimulation.py +++ b/projects/quantile_regression_vs_evt/AbstractSimulation.py @@ -29,11 +29,19 @@ class AbstractSimulation(object): self.transformation_class = transformation_class self.models_classes = model_classes self.multiprocessing = multiprocessing - self.quantile = quantile + self._quantile = quantile self.time_series_lengths = time_series_lengths self.nb_time_series = nb_time_series self.uncertainty_interval_size = 0.5 + @property + def quantile_estimator(self): + raise NotImplementedError + + @property + def quantile_data(self): + raise NotImplementedError + def generate_all_observation(self, nb_time_series, length) -> List[AbstractSpatioTemporalObservations]: raise NotImplementedError @@ -41,14 +49,15 @@ class AbstractSimulation(object): def time_series_length_to_observation_list(self) -> Dict[int, List[AbstractSpatioTemporalObservations]]: d = OrderedDict() for length in self.time_series_lengths: - d[length] = self.generate_all_observation(self.nb_time_series, length) + observation_list = self.generate_all_observation(self.nb_time_series, length) + d[length] = observation_list return d @cached_property def time_series_length_to_coordinates(self) -> Dict[int, AbstractCoordinates]: d = OrderedDict() for length in self.time_series_lengths: - d[length] = ConsecutiveTemporalCoordinates.\ + d[length] = ConsecutiveTemporalCoordinates. \ from_nb_temporal_steps(length, transformation_class=self.transformation_class) return d @@ -61,17 +70,18 @@ class AbstractSimulation(object): coordinates = self.time_series_length_to_coordinates[time_series_length] estimators = [] for observations in observation_list: - estimators.append(self.get_fitted_quantile_estimator(model_class, observations, coordinates)) + estimators.append(self.get_fitted_quantile_estimator(model_class, observations, coordinates, + self.quantile_estimator)) d_sub[time_series_length] = estimators d[model_class] = d_sub return d - def get_fitted_quantile_estimator(self, model_class, observations, coordinates): + def get_fitted_quantile_estimator(self, model_class, observations, coordinates, quantile_estimator): dataset = AbstractDataset(observations, coordinates) if issubclass(model_class, AbstractTemporalLinearMarginModel): - estimator = QuantileEstimatorFromMargin(dataset, self.quantile, model_class) + estimator = QuantileEstimatorFromMargin(dataset, quantile_estimator, model_class) elif issubclass(model_class, AbstractQuantileRegressionModel): - estimator = QuantileRegressionEstimator(dataset, self.quantile, model_class) + estimator = QuantileRegressionEstimator(dataset, quantile_estimator, model_class) else: raise NotImplementedError estimator.fit() @@ -85,7 +95,7 @@ class AbstractSimulation(object): for length, estimators_fitted in d_sub.items(): errors = self.compute_errors(length, estimators_fitted) leftover = (1 - self.uncertainty_interval_size) / 2 - error_values = [np.quantile(errors, q=leftover), np.mean(errors), np.quantile(errors, q=1-leftover)] + error_values = [np.quantile(errors, q=leftover), np.mean(errors), np.quantile(errors, q=1 - leftover)] length_to_error_values[length] = error_values d[model_class] = length_to_error_values return d @@ -98,15 +108,19 @@ class AbstractSimulation(object): alpha = 0.1 colors = ['green', 'orange', 'blue', 'red'] ax = plt.gca() - for color, (model_class, length_to_error_values) in zip(colors, self.model_class_to_error_last_year_quantile.items()): + for color, (model_class, length_to_error_values) in zip(colors, + self.model_class_to_error_last_year_quantile.items()): lengths = list(length_to_error_values.keys()) errors_values = np.array(list(length_to_error_values.values())) mean_error = errors_values[:, 1] label = get_display_name_from_object_type(model_class) ax.plot(lengths, mean_error, label=label) ax.set_xlabel('# Data') - ax.set_ylabel('Average (out of {} samples) relative error\nfor the {} quantile at the last coordinate (%)'.format(self.nb_time_series, - self.quantile)) + ax.set_ylabel( + 'Average (out of {} samples) relative error\nfor the {} estimated ' + 'quantile at the last coordinate (%)'.format( + self.nb_time_series, + self.quantile_estimator)) lower_bound = errors_values[:, 0] upper_bound = errors_values[:, 2] diff --git a/projects/quantile_regression_vs_evt/annual_maxima_simulation/abstract_annual_maxima_simulation.py b/projects/quantile_regression_vs_evt/annual_maxima_simulation/abstract_annual_maxima_simulation.py index 74b35ea0..850353e3 100644 --- a/projects/quantile_regression_vs_evt/annual_maxima_simulation/abstract_annual_maxima_simulation.py +++ b/projects/quantile_regression_vs_evt/annual_maxima_simulation/abstract_annual_maxima_simulation.py @@ -13,6 +13,14 @@ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_tempor class AnnualMaximaSimulation(AbstractSimulation): + @property + def quantile_estimator(self): + return self._quantile + + @property + def quantile_data(self): + return self._quantile + @property def observations_class(self): raise NotImplementedError @@ -39,7 +47,11 @@ class AnnualMaximaSimulation(AbstractSimulation): last_coordinate = coordinates.coordinates_values()[-1] # Compute true value margin_model = self.time_series_lengths_to_margin_model[length] - true_quantile = margin_model.margin_function_sample.get_gev_params(last_coordinate).quantile(self.quantile) + true_gev_params = margin_model.margin_function_sample.get_gev_params(last_coordinate) + print(true_gev_params) + true_quantile = true_gev_params.quantile(self.quantile_data) + print(self.quantile_data) + print(true_quantile) # Compute estimated values estimated_quantiles = [estimator.function_from_fit.get_quantile(last_coordinate) for estimator in estimators] return 100 * np.abs(np.array(estimated_quantiles) - true_quantile) / true_quantile diff --git a/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py b/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py index fd6ee11f..8d64eae6 100644 --- a/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py +++ b/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py @@ -1,26 +1,59 @@ -from extreme_fit.distribution.gev.gev_params import GevParams +from abc import ABC + +from extreme_fit.distribution.abstract_params import AbstractParams +from extreme_fit.distribution.exp_params import ExpParams from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \ TemporalMarginFitMethod +from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \ + NonStationaryRateTemporalModel from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel from projects.quantile_regression_vs_evt.annual_maxima_simulation.abstract_annual_maxima_simulation import \ - AnnualMaximaSimulation + AnnualMaximaSimulation +from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ + CenteredScaledNormalization from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import DailyExpAnnualMaxima -class DailyExpSimulation(AnnualMaximaSimulation): +class AbstractDailyExpSimulation(AnnualMaximaSimulation, ABC): + + def __init__(self, nb_time_series, quantile, time_series_lengths=None, multiprocessing=False, model_classes=None, + transformation_class=CenteredScaledNormalization): + super().__init__(nb_time_series, quantile, time_series_lengths, multiprocessing, model_classes, + transformation_class) + + @property + def quantile_data(self): + return 1 - ((1 - self._quantile) / 365) @property def observations_class(self): return DailyExpAnnualMaxima + def get_fitted_quantile_estimator(self, model_class, observations, coordinates, quantile_estimator): + if model_class in [NonStationaryRateTemporalModel]: + quantile_estimator = self.quantile_data + # todo: i should give other observatations, not the annual maxima + raise NotImplementedError + return super().get_fitted_quantile_estimator(model_class, observations, coordinates, quantile_estimator) + -class StationaryExpSimulation(DailyExpSimulation): +class StationaryExpSimulation(AbstractDailyExpSimulation): def create_model(self, coordinates): gev_param_name_to_coef_list = { - GevParams.SCALE: [1], + AbstractParams.RATE: [1], } return StationaryTemporalModel.from_coef_list(coordinates, gev_param_name_to_coef_list, - fit_method=TemporalMarginFitMethod.extremes_fevd_mle) + fit_method=TemporalMarginFitMethod.extremes_fevd_mle, + params_class=ExpParams) +class NonStationaryExpSimulation(AbstractDailyExpSimulation): + + def create_model(self, coordinates): + gev_param_name_to_coef_list = { + AbstractParams.RATE: [0.1, 0.01], + } + return NonStationaryRateTemporalModel.from_coef_list(coordinates, gev_param_name_to_coef_list, + fit_method=TemporalMarginFitMethod.extremes_fevd_mle, + params_class=ExpParams) diff --git a/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py index a4dbcde9..a1ed5b66 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py @@ -44,6 +44,7 @@ class DailyExpAnnualMaxima(AnnualMaxima): return cls(df_maxima_gev=df_maxima_gev) + class MaxStableAnnualMaxima(AnnualMaxima): @classmethod diff --git a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py index ab3de724..cf0de5f4 100644 --- a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py +++ b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py @@ -1,10 +1,13 @@ import unittest +from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \ + NonStationaryRateTemporalModel from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel, \ NonStationaryLocationTemporalModel from extreme_fit.model.quantile_model.quantile_regression_model import ConstantQuantileRegressionModel, \ TemporalCoordinatesQuantileRegressionModel -from projects.quantile_regression_vs_evt.annual_maxima_simulation.daily_exp_simulation import StationaryExpSimulation +from projects.quantile_regression_vs_evt.annual_maxima_simulation.daily_exp_simulation import StationaryExpSimulation, \ + NonStationaryExpSimulation from projects.quantile_regression_vs_evt.annual_maxima_simulation.gev_simulation import StationarySimulation, \ NonStationaryLocationGumbelSimulation @@ -24,13 +27,19 @@ class TestGevSimulations(unittest.TestCase): simulation.plot_error_for_last_year_quantile(self.DISPLAY) -# class TestExpSimulations(unittest.TestCase): -# DISPLAY = True -# -# def test_stationary_run(self): -# simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60], -# model_classes=[StationaryTemporalModel, ConstantQuantileRegressionModel]) -# simulation.plot_error_for_last_year_quantile(self.DISPLAY) +class TestExpSimulations(unittest.TestCase): + DISPLAY = False + + def test_stationary_run(self): + simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60], + model_classes=[StationaryTemporalModel, ConstantQuantileRegressionModel]) + simulation.plot_error_for_last_year_quantile(self.DISPLAY) + + def test_non_stationary_run(self): + simulation = NonStationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60], + model_classes=[NonStationaryLocationTemporalModel, + TemporalCoordinatesQuantileRegressionModel]) + simulation.plot_error_for_last_year_quantile(self.DISPLAY) if __name__ == '__main__': -- GitLab