From 13f506a343c219dbafbe34e45f0f8b88858760c6 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 24 Mar 2020 16:47:53 +0100 Subject: [PATCH] [quantile regression project] add quantile regression on daily values --- extreme_fit/model/daily_data_model.py | 17 ++++++++++++ .../temporal_linear_margin_exp_models.py | 3 ++- .../quantile_regression_model.py | 2 ++ .../daily_exp_simulation.py | 13 ++++----- ...main_non_stationary_quantile_regression.py | 27 +++++++++++++++++++ .../main_quantile_regression.py | 21 --------------- .../abstract_spatio_temporal_observations.py | 1 + .../annual_maxima_observations.py | 16 +++++++---- .../daily_observations.py | 14 +++++----- .../test_annual_maxima_simulations.py | 7 +++++ 10 files changed, 81 insertions(+), 40 deletions(-) create mode 100644 extreme_fit/model/daily_data_model.py create mode 100644 projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py delete mode 100644 projects/quantile_regression_vs_evt/main_quantile_regression.py diff --git a/extreme_fit/model/daily_data_model.py b/extreme_fit/model/daily_data_model.py new file mode 100644 index 00000000..ffdab5c8 --- /dev/null +++ b/extreme_fit/model/daily_data_model.py @@ -0,0 +1,17 @@ +from extreme_fit.model.quantile_model.quantile_regression_model import ConstantQuantileRegressionModel, \ + TemporalCoordinatesQuantileRegressionModel + + +class AbstractModelOnDailyData(object): + pass + + +class ConstantQuantileRegressionModelOnDailyData(ConstantQuantileRegressionModel, AbstractModelOnDailyData): + pass + + +class TemporalCoordinatesQuantileRegressionModelOnDailyData(TemporalCoordinatesQuantileRegressionModel, + AbstractModelOnDailyData): + pass + + diff --git a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py index 80ffbb53..b10c0277 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py +++ b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py @@ -1,9 +1,10 @@ from extreme_fit.distribution.exp_params import ExpParams +from extreme_fit.model.daily_data_model import AbstractModelOnDailyData from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \ AbstractTemporalLinearMarginModel -class NonStationaryRateTemporalModel(AbstractTemporalLinearMarginModel): +class NonStationaryRateTemporalModel(AbstractTemporalLinearMarginModel, AbstractModelOnDailyData): def __init__(self, *arg, **kwargs): kwargs['params_class'] = ExpParams diff --git a/extreme_fit/model/quantile_model/quantile_regression_model.py b/extreme_fit/model/quantile_model/quantile_regression_model.py index a9e377c6..7bf51b02 100644 --- a/extreme_fit/model/quantile_model/quantile_regression_model.py +++ b/extreme_fit/model/quantile_model/quantile_regression_model.py @@ -54,3 +54,5 @@ class TemporalCoordinatesQuantileRegressionModel(AbstractQuantileRegressionModel assert self.dataset.coordinates.has_temporal_coordinates \ and not self.dataset.coordinates.has_spatial_coordinates return AbstractCoordinates.COORDINATE_T + + diff --git a/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py b/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py index 8d64eae6..f33e4f62 100644 --- a/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py +++ b/projects/quantile_regression_vs_evt/annual_maxima_simulation/daily_exp_simulation.py @@ -2,13 +2,14 @@ from abc import ABC from extreme_fit.distribution.abstract_params import AbstractParams from extreme_fit.distribution.exp_params import ExpParams +from extreme_fit.model.daily_data_model import AbstractModelOnDailyData from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \ TemporalMarginFitMethod from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \ NonStationaryRateTemporalModel from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel from projects.quantile_regression_vs_evt.annual_maxima_simulation.abstract_annual_maxima_simulation import \ - AnnualMaximaSimulation + AnnualMaximaSimulation from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ CenteredScaledNormalization from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import DailyExpAnnualMaxima @@ -29,11 +30,11 @@ class AbstractDailyExpSimulation(AnnualMaximaSimulation, ABC): def observations_class(self): return DailyExpAnnualMaxima - def get_fitted_quantile_estimator(self, model_class, observations, coordinates, quantile_estimator): - if model_class in [NonStationaryRateTemporalModel]: + def get_fitted_quantile_estimator(self, model_class, observations: DailyExpAnnualMaxima, coordinates, + quantile_estimator): + if issubclass(model_class, AbstractModelOnDailyData): quantile_estimator = self.quantile_data - # todo: i should give other observatations, not the annual maxima - raise NotImplementedError + observations, coordinates = observations.daily_observations.transform_to_standard_shape(coordinates) return super().get_fitted_quantile_estimator(model_class, observations, coordinates, quantile_estimator) @@ -41,7 +42,7 @@ class StationaryExpSimulation(AbstractDailyExpSimulation): def create_model(self, coordinates): gev_param_name_to_coef_list = { - AbstractParams.RATE: [1], + AbstractParams.RATE: [10], } return StationaryTemporalModel.from_coef_list(coordinates, gev_param_name_to_coef_list, fit_method=TemporalMarginFitMethod.extremes_fevd_mle, diff --git a/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py b/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py new file mode 100644 index 00000000..ca70617a --- /dev/null +++ b/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py @@ -0,0 +1,27 @@ +from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \ + NonStationaryLocationTemporalModel, NonStationaryLocationGumbelModel +from extreme_fit.model.quantile_model.quantile_regression_model import TemporalCoordinatesQuantileRegressionModel +from projects.quantile_regression_vs_evt.annual_maxima_simulation.daily_exp_simulation import \ + NonStationaryExpSimulation, StationaryExpSimulation +from projects.quantile_regression_vs_evt.annual_maxima_simulation.gev_simulation import \ + NonStationaryLocationGumbelSimulation, NonStationaryLocationGevSimulation +from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ + CenteredScaledNormalization, IdentityTransformation + +nb_time_series = 20 +quantile = 0.98 +time_series_lengths = [50, 100, 200] +transformation_class = [IdentityTransformation, CenteredScaledNormalization][1] +model_classes = [NonStationaryLocationTemporalModel, + TemporalCoordinatesQuantileRegressionModel, + NonStationaryLocationGumbelModel] +simulation_class = [NonStationaryLocationGumbelSimulation, + NonStationaryLocationGevSimulation, + NonStationaryExpSimulation][-2] + +simulation = simulation_class(nb_time_series=nb_time_series, + quantile=quantile, + time_series_lengths=time_series_lengths, + model_classes=model_classes, + transformation_class=transformation_class) +simulation.plot_error_for_last_year_quantile() diff --git a/projects/quantile_regression_vs_evt/main_quantile_regression.py b/projects/quantile_regression_vs_evt/main_quantile_regression.py deleted file mode 100644 index b163ad3d..00000000 --- a/projects/quantile_regression_vs_evt/main_quantile_regression.py +++ /dev/null @@ -1,21 +0,0 @@ -from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \ - NonStationaryLocationTemporalModel, NonStationaryLocationGumbelModel -from extreme_fit.model.quantile_model.quantile_regression_model import TemporalCoordinatesQuantileRegressionModel -from projects.quantile_regression_vs_evt.annual_maxima_simulation.gev_simulation import \ - NonStationaryLocationGumbelSimulation, NonStationaryLocationGevSimulation -from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ - CenteredScaledNormalization, IdentityTransformation - -nb_time_series = 10 -quantile = 0.98 -time_series_lengths = [50, 100, 200] -transformation_class = [IdentityTransformation, CenteredScaledNormalization][0] -model_classes = [NonStationaryLocationTemporalModel, TemporalCoordinatesQuantileRegressionModel, NonStationaryLocationGumbelModel] -simulation_class = [NonStationaryLocationGumbelSimulation, NonStationaryLocationGevSimulation][0] - -simulation = NonStationaryLocationGumbelSimulation(nb_time_series=nb_time_series, - quantile=quantile, - time_series_lengths=time_series_lengths, - model_classes=model_classes, - transformation_class=transformation_class) -simulation.plot_error_for_last_year_quantile() diff --git a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py index 0b009da9..a5845546 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py @@ -21,6 +21,7 @@ class AbstractSpatioTemporalObservations(object): Columns are independent observations from the same coordinates index """ assert df_maxima_gev is not None or df_maxima_frech is not None + assert isinstance(df_maxima_gev, pd.DataFrame) or isinstance(df_maxima_frech, pd.DataFrame) if df_maxima_gev is not None and df_maxima_frech is not None: assert pd.Index.equals(df_maxima_gev.index, df_maxima_frech.index) self.df_maxima_gev = df_maxima_gev # type: pd.DataFrame diff --git a/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py index a1ed5b66..98122f23 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py @@ -1,3 +1,5 @@ +from typing import Union + import pandas as pd from extreme_fit.model.margin_model.abstract_margin_model import AbstractMarginModel @@ -9,7 +11,7 @@ from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_sp AbstractSpatioTemporalCoordinates from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations \ import AbstractSpatioTemporalObservations -from spatio_temporal_dataset.spatio_temporal_observations.daily_observations import DailyExp +from spatio_temporal_dataset.spatio_temporal_observations.daily_observations import DailyExp, DailyObservations class AnnualMaxima(AbstractSpatioTemporalObservations): @@ -34,15 +36,19 @@ class MarginAnnualMaxima(AnnualMaxima): class DailyExpAnnualMaxima(AnnualMaxima): + def __init__(self, df_maxima_gev: pd.DataFrame = None, df_maxima_frech: pd.DataFrame = None, + daily_observations: Union[None, DailyObservations] = None): + super().__init__(df_maxima_gev, df_maxima_frech) + self.daily_observations = daily_observations + @classmethod def from_sampling(cls, nb_obs: int, coordinates: AbstractCoordinates, margin_model: AbstractMarginModel): # todo: to take nb_obs into accoutn i could generate nb_obs * 365 observations - observations = DailyExp.from_sampling(nb_obs=365, coordinates=coordinates, margin_model=margin_model) - df_daily_values = observations.df_maxima_gev + daily_observations = DailyExp.from_sampling(nb_obs=365, coordinates=coordinates, margin_model=margin_model) + df_daily_values = daily_observations.df_maxima_gev df_maxima_gev = pd.DataFrame({'0': df_daily_values.max(axis=1)}, index=df_daily_values.index) - return cls(df_maxima_gev=df_maxima_gev) - + return cls(df_maxima_gev=df_maxima_gev, daily_observations=daily_observations) class MaxStableAnnualMaxima(AnnualMaxima): diff --git a/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py index b0044f33..00ca0ba9 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py @@ -1,21 +1,21 @@ import pandas as pd -from extreme_fit.distribution.abstract_params import AbstractParams -from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.model.margin_model.abstract_margin_model import AbstractMarginModel -from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates -from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ - ConsecutiveTemporalCoordinates from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ AbstractSpatioTemporalObservations class DailyObservations(AbstractSpatioTemporalObservations): - pass + def transform_to_standard_shape(self, coordinates: AbstractCoordinates): + coordinates.df_all_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)]) + df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns]), index=coordinates.index) + observation = AbstractSpatioTemporalObservations(df_maxima_gev=df) + return observation, coordinates -class DailyExp(AbstractSpatioTemporalObservations): + +class DailyExp(DailyObservations): @classmethod def from_sampling(cls, nb_obs: int, coordinates: AbstractCoordinates, diff --git a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py index cf0de5f4..377164f8 100644 --- a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py +++ b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py @@ -1,5 +1,6 @@ import unittest +from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \ NonStationaryRateTemporalModel from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel, \ @@ -41,6 +42,12 @@ class TestExpSimulations(unittest.TestCase): TemporalCoordinatesQuantileRegressionModel]) simulation.plot_error_for_last_year_quantile(self.DISPLAY) + # Fit is way too long.... Probability the regression quantile estimator does not scale well at all... + # def test_stationary_run_daily_data_model(self): + # simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[1, 2], + # model_classes=[ConstantQuantileRegressionModelOnDailyData]) + # simulation.plot_error_for_last_year_quantile(self.DISPLAY) + if __name__ == '__main__': unittest.main() -- GitLab