From 6c753d0dce453b92c799bd649ce17848a2b47b0d Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 24 Mar 2020 17:48:22 +0100 Subject: [PATCH] [quantile regression project] fix bugs in daily_observations.py for the creation of the coordinates for the daily fit. some fix to prepare for some potential non stationary exponential model one day --- .../abstract_margin_estimator.py | 3 +- .../margin_function/linear_margin_function.py | 2 +- .../abstract_temporal_linear_margin_model.py | 3 +- .../temporal_linear_margin_exp_models.py | 1 + .../margin_model/parametric_margin_model.py | 1 + ...main_non_stationary_quantile_regression.py | 14 ++++++---- .../coordinates/abstract_coordinates.py | 15 ++++++---- .../daily_observations.py | 13 +++++++-- .../test_annual_maxima_simulations.py | 28 ++++++++++++++++--- 9 files changed, 60 insertions(+), 20 deletions(-) diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py index 00285f89..8286e8d5 100644 --- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py @@ -35,7 +35,8 @@ class LinearMarginEstimator(AbstractMarginEstimator): @property def coordinate_temp(self): return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split, - starting_point=self.margin_model.starting_point) + starting_point=self.margin_model.starting_point, + drop_duplicates=self.margin_model.drop_duplicates) @property def maxima_gev_train(self): diff --git a/extreme_fit/function/margin_function/linear_margin_function.py b/extreme_fit/function/margin_function/linear_margin_function.py index 3cdc1f84..7deb1ab3 100644 --- a/extreme_fit/function/margin_function/linear_margin_function.py +++ b/extreme_fit/function/margin_function/linear_margin_function.py @@ -64,7 +64,7 @@ class LinearMarginFunction(ParametricMarginFunction): @property def form_dict(self) -> Dict[str, str]: form_dict = {} - for gev_param_name in GevParams.PARAM_NAMES: + for gev_param_name in self.params_class.PARAM_NAMES: linear_dims = self.gev_param_name_to_dims.get(gev_param_name, []) # Load spatial form_dict (only if we have some spatial coordinates) if self.coordinates.has_spatial_coordinates: diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index b441211c..eb8ba9cf 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -44,7 +44,8 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: data = data[0] - assert len(data) == len(df_coordinates_temp.values) + assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data), + len(df_coordinates_temp.values)) x = ro.FloatVector(data) if self.params_class is GevParams: if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit: diff --git a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py index b10c0277..677f1865 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py +++ b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py @@ -9,6 +9,7 @@ class NonStationaryRateTemporalModel(AbstractTemporalLinearMarginModel, Abstract def __init__(self, *arg, **kwargs): kwargs['params_class'] = ExpParams super().__init__(*arg, **kwargs) + self.drop_duplicates = False def load_margin_functions(self, gev_param_name_to_dims=None): super().load_margin_functions({ExpParams.RATE: [self.coordinates.idx_temporal_coordinates]}) diff --git a/extreme_fit/model/margin_model/parametric_margin_model.py b/extreme_fit/model/margin_model/parametric_margin_model.py index 724366c4..49f1995c 100644 --- a/extreme_fit/model/margin_model/parametric_margin_model.py +++ b/extreme_fit/model/margin_model/parametric_margin_model.py @@ -23,6 +23,7 @@ class ParametricMarginModel(AbstractMarginModel, ABC): self.starting_point = starting_point self.margin_function_sample = None # type: ParametricMarginFunction self.margin_function_start_fit = None # type: ParametricMarginFunction + self.drop_duplicates = True super().__init__(coordinates, use_start_value, params_start_fit, params_sample, params_class) def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, diff --git a/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py b/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py index ca70617a..c9a81296 100644 --- a/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py +++ b/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py @@ -1,3 +1,4 @@ +from extreme_fit.model.daily_data_model import TemporalCoordinatesQuantileRegressionModelOnDailyData from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \ NonStationaryLocationTemporalModel, NonStationaryLocationGumbelModel from extreme_fit.model.quantile_model.quantile_regression_model import TemporalCoordinatesQuantileRegressionModel @@ -8,16 +9,19 @@ from projects.quantile_regression_vs_evt.annual_maxima_simulation.gev_simulation from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ CenteredScaledNormalization, IdentityTransformation -nb_time_series = 20 +nb_time_series = 10 quantile = 0.98 time_series_lengths = [50, 100, 200] transformation_class = [IdentityTransformation, CenteredScaledNormalization][1] -model_classes = [NonStationaryLocationTemporalModel, - TemporalCoordinatesQuantileRegressionModel, - NonStationaryLocationGumbelModel] +model_classes = [ + NonStationaryLocationTemporalModel, + TemporalCoordinatesQuantileRegressionModel, + NonStationaryLocationGumbelModel, + TemporalCoordinatesQuantileRegressionModelOnDailyData +] simulation_class = [NonStationaryLocationGumbelSimulation, NonStationaryLocationGevSimulation, - NonStationaryExpSimulation][-2] + NonStationaryExpSimulation][-1] simulation = simulation_class(nb_time_series=nb_time_series, quantile=quantile, diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index f98974e9..76a211f1 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -235,18 +235,23 @@ class AbstractCoordinates(object): def has_temporal_coordinates(self) -> bool: return self.nb_temporal_coordinates > 0 - def df_temporal_coordinates(self, split: Split = Split.all, transformed=True) -> pd.DataFrame: + def df_temporal_coordinates(self, split: Split = Split.all, transformed=True, + drop_duplicates=True) -> pd.DataFrame: if self.nb_temporal_coordinates == 0: return pd.DataFrame() else: - return self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names] \ - .drop_duplicates() + df = self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names] + if drop_duplicates: + return df.drop_duplicates() + else: + return df def df_temporal_coordinates_for_fit(self, split=Split.all, starting_point=None, - temporal_covariate_for_fit: Union[None, type] = None) -> pd.DataFrame: + temporal_covariate_for_fit: Union[None, type] = None, + drop_duplicates=True) -> pd.DataFrame: # Load time covariate if starting_point is None: - df = self.df_temporal_coordinates(split=split, transformed=True) + df = self.df_temporal_coordinates(split=split, transformed=True, drop_duplicates=drop_duplicates) else: # Load the un transformed coordinates df_temporal_coordinates = self.df_temporal_coordinates(split=split, transformed=False) diff --git a/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py index 00ca0ba9..e89b3bad 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py @@ -2,15 +2,22 @@ import pandas as pd from extreme_fit.model.margin_model.abstract_margin_model import AbstractMarginModel from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \ + AbstractTemporalCoordinates from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ AbstractSpatioTemporalObservations class DailyObservations(AbstractSpatioTemporalObservations): - def transform_to_standard_shape(self, coordinates: AbstractCoordinates): - coordinates.df_all_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)]) - df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns]), index=coordinates.index) + def transform_to_standard_shape(self, coordinates: AbstractTemporalCoordinates): + assert isinstance(coordinates, AbstractTemporalCoordinates) + df_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)]) + df_coordinates.index = pd.Index(range(self.nb_obs * coordinates.nb_steps)) + coordinates = AbstractTemporalCoordinates.from_df(df_coordinates, train_split_ratio=None, + transformation_class=coordinates.transformation_class) + df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns])) + df.index = coordinates.index observation = AbstractSpatioTemporalObservations(df_maxima_gev=df) return observation, coordinates diff --git a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py index 377164f8..1998818d 100644 --- a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py +++ b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py @@ -1,6 +1,7 @@ import unittest -from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData +from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData, \ + TemporalCoordinatesQuantileRegressionModelOnDailyData from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \ NonStationaryRateTemporalModel from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel, \ @@ -42,10 +43,29 @@ class TestExpSimulations(unittest.TestCase): TemporalCoordinatesQuantileRegressionModel]) simulation.plot_error_for_last_year_quantile(self.DISPLAY) - # Fit is way too long.... Probability the regression quantile estimator does not scale well at all... - # def test_stationary_run_daily_data_model(self): + +class TestExpSimulationsDailyDataModels(unittest.TestCase): + DISPLAY = False + + def test_stationary_run_daily_data_quantile_regression_model(self): + simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60], + model_classes=[ConstantQuantileRegressionModelOnDailyData]) + simulation.plot_error_for_last_year_quantile(self.DISPLAY) + + def test_non_stationary_run_daily_data_quantile_regression_model(self): + simulation = NonStationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60], + model_classes=[TemporalCoordinatesQuantileRegressionModelOnDailyData]) + first_estimator = simulation.model_class_to_time_series_length_to_estimators[ + TemporalCoordinatesQuantileRegressionModelOnDailyData][50][0] + self.assertEqual(len(first_estimator.dataset.df_dataset), 50 * 365) + simulation.plot_error_for_last_year_quantile(self.DISPLAY) + + # WARNING: It does not work yet, read fevd manual to understand how does he expect the parameters + # probably the formula to provide should be w.r.t to the scale parameter + # & there seems to be a need to be a need to provide a threshold parameter... + # def test_stationary_run_daily_data_exponential_model(self): # simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[1, 2], - # model_classes=[ConstantQuantileRegressionModelOnDailyData]) + # model_classes=[NonStationaryRateTemporalModel]) # simulation.plot_error_for_last_year_quantile(self.DISPLAY) -- GitLab