From 6c753d0dce453b92c799bd649ce17848a2b47b0d Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Tue, 24 Mar 2020 17:48:22 +0100
Subject: [PATCH] [quantile regression project]  fix bugs in
 daily_observations.py for the creation of the coordinates for the daily fit.
 some fix to prepare for some potential non stationary exponential model one
 day

---
 .../abstract_margin_estimator.py              |  3 +-
 .../margin_function/linear_margin_function.py |  2 +-
 .../abstract_temporal_linear_margin_model.py  |  3 +-
 .../temporal_linear_margin_exp_models.py      |  1 +
 .../margin_model/parametric_margin_model.py   |  1 +
 ...main_non_stationary_quantile_regression.py | 14 ++++++----
 .../coordinates/abstract_coordinates.py       | 15 ++++++----
 .../daily_observations.py                     | 13 +++++++--
 .../test_annual_maxima_simulations.py         | 28 ++++++++++++++++---
 9 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py
index 00285f89..8286e8d5 100644
--- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py
+++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py
@@ -35,7 +35,8 @@ class LinearMarginEstimator(AbstractMarginEstimator):
     @property
     def coordinate_temp(self):
         return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split,
-                                                                        starting_point=self.margin_model.starting_point)
+                                                                        starting_point=self.margin_model.starting_point,
+                                                                        drop_duplicates=self.margin_model.drop_duplicates)
 
     @property
     def maxima_gev_train(self):
diff --git a/extreme_fit/function/margin_function/linear_margin_function.py b/extreme_fit/function/margin_function/linear_margin_function.py
index 3cdc1f84..7deb1ab3 100644
--- a/extreme_fit/function/margin_function/linear_margin_function.py
+++ b/extreme_fit/function/margin_function/linear_margin_function.py
@@ -64,7 +64,7 @@ class LinearMarginFunction(ParametricMarginFunction):
     @property
     def form_dict(self) -> Dict[str, str]:
         form_dict = {}
-        for gev_param_name in GevParams.PARAM_NAMES:
+        for gev_param_name in self.params_class.PARAM_NAMES:
             linear_dims = self.gev_param_name_to_dims.get(gev_param_name, [])
             # Load spatial form_dict (only if we have some spatial coordinates)
             if self.coordinates.has_spatial_coordinates:
diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
index b441211c..eb8ba9cf 100644
--- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
+++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py
@@ -44,7 +44,8 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
     def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame,
                                   df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit:
         data = data[0]
-        assert len(data) == len(df_coordinates_temp.values)
+        assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data),
+                                                                                                   len(df_coordinates_temp.values))
         x = ro.FloatVector(data)
         if self.params_class is GevParams:
             if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit:
diff --git a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py
index b10c0277..677f1865 100644
--- a/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py
+++ b/extreme_fit/model/margin_model/linear_margin_model/temporal_linear_margin_exp_models.py
@@ -9,6 +9,7 @@ class NonStationaryRateTemporalModel(AbstractTemporalLinearMarginModel, Abstract
     def __init__(self, *arg, **kwargs):
         kwargs['params_class'] = ExpParams
         super().__init__(*arg, **kwargs)
+        self.drop_duplicates = False
 
     def load_margin_functions(self, gev_param_name_to_dims=None):
         super().load_margin_functions({ExpParams.RATE: [self.coordinates.idx_temporal_coordinates]})
diff --git a/extreme_fit/model/margin_model/parametric_margin_model.py b/extreme_fit/model/margin_model/parametric_margin_model.py
index 724366c4..49f1995c 100644
--- a/extreme_fit/model/margin_model/parametric_margin_model.py
+++ b/extreme_fit/model/margin_model/parametric_margin_model.py
@@ -23,6 +23,7 @@ class ParametricMarginModel(AbstractMarginModel, ABC):
         self.starting_point = starting_point
         self.margin_function_sample = None  # type: ParametricMarginFunction
         self.margin_function_start_fit = None  # type: ParametricMarginFunction
+        self.drop_duplicates = True
         super().__init__(coordinates, use_start_value, params_start_fit, params_sample, params_class)
 
     def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame,
diff --git a/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py b/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py
index ca70617a..c9a81296 100644
--- a/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py
+++ b/projects/quantile_regression_vs_evt/main_non_stationary_quantile_regression.py
@@ -1,3 +1,4 @@
+from extreme_fit.model.daily_data_model import TemporalCoordinatesQuantileRegressionModelOnDailyData
 from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \
     NonStationaryLocationTemporalModel, NonStationaryLocationGumbelModel
 from extreme_fit.model.quantile_model.quantile_regression_model import TemporalCoordinatesQuantileRegressionModel
@@ -8,16 +9,19 @@ from projects.quantile_regression_vs_evt.annual_maxima_simulation.gev_simulation
 from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \
     CenteredScaledNormalization, IdentityTransformation
 
-nb_time_series = 20
+nb_time_series = 10
 quantile = 0.98
 time_series_lengths = [50, 100, 200]
 transformation_class = [IdentityTransformation, CenteredScaledNormalization][1]
-model_classes = [NonStationaryLocationTemporalModel,
-                 TemporalCoordinatesQuantileRegressionModel,
-                 NonStationaryLocationGumbelModel]
+model_classes = [
+    NonStationaryLocationTemporalModel,
+    TemporalCoordinatesQuantileRegressionModel,
+    NonStationaryLocationGumbelModel,
+    TemporalCoordinatesQuantileRegressionModelOnDailyData
+]
 simulation_class = [NonStationaryLocationGumbelSimulation,
                     NonStationaryLocationGevSimulation,
-                    NonStationaryExpSimulation][-2]
+                    NonStationaryExpSimulation][-1]
 
 simulation = simulation_class(nb_time_series=nb_time_series,
                               quantile=quantile,
diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
index f98974e9..76a211f1 100644
--- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
@@ -235,18 +235,23 @@ class AbstractCoordinates(object):
     def has_temporal_coordinates(self) -> bool:
         return self.nb_temporal_coordinates > 0
 
-    def df_temporal_coordinates(self, split: Split = Split.all, transformed=True) -> pd.DataFrame:
+    def df_temporal_coordinates(self, split: Split = Split.all, transformed=True,
+                                drop_duplicates=True) -> pd.DataFrame:
         if self.nb_temporal_coordinates == 0:
             return pd.DataFrame()
         else:
-            return self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names] \
-                .drop_duplicates()
+            df = self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names]
+            if drop_duplicates:
+                return df.drop_duplicates()
+            else:
+                return df
 
     def df_temporal_coordinates_for_fit(self, split=Split.all, starting_point=None,
-                                        temporal_covariate_for_fit: Union[None, type] = None) -> pd.DataFrame:
+                                        temporal_covariate_for_fit: Union[None, type] = None,
+                                        drop_duplicates=True) -> pd.DataFrame:
         # Load time covariate
         if starting_point is None:
-            df = self.df_temporal_coordinates(split=split, transformed=True)
+            df = self.df_temporal_coordinates(split=split, transformed=True, drop_duplicates=drop_duplicates)
         else:
             # Load the un transformed coordinates
             df_temporal_coordinates = self.df_temporal_coordinates(split=split, transformed=False)
diff --git a/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py
index 00ca0ba9..e89b3bad 100644
--- a/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py
+++ b/spatio_temporal_dataset/spatio_temporal_observations/daily_observations.py
@@ -2,15 +2,22 @@ import pandas as pd
 
 from extreme_fit.model.margin_model.abstract_margin_model import AbstractMarginModel
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \
+    AbstractTemporalCoordinates
 from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \
     AbstractSpatioTemporalObservations
 
 
 class DailyObservations(AbstractSpatioTemporalObservations):
 
-    def transform_to_standard_shape(self, coordinates: AbstractCoordinates):
-        coordinates.df_all_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)])
-        df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns]), index=coordinates.index)
+    def transform_to_standard_shape(self, coordinates: AbstractTemporalCoordinates):
+        assert isinstance(coordinates, AbstractTemporalCoordinates)
+        df_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)])
+        df_coordinates.index = pd.Index(range(self.nb_obs * coordinates.nb_steps))
+        coordinates = AbstractTemporalCoordinates.from_df(df_coordinates, train_split_ratio=None,
+                                                          transformation_class=coordinates.transformation_class)
+        df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns]))
+        df.index = coordinates.index
         observation = AbstractSpatioTemporalObservations(df_maxima_gev=df)
         return observation, coordinates
 
diff --git a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py
index 377164f8..1998818d 100644
--- a/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py
+++ b/test/test_projects/test_quantile_regression/test_annual_maxima_simulations.py
@@ -1,6 +1,7 @@
 import unittest
 
-from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData
+from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData, \
+    TemporalCoordinatesQuantileRegressionModelOnDailyData
 from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \
     NonStationaryRateTemporalModel
 from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel, \
@@ -42,10 +43,29 @@ class TestExpSimulations(unittest.TestCase):
                                                                TemporalCoordinatesQuantileRegressionModel])
         simulation.plot_error_for_last_year_quantile(self.DISPLAY)
 
-    # Fit is way too long.... Probability the regression quantile estimator does not scale well at all...
-    # def test_stationary_run_daily_data_model(self):
+
+class TestExpSimulationsDailyDataModels(unittest.TestCase):
+    DISPLAY = False
+
+    def test_stationary_run_daily_data_quantile_regression_model(self):
+        simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60],
+                                             model_classes=[ConstantQuantileRegressionModelOnDailyData])
+        simulation.plot_error_for_last_year_quantile(self.DISPLAY)
+
+    def test_non_stationary_run_daily_data_quantile_regression_model(self):
+        simulation = NonStationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60],
+                                                model_classes=[TemporalCoordinatesQuantileRegressionModelOnDailyData])
+        first_estimator = simulation.model_class_to_time_series_length_to_estimators[
+            TemporalCoordinatesQuantileRegressionModelOnDailyData][50][0]
+        self.assertEqual(len(first_estimator.dataset.df_dataset), 50 * 365)
+        simulation.plot_error_for_last_year_quantile(self.DISPLAY)
+
+    # WARNING: It does not work yet, read fevd manual to understand how does he expect the parameters
+    # probably the formula to provide should be w.r.t to the scale parameter
+    # & there seems to be a need to be  a need to provide a threshold parameter...
+    # def test_stationary_run_daily_data_exponential_model(self):
     #     simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[1, 2],
-    #                                          model_classes=[ConstantQuantileRegressionModelOnDailyData])
+    #                                          model_classes=[NonStationaryRateTemporalModel])
     #     simulation.plot_error_for_last_year_quantile(self.DISPLAY)
 
 
-- 
GitLab