Commit 6c753d0d authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[quantile regression project] fix bugs in daily_observations.py for the...

[quantile regression project]  fix bugs in daily_observations.py for the creation of the coordinates for the daily fit. some fix to prepare for some potential non stationary exponential model one day
parent 13f506a3
No related merge requests found
Showing with 60 additions and 20 deletions
+60 -20
......@@ -35,7 +35,8 @@ class LinearMarginEstimator(AbstractMarginEstimator):
@property
def coordinate_temp(self):
return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split,
starting_point=self.margin_model.starting_point)
starting_point=self.margin_model.starting_point,
drop_duplicates=self.margin_model.drop_duplicates)
@property
def maxima_gev_train(self):
......
......@@ -64,7 +64,7 @@ class LinearMarginFunction(ParametricMarginFunction):
@property
def form_dict(self) -> Dict[str, str]:
form_dict = {}
for gev_param_name in GevParams.PARAM_NAMES:
for gev_param_name in self.params_class.PARAM_NAMES:
linear_dims = self.gev_param_name_to_dims.get(gev_param_name, [])
# Load spatial form_dict (only if we have some spatial coordinates)
if self.coordinates.has_spatial_coordinates:
......
......@@ -44,7 +44,8 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame,
df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit:
data = data[0]
assert len(data) == len(df_coordinates_temp.values)
assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data),
len(df_coordinates_temp.values))
x = ro.FloatVector(data)
if self.params_class is GevParams:
if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit:
......
......@@ -9,6 +9,7 @@ class NonStationaryRateTemporalModel(AbstractTemporalLinearMarginModel, Abstract
def __init__(self, *arg, **kwargs):
kwargs['params_class'] = ExpParams
super().__init__(*arg, **kwargs)
self.drop_duplicates = False
def load_margin_functions(self, gev_param_name_to_dims=None):
super().load_margin_functions({ExpParams.RATE: [self.coordinates.idx_temporal_coordinates]})
......@@ -23,6 +23,7 @@ class ParametricMarginModel(AbstractMarginModel, ABC):
self.starting_point = starting_point
self.margin_function_sample = None # type: ParametricMarginFunction
self.margin_function_start_fit = None # type: ParametricMarginFunction
self.drop_duplicates = True
super().__init__(coordinates, use_start_value, params_start_fit, params_sample, params_class)
def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame,
......
from extreme_fit.model.daily_data_model import TemporalCoordinatesQuantileRegressionModelOnDailyData
from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \
NonStationaryLocationTemporalModel, NonStationaryLocationGumbelModel
from extreme_fit.model.quantile_model.quantile_regression_model import TemporalCoordinatesQuantileRegressionModel
......@@ -8,16 +9,19 @@ from projects.quantile_regression_vs_evt.annual_maxima_simulation.gev_simulation
from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \
CenteredScaledNormalization, IdentityTransformation
nb_time_series = 20
nb_time_series = 10
quantile = 0.98
time_series_lengths = [50, 100, 200]
transformation_class = [IdentityTransformation, CenteredScaledNormalization][1]
model_classes = [NonStationaryLocationTemporalModel,
TemporalCoordinatesQuantileRegressionModel,
NonStationaryLocationGumbelModel]
model_classes = [
NonStationaryLocationTemporalModel,
TemporalCoordinatesQuantileRegressionModel,
NonStationaryLocationGumbelModel,
TemporalCoordinatesQuantileRegressionModelOnDailyData
]
simulation_class = [NonStationaryLocationGumbelSimulation,
NonStationaryLocationGevSimulation,
NonStationaryExpSimulation][-2]
NonStationaryExpSimulation][-1]
simulation = simulation_class(nb_time_series=nb_time_series,
quantile=quantile,
......
......@@ -235,18 +235,23 @@ class AbstractCoordinates(object):
def has_temporal_coordinates(self) -> bool:
return self.nb_temporal_coordinates > 0
def df_temporal_coordinates(self, split: Split = Split.all, transformed=True) -> pd.DataFrame:
def df_temporal_coordinates(self, split: Split = Split.all, transformed=True,
drop_duplicates=True) -> pd.DataFrame:
if self.nb_temporal_coordinates == 0:
return pd.DataFrame()
else:
return self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names] \
.drop_duplicates()
df = self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names]
if drop_duplicates:
return df.drop_duplicates()
else:
return df
def df_temporal_coordinates_for_fit(self, split=Split.all, starting_point=None,
temporal_covariate_for_fit: Union[None, type] = None) -> pd.DataFrame:
temporal_covariate_for_fit: Union[None, type] = None,
drop_duplicates=True) -> pd.DataFrame:
# Load time covariate
if starting_point is None:
df = self.df_temporal_coordinates(split=split, transformed=True)
df = self.df_temporal_coordinates(split=split, transformed=True, drop_duplicates=drop_duplicates)
else:
# Load the un transformed coordinates
df_temporal_coordinates = self.df_temporal_coordinates(split=split, transformed=False)
......
......@@ -2,15 +2,22 @@ import pandas as pd
from extreme_fit.model.margin_model.abstract_margin_model import AbstractMarginModel
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \
AbstractTemporalCoordinates
from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \
AbstractSpatioTemporalObservations
class DailyObservations(AbstractSpatioTemporalObservations):
def transform_to_standard_shape(self, coordinates: AbstractCoordinates):
coordinates.df_all_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)])
df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns]), index=coordinates.index)
def transform_to_standard_shape(self, coordinates: AbstractTemporalCoordinates):
assert isinstance(coordinates, AbstractTemporalCoordinates)
df_coordinates = pd.concat([coordinates.df_all_coordinates for _ in range(self.nb_obs)])
df_coordinates.index = pd.Index(range(self.nb_obs * coordinates.nb_steps))
coordinates = AbstractTemporalCoordinates.from_df(df_coordinates, train_split_ratio=None,
transformation_class=coordinates.transformation_class)
df = pd.DataFrame(pd.concat([self.df_maxima_gev[c] for c in self.columns]))
df.index = coordinates.index
observation = AbstractSpatioTemporalObservations(df_maxima_gev=df)
return observation, coordinates
......
import unittest
from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData
from extreme_fit.model.daily_data_model import ConstantQuantileRegressionModelOnDailyData, \
TemporalCoordinatesQuantileRegressionModelOnDailyData
from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_exp_models import \
NonStationaryRateTemporalModel
from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel, \
......@@ -42,10 +43,29 @@ class TestExpSimulations(unittest.TestCase):
TemporalCoordinatesQuantileRegressionModel])
simulation.plot_error_for_last_year_quantile(self.DISPLAY)
# Fit is way too long.... Probability the regression quantile estimator does not scale well at all...
# def test_stationary_run_daily_data_model(self):
class TestExpSimulationsDailyDataModels(unittest.TestCase):
DISPLAY = False
def test_stationary_run_daily_data_quantile_regression_model(self):
simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60],
model_classes=[ConstantQuantileRegressionModelOnDailyData])
simulation.plot_error_for_last_year_quantile(self.DISPLAY)
def test_non_stationary_run_daily_data_quantile_regression_model(self):
simulation = NonStationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[50, 60],
model_classes=[TemporalCoordinatesQuantileRegressionModelOnDailyData])
first_estimator = simulation.model_class_to_time_series_length_to_estimators[
TemporalCoordinatesQuantileRegressionModelOnDailyData][50][0]
self.assertEqual(len(first_estimator.dataset.df_dataset), 50 * 365)
simulation.plot_error_for_last_year_quantile(self.DISPLAY)
# WARNING: It does not work yet, read fevd manual to understand how does he expect the parameters
# probably the formula to provide should be w.r.t to the scale parameter
# & there seems to be a need to be a need to provide a threshold parameter...
# def test_stationary_run_daily_data_exponential_model(self):
# simulation = StationaryExpSimulation(nb_time_series=1, quantile=0.5, time_series_lengths=[1, 2],
# model_classes=[ConstantQuantileRegressionModelOnDailyData])
# model_classes=[NonStationaryRateTemporalModel])
# simulation.plot_error_for_last_year_quantile(self.DISPLAY)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment