Commit d3e8e5cc authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[contrasting] add remove top maxima from dataset

parent 122c593b
No related merge requests found
Showing with 69 additions and 11 deletions
+69 -11
...@@ -30,7 +30,7 @@ def main(): ...@@ -30,7 +30,7 @@ def main():
# study_classes = [SafranPrecipitation1Day][:1] # study_classes = [SafranPrecipitation1Day][:1]
seasons = [Season.annual, Season.winter, Season.spring, Season.automn][:1] seasons = [Season.annual, Season.winter, Season.spring, Season.automn][:1]
fast = None fast = False
if fast is None: if fast is None:
massif_names = None massif_names = None
altitudes_list = altitudes_for_groups[:2] altitudes_list = altitudes_for_groups[:2]
......
...@@ -5,6 +5,8 @@ from cached_property import cached_property ...@@ -5,6 +5,8 @@ from cached_property import cached_property
from scipy.stats import chi2 from scipy.stats import chi2
from extreme_fit.distribution.gumbel.gumbel_gof import goodness_of_fit_anderson from extreme_fit.distribution.gumbel.gumbel_gof import goodness_of_fit_anderson
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import AbstractMarginEstimator, \
LinearMarginEstimator
from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short
from extreme_fit.function.param_function.polynomial_coef import PolynomialAllCoef, PolynomialCoef from extreme_fit.function.param_function.polynomial_coef import PolynomialAllCoef, PolynomialCoef
from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal
...@@ -125,7 +127,8 @@ class OneFoldFit(object): ...@@ -125,7 +127,8 @@ class OneFoldFit(object):
@cached_property @cached_property
def sorted_estimators_with_stationary(self): def sorted_estimators_with_stationary(self):
if self.only_models_that_pass_anderson_test: if self.only_models_that_pass_anderson_test:
return [e for e in self.sorted_estimators if self.goodness_of_fit_test(e)] return [e for e in self.sorted_estimators
if self.goodness_of_fit_test(e) and self.sensitivity_of_fit_test(e)]
else: else:
return self._sorted_estimators_without_stationary return self._sorted_estimators_without_stationary
...@@ -231,7 +234,8 @@ class OneFoldFit(object): ...@@ -231,7 +234,8 @@ class OneFoldFit(object):
@property @property
def is_significant(self) -> bool: def is_significant(self) -> bool:
stationary_model_classes = [StationaryAltitudinal, StationaryGumbelAltitudinal, AltitudinalShapeLinearTimeStationary] stationary_model_classes = [StationaryAltitudinal, StationaryGumbelAltitudinal,
AltitudinalShapeLinearTimeStationary]
if any([isinstance(self.best_estimator.margin_model, c) if any([isinstance(self.best_estimator.margin_model, c)
for c in stationary_model_classes]): for c in stationary_model_classes]):
return False return False
...@@ -252,6 +256,30 @@ class OneFoldFit(object): ...@@ -252,6 +256,30 @@ class OneFoldFit(object):
# self._folder_to_goodness[self.folder_for_plots] = goodness_of_fit_anderson_test # self._folder_to_goodness[self.folder_for_plots] = goodness_of_fit_anderson_test
# return goodness_of_fit_anderson_test # return goodness_of_fit_anderson_test
def sensitivity_of_fit_test(self, estimator: LinearMarginEstimator):
# Build the dataset without the maxima for each altitude
new_dataset = AbstractDataset.remove_top_maxima(self.dataset.observations,
self.dataset.coordinates)
# Fit the new estimator
new_estimator = fitted_linear_margin_estimator_short(model_class=type(estimator.margin_model),
dataset=new_dataset,
fit_method=self.fit_method,
temporal_covariate_for_fit=self.temporal_covariate_for_fit)
# Compare sign of change
print(self.massif_name, self.sign_of_change(estimator), self.sign_of_change(new_estimator))
has_not_opposite_sign = self.sign_of_change(estimator) * self.sign_of_change(new_estimator) >= 0
return has_not_opposite_sign
def sign_of_change(self, estimator):
return_levels = []
for year in [2019 - 50, 2019]:
coordinate = np.array([self.altitude_plot, year])
return_level = estimator.function_from_fit.get_params(
coordinate=coordinate,
is_transformed=False).return_level(return_period=self.return_period)
return_levels.append(return_level)
return 100 * (return_levels[1] - return_levels[0]) / return_levels[0]
def goodness_of_fit_test(self, estimator): def goodness_of_fit_test(self, estimator):
quantiles = self.compute_empirical_quantiles(estimator=estimator) quantiles = self.compute_empirical_quantiles(estimator=estimator)
goodness_of_fit_anderson_test = goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL) goodness_of_fit_anderson_test = goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL)
...@@ -274,4 +302,4 @@ class OneFoldFit(object): ...@@ -274,4 +302,4 @@ class OneFoldFit(object):
# def best_confidence_interval(self): # def best_confidence_interval(self):
# EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(self.best_estimator, # EurocodeConfidenceIntervalFromExtremes.from_estimator_extremes(self.best_estimator,
# ci_method=ConfidenceIntervalMethodFromExtremes.ci_mle, # ci_method=ConfidenceIntervalMethodFromExtremes.ci_mle,
# temporal_covariate=np.array([2019, self.altitude_plot]),) # temporal_covariate=np.array([2019, self.altitude_plot]),)
\ No newline at end of file
...@@ -26,15 +26,32 @@ class AbstractDataset(object): ...@@ -26,15 +26,32 @@ class AbstractDataset(object):
def remove_zeros(cls, observations: AbstractSpatioTemporalObservations, def remove_zeros(cls, observations: AbstractSpatioTemporalObservations,
coordinates: AbstractCoordinates): coordinates: AbstractCoordinates):
ind_without_zero = ~(observations.df_maxima_gev == 0).any(axis=1) ind_without_zero = ~(observations.df_maxima_gev == 0).any(axis=1)
return cls.create_new_dataset(coordinates, ind_without_zero, observations)
@classmethod
def create_new_dataset(cls, coordinates, ind, observations):
# Create new observations # Create new observations
new_df_maxima_gev = observations.df_maxima_gev.loc[ind_without_zero].copy() new_df_maxima_gev = observations.df_maxima_gev.loc[ind].copy()
new_observations = AbstractSpatioTemporalObservations(df_maxima_gev=new_df_maxima_gev) new_observations = AbstractSpatioTemporalObservations(df_maxima_gev=new_df_maxima_gev)
# Create new coordinates # Create new coordinates
coordinate_class = type(coordinates) coordinate_class = type(coordinates)
new_df = coordinates.df_all_coordinates.loc[ind_without_zero].copy() new_df = coordinates.df_all_coordinates.loc[ind].copy()
new_coordinates = coordinate_class(df=new_df, slicer_class=type(coordinates.slicer)) new_coordinates = coordinate_class(df=new_df, slicer_class=type(coordinates.slicer))
return cls(new_observations, new_coordinates) return cls(new_observations, new_coordinates)
@classmethod
def remove_top_maxima(cls, observations: AbstractSpatioTemporalObservations,
coordinates: AbstractSpatioTemporalCoordinates):
""" We remove the top maxima w.r.t. each spatial coordinates"""
assert isinstance(coordinates, AbstractSpatioTemporalCoordinates)
idxs_top = []
for spatial_coordinate in coordinates.spatial_coordinates.coordinates_values():
ind = coordinates.df_all_coordinates[coordinates.COORDINATE_X] == spatial_coordinate[0]
idx = observations.df_maxima_gev.loc[ind].idxmax()[0]
idxs_top.append(idx)
ind = ~coordinates.index.isin(idxs_top)
return cls.create_new_dataset(coordinates, ind, observations)
@property @property
def df_dataset(self) -> pd.DataFrame: def df_dataset(self) -> pd.DataFrame:
......
...@@ -27,6 +27,21 @@ class TestDataset(unittest.TestCase): ...@@ -27,6 +27,21 @@ class TestDataset(unittest.TestCase):
nb_points = 2 nb_points = 2
def test_remove_zero_from_dataset(self): def test_remove_zero_from_dataset(self):
coordinates, dataset_initial, observations = self.build_initial_dataset()
dataset_without_zero = AbstractDataset.remove_zeros(observations,
coordinates)
self.assertEqual(len(dataset_initial.coordinates), 4)
self.assertEqual(len(dataset_without_zero.coordinates), 2)
def test_remove_top_maxima_from_dataset(self):
coordinates, dataset_initial, observations = self.build_initial_dataset()
dataset_without_top = AbstractDataset.remove_top_maxima(observations,
coordinates)
self.assertEqual(4, len(dataset_initial.coordinates), 4)
self.assertEqual(2, len(dataset_without_top.coordinates))
self.assertEqual(set(dataset_without_top.coordinates.index.values), {0, 3})
def build_initial_dataset(self):
temporal_coordinates = ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=2) temporal_coordinates = ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=2)
spatial_coordinates = AbstractSpatialCoordinates.from_list_x_coordinates([300, 600]) spatial_coordinates = AbstractSpatialCoordinates.from_list_x_coordinates([300, 600])
coordinates = AbstractSpatioTemporalCoordinates(spatial_coordinates=spatial_coordinates, coordinates = AbstractSpatioTemporalCoordinates(spatial_coordinates=spatial_coordinates,
...@@ -38,11 +53,9 @@ class TestDataset(unittest.TestCase): ...@@ -38,11 +53,9 @@ class TestDataset(unittest.TestCase):
(600, 1): [0], (600, 1): [0],
} }
observations = AnnualMaxima.from_coordinates(coordinates, coordinate_values_to_maxima) observations = AnnualMaxima.from_coordinates(coordinates, coordinate_values_to_maxima)
dataset_with_zero = AbstractDataset(observations, coordinates) dataset_initial = AbstractDataset(observations, coordinates)
dataset_without_zero = AbstractDataset.remove_zeros(observations, return coordinates, dataset_initial, observations
coordinates)
self.assertEqual(len(dataset_with_zero.coordinates), 4)
self.assertEqual(len(dataset_without_zero.coordinates), 2)
def test_max_stable_dataset_R1_and_R2(self): def test_max_stable_dataset_R1_and_R2(self):
max_stable_models = load_test_max_stable_models()[:] max_stable_models = load_test_max_stable_models()[:]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment