From a5c353c6bb086bce6cd84dee47acf6e9dbfce1f7 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Fri, 18 Sep 2020 11:57:04 +0200 Subject: [PATCH] [contrasting] improve pointwise_gev_study_visualizer.py with a linear regression against the altitude. modify the _maxima_gev depending on the fit method --- .../scm_models_data/abstract_study.py | 8 +++-- .../visualization/plot_utils.py | 3 ++ extreme_fit/distribution/gev/gev_params.py | 9 +++++ .../abstract_margin_estimator.py | 6 +++- .../abstract_temporal_linear_margin_model.py | 10 ++---- .../gev_altitudinal_models.py | 2 +- .../altitudes_fit/main_altitudes_studies.py | 4 +-- .../one_fold_analysis/altitude_group.py | 31 +++++++++++++---- ...es_visualizer_for_non_stationary_models.py | 4 +++ .../pointwise_gev_study_visualizer.py | 33 +++++++++++++++---- .../dataset/abstract_dataset.py | 19 ++++++++++- .../test_dataset.py | 27 +++++++++++++++ 12 files changed, 129 insertions(+), 27 deletions(-) diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py index 549a997a..cdc61c4c 100644 --- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py @@ -33,6 +33,7 @@ from extreme_fit.function.margin_function.abstract_margin_function import \ AbstractMarginFunction from extreme_data.meteo_france_data.scm_models_data.visualization.create_shifted_cmap import create_colorbase_axis, \ get_shifted_map, get_colors +from extreme_fit.model.margin_model.utils import MarginFitMethod from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ AbstractSpatialCoordinates @@ -96,6 +97,8 @@ class AbstractStudy(object): assert slope in SLOPES self.orientation = orientation self.slope = slope + # Add some options + self.fit_method = MarginFitMethod.is_mev_gev_fit """ Time """ @@ -800,10 +803,11 @@ class AbstractStudy(object): return ~np.array(mask_french_alps, dtype=bool) @cached_property - def massif_name_to_stationary_gev_params(self): + def massif_name_to_stationary_gev_params_for_non_zero_annual_maxima(self): massif_name_to_stationary_gev_params = {} for massif_name, annual_maxima in self.massif_name_to_annual_maxima.items(): - gev_params = fitted_stationary_gev(annual_maxima) + non_zero_annual_maxima = annual_maxima[annual_maxima > 0] + gev_params = fitted_stationary_gev(non_zero_annual_maxima, fit_method=self.fit_method) massif_name_to_stationary_gev_params[massif_name] = gev_params return massif_name_to_stationary_gev_params diff --git a/extreme_data/meteo_france_data/scm_models_data/visualization/plot_utils.py b/extreme_data/meteo_france_data/scm_models_data/visualization/plot_utils.py index 8c60223d..3bb18029 100644 --- a/extreme_data/meteo_france_data/scm_models_data/visualization/plot_utils.py +++ b/extreme_data/meteo_france_data/scm_models_data/visualization/plot_utils.py @@ -49,8 +49,11 @@ def load_plot(cmap, graduation, label, massif_name_to_value, altitude, fit_metho min_ratio = 0 max_ratio = max_abs_change + for v in massif_name_to_value.values(): + assert isinstance(v, float) massif_name_to_color = {m: get_colors([v], cmap, min_ratio, max_ratio)[0] for m, v in massif_name_to_value.items()} + ticks_values_and_labels = ticks, labels ax = plt.gca() diff --git a/extreme_fit/distribution/gev/gev_params.py b/extreme_fit/distribution/gev/gev_params.py index 384509c3..f1e0864b 100644 --- a/extreme_fit/distribution/gev/gev_params.py +++ b/extreme_fit/distribution/gev/gev_params.py @@ -135,6 +135,15 @@ class GevParams(AbstractExtremeParams): cls.SHAPE: 'zeta', }[param_name] + @classmethod + def full_name_from_param_name(cls, param_name): + assert param_name in cls.PARAM_NAMES + return { + cls.LOC: 'location', + cls.SCALE: 'scale', + cls.SHAPE: 'shape', + }[param_name] + def gumbel_standardization(self, x): x -= self.location x /= self.scale diff --git a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py index 62661c25..12e01691 100644 --- a/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_fit/estimator/margin_estimator/abstract_margin_estimator.py @@ -9,6 +9,7 @@ from extreme_fit.estimator.abstract_estimator import AbstractEstimator from extreme_fit.estimator.utils import load_margin_function from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model import LinearMarginModel from extreme_fit.function.margin_function.linear_margin_function import LinearMarginFunction +from extreme_fit.model.margin_model.utils import MarginFitMethod from extreme_fit.model.result_from_model_fit.abstract_result_from_model_fit import AbstractResultFromModelFit from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from spatio_temporal_dataset.slicer.split import Split @@ -41,7 +42,10 @@ class LinearMarginEstimator(AbstractMarginEstimator): return self._maxima_gev(split) def _maxima_gev(self, split): - return self.dataset.maxima_gev_for_spatial_extremes_package(split) + if self.margin_model.fit_method == MarginFitMethod.spatial_extremes_mle: + return self.dataset.maxima_gev_for_spatial_extremes_package(split) + else: + return self.dataset.maxima_gev(split) def df_coordinates_spat(self, split): return self.dataset.coordinates.df_spatial_coordinates(split=split, diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index c02d1bd9..090cfb25 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -36,13 +36,9 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: data = data.flatten() - # if len(data) == 1: - # data = data[0] - # assert len(data) == len(df_coordinates_temp.values), 'len(data)={} != len(temp)={}'.format(len(data), - # len( - # df_coordinates_temp.values)) - # else: - # data = data.flatten() + assert len(df_coordinates_temp) == len(data) + if not (df_coordinates_spat is None or df_coordinates_spat.empty): + assert len(df_coordinates_spat) == len(data) x = ro.FloatVector(data) if self.params_class is GevParams: diff --git a/extreme_fit/model/margin_model/polynomial_margin_model/gev_altitudinal_models.py b/extreme_fit/model/margin_model/polynomial_margin_model/gev_altitudinal_models.py index fc0ce689..f5612e2b 100644 --- a/extreme_fit/model/margin_model/polynomial_margin_model/gev_altitudinal_models.py +++ b/extreme_fit/model/margin_model/polynomial_margin_model/gev_altitudinal_models.py @@ -36,7 +36,7 @@ class AbstractAltitudinalModel(AbstractSpatioTemporalPolynomialModel): def name_str(self): s = '' if self.dim_to_str_number(GevParams.SHAPE, self.coordinates.idx_x_coordinates) == '1': - s += '\\zeta_s' + s += '\\zeta_z' if self.dim_to_str_number(GevParams.LOC, self.coordinates.idx_temporal_coordinates) in ['1', '2']: s += '\\mu_t' if self.dim_to_str_number(GevParams.LOC, self.coordinates.idx_temporal_coordinates) == '2': diff --git a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py index 3cb375ca..2f8106d0 100644 --- a/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py +++ b/projects/altitude_spatial_model/altitudes_fit/main_altitudes_studies.py @@ -39,7 +39,7 @@ def main(): , SafranPrecipitation3Days][:1] altitudes = [1800, 2100, 2400] study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, SafranSnowfall5Days, SafranSnowfall7Days][:1] - study_classes = [SafranPrecipitation1Day][:1] + # study_classes = [SafranPrecipitation1Day][:1] # Common parameters # altitudes = [600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600] @@ -47,7 +47,7 @@ def main(): # massif_names = ['Mercantour', 'Vercors', 'Ubaye'] seasons = [Season.annual, Season.winter, Season.spring, Season.automn][:1] - main_loop(altitudes_for_groups, massif_names, seasons, study_classes) + main_loop(altitudes_for_groups[:1], massif_names, seasons, study_classes) def main_loop(altitudes_list, massif_names, seasons, study_classes): diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitude_group.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitude_group.py index 72d694b2..9092fac1 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitude_group.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitude_group.py @@ -2,11 +2,16 @@ from enum import Enum # The order is important altitudes_for_groups = [ - [900, 1200, 1500], - [1800, 2100, 2400], - [2700, 3000, 3300] + [300, 600, 900], + [1200, 1500, 1800], + [2100, 2400, 2700], + [3000, 3300, 3600] ] - +# altitudes_for_groups = [ +# [900, 1200, 1500], +# [1800, 2100, 2400], +# [2700, 3000, 3300] +# ] # altitudes_for_groups = [ # [600, 900, 1200, 1500, 1800], @@ -34,7 +39,7 @@ class LowAltitudeGroup(AbstractAltitudeGroup): @property def reference_altitude(self): - return 1000 + return 600 class MidAltitudeGroup(AbstractAltitudeGroup): @@ -45,7 +50,7 @@ class MidAltitudeGroup(AbstractAltitudeGroup): @property def reference_altitude(self): - return 2000 + return 1500 class HighAltitudeGroup(AbstractAltitudeGroup): @@ -56,7 +61,17 @@ class HighAltitudeGroup(AbstractAltitudeGroup): @property def reference_altitude(self): - return 3000 + return 2400 + +class VeyHighAltitudeGroup(AbstractAltitudeGroup): + + @property + def name(self): + return 'very high' + + @property + def reference_altitude(self): + return 3300 class DefaultAltitudeGroup(AbstractAltitudeGroup): @@ -78,5 +93,7 @@ def get_altitude_group_from_altitudes(altitudes): return MidAltitudeGroup() elif s == set(altitudes_for_groups[2]): return HighAltitudeGroup() + elif s == set(altitudes_for_groups[3]): + return VeyHighAltitudeGroup() else: return DefaultAltitudeGroup() diff --git a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py index 103eb6b7..2674d546 100644 --- a/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py +++ b/projects/altitude_spatial_model/altitudes_fit/one_fold_analysis/altitudes_studies_visualizer_for_non_stationary_models.py @@ -25,6 +25,7 @@ from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitude_gr from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fit import \ OneFoldFit from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): @@ -50,7 +51,10 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): self._massif_name_to_one_fold_fit = {} for massif_name in self.massif_names: if any([massif_name in study.study_massif_names for study in self.studies.altitude_to_study.values()]): + # Load dataset dataset = studies.spatio_temporal_dataset(massif_name=massif_name) + # dataset_without_zero = AbstractDataset.remove_zeros(dataset.observations, + # dataset.coordinates) old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method, self.temporal_covariate_for_fit, type(self.altitude_group), diff --git a/projects/contrasting_trends_in_snow_loads/post thesis committee/pointwise_gev_study_visualizer.py b/projects/contrasting_trends_in_snow_loads/post thesis committee/pointwise_gev_study_visualizer.py index 6d848cc1..e771f947 100644 --- a/projects/contrasting_trends_in_snow_loads/post thesis committee/pointwise_gev_study_visualizer.py +++ b/projects/contrasting_trends_in_snow_loads/post thesis committee/pointwise_gev_study_visualizer.py @@ -20,19 +20,37 @@ class PointwiseGevStudyVisualizer(AltitudesStudies): super().__init__(study_class, altitudes, spatial_transformation_class, temporal_transformation_class, **kwargs_study) # self.altitudes_for_temporal_hypothesis = [min(self.altitudes), 2100, max(self.altitudes)] - self.altitudes_for_temporal_hypothesis = [900, 2100, 3000] + self.altitudes_for_temporal_hypothesis = [600, 1500, 2400, 3300] def plot_gev_params_against_altitude(self): for param_name in GevParams.PARAM_NAMES[:]: ax = plt.gca() + massif_name_to_linear_coef = {} + massif_name_to_r2_score = {} for massif_name in self.study.all_massif_names()[:]: - self._plot_gev_params_against_altitude_one_massif(ax, massif_name, param_name) + linear_coef, _, r2 = self._plot_gev_params_against_altitude_one_massif(ax, massif_name, param_name) + massif_name_to_linear_coef[massif_name] = linear_coef[0] + massif_name_to_r2_score[massif_name] = str(round(r2, 2)) + print(massif_name_to_linear_coef, massif_name_to_r2_score) + # Plot change against altitude ax.legend(prop={'size': 7}, ncol=3) ax.set_xlabel('Altitude') - ax.set_ylabel(param_name) + ax.set_ylabel(GevParams.full_name_from_param_name(param_name) + ' parameter for a stationary GEV distribution') plot_name = '{} change /with altitude'.format(param_name) self.show_or_save_to_file(plot_name, no_title=True, tight_layout=True, show=False) ax.clear() + # Plot map of slope for each massif + visualizer = StudyVisualizer(study=self.study, show=False, save_to_file=True) + ylabel = 'Linear slope for the {} parameter against the altitude'.format(param_name) + visualizer.plot_map(cmap=plt.cm.coolwarm, fit_method=self.study.fit_method, + graduation=1, + label=ylabel, massif_name_to_value=massif_name_to_linear_coef, + plot_name=ylabel, add_x_label=False, + # negative_and_positive_values=param_name == GevParams.SHAPE, + negative_and_positive_values=True, + add_colorbar=True, + massif_name_to_text=massif_name_to_r2_score, + ) plt.close() def _plot_gev_params_against_altitude_one_massif(self, ax, massif_name, param_name): @@ -42,11 +60,13 @@ class PointwiseGevStudyVisualizer(AltitudesStudies): for altitude, study in self.altitude_to_study.items(): if massif_name in study.study_massif_names: altitudes.append(altitude) - gev_params = study.massif_name_to_stationary_gev_params[massif_name] + gev_params = study.massif_name_to_stationary_gev_params_for_non_zero_annual_maxima[massif_name] params.append(gev_params.to_dict()[param_name]) confidence_intervals.append(gev_params.param_name_to_confidence_interval[param_name]) massif_id = self.study.all_massif_names().index(massif_name) plot_against_altitude(altitudes, ax, massif_id, massif_name, params, fill=False) + + return fit_linear_regression(altitudes, params) # plot_against_altitude(altitudes, ax, massif_id, massif_name, confidence_intervals, fill=True) # Plot against the time @@ -170,10 +190,11 @@ class PointwiseGevStudyVisualizer(AltitudesStudies): if __name__ == '__main__': altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300] + altitudes = [300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600] # altitudes = paper_altitudes # altitudes = [1800, 2100] visualizer = PointwiseGevStudyVisualizer(SafranSnowfall1Day, altitudes=altitudes) visualizer.plot_gev_params_against_altitude() - visualizer.plot_gev_params_against_time_for_all_altitudes() - visualizer.plot_gev_params_against_time_for_all_massifs() + # visualizer.plot_gev_params_against_time_for_all_altitudes() + # visualizer.plot_gev_params_against_time_for_all_massifs() # visualizer.plot_time_derivative_against_time() diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py index 3ff96f03..08029b50 100644 --- a/spatio_temporal_dataset/dataset/abstract_dataset.py +++ b/spatio_temporal_dataset/dataset/abstract_dataset.py @@ -7,6 +7,8 @@ import numpy as np import pandas as pd from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ + AbstractSpatioTemporalCoordinates from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer from spatio_temporal_dataset.slicer.split import Split from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ @@ -15,11 +17,25 @@ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_tempor class AbstractDataset(object): - def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates): + def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates,): assert pd.Index.equals(observations.index, coordinates.index), '\n{}\n{}'.format(observations.index, coordinates.index) self.observations = observations # type: AbstractSpatioTemporalObservations self.coordinates = coordinates # type: AbstractCoordinates + @classmethod + def remove_zeros(cls, observations: AbstractSpatioTemporalObservations, + coordinates: AbstractCoordinates): + ind_without_zero = ~(observations.df_maxima_gev == 0).any(axis=1) + # Create new observations + new_df_maxima_gev = observations.df_maxima_gev.loc[ind_without_zero].copy() + new_observations = AbstractSpatioTemporalObservations(df_maxima_gev=new_df_maxima_gev) + # Create new coordinates + coordinate_class = type(coordinates) + new_df = coordinates.df_all_coordinates.loc[ind_without_zero].copy() + new_coordinates = coordinate_class(df=new_df, slicer_class=type(coordinates.slicer)) + return cls(new_observations, new_coordinates) + + @property def df_dataset(self) -> pd.DataFrame: # Merge dataframes with the maxima and with the coordinates @@ -78,3 +94,4 @@ class AbstractDataset(object): def __str__(self) -> str: return 'coordinates:\n{}\nobservations:\n{}'.format(self.coordinates.__str__(), self.observations.__str__()) + diff --git a/test/test_spatio_temporal_dataset/test_dataset.py b/test/test_spatio_temporal_dataset/test_dataset.py index 7dca3e3c..4b3131f5 100644 --- a/test/test_spatio_temporal_dataset/test_dataset.py +++ b/test/test_spatio_temporal_dataset/test_dataset.py @@ -6,9 +6,18 @@ import numpy as np from extreme_fit.model.margin_model.linear_margin_model.linear_margin_model import LinearNonStationaryLocationMarginModel from extreme_fit.model.utils import set_seed_for_test, SafeRunException from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ + AbstractSpatialCoordinates +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ + AbstractSpatioTemporalCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ + ConsecutiveTemporalCoordinates from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.uniform_normalization import \ BetweenZeroAndOneNormalization +from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from spatio_temporal_dataset.dataset.simulation_dataset import MaxStableDataset, MarginDataset +from spatio_temporal_dataset.slicer.split import Split +from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima from test.test_utils import load_test_max_stable_models, load_test_3D_spatial_coordinates, \ load_test_1D_and_2D_spatial_coordinates, load_test_spatiotemporal_coordinates @@ -17,6 +26,24 @@ class TestDataset(unittest.TestCase): nb_obs = 2 nb_points = 2 + def test_remove_zero_from_dataset(self): + temporal_coordinates = ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=2) + spatial_coordinates = AbstractSpatialCoordinates.from_list_x_coordinates([300, 600]) + coordinates = AbstractSpatioTemporalCoordinates(spatial_coordinates=spatial_coordinates, + temporal_coordinates=temporal_coordinates) + coordinate_values_to_maxima = { + (300, 0): [0], + (300, 1): [1], + (600, 0): [2], + (600, 1): [0], + } + observations = AnnualMaxima.from_coordinates(coordinates, coordinate_values_to_maxima) + dataset_with_zero = AbstractDataset(observations, coordinates) + dataset_without_zero = AbstractDataset.remove_zeros(observations, + coordinates) + self.assertEqual(len(dataset_with_zero.coordinates), 4) + self.assertEqual(len(dataset_without_zero.coordinates), 2) + def test_max_stable_dataset_R1_and_R2(self): max_stable_models = load_test_max_stable_models()[:] coordinates = load_test_1D_and_2D_spatial_coordinates(self.nb_points) -- GitLab