From 49c966586082f39bcb57740d43a5edabcfaebce0 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Wed, 29 May 2019 10:16:16 +0200 Subject: [PATCH] [METEO FRANCE DATA][COMPARISON ANALYSIS] add some checks in comparison analysis --- .../stations_data/comparison_analysis.py | 29 ++++++++++++++----- .../transformation/uniform_normalization.py | 7 +++++ .../abstract_spatio_temporal_observations.py | 6 +++- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/experiment/meteo_france_data/stations_data/comparison_analysis.py b/experiment/meteo_france_data/stations_data/comparison_analysis.py index c852a828..76c7d3da 100644 --- a/experiment/meteo_france_data/stations_data/comparison_analysis.py +++ b/experiment/meteo_france_data/stations_data/comparison_analysis.py @@ -1,14 +1,15 @@ from collections import OrderedDict +import numpy as np +from typing import List from cached_property import cached_property from experiment.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall from experiment.meteo_france_data.visualization.study_visualization.main_study_visualizer import \ - ALL_ALTITUDES, ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST + ALL_ALTITUDES from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ FullEstimatorInASingleStepWithSmoothMargin -from extreme_estimator.extreme_models.margin_model.linear_margin_model import LinearAllParametersAllDimsMarginModel, \ - LinearLocationAllDimsMarginModel, LinearShapeAllDimsMarginModel +from extreme_estimator.extreme_models.margin_model.linear_margin_model import LinearAllParametersAllDimsMarginModel from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model import CovarianceFunction from extreme_estimator.extreme_models.max_stable_model.max_stable_models import ExtremalT, BrownResnick from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates @@ -66,6 +67,10 @@ class ComparisonAnalysis(object): df = df.loc[ind_massif] # Keep only one station per massif, to have the same number of points (the first by default) df = df.drop_duplicates(subset='MASSIF_PRA') + # Sort all the DataFrame so that the massif order correspond + df['MASSIF_IDX'] = [self.intersection_massif_names.index(m) for m in df['MASSIF_PRA']] + df = df.sort_values(['MASSIF_IDX']) + df.drop(labels='MASSIF_IDX', axis=1, inplace=True) return df @property @@ -148,14 +153,14 @@ class ComparisonAnalysis(object): return observations @property - def study_dataset_latitude_longitude(self): + def study_dataset_latitude_longitude(self) -> AbstractDataset: dataset = AbstractDataset(observations=self.study_observations, coordinates=self.study_coordinates( use_study_coordinate_with_latitude_and_longitude=True)) return dataset @property - def study_dataset_lambert(self): + def study_dataset_lambert(self) -> AbstractDataset: dataset = AbstractDataset(observations=self.study_observations, coordinates=self.study_coordinates( use_study_coordinate_with_latitude_and_longitude=False)) @@ -213,7 +218,12 @@ class ComparisonAnalysis(object): print('\n\n', get_display_name_from_object_type(type(max_stable_model))) if hasattr(max_stable_model, 'covariance_function'): print(max_stable_model.covariance_function) - for dataset in [self.station_dataset, self.study_dataset_lambert]: + estimators = [] + datasets = [self.station_dataset, self.study_dataset_lambert] # type: List[AbstractDataset] + # Checks that the dataset have the same index + assert pd.Index.equals(datasets[0].observations.columns, datasets[1].observations.columns) + # assert datasets[0].observations.columns + for dataset in datasets: margin_model = margin_model_class(coordinates=dataset.coordinates) estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset=dataset, margin_model=margin_model, @@ -221,8 +231,11 @@ class ComparisonAnalysis(object): estimator.fit() print(estimator.result_from_fit.margin_coef_dict) print(estimator.result_from_fit.other_coef_dict) - # print(estimato) - + estimators.append(estimator) + # Compare the sign of them margin coefficient for the estimators + coefs = [{k: v for k, v in e.result_from_fit.margin_coef_dict.items() if 'Coeff1' not in k} for e in estimators] + different_sign = [k for k, v in coefs[0].items() if np.sign(coefs[1][k]) != np.sign(v) ] + print('All linear coefficient have the same sign: {}, different_signs for: {}'.format(len(different_sign) == 0, different_sign)) def choice_of_altitude_and_nb_border_data_to_remove_to_get_data_without_nan(): for margin in [50, 100, 150, 200, 250, 300][2:3]: diff --git a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py index 8922ff77..017b40f7 100644 --- a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py +++ b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py @@ -39,6 +39,13 @@ class BetweenZeroAndTenNormalization(BetweenZeroAndOneNormalization): epsilon = 0.001 +""" +Deux manières possiblede tester la stabilité par rapport aux coordonées +on peut utiliser BetweenZeroAndOneNormalizationMinEpsiloncela permet d utiliser des coordonnées [epsilon, 1] et [0, 1-epsilon] +et voir l effet sur le fit +ou bien utiliser [0, 2] [0, 10] par exemple, -> mais ca peut créer des effets vraiment différents +""" + class BetweenZeroAndOneNormalizationMinEpsilon(BetweenZeroAndOneNormalization): diff --git a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py index a772e793..fe599b2c 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py @@ -61,9 +61,13 @@ class AbstractSpatioTemporalObservations(object): def index(self) -> pd.Index: return self._df_maxima.index + @property + def columns(self) -> pd.Index: + return self._df_maxima.columns + @property def nb_obs(self) -> int: - return len(self._df_maxima.columns) + return len(self.columns) @classmethod def from_df(cls, df): -- GitLab