From 49c966586082f39bcb57740d43a5edabcfaebce0 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Wed, 29 May 2019 10:16:16 +0200
Subject: [PATCH] [METEO FRANCE DATA][COMPARISON ANALYSIS] add some checks in
 comparison analysis

---
 .../stations_data/comparison_analysis.py      | 29 ++++++++++++++-----
 .../transformation/uniform_normalization.py   |  7 +++++
 .../abstract_spatio_temporal_observations.py  |  6 +++-
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/experiment/meteo_france_data/stations_data/comparison_analysis.py b/experiment/meteo_france_data/stations_data/comparison_analysis.py
index c852a828..76c7d3da 100644
--- a/experiment/meteo_france_data/stations_data/comparison_analysis.py
+++ b/experiment/meteo_france_data/stations_data/comparison_analysis.py
@@ -1,14 +1,15 @@
 from collections import OrderedDict
+import numpy as np
+from typing import List
 
 from cached_property import cached_property
 
 from experiment.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall
 from experiment.meteo_france_data.visualization.study_visualization.main_study_visualizer import \
-    ALL_ALTITUDES, ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST
+    ALL_ALTITUDES
 from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \
     FullEstimatorInASingleStepWithSmoothMargin
-from extreme_estimator.extreme_models.margin_model.linear_margin_model import LinearAllParametersAllDimsMarginModel, \
-    LinearLocationAllDimsMarginModel, LinearShapeAllDimsMarginModel
+from extreme_estimator.extreme_models.margin_model.linear_margin_model import LinearAllParametersAllDimsMarginModel
 from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model import CovarianceFunction
 from extreme_estimator.extreme_models.max_stable_model.max_stable_models import ExtremalT, BrownResnick
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
@@ -66,6 +67,10 @@ class ComparisonAnalysis(object):
         df = df.loc[ind_massif]
         # Keep only one station per massif, to have the same number of points (the first by default)
         df = df.drop_duplicates(subset='MASSIF_PRA')
+        # Sort all the DataFrame so that the massif order correspond
+        df['MASSIF_IDX'] = [self.intersection_massif_names.index(m) for m in df['MASSIF_PRA']]
+        df = df.sort_values(['MASSIF_IDX'])
+        df.drop(labels='MASSIF_IDX', axis=1, inplace=True)
         return df
 
     @property
@@ -148,14 +153,14 @@ class ComparisonAnalysis(object):
         return observations
 
     @property
-    def study_dataset_latitude_longitude(self):
+    def study_dataset_latitude_longitude(self) -> AbstractDataset:
         dataset = AbstractDataset(observations=self.study_observations,
                                   coordinates=self.study_coordinates(
                                       use_study_coordinate_with_latitude_and_longitude=True))
         return dataset
 
     @property
-    def study_dataset_lambert(self):
+    def study_dataset_lambert(self) -> AbstractDataset:
         dataset = AbstractDataset(observations=self.study_observations,
                                   coordinates=self.study_coordinates(
                                       use_study_coordinate_with_latitude_and_longitude=False))
@@ -213,7 +218,12 @@ class ComparisonAnalysis(object):
             print('\n\n', get_display_name_from_object_type(type(max_stable_model)))
             if hasattr(max_stable_model, 'covariance_function'):
                 print(max_stable_model.covariance_function)
-            for dataset in [self.station_dataset, self.study_dataset_lambert]:
+            estimators = []
+            datasets = [self.station_dataset, self.study_dataset_lambert]  # type: List[AbstractDataset]
+            # Checks that the dataset have the same index
+            assert pd.Index.equals(datasets[0].observations.columns, datasets[1].observations.columns)
+            # assert datasets[0].observations.columns
+            for dataset in datasets:
                 margin_model = margin_model_class(coordinates=dataset.coordinates)
                 estimator = FullEstimatorInASingleStepWithSmoothMargin(dataset=dataset,
                                                                        margin_model=margin_model,
@@ -221,8 +231,11 @@ class ComparisonAnalysis(object):
                 estimator.fit()
                 print(estimator.result_from_fit.margin_coef_dict)
                 print(estimator.result_from_fit.other_coef_dict)
-                # print(estimato)
-
+                estimators.append(estimator)
+            # Compare the sign of them margin coefficient for the estimators
+            coefs = [{k: v for k, v in e.result_from_fit.margin_coef_dict.items() if 'Coeff1' not in k} for e in estimators]
+            different_sign = [k for k, v in coefs[0].items() if np.sign(coefs[1][k]) != np.sign(v) ]
+            print('All linear coefficient have the same sign: {}, different_signs for: {}'.format(len(different_sign) == 0, different_sign))
 
 def choice_of_altitude_and_nb_border_data_to_remove_to_get_data_without_nan():
     for margin in [50, 100, 150, 200, 250, 300][2:3]:
diff --git a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py
index 8922ff77..017b40f7 100644
--- a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py
+++ b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/uniform_normalization.py
@@ -39,6 +39,13 @@ class BetweenZeroAndTenNormalization(BetweenZeroAndOneNormalization):
 
 epsilon = 0.001
 
+"""
+Deux manières possiblede tester la stabilité par rapport aux coordonées
+on peut utiliser BetweenZeroAndOneNormalizationMinEpsiloncela permet d utiliser des coordonnées [epsilon, 1] et [0, 1-epsilon]
+et voir l effet sur le fit
+ou bien utiliser [0, 2] [0, 10] par exemple, -> mais ca peut créer des effets vraiment différents
+"""
+
 
 class BetweenZeroAndOneNormalizationMinEpsilon(BetweenZeroAndOneNormalization):
 
diff --git a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py
index a772e793..fe599b2c 100644
--- a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py
+++ b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py
@@ -61,9 +61,13 @@ class AbstractSpatioTemporalObservations(object):
     def index(self) -> pd.Index:
         return self._df_maxima.index
 
+    @property
+    def columns(self) -> pd.Index:
+        return self._df_maxima.columns
+
     @property
     def nb_obs(self) -> int:
-        return len(self._df_maxima.columns)
+        return len(self.columns)
 
     @classmethod
     def from_df(cls, df):
-- 
GitLab