[STATIONS] add trend_test result display in comparisons_visualization graphs

c0c3ddce · Le Roux Erwan · f9ad9960 · c0c3ddce · c0c3ddce · c0c3ddce
Commit c0c3ddce authored 6 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 87 additions and 42 deletions
+87 -42
--- a/experiment/meteo_france_data/scm_models_data/visualization/studies_visualization/studies_visualizer.py
+++ b/experiment/meteo_france_data/scm_models_data/visualization/studies_visualization/studies_visualizer.py
@@ -10,10 +10,10 @@ import matplotlib.pyplot as plt
 from matplotlib.lines import Line2D
 from experiment.meteo_france_data.scm_models_data.abstract_extended_study import AbstractExtendedStudy
+from experiment.meteo_france_data.scm_models_data.visualization.studies_visualization.studies import Studies
+from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \
+    StudyVisualizer
 from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest
-from experiment.meteo_france_data.scm_models_data.visualization import \
-    Studies
-from experiment.meteo_france_data.scm_models_data.visualization import StudyVisualizer
 from experiment.meteo_france_data.scm_models_data.visualization.utils import plot_df
 from utils import cached_property, get_display_name_from_object_type, VERSION_TIME

--- a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py
+++ b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py
@@ -16,6 +16,7 @@ from experiment.trend_analysis.univariate_test.abstract_univariate_test import A
 from experiment.trend_analysis.non_stationary_trends import \
    ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest
 from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes
+from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results
 from experiment.utils import average_smoothing_with_sliding_window
 from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \
    FullEstimatorInASingleStepWithSmoothMargin
@@ -405,23 +406,8 @@ class StudyVisualizer(VisualizationParameters):
            massif_names = massif_names[:nb_massif_for_fast_mode]
        for massif_id, massif_name in enumerate(massif_names):
            years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
-            if self.multiprocessing:
+            trend_test_res, best_idxs = compute_gev_change_point_test_results(self.multiprocessing, smooth_maxima, starting_years,
-                list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in
+                                                                   trend_test_class, years)
-                             starting_years]
-                with Pool(NB_CORES) as p:
-                    trend_test_res = p.starmap(self.compute_gev_change_point_test_result, list_args)
-            else:
-                trend_test_res = [
-                    self.compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years)
-                    for starting_year in starting_years]
-            # Keep only the most likely starting year
-            # (i.e. the starting year that minimizes its negative log likelihood)
-            # (set all the other data to np.nan so that they will not be taken into account in mean function)
-            best_idx = list(np.argmin(trend_test_res, axis=0))[2]
-            # print(best_idx, trend_test_res)
-            best_idxs = [best_idx]
-            # todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values
-            # best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:]
            trend_test_res = [(a, b) if i in best_idxs else (np.nan, np.nan)
                              for i, (a, b, *_) in enumerate(trend_test_res)]
            massif_name_to_trend_res[massif_name] = list(zip(*trend_test_res))
@@ -432,12 +418,6 @@ class StudyVisualizer(VisualizationParameters):
        return [pd.DataFrame(massif_name_to_res, index=starting_years).transpose()
                for massif_name_to_res in all_massif_name_to_res]
-    @staticmethod
-    def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years):
-        trend_test = trend_test_class(years, smooth_maxima, starting_year)  # type: AbstractGevChangePointTest
-        assert isinstance(trend_test, AbstractGevChangePointTest)
-        return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance
    @staticmethod
    def compute_trend_test_result(smooth_maxima, starting_year, trend_test_class, years):
        trend_test = trend_test_class(years, smooth_maxima, starting_year)  # type: AbstractUnivariateTest

--- a/experiment/meteo_france_data/stations_data/main_station_comparison.py
+++ b/experiment/meteo_france_data/stations_data/main_station_comparison.py
@@ -13,22 +13,23 @@ def visualize_all_stations():
 def visualize_non_nan_station():
    vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST,
                                    keep_only_station_without_nan_values=True,
-                                    normalize_observations=True)
+                                    normalize_observations=False)
-    # vizu.visualize_maximum()
+    vizu.visualize_maximum()
-    vizu.visualize_gev()
+    # vizu.visualize_gev()
 def example():
    # this is a really good example for the maxima at least
-    vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False)
+    # vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False)
-    vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
+    # vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
-    # vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True)
+    vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True)
    # vizu._visualize_ax_main(vizu.plot_gev, vizu.comparisons[0], 'Beaufortain', show=True)
+    vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
 if __name__ == '__main__':
    # visualize_fast_comparison()
-    # visualize_all_stations()
+    visualize_all_stations()
    # visualize_non_nan_station()
-    example()
+    # example()
--- a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py
+++ b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py
@@ -10,6 +10,9 @@ from experiment.meteo_france_data.scm_models_data.visualization.study_visualizat
    VisualizationParameters
 from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \
    REANALYSE_STR, ALTITUDE_COLUMN_NAME
+from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest
+from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest
+from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results
 from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev
 from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
@@ -67,6 +70,7 @@ class ComparisonsVisualization(VisualizationParameters):
    def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False):
        if ax is None:
            _, ax = plt.subplots(1, 1, figsize=self.figsize)
+        ax2 = ax.twinx()
        df = comparison.df_merged_intersection_clean.copy()
        ind = df[MASSIF_COLUMN_NAME] == massif
@@ -78,19 +82,21 @@ class ComparisonsVisualization(VisualizationParameters):
        ind_location = df.index.str.contains(REANALYSE_STR)
        df[DISTANCE_COLUMN_NAME] = 0
        for coordinate_name in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
-            print(df[DISTANCE_COLUMN_NAME])
            center = df.loc[ind_location, coordinate_name].values[0]
-            print(center)
            df[DISTANCE_COLUMN_NAME] += (center - df[coordinate_name]).pow(2)
        df[DISTANCE_COLUMN_NAME] = df[DISTANCE_COLUMN_NAME].pow(0.5)
        df[DISTANCE_COLUMN_NAME] = (df[DISTANCE_COLUMN_NAME] / 1000).round(1)
        for color, (i, s) in zip(colors_station, df.iterrows()):
            label = i
            label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME])
            label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME])
            s_values = s.iloc[3:-1].to_dict()
+            years, maxima = list(s_values.keys()), list(s_values.values())
            plot_color = color if REANALYSE_STR not in label else 'g'
-            plot_function(ax, s_values, label, plot_color)
+            plot_function(ax, ax2, years, maxima, label, plot_color)
            ax.set_title('{} at {}m'.format(massif, comparison.altitude))
            ax.legend(prop={'size': 5})
@@ -100,13 +106,30 @@ class ComparisonsVisualization(VisualizationParameters):
    def visualize_maximum(self):
        return self._visualize_main(self.plot_maxima, 'Recent trend of Annual maxima of snowfall')
-    def plot_maxima(self, ax, s_values, label, plot_color):
+    def plot_maxima(self, ax, ax2, years, maxima, label, plot_color):
-        ax.plot(list(s_values.keys()), list(s_values.values()), label=label, color=plot_color)
+        if self.keep_only_station_without_nan_values:
+            # Run trend test to improve the label
+            starting_years = years[:-1]
+            trend_test_res, best_idxs = compute_gev_change_point_test_results(multiprocessing=True,
+                                                                              maxima=maxima, starting_years=starting_years,
+                                                                              trend_test_class=GevLocationChangePointTest,
+                                                                              years=years)
+            best_idx = best_idxs[0]
+            most_likely_year = years[best_idx]
+            most_likely_trend_type = trend_test_res[best_idx][0]
+            display_trend_type = AbstractUnivariateTest.get_display_trend_type(real_trend_type=most_likely_trend_type)
+            label += "\n {} starting in {}".format(display_trend_type, most_likely_year)
+            # Display the nllh against the starting year
+            step = 10
+            ax2.plot(starting_years[::step], [t[3] for t in trend_test_res][::step], color=plot_color, marker='o')
+            ax2.plot(starting_years[::step], [t[4] for t in trend_test_res][::step], color=plot_color, marker='x')
+        # Plot maxima
+        ax.plot(years, maxima, label=label, color=plot_color)
    def visualize_gev(self):
        return self._visualize_main(self.plot_gev)
-    def plot_gev(self, ax, s_values, label, plot_color):
+    def plot_gev(self, ax, ax2, s_values, label, plot_color):
        # todo should I normalize here ?
        # fit gev
        data = list(s_values.values())
@@ -120,4 +143,3 @@ class ComparisonsVisualization(VisualizationParameters):
        y = gev_params.density(x)
        # display the gev distribution that was obtained
        ax.plot(x, y, label=label, color=plot_color)
--- a/experiment/trend_analysis/univariate_test/abstract_univariate_test.py
+++ b/experiment/trend_analysis/univariate_test/abstract_univariate_test.py
@@ -62,6 +62,13 @@ class AbstractUnivariateTest(object):
        # d[cls.NO_TREND] = 'k--'
        return d
+    @classmethod
+    def get_display_trend_type(cls, real_trend_type):
+        if cls.SIGNIFICATIVE in real_trend_type:
+            return real_trend_type
+        else:
+            return cls.NON_SIGNIFICATIVE_TREND
    @classmethod
    def get_real_trend_types(cls, display_trend_type):
        if display_trend_type is cls.ALL_TREND:

--- a/experiment/trend_analysis/univariate_test/utils.py
+++ b/experiment/trend_analysis/univariate_test/utils.py
+from multiprocessing.pool import Pool
+import numpy as np
+from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import AbstractGevChangePointTest
+from utils import NB_CORES
+def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years):
+    trend_test = trend_test_class(years, smooth_maxima, starting_year)  # type: AbstractGevChangePointTest
+    assert isinstance(trend_test, AbstractGevChangePointTest)
+    return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance
+def compute_gev_change_point_test_results(multiprocessing, maxima, starting_years, trend_test_class,
+                                          years):
+    if multiprocessing:
+        list_args = [(maxima, starting_year, trend_test_class, years) for starting_year in
+                     starting_years]
+        with Pool(NB_CORES) as p:
+            trend_test_res = p.starmap(compute_gev_change_point_test_result, list_args)
+    else:
+        trend_test_res = [
+            compute_gev_change_point_test_result(maxima, starting_year, trend_test_class, years)
+            for starting_year in starting_years]
+    # Keep only the most likely starting year
+    # (i.e. the starting year that minimizes its negative log likelihood)
+    # (set all the other data to np.nan so that they will not be taken into account in mean function)
+    best_idx = list(np.argmin(trend_test_res, axis=0))[2]
+    # print(best_idx, trend_test_res)
+    best_idxs = [best_idx]
+    # todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values
+    # best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:]
+    return trend_test_res, best_idxs
--- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
@@ -251,7 +251,7 @@ class AbstractCoordinates(object):
            # Compute the indices to modify
            ind_to_modify = df_temporal_coordinates.iloc[:, 0] <= starting_point  # type: pd.Series
            # Assert that some coordinates are selected but not all
-            assert 0 < sum(ind_to_modify) < len(ind_to_modify)
+            assert 0 < sum(ind_to_modify) < len(ind_to_modify), sum(ind_to_modify)
            # Modify the temporal coordinates to enforce the stationarity
            df_temporal_coordinates.loc[ind_to_modify] = starting_point
            # Load the temporal transformation object