[SCM][STUDIES VISUALIZER] add significative trend evolution. add abstract_trend_test object.

1d98720b · Le Roux Erwan · fb667260 · 1d98720b · 1d98720b · 1d98720b
Commit 1d98720b authored 5 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 191 additions and 21 deletions
+191 -21
--- a/experiment/meteo_france_SCM_study/abstract_score.py
+++ b/experiment/meteo_france_SCM_study/abstract_score.py
@@ -9,9 +9,8 @@ class AbstractTrendScore(object):
    We don't care what happen before the change point.
    All we want to focus on, is the potential trend that could exist in the data after a potential change point"""

-    def __init__(self, starting_years, number_of_top_values) -> None:
+    def __init__(self, number_of_top_values) -> None:
        self.number_of_top_values = number_of_top_values
-        self.starting_years = starting_years

    def get_detailed_score(self, years_after_change_point, maxima_after_change_point):
        raise NotImplementedError

--- a/experiment/meteo_france_SCM_study/abstract_trend_test.py
+++ b/experiment/meteo_france_SCM_study/abstract_trend_test.py
+import random
+
+
+class AbstractTrendTest(object):
+    SIGNIFICATIVE = 'significative'
+    # 5 possible types of trends
+    NO_TREND = 'no trend'
+    POSITIVE_TREND = 'positive trend'
+    NEGATIVE_TREND = 'negative trend'
+    SIGNIFICATIVE_POSITIVE_TREND = SIGNIFICATIVE + ' ' + POSITIVE_TREND
+    SIGNIFICATIVE_NEGATIVE_TREND = SIGNIFICATIVE + ' ' + NEGATIVE_TREND
+
+    # todo: maybe create ordered dict
+    TREND_TYPE_TO_STYLE = {
+        NO_TREND: 'k--',
+        POSITIVE_TREND: 'g--',
+        SIGNIFICATIVE_POSITIVE_TREND: 'g-',
+        SIGNIFICATIVE_NEGATIVE_TREND: 'r-',
+        NEGATIVE_TREND: 'r--',
+    }
+
+    TREND_TYPES = list(TREND_TYPE_TO_STYLE.keys())
+
+    def __init__(self, years_after_change_point, maxima_after_change_point):
+        self.years_after_change_point = years_after_change_point
+        self.maxima_after_change_point = maxima_after_change_point
+
+    @property
+    def test_trend_type(self) -> str:
+        test_sign = self.test_sign
+        assert test_sign in [-1, 0, 1]
+        if test_sign == 0:
+            trend_type = self.NO_TREND
+        else:
+            trend_type = self.POSITIVE_TREND if test_sign > 0 else self.NEGATIVE_TREND
+            if self.is_significant:
+                trend_type = self.SIGNIFICATIVE + ' ' + trend_type
+        assert trend_type in self.TREND_TYPE_TO_STYLE
+        return trend_type
+
+    @property
+    def test_sign(self) -> int:
+        raise NotImplementedError
+
+    @property
+    def is_significant(self) -> bool:
+        raise NotImplementedError
+
+
+class ExampleTrendTest(AbstractTrendTest):
+
+    @property
+    def test_sign(self) -> int:
+        return random.randint(0, 2) - 1
+
+    @property
+    def is_significant(self) -> bool:
+        return random.randint(1, 10) == 10
+
+
+class MannKendallTrendTest(AbstractTrendTest):
+    pass
--- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py
 from experiment.meteo_france_SCM_study.abstract_score import MannKendall, WeigthedScore, MeanScore, MedianScore
 from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
+from experiment.meteo_france_SCM_study.abstract_trend_test import MannKendallTrendTest, ExampleTrendTest
 from experiment.meteo_france_SCM_study.crocus.crocus import CrocusDepth, CrocusSwe, ExtendedCrocusDepth, \
    ExtendedCrocusSwe
 from experiment.meteo_france_SCM_study.safran.safran import SafranSnowfall, ExtendedSafranSnowfall, \
@@ -31,12 +32,31 @@ def altitude_trends():
        for score_class in [MedianScore, MeanScore, MannKendall, WeigthedScore]:
            visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=True,
                                           score_class=score_class)
-                           for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
-                                                              altitudes=altitudes)]
+                           for study in
+                           study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
+                                                 altitudes=altitudes)]
            altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers))
            visualizer = AltitudeVisualizer(altitude_to_visualizer, multiprocessing=False, save_to_file=save_to_file)
            visualizer.negative_trend_percentages_evolution(reverse=True)


+def altitude_trends_significant():
+    save_to_file = False
+    only_first_one = False
+    # altitudes that have 20 massifs at least
+    altitudes = ALL_ALTITUDES[3:-6]
+    altitudes = ALL_ALTITUDES[3:5]
+    # altitudes = ALL_ALTITUDES[:2]
+    for study_class in SCM_STUDIES[:1]:
+        trend_test_classes = [ExampleTrendTest, ExampleTrendTest, MannKendallTrendTest][:2]
+        visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=False)
+                       for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
+                                                          altitudes=altitudes)]
+        altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers))
+        visualizer = AltitudeVisualizer(altitude_to_visualizer, multiprocessing=False, save_to_file=save_to_file)
+        visualizer.trend_tests_percentage_evolution(trend_test_classes, starting_year_to_weights=None)
+
+
 if __name__ == '__main__':
-    altitude_trends()
+    # altitude_trends()
+    altitude_trends_significant()
--- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py
@@ -2,7 +2,7 @@ from collections import OrderedDict, Counter
 import os
 import os.path as op
 from multiprocessing.dummy import Pool
-from typing import Dict
+from typing import Dict, List

 import numpy as np
 import pandas as pd
@@ -10,6 +10,7 @@ import matplotlib.pyplot as plt
 from matplotlib.lines import Line2D

 from experiment.meteo_france_SCM_study.abstract_extended_study import AbstractExtendedStudy
+from experiment.meteo_france_SCM_study.abstract_trend_test import AbstractTrendTest
 from experiment.meteo_france_SCM_study.visualization.studies_visualization.studies import \
    Studies
 from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer
@@ -54,7 +55,7 @@ class AltitudeVisualizer(object):
        self.save_to_file = save_to_file
        self.multiprocessing = multiprocessing
        assert isinstance(altitude_to_study_visualizer, OrderedDict)
-        self.altitude_to_study_visualizer = altitude_to_study_visualizer
+        self.altitude_to_study_visualizer = altitude_to_study_visualizer  # type: Dict[int, StudyVisualizer]

    @property
    def altitudes(self):
@@ -90,10 +91,11 @@ class AltitudeVisualizer(object):
        top_n = 5
        top_top = 3
        # keep the top_n for each altitude
-        all_years = [[year for year, _ in sorted(enumerate(p), key=lambda s:s[1], reverse=reverse)[-top_n:]] for p in self.all_percentages]
+        all_years = [[year for year, _ in sorted(enumerate(p), key=lambda s: s[1], reverse=reverse)[-top_n:]] for p in
+                     self.all_percentages]
        from itertools import chain
        all_years = list(chain(*all_years))
-        years = [y for y, _ in sorted(Counter(all_years).items(), key=lambda s:s[1])[-top_top:]]
+        years = [y for y, _ in sorted(Counter(all_years).items(), key=lambda s: s[1])[-top_top:]]
        years = [y + self.starting_year for y in years]
        return years

@@ -147,7 +149,7 @@ class AltitudeVisualizer(object):
            ax.plot(self.altitudes, values, color + marker, label=curve_name)
        ax.legend()
        ax.set_xticks(self.altitudes)
-        ax.set_yticks(list(range(0,101, 10)))
+        ax.set_yticks(list(range(0, 101, 10)))
        ax.grid()

        ax.axhline(y=50, color='k')
@@ -158,4 +160,70 @@ class AltitudeVisualizer(object):
        score_name = get_display_name_from_object_type(self.any_study_visualizer.score_class)
        title = 'Evolution of {} trends wrt to the altitude with {}'.format(variable_name, score_name)
        ax.set_title(title)
-        self.show_or_save_to_file(specific_title=title)
\ No newline at end of file
+        self.show_or_save_to_file(specific_title=title)
+
+    # Trend tests evolution
+
+    def trend_tests_percentage_evolution(self, trend_test_classes, starting_year_to_weights: None):
+        # Load uniform weights by default
+        if starting_year_to_weights is None:
+            startings_years = self.any_study_visualizer.starting_years
+            uniform_weight = 1 / len(startings_years)
+            starting_year_to_weights = {year: uniform_weight for year in startings_years}
+        else:
+            uniform_weight = 0.0
+
+        fig, ax = plt.subplots(1, 1, figsize=self.any_study_visualizer.figsize)
+
+        # Create one display for each trend test class
+        markers = ['o', '+']
+        assert len(markers) >= len(trend_test_classes)
+        # Add a second legend for the color and to explain the line
+
+        for marker, trend_test_class in zip(markers, trend_test_classes):
+            self.trend_test_percentages_evolution(ax, marker, trend_test_class, starting_year_to_weights)
+
+        # Add the color legend
+        handles, labels = ax.get_legend_handles_labels()
+        handles_ax, labels_ax = handles[:5], labels[:5]
+        ax.legend(handles_ax, labels_ax, markerscale=0.0, loc=1)
+        ax.set_xticks(self.altitudes)
+        ax.set_yticks(list(range(0, 101, 10)))
+        ax.grid()
+
+        # Add the marker legend
+        names = [get_display_name_from_object_type(c) for c in trend_test_classes]
+        handles_ax2, labels_ax2 = handles[::5], names
+        ax2 = ax.twinx()
+        ax2.legend(handles_ax2, labels_ax2, loc=2)
+        ax2.set_yticks([])
+
+        # Global information
+        added_str = ''if uniform_weight > 0.0 else 'weighted '
+        ylabel = '% averaged on massifs & {}averaged on starting years'.format(added_str)
+        ylabel += ' (with uniform weights)'
+        ax.set_ylabel(ylabel)
+        ax.set_xlabel('altitude')
+        variable_name = self.study.variable_class.NAME
+        title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name, ', '.join(names))
+        ax.set_title(title)
+        self.show_or_save_to_file(specific_title=title)
+
+    def trend_test_percentages_evolution(self, ax, marker, trend_test_class, starting_year_to_weights):
+        """
+        Positive trend in green
+        Negative trend in red
+        Non significative trend with dotted line
+        Significative trend with real line
+
+        :return:
+        """
+        # Build OrderedDict mapping altitude to a mean serie
+        altitude_to_serie_with_mean_percentages = OrderedDict()
+        for altitude, study_visualizer in self.altitude_to_study_visualizer.items():
+            s = study_visualizer.serie_mean_trend_test_count(trend_test_class, starting_year_to_weights)
+            altitude_to_serie_with_mean_percentages[altitude] = s
+        # Plot lines
+        for trend_type, style in AbstractTrendTest.TREND_TYPE_TO_STYLE.items():
+            percentages = [v.loc[trend_type] for v in altitude_to_serie_with_mean_percentages.values()]
+            ax.plot(self.altitudes, percentages, style + marker, label=trend_type)
--- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
@@ -153,8 +153,8 @@ def complete_analysis(only_first_one=False):


 def trend_analysis():
-    save_to_file = False
-    only_first_one = True
+    save_to_file = True
+    only_first_one = False
    short_altitudes = [300, 1200, 2100, 3000][:1]
    full_altitude_with_at_least_2_stations = [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600,
                                              3900, 4200][:]
@@ -168,10 +168,9 @@ def trend_analysis():
                                           temporal_non_stationarity=True,
                                           verbose=True,
                                           multiprocessing=True,
-                                           complete_non_stationary_trend_analysis=False)
-        # study_visualizer.only_one_graph = True
+                                           complete_non_stationary_trend_analysis=True)
        study_visualizer.visualize_all_independent_temporal_trend()
-        # study_visualizer.visualize_temporal_trend_relevance()
+        study_visualizer.visualize_temporal_trend_relevance()


 def maxima_analysis():
@@ -195,9 +194,9 @@ def maxima_analysis():

 def main_run():
    # normal_visualization(temporal_non_stationarity=True)
-    # trend_analysis()
+    trend_analysis()
    # all_scores_vizu()
-    maxima_analysis()
+    # maxima_analysis()
    # all_normal_vizu()

    # annual_mean_vizu_compare_durand_study(safran=True, take_mean_value=True, altitude=2100)

--- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
@@ -12,6 +12,7 @@ import seaborn as sns

 from experiment.meteo_france_SCM_study.abstract_score import MeanScore, WeigthedScore, AbstractTrendScore
 from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
+from experiment.meteo_france_SCM_study.abstract_trend_test import AbstractTrendTest
 from experiment.meteo_france_SCM_study.visualization.study_visualization.non_stationary_trends import \
    ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest
 from experiment.meteo_france_SCM_study.visualization.utils import create_adjusted_axes
@@ -90,7 +91,7 @@ class StudyVisualizer(object):
        self.window_size_for_smoothing = 1  # other value could be
        self.number_of_top_values = 10  # 1 if we just want the maxima
        self.score_class = score_class
-        self.score = self.score_class(self.starting_years, self.number_of_top_values) # type: AbstractTrendScore
+        self.score = self.score_class(self.number_of_top_values)  # type: AbstractTrendScore

        # PLOT ARGUMENTS
        self.show = False if self.save_to_file else show
@@ -333,8 +334,6 @@ class StudyVisualizer(object):
        all_massif_data = np.sort(all_massif_data)
        return all_massif_data

-
-
    @property
    def starting_years(self):
        start_year, stop_year = self.study.start_year_and_stop_year
@@ -363,11 +362,34 @@ class StudyVisualizer(object):
            for j, starting_year in enumerate(self.starting_years):
                detailed_scores.append(self.score.get_detailed_score(years, smooth_maxima))
                assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
+                # Remove the first element from the list
                years = years[1:]
                smooth_maxima = smooth_maxima[1:]
            massif_name_to_scores[massif_name] = np.array(detailed_scores)
        return massif_name_to_scores

+    def massif_name_to_trend_test_count(self, trend_test_class, starting_year_to_weight):
+        massif_name_to_serie_percentages = {}
+        for massif_id, massif_name in enumerate(self.study.study_massif_names):
+            trend_type_and_weight = []
+            years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
+            for starting_year, weight in starting_year_to_weight.items():
+                idx = years.index(starting_year)
+                years, smooth_maxima = years[idx:], smooth_maxima[idx:]
+                assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
+                trend_test = trend_test_class(years, smooth_maxima) # type: AbstractTrendTest
+                trend_type_and_weight.append((trend_test.test_trend_type, weight))
+            df = pd.DataFrame(trend_type_and_weight, columns=['trend type', 'weight'])
+            serie = df.groupby(['trend type']).sum()
+            massif_name_to_serie_percentages[massif_name] = serie * 100
+        return massif_name_to_serie_percentages
+
+    def serie_mean_trend_test_count(self, trend_test_class, starting_year_to_weight):
+        massif_name_to_trend_test_count = self.massif_name_to_trend_test_count(trend_test_class, starting_year_to_weight)
+        df = pd.concat(list(massif_name_to_trend_test_count.values()), axis=1, sort=False)
+        df.fillna(0.0, inplace=True)
+        return df.mean(axis=1)
+
    @cached_property
    def massif_name_to_scores(self):
        return {k: v[:, 0] for k, v in self.massif_name_to_detailed_scores.items()}