From 3bf990437cd0329a8e21230bbb927ab48ec365a4 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Thu, 16 May 2019 14:25:37 +0200
Subject: [PATCH] [SCM][SCORE TREND] add mann kendall score

---
 .../meteo_france_SCM_study/abstract_score.py  | 60 +++++++++++++------
 .../main_study_visualizer.py                  | 11 ++--
 .../study_visualization/study_visualizer.py   | 17 +++---
 3 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/experiment/meteo_france_SCM_study/abstract_score.py b/experiment/meteo_france_SCM_study/abstract_score.py
index 4e0f1d4a..98b50619 100644
--- a/experiment/meteo_france_SCM_study/abstract_score.py
+++ b/experiment/meteo_france_SCM_study/abstract_score.py
@@ -1,39 +1,65 @@
 import numpy as np
 
 
-class AbstractScore(object):
+class AbstractTrendScore(object):
+    """A score that should be equal to zero is there is no trend
+    positive if we suppose a positive trend
+    negative if we suppose a negative trend
 
-    @classmethod
-    def get_detailed_score(cls, sorted_years, sorted_maxima, top_n):
-        sorted_maxima = np.array(sorted_maxima)
-        year_top_score_max = cls.year_from_top_score(sorted_years[-top_n:], sorted_maxima[-top_n:], top_max=True)
-        year_top_score_min = cls.year_from_top_score(sorted_years[:top_n], sorted_maxima[:top_n], top_max=False)
+    We don't care what happen before the change point.
+    All we want to focus on, is the potential trend that could exist in the data after a potential change point"""
+
+    def __init__(self, starting_years, number_of_top_values) -> None:
+        self.number_of_top_values = number_of_top_values
+        self.starting_years = starting_years
+
+    def get_detailed_score(self, years_after_change_point, maxima_after_change_point):
+        raise NotImplementedError
+
+
+class MannKendall(AbstractTrendScore):
+
+    def get_detailed_score(self, years_after_change_point, maxima_after_change_point):
+        score = 0.0
+        for i, xi in enumerate(maxima_after_change_point[:-1]):
+            for xj in maxima_after_change_point[i+1:]:
+                score += np.sign(xj - xi)
+        return [score, score, score]
+
+
+class SortedScore(AbstractTrendScore):
+
+    def get_detailed_score(self, years_after_change_point, maxima_after_change_point):
+        # Get sorted years and sorted maxima
+        sorted_years, sorted_maxima = zip(
+            *sorted(zip(years_after_change_point, maxima_after_change_point), key=lambda s: s[1]))
+        sorted_years, sorted_maxima = list(sorted_years), np.array(sorted_maxima)
+        year_top_score_max = self.year_from_top_score(sorted_years[-self.number_of_top_values:],
+                                                      sorted_maxima[-self.number_of_top_values:], top_max=True)
+        year_top_score_min = self.year_from_top_score(sorted_years[:self.number_of_top_values],
+                                                      sorted_maxima[:self.number_of_top_values], top_max=False)
         score_difference = year_top_score_max - year_top_score_min
         return [score_difference, year_top_score_max, year_top_score_min]
 
-    @classmethod
-    def year_from_top_score(cls, top_sorted_years, top_sorted_maxima, top_max=None):
+    def year_from_top_score(self, top_sorted_years, top_sorted_maxima, top_max=None):
         raise NotImplementedError
 
 
-class MeanScore(AbstractScore):
+class MeanScore(SortedScore):
 
-    @classmethod
-    def year_from_top_score(cls, top_sorted_years, top_sorted_maxima, top_max=None):
+    def year_from_top_score(self, top_sorted_years, top_sorted_maxima, top_max=None):
         return np.mean(top_sorted_years)
 
 
-class MedianScore(AbstractScore):
+class MedianScore(SortedScore):
 
-    @classmethod
-    def year_from_top_score(cls, top_sorted_years, top_sorted_maxima, top_max=None):
+    def year_from_top_score(self, top_sorted_years, top_sorted_maxima, top_max=None):
         return np.median(top_sorted_years)
 
 
-class WeigthedScore(AbstractScore):
+class WeigthedScore(SortedScore):
 
-    @classmethod
-    def year_from_top_score(cls, top_sorted_years, top_sorted_maxima, top_max=None):
+    def year_from_top_score(self, top_sorted_years, top_sorted_maxima, top_max=None):
         assert isinstance(top_max, bool)
         if not top_max:
             top_sorted_maxima = np.sum(top_sorted_maxima) - top_sorted_maxima
diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
index 5b530d18..7588aeab 100644
--- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
@@ -3,7 +3,7 @@ from typing import Generator, List
 
 import numpy as np
 
-from experiment.meteo_france_SCM_study.abstract_score import WeigthedScore
+from experiment.meteo_france_SCM_study.abstract_score import WeigthedScore, MannKendall
 from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
 from experiment.meteo_france_SCM_study.crocus.crocus import CrocusDepth, CrocusSwe, ExtendedCrocusDepth, \
     ExtendedCrocusSwe, CrocusDaysWithSnowOnGround
@@ -187,8 +187,8 @@ def maxima_analysis():
                                            temporal_non_stationarity=True,
                                            verbose=True,
                                            multiprocessing=True,
-                                           complete_non_stationary_trend_analysis=True)
-        study_visualizer.score = WeigthedScore
+                                           complete_non_stationary_trend_analysis=True,
+                                           score_class=MannKendall)
         study_visualizer.visualize_all_score_wrt_starting_year()
         # study_visualizer.visualize_all_independent_temporal_trend()
         # study_visualizer.visualize_all_mean_and_max_graphs()
@@ -196,9 +196,8 @@ def maxima_analysis():
 def main_run():
     # normal_visualization(temporal_non_stationarity=True)
     # trend_analysis()
-    all_scores_vizu()
-
-    # maxima_analysis()
+    # all_scores_vizu()
+    maxima_analysis()
     # all_normal_vizu()
 
     # annual_mean_vizu_compare_durand_study(safran=True, take_mean_value=True, altitude=2100)
diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
index 83c3c9c5..23ad3a76 100644
--- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
@@ -10,7 +10,7 @@ import numpy as np
 import pandas as pd
 import seaborn as sns
 
-from experiment.meteo_france_SCM_study.abstract_score import MeanScore, WeigthedScore
+from experiment.meteo_france_SCM_study.abstract_score import MeanScore, WeigthedScore, AbstractTrendScore
 from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
 from experiment.meteo_france_SCM_study.visualization.study_visualization.non_stationary_trends import \
     ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest
@@ -58,7 +58,8 @@ class StudyVisualizer(object):
                  verbose=False,
                  multiprocessing=False,
                  complete_non_stationary_trend_analysis=False,
-                 normalization_under_one_observations=True):
+                 normalization_under_one_observations=True,
+                 score_class=MeanScore):
 
         self.massif_id_to_smooth_maxima = {}
         self.temporal_non_stationarity = temporal_non_stationarity
@@ -88,7 +89,8 @@ class StudyVisualizer(object):
 
         self.window_size_for_smoothing = 1  # other value could be
         self.number_of_top_values = 10  # 1 if we just want the maxima
-        self.score = WeigthedScore
+        self.score_class = score_class
+        self.score = self.score_class(self.starting_years, self.number_of_top_values) # type: AbstractTrendScore
 
         # PLOT ARGUMENTS
         self.show = False if self.save_to_file else show
@@ -357,13 +359,12 @@ class StudyVisualizer(object):
         massif_name_to_scores = {}
         for massif_id, massif_name in enumerate(self.study.study_massif_names):
             years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
-            sorted_years, sorted_maxima = zip(*[(i + self.study.start_year_and_stop_year[0], e)
-                                                for i, e in sorted(enumerate(smooth_maxima), key=lambda s: s[1])])
-            sorted_years, sorted_maxima = list(sorted_years), list(sorted_maxima)
             detailed_scores = []
             for j, starting_year in enumerate(self.starting_years):
-                detailed_scores.append(self.score.get_detailed_score(sorted_years, sorted_maxima, self.number_of_top_values))
-                sorted_years.remove(starting_year)
+                detailed_scores.append(self.score.get_detailed_score(years, smooth_maxima))
+                assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
+                years = years[1:]
+                smooth_maxima = smooth_maxima[1:]
             massif_name_to_scores[massif_name] = np.array(detailed_scores)
         return massif_name_to_scores
 
-- 
GitLab