From 413c549e71a5ac432e84046b27c5f11abb36c636 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Thu, 12 Nov 2020 12:30:52 +0100
Subject: [PATCH] [snow projection] add rank analysis & relative bias analysis

---
 .../comparison_historical_visualizer.py       | 31 +++++++--
 .../comparison_with_scm/comparison_plot.py    | 64 ++++++++++++++-----
 .../main_comparison_on_rcp_85_extended.py     | 17 ++---
 3 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/projects/projected_snowfall/comparison_with_scm/comparison_historical_visualizer.py b/projects/projected_snowfall/comparison_with_scm/comparison_historical_visualizer.py
index 652a5581..a4fe1766 100644
--- a/projects/projected_snowfall/comparison_with_scm/comparison_historical_visualizer.py
+++ b/projects/projected_snowfall/comparison_with_scm/comparison_historical_visualizer.py
@@ -49,6 +49,26 @@ class ComparisonHistoricalVisualizer(StudyVisualizer):
                 pass
         return np.array(values), gcm_rcm_couples
 
+    def massif_name_to_relative_bias(self, plot_maxima=True):
+        study_method = self.get_study_method(plot_maxima)
+        massif_name_to_relative_bias = {}
+        for massif_name in self.massif_names:
+            values, gcm_rcm_couples = self.get_values(study_method, massif_name)
+            if len(gcm_rcm_couples) == len(self.adamont_studies.gcm_rcm_couples):
+                mean_values = np.mean(values, axis=1)
+                bias_in_the_mean = (mean_values - mean_values[0])[1:]
+                relative_bias = 100 * np.mean(bias_in_the_mean) / mean_values[0]
+                massif_name_to_relative_bias[massif_name] = relative_bias
+        return massif_name_to_relative_bias
+
+    def compute_real_bias_list_in_the_mean(self, massif_name, relative, study_method):
+        values, gcm_rcm_couples = self.get_values(study_method, massif_name)
+        mean_values = np.mean(values, axis=1)
+        bias_in_the_mean = (mean_values - mean_values[0])[1:]
+        if relative:
+            bias_in_the_mean *= 100 / mean_values[0]
+        return bias_in_the_mean, gcm_rcm_couples
+
     def compute_bias_list_in_the_mean(self, massif_name, relative, study_method):
         values, gcm_rcm_couples = self.get_values(study_method, massif_name)
         mean_values = np.mean(values, axis=1)
@@ -71,17 +91,15 @@ class ComparisonHistoricalVisualizer(StudyVisualizer):
     def massif_name_to_mean_bias_in_the_mean(self, plot_maxima=True, relative=False):
         return {m: np.mean(l) for m, l in self.massif_name_to_bias_list_in_the_mean(plot_maxima, relative).items()}
 
-    @property
-    def massif_name_to_rank(self):
+    def massif_name_to_rank(self, plot_maxima=True):
         massif_name_to_rank = {}
-        for massif_name, bias_list in self.massif_name_to_bias_list_in_the_mean(plot_maxima=True).items():
+        for massif_name, bias_list in self.massif_name_to_bias_list_in_the_mean(plot_maxima).items():
             # Count the number of bias negative
             nb_of_negative = sum([b < 0 for b in bias_list])
             # Rank starts to 0
             massif_name_to_rank[massif_name] = float(nb_of_negative)
         return massif_name_to_rank
 
-
     # Map gcm_rcm_couple to bias list (ordered by the massif_names)
 
     @cached_property
@@ -170,12 +188,13 @@ class ComparisonHistoricalVisualizer(StudyVisualizer):
         plt.close()
 
     def plot_map_with_the_rank(self):
-        massif_name_to_value = self.massif_name_to_rank
+        massif_name_to_value = self.massif_name_to_rank(plot_maxima=True)
         max_abs_change = self.adamont_studies.nb_ensemble_members
         ylabel = 'Rank of the mean maxima\n' \
                  'for the period {}-{} and the scenario {}\n' \
                  'which is between 0 (lowest) and {} (largest)'.format(self.study.year_min, self.study.year_max,
-                                                               scenario_to_str(self.study.scenario), max_abs_change)
+                                                                       scenario_to_str(self.study.scenario),
+                                                                       max_abs_change)
         plot_name = op.join('rank', ylabel)
         self.plot_map(cmap=plt.cm.coolwarm, graduation=1,
                       label=ylabel,
diff --git a/projects/projected_snowfall/comparison_with_scm/comparison_plot.py b/projects/projected_snowfall/comparison_with_scm/comparison_plot.py
index 7c50dced..93735b48 100644
--- a/projects/projected_snowfall/comparison_with_scm/comparison_plot.py
+++ b/projects/projected_snowfall/comparison_with_scm/comparison_plot.py
@@ -1,3 +1,5 @@
+from collections import Counter
+
 import matplotlib.pyplot as plt
 from typing import Dict
 
@@ -13,38 +15,66 @@ from projects.projected_snowfall.comparison_with_scm.comparison_historical_visua
 def individual_plot(visualizer: ComparisonHistoricalVisualizer):
     # visualizer.adamont_studies.plot_maxima_time_series_adamont(visualizer.massif_names, visualizer.scm_study)
     # visualizer.shoe_plot_bias_maxima_comparison()
-    for relative in [True, False]:
-        visualizer.plot_map_with_the_mean_bias_in_the_mean(relative)
-
-    visualizer.plot_map_with_the_rank()
+    # for relative in [True, False]:
+    #     visualizer.plot_map_with_the_mean_bias_in_the_mean(relative)
+    # visualizer.plot_map_with_the_rank()
     # for relative
     # for plot_maxima in [True, False][:1]:
     #     visualizer.plot_comparison(plot_maxima)
+    pass
 
 
 def collective_plot(altitude_to_visualizer: Dict[int, ComparisonHistoricalVisualizer]):
     visualizer = list(altitude_to_visualizer.values())[0]
+    altitudes = list(altitude_to_visualizer.keys())
+    print('Summary for rank for altitudes:', altitudes)
+    for plot_maxima in [True, False]:
+        study_setting = '{} {} {} {}-{}'.format('maxima' if plot_maxima else 'daily', ' values for ', scenario_to_str(visualizer.study.scenario),
+              visualizer.study.year_min, visualizer.study.year_max)
+        print(study_setting)
+        summary_rank(altitude_to_visualizer, visualizer, plot_maxima)
+        visualizer.plot_name = 'Histogram of the rank analysis for {}'.format(study_setting)
+        visualizer.show_or_save_to_file()
+        plt.close()
+
+        summary_bias(altitude_to_visualizer, visualizer, plot_maxima)
+
+
+def summary_bias(altitude_to_visualizer, visualizer, plot_maxima):
+    relative_bias_list = []
+    for v in altitude_to_visualizer.values():
+        relative_bias = np.array(list(v.massif_name_to_relative_bias(plot_maxima).values()))
+        relative_bias_list.extend(relative_bias)
+    mean_relative_bias = np.round(np.mean(relative_bias_list), 1)
+    print('Average relative bias w.r.t to the mean:', mean_relative_bias, '\%\n')
+
+
+def summary_rank(altitude_to_visualizer, visualizer, plot_maxima):
     count_number_of_total_massifs = 0
     count_number_of_time_the_reanalysis_is_the_smallest = 0
     count_number_of_time_the_reanalysis_is_the_biggest = 0
-    altitudes = list(altitude_to_visualizer.keys())
     all_ranks = []
     for v in altitude_to_visualizer.values():
-        ranks = np.array(list(v.massif_name_to_rank.values()))
+        ranks = np.array(list(v.massif_name_to_rank(plot_maxima).values()))
         count_number_of_total_massifs += len(ranks)
         count_number_of_time_the_reanalysis_is_the_smallest += sum(ranks == 0.0)
         all_ranks.extend(ranks)
-    print(scenario_to_str(visualizer.study.scenario), visualizer.study.year_min, visualizer.study.year_max)
-    print('Summary for rank for altitudes:', altitudes)
-    print('Mean ranks:', np.mean(all_ranks))
-    print('percentages of time reanalysis is the biggest:', 
-          100 * count_number_of_time_the_reanalysis_is_the_biggest / count_number_of_total_massifs)
-    print('number of time reanalysis is the biggest:', count_number_of_time_the_reanalysis_is_the_biggest,
-          ' out of ', count_number_of_total_massifs, ' time series')
-    print('percentages of time reanalysis is the smallest:', 
-          100 * count_number_of_time_the_reanalysis_is_the_smallest / count_number_of_total_massifs)
-    print('number of time reanalysis is the smallest:', count_number_of_time_the_reanalysis_is_the_smallest,
-          ' out of ', count_number_of_total_massifs, ' time series')
+    print('Average ranks w.r.t to the mean:', np.round(np.mean(all_ranks), 1))
+    print('percentages of time reanalysis is the biggest:',
+          np.round(100 * count_number_of_time_the_reanalysis_is_the_biggest / count_number_of_total_massifs, 1), '\%')
+    print('percentages of time reanalysis is the smallest:',
+          np.round(100 * count_number_of_time_the_reanalysis_is_the_smallest / count_number_of_total_massifs, 1), '\%')
+
+    # Plot histogram.
+    ax = plt.gca()
+    c = Counter(all_ranks)
+    x = list(range(15))
+    y = 100 * np.array([c[i] for i in x]) / len(all_ranks)
+    ax.bar(x, y, width=0.5, edgecolor='black')
+    ax.set_ylabel('Percentage of time series (\%)')
+    ax.set_xlabel('Rank w.r.t to the mean')
+
+
 
 
 def bias_of_the_mean_with_the_altitude(altitude_to_visualizer: Dict[int, ComparisonHistoricalVisualizer]):
diff --git a/projects/projected_snowfall/comparison_with_scm/main_comparison_on_rcp_85_extended.py b/projects/projected_snowfall/comparison_with_scm/main_comparison_on_rcp_85_extended.py
index cda70659..c8998afc 100644
--- a/projects/projected_snowfall/comparison_with_scm/main_comparison_on_rcp_85_extended.py
+++ b/projects/projected_snowfall/comparison_with_scm/main_comparison_on_rcp_85_extended.py
@@ -18,8 +18,7 @@ from projects.projected_snowfall.comparison_with_scm.comparison_historical_visua
     ComparisonHistoricalVisualizer
 
 
-def main():
-    fast = 7
+def main(fast):
     year_max = 2019
     # Set the year_min and year_max for the comparison
     if fast is 1:
@@ -40,16 +39,6 @@ def main():
         year_min = 2006
         altitudes = [600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600][:]
     elif fast is 5:
-        year_max = 2019
-        massif_names = None
-        year_min = 2013
-        altitudes = [600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600][:]
-    elif fast is 6:
-        year_max = 2012
-        massif_names = None
-        year_min = 2006
-        altitudes = [600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600][:]
-    elif fast is 7:
         year_max = 2019
         massif_names = None
         year_min = 1982
@@ -97,4 +86,6 @@ def load_visualizer(altitude, massif_names, year_min, year_max) -> ComparisonHis
 
 
 if __name__ == '__main__':
-    main()
+    fast_list = [2, 4, 6][1:]
+    for fast in fast_list:
+        main(fast)
-- 
GitLab