From e59cdcb16f0b9733f66675047140e8b7d7f1edfd Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Mon, 15 Mar 2021 15:39:19 +0100
Subject: [PATCH] [projection snowfall] add weight solver

---
 .../cmip5/climate_explorer_cimp5.py           |  2 +-
 .../stations_data/comparison_analysis.py      |  2 +-
 .../gelman_convergence_test.py                |  2 +-
 .../model_as_truth_visualizer/__init__.py     |  0
 .../old_weight_computer/__init__.py           |  0
 .../abstract_weight_computer.py               | 16 +-----
 .../knutti_weight_computer.py                 |  4 +-
 .../main_weight_computation.py                | 25 ++-------
 .../non_stationary_weight_computer.py         |  3 +-
 .../{ => old_weight_computer}/utils.py        |  0
 .../projected_swe/weight_solver/__init__.py   |  0
 .../weight_solver/abtract_weight_solver.py    | 51 +++++++++++++++++++
 .../projected_swe/weight_solver/indicator.py  | 19 +++++++
 .../weight_solver/knutti_weight_solver.py     | 49 ++++++++++++++++++
 .../weight_solver/zhu_weight_solver.py        |  0
 15 files changed, 130 insertions(+), 43 deletions(-)
 create mode 100644 projects/projected_swe/model_as_truth_visualizer/__init__.py
 create mode 100644 projects/projected_swe/old_weight_computer/__init__.py
 rename projects/projected_swe/{ => old_weight_computer}/abstract_weight_computer.py (83%)
 rename projects/projected_swe/{ => old_weight_computer}/knutti_weight_computer.py (80%)
 rename projects/projected_swe/{ => old_weight_computer}/main_weight_computation.py (75%)
 rename projects/projected_swe/{ => old_weight_computer}/non_stationary_weight_computer.py (89%)
 rename projects/projected_swe/{ => old_weight_computer}/utils.py (100%)
 create mode 100644 projects/projected_swe/weight_solver/__init__.py
 create mode 100644 projects/projected_swe/weight_solver/abtract_weight_solver.py
 create mode 100644 projects/projected_swe/weight_solver/indicator.py
 create mode 100644 projects/projected_swe/weight_solver/knutti_weight_solver.py
 create mode 100644 projects/projected_swe/weight_solver/zhu_weight_solver.py

diff --git a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
index 4b5d8a8d..bf9a6f4a 100644
--- a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
+++ b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
@@ -90,7 +90,7 @@ def dat_to_csv(csv_filepath, txt_filepath):
     assert len(df_temp_until_july.columns) == 7
     df_temp_after_august = df.iloc[:-1, 7:]
     assert len(df_temp_after_august.columns) == 5
-    l = df_temp_until_july.sum(axis=1).values + df_temp_after_august.sum(axis=1).values
+    l = df_temp_until_july.sum_of_differences(axis=1).values + df_temp_after_august.sum_of_differences(axis=1).values
     l /= 12
     l = [np.nan] + list(l)
     assert len(l) == len(df.index)
diff --git a/extreme_data/meteo_france_data/stations_data/comparison_analysis.py b/extreme_data/meteo_france_data/stations_data/comparison_analysis.py
index d4bd1bd1..27899157 100644
--- a/extreme_data/meteo_france_data/stations_data/comparison_analysis.py
+++ b/extreme_data/meteo_france_data/stations_data/comparison_analysis.py
@@ -213,7 +213,7 @@ class ComparisonAnalysis(object):
         # Mean number of non-Nan values
         d['% of Nan'] = df_values_from_1958.isna().mean().mean()
         # Number of lines with only Nan
-        d['Lines w Nan'] = df_values_from_1958.isna().all(axis=1).sum()
+        d['Lines w Nan'] = df_values_from_1958.isna().all(axis=1).sum_of_differences()
         return pd.Series(d)
 
     def altitude_short_analysis(self):
diff --git a/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py b/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py
index e5f6b7a5..aed8b4db 100644
--- a/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py
+++ b/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py
@@ -13,7 +13,7 @@ def compute_gelman_score(means, variances, N, M):
     assert isinstance(means, pd.Series)
     assert isinstance(variances, pd.Series)
     mean = means.mean()
-    B = N * (means - mean).pow(2).sum() / (M - 1)
+    B = N * (means - mean).pow(2).sum_of_differences() / (M - 1)
     W = variances.mean()
     V_hat = (N - 1) * W / N
     V_hat += (M + 1) * B / (M * N)
diff --git a/projects/projected_swe/model_as_truth_visualizer/__init__.py b/projects/projected_swe/model_as_truth_visualizer/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/projects/projected_swe/old_weight_computer/__init__.py b/projects/projected_swe/old_weight_computer/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/projects/projected_swe/abstract_weight_computer.py b/projects/projected_swe/old_weight_computer/abstract_weight_computer.py
similarity index 83%
rename from projects/projected_swe/abstract_weight_computer.py
rename to projects/projected_swe/old_weight_computer/abstract_weight_computer.py
index 669419f1..455d992e 100644
--- a/projects/projected_swe/abstract_weight_computer.py
+++ b/projects/projected_swe/old_weight_computer/abstract_weight_computer.py
@@ -1,28 +1,14 @@
-from collections import OrderedDict
 import pandas as pd
 
-import numpy as np
-from scipy.special import softmax
-
-from extreme_data.meteo_france_data.adamont_data.adamont_scenario import gcm_rcm_couple_to_str
-from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
-from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
-from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
-from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
-from projects.projected_swe.utils import WEIGHT_COLUMN_NAME, get_csv_filepath, save_to_filepath
-from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
-    TimeTemporalCovariate
+from projects.projected_swe.weight_computer.utils import WEIGHT_COLUMN_NAME, save_to_filepath
 from collections import OrderedDict
 
 import numpy as np
 from scipy.special import softmax
 
 from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
-from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
 from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
 from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
-from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
-    TimeTemporalCovariate
 
 
 class AbstractWeightComputer(object):
diff --git a/projects/projected_swe/knutti_weight_computer.py b/projects/projected_swe/old_weight_computer/knutti_weight_computer.py
similarity index 80%
rename from projects/projected_swe/knutti_weight_computer.py
rename to projects/projected_swe/old_weight_computer/knutti_weight_computer.py
index 41b43b13..17284b7a 100644
--- a/projects/projected_swe/knutti_weight_computer.py
+++ b/projects/projected_swe/old_weight_computer/knutti_weight_computer.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
-from projects.projected_swe.non_stationary_weight_computer import NllhWeightComputer
+from projects.projected_swe.weight_computer.abstract_weight_computer import AbstractWeightComputer
+from projects.projected_swe.weight_computer.non_stationary_weight_computer import NllhWeightComputer
 
 
 class KnuttiWeightComputer(AbstractWeightComputer):
diff --git a/projects/projected_swe/main_weight_computation.py b/projects/projected_swe/old_weight_computer/main_weight_computation.py
similarity index 75%
rename from projects/projected_swe/main_weight_computation.py
rename to projects/projected_swe/old_weight_computer/main_weight_computation.py
index 8a6b8751..bfd4a62b 100644
--- a/projects/projected_swe/main_weight_computation.py
+++ b/projects/projected_swe/old_weight_computer/main_weight_computation.py
@@ -1,38 +1,21 @@
-import os
-
-from collections import OrderedDict
-
-import pandas as pd
-import os.path as op
 import datetime
 import time
-import numpy as np
-from scipy.special import softmax
 
 from extreme_data.meteo_france_data.adamont_data.adamont.adamont_safran import AdamontSnowfall
-from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario, get_gcm_rcm_couples, \
-    gcm_rcm_couple_to_str, SEPARATOR_STR, scenario_to_str
-from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
+from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario, get_gcm_rcm_couples
 from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day
 from extreme_data.meteo_france_data.scm_models_data.utils import Season
-from extreme_data.utils import DATA_PATH
-from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
 from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal
 from extreme_fit.model.margin_model.polynomial_margin_model.models_based_on_pariwise_analysis.gev_with_linear_shape_wrt_altitude import \
     AltitudinalShapeLinearTimeStationary
-from extreme_fit.model.margin_model.polynomial_margin_model.utils import \
-    ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS
 from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
 from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
 from extreme_trend.one_fold_fit.altitude_group import altitudes_for_groups
-from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
-from projects.projected_swe.knutti_weight_computer import KnuttiWeightComputer
-from projects.projected_swe.non_stationary_weight_computer import NllhWeightComputer
-from projects.projected_swe.utils import load_gcm_rcm_couple_to_weight
+from projects.projected_swe.weight_computer.abstract_weight_computer import AbstractWeightComputer
+from projects.projected_swe.weight_computer.knutti_weight_computer import KnuttiWeightComputer
+from projects.projected_swe.weight_computer.non_stationary_weight_computer import NllhWeightComputer
 from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
     TimeTemporalCovariate
-from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import \
-    AnomalyTemperatureWithSplineTemporalCovariate
 
 
 def main_weight_computation():
diff --git a/projects/projected_swe/non_stationary_weight_computer.py b/projects/projected_swe/old_weight_computer/non_stationary_weight_computer.py
similarity index 89%
rename from projects/projected_swe/non_stationary_weight_computer.py
rename to projects/projected_swe/old_weight_computer/non_stationary_weight_computer.py
index dd305395..75215187 100644
--- a/projects/projected_swe/non_stationary_weight_computer.py
+++ b/projects/projected_swe/old_weight_computer/non_stationary_weight_computer.py
@@ -1,8 +1,7 @@
 import numpy as np
 
 from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
-from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
-from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
+from projects.projected_swe.weight_computer.abstract_weight_computer import AbstractWeightComputer
 from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
     TimeTemporalCovariate
 
diff --git a/projects/projected_swe/utils.py b/projects/projected_swe/old_weight_computer/utils.py
similarity index 100%
rename from projects/projected_swe/utils.py
rename to projects/projected_swe/old_weight_computer/utils.py
diff --git a/projects/projected_swe/weight_solver/__init__.py b/projects/projected_swe/weight_solver/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/projects/projected_swe/weight_solver/abtract_weight_solver.py b/projects/projected_swe/weight_solver/abtract_weight_solver.py
new file mode 100644
index 00000000..70bc47e3
--- /dev/null
+++ b/projects/projected_swe/weight_solver/abtract_weight_solver.py
@@ -0,0 +1,51 @@
+from scipy.special import softmax
+import numpy as np
+
+from projects.projected_swe.weight_solver.indicator import AbstractIndicator
+
+
+class AbstractWeightSolver(object):
+
+    def __init__(self, observation_study, couple_to_study, indicator_class: type, add_interdependence_weight=False):
+        self.observation_study = observation_study
+        self.couple_to_study = couple_to_study
+        self.indicator_class = indicator_class
+        self.add_interdependence_weight = add_interdependence_weight
+
+    @property
+    def couple_to_weight(self):
+        nllh_list, couple_list = zip(*list(self.couple_to_nllh.items()))
+        weights = softmax(-np.array(nllh_list))
+        return dict(zip(couple_list, weights))
+
+    @property
+    def couple_to_nllh(self):
+        couple_to_nllh = self.couple_to_nllh_skill
+        if self.add_interdependence_weight:
+            for c, v in self.couple_to_nllh_interdependence.items():
+                couple_to_nllh[c] += v
+        return couple_to_nllh
+
+    @property
+    def couple_to_nllh_skill(self):
+        couple_to_nllh_skill = {}
+        for couple, couple_study in self.couple_to_study.items():
+            skill = self.compute_skill(couple_study=couple_study)
+            nllh_skill = -np.log(skill)
+            couple_to_nllh_skill[couple] = nllh_skill
+        return couple_to_nllh_skill
+
+    def compute_skill(self, couple_study):
+        raise NotImplementedError
+
+    @property
+    def couple_to_nllh_interdependence(self):
+        couple_to_nllh_interdependence = {}
+        for couple, couple_study in self.couple_to_study.items():
+            interdependence = self.compute_interdependence(couple_study=couple_study)
+            nllh_interdependence = -np.log(interdependence)
+            couple_to_nllh_interdependence[couple] = nllh_interdependence
+        return couple_to_nllh_interdependence
+
+    def compute_interdependence(self, couple_study):
+        raise NotImplementedError
diff --git a/projects/projected_swe/weight_solver/indicator.py b/projects/projected_swe/weight_solver/indicator.py
new file mode 100644
index 00000000..1507f8a2
--- /dev/null
+++ b/projects/projected_swe/weight_solver/indicator.py
@@ -0,0 +1,19 @@
+class AbstractIndicator(object):
+
+    @classmethod
+    def get_indicator(cls, study, bootstrap=False):
+        raise NotImplementedError
+
+
+class AnnualMaximaMeanIndicator(AbstractIndicator):
+
+    @classmethod
+    def get_indicator(cls, study, bootstrap=False):
+        pass
+
+
+class ReturnLevelIndicator(AbstractIndicator):
+
+    @classmethod
+    def get_indicator(cls, study, bootstrap=False):
+        pass
diff --git a/projects/projected_swe/weight_solver/knutti_weight_solver.py b/projects/projected_swe/weight_solver/knutti_weight_solver.py
new file mode 100644
index 00000000..b1b2a651
--- /dev/null
+++ b/projects/projected_swe/weight_solver/knutti_weight_solver.py
@@ -0,0 +1,49 @@
+import numpy as np
+from projects.projected_swe.weight_solver.abtract_weight_solver import AbstractWeightSolver
+from projects.projected_swe.weight_solver.indicator import AbstractIndicator
+
+
+class KnuttiWeightSolver(AbstractWeightSolver):
+
+    def __init__(self, sigma_skill, sigma_interdependence, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.sigma_skill = sigma_skill
+        self.sigma_interdependence = sigma_interdependence
+
+    def compute_skill(self, couple_study):
+        raise self.compute_distance_between_two_study(self.observation_study, self.couple_to_study, self.sigma_skill)
+
+    def compute_interdependence(self, couple_study):
+        sum = 0
+        for other_couple_study in self.couple_to_study.values():
+            if other_couple_study is not couple_study:
+                sum += self.compute_distance_between_two_study(couple_study, other_couple_study, self.sigma_interdependence)
+        return 1 / (1 + sum)
+
+    def compute_distance_between_two_study(self, study_1, study_2, sigma):
+        difference = self.sum_of_differences(study_1, study_2)
+        return np.exp(-np.power(difference, 2 * sigma))
+
+    def sum_of_differences(self, study_1, study_2):
+        assert issubclass(self.indicator_class, AbstractIndicator)
+        return self.indicator_class.get_indicator(study_1) - self.indicator_class.get_indicator(study_2)
+
+
+class KnuttiWeightSolverWithBootstrapVersion1(KnuttiWeightSolver):
+
+    def sum_of_differences(self, study_1, study_2):
+        assert issubclass(self.indicator_class, AbstractIndicator)
+        bootstrap_study_1 = self.indicator_class.get_indicator(study_1, bootstrap=True)
+        bootstrap_study_2 = self.indicator_class.get_indicator(study_2, bootstrap=True)
+        differences = bootstrap_study_1 - bootstrap_study_2
+        return differences.sum()
+
+
+class KnuttiWeightSolverWithBootstrapVersion2(KnuttiWeightSolver):
+
+    def sum_of_differences(self, study_1, study_2):
+        assert issubclass(self.indicator_class, AbstractIndicator)
+        bootstrap_study_1 = self.indicator_class.get_indicator(study_1, bootstrap=True)
+        bootstrap_study_2 = self.indicator_class.get_indicator(study_2, bootstrap=True)
+        differences = np.subtract.outer(bootstrap_study_1, bootstrap_study_2)
+        return differences.sum()
diff --git a/projects/projected_swe/weight_solver/zhu_weight_solver.py b/projects/projected_swe/weight_solver/zhu_weight_solver.py
new file mode 100644
index 00000000..e69de29b
-- 
GitLab