Commit 9de91e03 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[projection swe] add non-stationary weight computer. refactor code for the weights.

parent c90f8771
No related merge requests found
Showing with 242 additions and 89 deletions
+242 -89
...@@ -139,3 +139,7 @@ def scenario_to_real_scenarios(adamont_scenario): ...@@ -139,3 +139,7 @@ def scenario_to_real_scenarios(adamont_scenario):
def gcm_rcm_couple_to_str(gcm_rcm_couple): def gcm_rcm_couple_to_str(gcm_rcm_couple):
return SEPARATOR_STR.join(gcm_rcm_couple) return SEPARATOR_STR.join(gcm_rcm_couple)
def str_to_gcm_rcm_couple(str):
return tuple(str.split(SEPARATOR_STR))
...@@ -26,10 +26,17 @@ class VisualizerForProjectionEnsemble(object): ...@@ -26,10 +26,17 @@ class VisualizerForProjectionEnsemble(object):
gcm_to_year_min_and_year_max=None, gcm_to_year_min_and_year_max=None,
): ):
self.altitudes_list = altitudes_list self.altitudes_list = altitudes_list
self.scenario = scenario
self.gcm_rcm_couples = gcm_rcm_couples self.gcm_rcm_couples = gcm_rcm_couples
self.massif_names = massif_names self.massif_names = massif_names
self.ensemble_fit_classes = ensemble_fit_classes self.ensemble_fit_classes = ensemble_fit_classes
# Some checks
if gcm_to_year_min_and_year_max is not None:
for gcm, years in gcm_to_year_min_and_year_max.items():
assert isinstance(gcm, str), gcm
assert len(years) == 2, years
# Load all studies # Load all studies
altitude_class_to_gcm_couple_to_studies = OrderedDict() altitude_class_to_gcm_couple_to_studies = OrderedDict()
for altitudes in altitudes_list: for altitudes in altitudes_list:
......
from collections import OrderedDict
import pandas as pd
import numpy as np
from scipy.special import softmax
from extreme_data.meteo_france_data.adamont_data.adamont_scenario import gcm_rcm_couple_to_str
from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from projects.projected_swe.utils import WEIGHT_COLUMN_NAME, get_csv_filepath, save_to_filepath
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate
from collections import OrderedDict
import numpy as np
from scipy.special import softmax
from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate
class AbstractWeightComputer(object):
def __init__(self, visualizer: VisualizerForProjectionEnsemble, scm_study_class, year_min, year_max):
self.scm_study_class = scm_study_class
self.year_max = year_max
self.year_min = year_min
self.visualizer = visualizer
def compute_weights_and_save_them(self):
column_names = [WEIGHT_COLUMN_NAME] + self.visualizer.gcm_rcm_couples
df = pd.DataFrame(index=self.visualizer.gcm_rcm_couples, columns=column_names)
for i, column_name in enumerate(column_names):
if i == 0:
gcm_rcm_couples_subset = self.visualizer.gcm_rcm_couples
gcm_rcm_couple_missing = None
else:
index_missing = i - 1
gcm_rcm_couple_missing = self.visualizer.gcm_rcm_couples[index_missing]
gcm_rcm_couples_subset = self.visualizer.gcm_rcm_couples[:]
gcm_rcm_couples_subset.remove(gcm_rcm_couple_missing)
# Compute weights
gcm_rcm_couple_to_weight = self.compute_weight(gcm_rcm_couples_subset, gcm_rcm_couple_missing)
column_values = [gcm_rcm_couple_to_weight[c] for c in df.index]
df[column_name] = column_values
# Save csv
save_to_filepath(df, self.visualizer.gcm_rcm_couples, self.visualizer.altitudes_list,
self.year_min, self.year_max,
self.visualizer.scenario)
def compute_weight(self, gcm_rcm_couples_subset, gcm_rcm_couple_missing):
# Initial the dictionary
gcm_rcm_couple_to_nllh = OrderedDict()
for gcm_rcm_couple in gcm_rcm_couples_subset:
gcm_rcm_couple_to_nllh[gcm_rcm_couple] = 0
for ensemble_fit in self.visualizer.ensemble_fits(ensemble_class=IndependentEnsembleFit):
# Load the AltitudeStudies
if gcm_rcm_couple_missing is None:
reference_studies = AltitudesStudies(self.scm_study_class, ensemble_fit.altitudes,
year_min=self.year_min,
year_max=self.year_max)
else:
reference_studies = ensemble_fit.gcm_rcm_couple_to_visualizer[gcm_rcm_couple_missing].studies
# Computer the nllh for each "station", i.e. each altitude and massif_name
for altitude, reference_study in reference_studies.altitude_to_study.items():
for massif_name, maxima in reference_study.massif_name_to_annual_maxima.items():
assert all([self.year_min <= year <= self.year_max for year in reference_study.ordered_years])
gcm_rcm_couple_to_local_nllh = self.compute_gcm_rcm_couple_to_local_nllh(ensemble_fit,
gcm_rcm_couples_subset,
massif_name,
maxima,
altitude,
reference_study)
if gcm_rcm_couple_to_local_nllh is not None:
for c, nllh in gcm_rcm_couple_to_local_nllh.items():
gcm_rcm_couple_to_nllh[c] += nllh
# Compute weights
weights = softmax(-np.array(list(gcm_rcm_couple_to_nllh.values())))
weights = [w[0] if isinstance(w, np.ndarray) else w for w in weights]
gcm_rcm_couple_to_weight = dict(zip(gcm_rcm_couples_subset, weights))
# Add missing weight
gcm_rcm_couple_to_weight[gcm_rcm_couple_missing] = np.nan
return gcm_rcm_couple_to_weight
def compute_gcm_rcm_couple_to_local_nllh(self, ensemble_fit, gcm_rcm_couples_subset, massif_name, maxima,
altitude, reference_study):
raise NotImplementedError
...@@ -11,97 +11,28 @@ from scipy.special import softmax ...@@ -11,97 +11,28 @@ from scipy.special import softmax
from extreme_data.meteo_france_data.adamont_data.adamont.adamont_safran import AdamontSnowfall from extreme_data.meteo_france_data.adamont_data.adamont.adamont_safran import AdamontSnowfall
from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario, get_gcm_rcm_couples, \ from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario, get_gcm_rcm_couples, \
gcm_rcm_couple_to_str, SEPARATOR_STR gcm_rcm_couple_to_str, SEPARATOR_STR, scenario_to_str
from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day
from extreme_data.meteo_france_data.scm_models_data.utils import Season from extreme_data.meteo_france_data.scm_models_data.utils import Season
from extreme_data.utils import DATA_PATH from extreme_data.utils import DATA_PATH
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal
from extreme_fit.model.margin_model.polynomial_margin_model.models_based_on_pariwise_analysis.gev_with_linear_shape_wrt_altitude import \
AltitudinalShapeLinearTimeStationary
from extreme_fit.model.margin_model.polynomial_margin_model.utils import \ from extreme_fit.model.margin_model.polynomial_margin_model.utils import \
ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS
from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from extreme_trend.one_fold_fit.altitude_group import altitudes_for_groups from extreme_trend.one_fold_fit.altitude_group import altitudes_for_groups
from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
from projects.projected_swe.non_stationary_weight_computer import NllhWeightComputer
from projects.projected_swe.utils import load_gcm_rcm_couple_to_weight
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate TimeTemporalCovariate
from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import \ from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import \
AnomalyTemperatureWithSplineTemporalCovariate AnomalyTemperatureWithSplineTemporalCovariate
WEIGHT_COLUMN_NAME = "weight"
WEIGHT_FOLDER = "ensemble weight"
def get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max):
nb_gcm_rcm_couples = len(gcm_rcm_couples)
nb_altitudes_list = len(altitudes_list)
ensemble_folder_path = op.join(DATA_PATH, WEIGHT_FOLDER)
if not op.exists(ensemble_folder_path):
os.makedirs(ensemble_folder_path, exist_ok=True)
csv_filename = "weights_{}_{}_{}_{}.csv".format(nb_gcm_rcm_couples, nb_altitudes_list, year_min, year_max)
weight_csv_filepath = op.join(ensemble_folder_path, csv_filename)
return weight_csv_filepath
def load_gcm_rcm_couple_to_weight(gcm_rcm_couples, altitudes_list, year_min, year_max):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max)
df = pd.read_csv(filepath, index_col=0)
d = df[WEIGHT_COLUMN_NAME].to_dict()
d = {tuple(k.split(SEPARATOR_STR)): v for k, v in d.items()}
return d
def save_gcm_rcm_couple_to_weight(visualizer: VisualizerForProjectionEnsemble, scm_study_class,
year_min, year_max):
gcm_rcm_couple_to_nllh_sum = OrderedDict()
for c in visualizer.gcm_rcm_couples:
gcm_rcm_couple_to_nllh_sum[c] = 0
for ensemble_fit in visualizer.ensemble_fits(ensemble_class=IndependentEnsembleFit):
# Load the AltitudeStudies
scm_studies = AltitudesStudies(scm_study_class, ensemble_fit.altitudes, year_min=year_min,year_max=year_max)
for altitude, study in scm_studies.altitude_to_study.items():
for massif_name, maxima in study.massif_name_to_annual_maxima.items():
# Check that all the gcm_rcm_couple have a model for this massif_name
if condition_to_compute_nllh(ensemble_fit, massif_name, visualizer):
print(ensemble_fit.altitudes, massif_name)
coordinates = [np.array([altitude, year]) for year in study.ordered_years]
nllh_list = []
for gcm_rcm_couple in visualizer.gcm_rcm_couples:
best_function_from_fit = get_function_from_fit(ensemble_fit, massif_name, gcm_rcm_couple)
# It is normal that it could crash, because some models where fitted with data smaller than
# the data used to compute the nllh
nllh = compute_nllh(coordinates, maxima, best_function_from_fit,
maximum_from_obs=False, assertion_for_inf=False)
nllh_list.append(nllh)
if all([not np.isinf(nllh) for nllh in nllh_list]):
for nllh, gcm_rcm_couple in zip(nllh_list, visualizer.gcm_rcm_couples):
gcm_rcm_couple_to_nllh_sum[gcm_rcm_couple] += nllh
# Compute the final weight
print(gcm_rcm_couple_to_nllh_sum)
llh_list = -np.array(list(gcm_rcm_couple_to_nllh_sum.values()))
weights = softmax(llh_list)
couple_names = [gcm_rcm_couple_to_str(c) for c in visualizer.gcm_rcm_couples]
gcm_rcm_couple_to_normalized_weights = dict(zip(couple_names, weights))
print(gcm_rcm_couple_to_normalized_weights)
# Save to csv
filepath = get_csv_filepath(visualizer.gcm_rcm_couples, visualizer.altitudes_list, year_min, year_max)
df = pd.DataFrame({WEIGHT_COLUMN_NAME: weights}, index=couple_names)
print(df)
df.to_csv(filepath)
def condition_to_compute_nllh(ensemble_fit, massif_name, visualizer):
return all(
[massif_name in ensemble_fit.gcm_rcm_couple_to_visualizer[c].massif_name_to_one_fold_fit for c in
visualizer.gcm_rcm_couples])
def get_function_from_fit(ensemble_fit, massif_name, gcm_rcm_couple):
visualizer = ensemble_fit.gcm_rcm_couple_to_visualizer[gcm_rcm_couple]
one_fold_fit = visualizer.massif_name_to_one_fold_fit[massif_name]
return one_fold_fit.best_function_from_fit
def main_weight_computation(): def main_weight_computation():
start = time.time() start = time.time()
...@@ -111,23 +42,24 @@ def main_weight_computation(): ...@@ -111,23 +42,24 @@ def main_weight_computation():
}[study_class] }[study_class]
ensemble_fit_classes = [IndependentEnsembleFit] ensemble_fit_classes = [IndependentEnsembleFit]
temporal_covariate_for_fit = TimeTemporalCovariate temporal_covariate_for_fit = TimeTemporalCovariate
model_classes = ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS # model_classes = ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS
model_classes = [StationaryAltitudinal, AltitudinalShapeLinearTimeStationary]
remove_physically_implausible_models = True remove_physically_implausible_models = True
scenario = AdamontScenario.histo scenario = AdamontScenario.rcp85_extended
gcm_rcm_couples = get_gcm_rcm_couples(scenario) gcm_rcm_couples = get_gcm_rcm_couples(scenario)
year_min = 1982 year_min = 1982
year_max = 2005 year_max = 2019
# todo: maybe i should also limit the years for the fit of the model for each ensemble ? weight_computer_class = NllhWeightComputer
fast = None fast = True
if fast is None: if fast is None:
massif_names = None massif_names = None
altitudes_list = altitudes_for_groups[2:3] altitudes_list = altitudes_for_groups[2:3]
gcm_rcm_couples = gcm_rcm_couples[:10] gcm_rcm_couples = gcm_rcm_couples[:]
elif fast: elif fast:
massif_names = ['Pelvoux'][:1] massif_names = ['Vanoise', 'Pelvoux', 'Chartreuse'][:1]
altitudes_list = altitudes_for_groups[2:3] altitudes_list = altitudes_for_groups[1:2]
gcm_rcm_couples = gcm_rcm_couples[:2] gcm_rcm_couples = gcm_rcm_couples[:3]
else: else:
massif_names = None massif_names = None
altitudes_list = altitudes_for_groups[:] altitudes_list = altitudes_for_groups[:]
...@@ -139,9 +71,11 @@ def main_weight_computation(): ...@@ -139,9 +71,11 @@ def main_weight_computation():
massif_names=massif_names, massif_names=massif_names,
temporal_covariate_for_fit=temporal_covariate_for_fit, temporal_covariate_for_fit=temporal_covariate_for_fit,
remove_physically_implausible_models=remove_physically_implausible_models, remove_physically_implausible_models=remove_physically_implausible_models,
gcm_to_year_min_and_year_max=None, gcm_to_year_min_and_year_max={c[0]: (year_min, year_max) for c in gcm_rcm_couples},
) )
save_gcm_rcm_couple_to_weight(visualizer, scm_study_class, year_min, year_max)
weight_computer = weight_computer_class(visualizer, scm_study_class, year_min, year_max) # type:AbstractWeightComputer
weight_computer.compute_weights_and_save_them()
end = time.time() end = time.time()
duration = str(datetime.timedelta(seconds=end - start)) duration = str(datetime.timedelta(seconds=end - start))
...@@ -150,5 +84,5 @@ def main_weight_computation(): ...@@ -150,5 +84,5 @@ def main_weight_computation():
if __name__ == '__main__': if __name__ == '__main__':
main_weight_computation() main_weight_computation()
# d = load_gcm_rcm_couple_to_weight(['sd', 'sdf'], [23]) d = load_gcm_rcm_couple_to_weight(['sd', 'sdf'], [23], 1982, 2019, AdamontScenario.rcp85_extended)
# print(d) print(d)
import numpy as np
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate
class NllhWeightComputer(AbstractWeightComputer):
def compute_gcm_rcm_couple_to_local_nllh(self, ensemble_fit, gcm_rcm_couples_subset, massif_name, maxima,
altitude, reference_study):
assert ensemble_fit.temporal_covariate_for_fit is TimeTemporalCovariate, \
'for temperature covariate you should transform the coordinates'
gcm_rcm_couple_to_local_nllh = {}
# Check that all the gcm_rcm_couple have a model for this massif_name
if self.condition_to_compute_nllh(ensemble_fit, massif_name, self.visualizer):
print(ensemble_fit.altitudes, massif_name)
coordinates = [np.array([altitude, year]) for year in reference_study.ordered_years]
nllh_list = []
for gcm_rcm_couple in gcm_rcm_couples_subset:
best_function_from_fit = self.get_function_from_fit(ensemble_fit, massif_name, gcm_rcm_couple)
# It is normal that it could crash, because some models where fitted with data smaller than
# the data used to compute the nllh
nllh = compute_nllh(coordinates, maxima, best_function_from_fit,
maximum_from_obs=False, assertion_for_inf=False)
nllh_list.append(nllh)
if all([not np.isinf(nllh) for nllh in nllh_list]):
return dict(zip(gcm_rcm_couples_subset, nllh_list))
def condition_to_compute_nllh(self, ensemble_fit, massif_name, visualizer):
return all(
[massif_name in ensemble_fit.gcm_rcm_couple_to_visualizer[c].massif_name_to_one_fold_fit for c in
visualizer.gcm_rcm_couples])
def get_function_from_fit(self, ensemble_fit, massif_name, gcm_rcm_couple):
visualizer = ensemble_fit.gcm_rcm_couple_to_visualizer[gcm_rcm_couple]
one_fold_fit = visualizer.massif_name_to_one_fold_fit[massif_name]
return one_fold_fit.best_function_from_fit
import os
from collections import OrderedDict
import pandas as pd
import os.path as op
import datetime
import time
import numpy as np
from scipy.special import softmax
from extreme_data.meteo_france_data.adamont_data.adamont.adamont_safran import AdamontSnowfall
from extreme_data.meteo_france_data.adamont_data.adamont_scenario import AdamontScenario, get_gcm_rcm_couples, \
gcm_rcm_couple_to_str, SEPARATOR_STR, scenario_to_str, str_to_gcm_rcm_couple
from extreme_data.meteo_france_data.scm_models_data.altitudes_studies import AltitudesStudies
from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day
from extreme_data.meteo_france_data.scm_models_data.utils import Season
from extreme_data.utils import DATA_PATH
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal
from extreme_fit.model.margin_model.polynomial_margin_model.utils import \
ALTITUDINAL_GEV_MODELS_BASED_ON_POINTWISE_ANALYSIS
from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from extreme_trend.one_fold_fit.altitude_group import altitudes_for_groups
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate
from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import \
AnomalyTemperatureWithSplineTemporalCovariate
WEIGHT_COLUMN_NAME = "all weights"
WEIGHT_FOLDER = "ensemble weight"
def get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario):
nb_gcm_rcm_couples = len(gcm_rcm_couples)
nb_altitudes_list = len(altitudes_list)
ensemble_folder_path = op.join(DATA_PATH, WEIGHT_FOLDER)
if not op.exists(ensemble_folder_path):
os.makedirs(ensemble_folder_path, exist_ok=True)
scenario_str = scenario_to_str(scenario)
csv_filename = "weights_{}_{}_{}_{}_{}.csv" \
.format(nb_gcm_rcm_couples, nb_altitudes_list, year_min, year_max, scenario_str)
weight_csv_filepath = op.join(ensemble_folder_path, csv_filename)
return weight_csv_filepath
def save_to_filepath(df, gcm_rcm_couples, altitudes_list,
year_min, year_max,
scenario):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario)
df = df.round(decimals=3)
df.index = [gcm_rcm_couple_to_str(i) for i in df.index]
df.columns = [gcm_rcm_couple_to_str(i) if j > 0 else i for j, i in enumerate(df.columns)]
# df.columns = [gcm_rcm_couple_to_str(i) for i in df.index]
print(df.head())
df.to_csv(filepath)
def load_gcm_rcm_couple_to_weight(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario,
gcm_rcm_couple_missing=None):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario)
df = pd.read_csv(filepath, index_col=0)
df.index = [str_to_gcm_rcm_couple(i) for i in df.index]
df.columns = [str_to_gcm_rcm_couple(i) if j > 0 else i for j, i in enumerate(df.columns)]
if gcm_rcm_couple_missing is None:
column_name = WEIGHT_COLUMN_NAME
else:
column_name = gcm_rcm_couple_missing
d = df[column_name].to_dict()
return d
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment