Commit 6acd3721 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[projection swe] implement knutti_weight_computer.py and refactor the rest of the code.

parent 9de91e03
No related merge requests found
Showing with 75 additions and 41 deletions
+75 -41
......@@ -26,6 +26,7 @@ class VisualizerForProjectionEnsemble(object):
gcm_to_year_min_and_year_max=None,
):
self.altitudes_list = altitudes_list
self.temporal_covariate_for_fit = temporal_covariate_for_fit
self.scenario = scenario
self.gcm_rcm_couples = gcm_rcm_couples
self.massif_names = massif_names
......
......@@ -53,7 +53,7 @@ class AbstractWeightComputer(object):
# Save csv
save_to_filepath(df, self.visualizer.gcm_rcm_couples, self.visualizer.altitudes_list,
self.year_min, self.year_max,
self.visualizer.scenario)
self.visualizer.scenario, type(self))
def compute_weight(self, gcm_rcm_couples_subset, gcm_rcm_couple_missing):
# Initial the dictionary
......@@ -70,13 +70,11 @@ class AbstractWeightComputer(object):
reference_studies = ensemble_fit.gcm_rcm_couple_to_visualizer[gcm_rcm_couple_missing].studies
# Computer the nllh for each "station", i.e. each altitude and massif_name
for altitude, reference_study in reference_studies.altitude_to_study.items():
for massif_name, maxima in reference_study.massif_name_to_annual_maxima.items():
for massif_name in reference_study.study_massif_names:
assert all([self.year_min <= year <= self.year_max for year in reference_study.ordered_years])
gcm_rcm_couple_to_local_nllh = self.compute_gcm_rcm_couple_to_local_nllh(ensemble_fit,
gcm_rcm_couples_subset,
massif_name,
maxima,
altitude,
reference_study)
if gcm_rcm_couple_to_local_nllh is not None:
for c, nllh in gcm_rcm_couple_to_local_nllh.items():
......@@ -89,6 +87,22 @@ class AbstractWeightComputer(object):
gcm_rcm_couple_to_weight[gcm_rcm_couple_missing] = np.nan
return gcm_rcm_couple_to_weight
def compute_gcm_rcm_couple_to_local_nllh(self, ensemble_fit, gcm_rcm_couples_subset, massif_name, maxima,
altitude, reference_study):
def compute_gcm_rcm_couple_to_local_nllh(self, ensemble_fit, gcm_rcm_couples_subset, massif_name, reference_study):
# Check that all the gcm_rcm_couple have a model for this massif_name
if self.condition_to_compute_nllh(ensemble_fit, massif_name, self.visualizer):
print(ensemble_fit.altitudes, massif_name)
nllh_list = []
for gcm_rcm_couple in gcm_rcm_couples_subset:
nllh = self.compute_nllh_local(ensemble_fit, gcm_rcm_couple, massif_name, reference_study)
nllh_list.append(nllh)
if all([not np.isinf(nllh) for nllh in nllh_list]):
return dict(zip(gcm_rcm_couples_subset, nllh_list))
def condition_to_compute_nllh(self, ensemble_fit, massif_name, visualizer):
return all(
[massif_name in ensemble_fit.gcm_rcm_couple_to_visualizer[c].massif_name_to_one_fold_fit for c in
visualizer.gcm_rcm_couples])
def compute_nllh_local(self, ensemble_fit, gcm_rcm_couple, massif_name, reference_study):
raise NotImplementedError
import numpy as np
from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
from projects.projected_swe.non_stationary_weight_computer import NllhWeightComputer
class KnuttiWeightComputer(AbstractWeightComputer):
def __init__(self, *args, sigma_D, **kwargs):
super().__init__(*args, **kwargs)
self.sigma_D = sigma_D
def compute_nllh_local(self, ensemble_fit, gcm_rcm_couple, massif_name, reference_study):
mean_maxima_reference = reference_study.massif_name_to_annual_maxima[massif_name].mean()
studies = ensemble_fit.gcm_rcm_couple_to_visualizer[gcm_rcm_couple].studies
study = studies.altitude_to_study[reference_study.altitude]
mean_maxima_couple = study.massif_name_to_annual_maxima[massif_name].mean()
ratio = (mean_maxima_couple - mean_maxima_reference) / self.sigma_D
proba = np.exp(-np.power(ratio, 2))
return -np.log(proba)
class MixedWeightComputer(NllhWeightComputer, KnuttiWeightComputer):
pass
\ No newline at end of file
......@@ -26,6 +26,7 @@ from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fi
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from extreme_trend.one_fold_fit.altitude_group import altitudes_for_groups
from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
from projects.projected_swe.knutti_weight_computer import KnuttiWeightComputer
from projects.projected_swe.non_stationary_weight_computer import NllhWeightComputer
from projects.projected_swe.utils import load_gcm_rcm_couple_to_weight
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
......@@ -49,7 +50,7 @@ def main_weight_computation():
gcm_rcm_couples = get_gcm_rcm_couples(scenario)
year_min = 1982
year_max = 2019
weight_computer_class = NllhWeightComputer
weight_computer_class = [NllhWeightComputer, KnuttiWeightComputer][1]
fast = True
if fast is None:
......@@ -74,7 +75,9 @@ def main_weight_computation():
gcm_to_year_min_and_year_max={c[0]: (year_min, year_max) for c in gcm_rcm_couples},
)
weight_computer = weight_computer_class(visualizer, scm_study_class, year_min, year_max) # type:AbstractWeightComputer
weight_computer = weight_computer_class(visualizer, scm_study_class, year_min, year_max,
sigma_D=10) # type:AbstractWeightComputer
weight_computer.compute_weights_and_save_them()
end = time.time()
......@@ -84,5 +87,6 @@ def main_weight_computation():
if __name__ == '__main__':
main_weight_computation()
d = load_gcm_rcm_couple_to_weight(['sd', 'sdf'], [23], 1982, 2019, AdamontScenario.rcp85_extended)
print(d)
# d = load_gcm_rcm_couple_to_weight(['sd', 'sdf'], [23], 1982, 2019, AdamontScenario.rcp85_extended,
# weight_class=NllhWeightComputer, gcm_rcm_couple_missing=None)
# print(d)
import numpy as np
from extreme_fit.estimator.margin_estimator.abstract_margin_estimator import compute_nllh
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from projects.projected_swe.abstract_weight_computer import AbstractWeightComputer
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate
......@@ -8,32 +9,19 @@ from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_
class NllhWeightComputer(AbstractWeightComputer):
def compute_gcm_rcm_couple_to_local_nllh(self, ensemble_fit, gcm_rcm_couples_subset, massif_name, maxima,
altitude, reference_study):
assert ensemble_fit.temporal_covariate_for_fit is TimeTemporalCovariate, \
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
assert self.visualizer.temporal_covariate_for_fit is TimeTemporalCovariate, \
'for temperature covariate you should transform the coordinates'
gcm_rcm_couple_to_local_nllh = {}
# Check that all the gcm_rcm_couple have a model for this massif_name
if self.condition_to_compute_nllh(ensemble_fit, massif_name, self.visualizer):
print(ensemble_fit.altitudes, massif_name)
coordinates = [np.array([altitude, year]) for year in reference_study.ordered_years]
nllh_list = []
for gcm_rcm_couple in gcm_rcm_couples_subset:
best_function_from_fit = self.get_function_from_fit(ensemble_fit, massif_name, gcm_rcm_couple)
# It is normal that it could crash, because some models where fitted with data smaller than
# the data used to compute the nllh
nllh = compute_nllh(coordinates, maxima, best_function_from_fit,
maximum_from_obs=False, assertion_for_inf=False)
nllh_list.append(nllh)
if all([not np.isinf(nllh) for nllh in nllh_list]):
return dict(zip(gcm_rcm_couples_subset, nllh_list))
def condition_to_compute_nllh(self, ensemble_fit, massif_name, visualizer):
return all(
[massif_name in ensemble_fit.gcm_rcm_couple_to_visualizer[c].massif_name_to_one_fold_fit for c in
visualizer.gcm_rcm_couples])
def compute_nllh_local(self, ensemble_fit, gcm_rcm_couple, massif_name, reference_study):
coordinates = [np.array([reference_study.altitude, year]) for year in reference_study.ordered_years]
maxima = reference_study.massif_name_to_annual_maxima[massif_name]
best_function_from_fit = self.get_function_from_fit(ensemble_fit, massif_name, gcm_rcm_couple)
# It is normal that it could crash, because some models where fitted with data smaller than
# the data used to compute the nllh, therefore we use assertion_for_inf=False below
return compute_nllh(coordinates, maxima, best_function_from_fit,
maximum_from_obs=False, assertion_for_inf=False)
def get_function_from_fit(self, ensemble_fit, massif_name, gcm_rcm_couple):
visualizer = ensemble_fit.gcm_rcm_couple_to_visualizer[gcm_rcm_couple]
......
......@@ -23,6 +23,7 @@ from extreme_fit.model.margin_model.polynomial_margin_model.utils import \
from extreme_trend.ensemble_fit.independent_ensemble_fit.independent_ensemble_fit import IndependentEnsembleFit
from extreme_trend.ensemble_fit.visualizer_for_projection_ensemble import VisualizerForProjectionEnsemble
from extreme_trend.one_fold_fit.altitude_group import altitudes_for_groups
from root_utils import get_display_name_from_object_type
from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \
TimeTemporalCovariate
from spatio_temporal_dataset.coordinates.temporal_coordinates.temperature_covariate import \
......@@ -33,23 +34,24 @@ WEIGHT_COLUMN_NAME = "all weights"
WEIGHT_FOLDER = "ensemble weight"
def get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario):
def get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario, weight_computer_class):
nb_gcm_rcm_couples = len(gcm_rcm_couples)
nb_altitudes_list = len(altitudes_list)
ensemble_folder_path = op.join(DATA_PATH, WEIGHT_FOLDER)
if not op.exists(ensemble_folder_path):
os.makedirs(ensemble_folder_path, exist_ok=True)
scenario_str = scenario_to_str(scenario)
csv_filename = "weights_{}_{}_{}_{}_{}.csv" \
.format(nb_gcm_rcm_couples, nb_altitudes_list, year_min, year_max, scenario_str)
class_name = get_display_name_from_object_type(weight_computer_class)
csv_filename = "weights_{}_{}_{}_{}_{}_{}.csv" \
.format(nb_gcm_rcm_couples, nb_altitudes_list, year_min, year_max, scenario_str, class_name)
weight_csv_filepath = op.join(ensemble_folder_path, csv_filename)
return weight_csv_filepath
def save_to_filepath(df, gcm_rcm_couples, altitudes_list,
year_min, year_max,
scenario):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario)
scenario, weight_computer_class):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario, weight_computer_class)
df = df.round(decimals=3)
df.index = [gcm_rcm_couple_to_str(i) for i in df.index]
df.columns = [gcm_rcm_couple_to_str(i) if j > 0 else i for j, i in enumerate(df.columns)]
......@@ -60,8 +62,8 @@ def save_to_filepath(df, gcm_rcm_couples, altitudes_list,
def load_gcm_rcm_couple_to_weight(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario,
gcm_rcm_couple_missing=None):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario)
weight_class, gcm_rcm_couple_missing):
filepath = get_csv_filepath(gcm_rcm_couples, altitudes_list, year_min, year_max, scenario, weight_class)
df = pd.read_csv(filepath, index_col=0)
df.index = [str_to_gcm_rcm_couple(i) for i in df.index]
df.columns = [str_to_gcm_rcm_couple(i) if j > 0 else i for j, i in enumerate(df.columns)]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment