diff --git a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py index bf9a6f4a3189b40c06ff0a08a80e78e732b4f0e3..4b5d8a8d42e3037a14df5a2290f431a3dcae7897 100644 --- a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py +++ b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py @@ -90,7 +90,7 @@ def dat_to_csv(csv_filepath, txt_filepath): assert len(df_temp_until_july.columns) == 7 df_temp_after_august = df.iloc[:-1, 7:] assert len(df_temp_after_august.columns) == 5 - l = df_temp_until_july.sum_of_differences(axis=1).values + df_temp_after_august.sum_of_differences(axis=1).values + l = df_temp_until_july.sum(axis=1).values + df_temp_after_august.sum(axis=1).values l /= 12 l = [np.nan] + list(l) assert len(l) == len(df.index) diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py index 09aaa8f595ab8a41591a85fb25d44b8855b3b4ff..c746c328172ae3d322a9b1472476c99af2a454b4 100644 --- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py @@ -103,6 +103,7 @@ class AbstractStudy(object): self.slope = slope # Add some cache for computation self._cache_for_pointwise_fit = {} + self._massif_names_for_cache = None """ Time """ @@ -873,46 +874,54 @@ class AbstractStudy(object): mask_french_alps += mask_massif return ~np.array(mask_french_alps, dtype=bool) + def massif_name_to_return_level_list_from_bootstrap(self, return_period): + quantile_level = 1 - 1 / return_period + _, _, d = self.massif_name_to_stationary_gev_params_and_confidence(quantile_level=quantile_level, + confidence_interval_based_on_delta_method=False) + return d + def massif_name_to_return_level(self, return_period): return {massif_name: gev_params.return_level(return_period=return_period) for massif_name, gev_params in self.massif_name_to_stationary_gev_params.items()} - @cached_property + @property def massif_name_to_stationary_gev_params(self): - d, _ = self._massif_name_to_stationary_gev_params_and_confidence(quantile_level=None) + d, _, _ = self.massif_name_to_stationary_gev_params_and_confidence(quantile_level=None) return d def massif_name_to_stationary_gev_params_and_confidence(self, quantile_level, - confidence_interval_based_on_delta_method): + confidence_interval_based_on_delta_method=True): t = (quantile_level, confidence_interval_based_on_delta_method) if t in self._cache_for_pointwise_fit: return self._cache_for_pointwise_fit[t] else: - res = self._massif_name_to_stationary_gev_params_and_confidence(*t) + res = self._massif_name_to_stationary_gev_params_and_confidence(*t, self._massif_names_for_cache) self._cache_for_pointwise_fit[t] = res return res def _massif_name_to_stationary_gev_params_and_confidence(self, quantile_level=None, - confidence_interval_based_on_delta_method=True): + confidence_interval_based_on_delta_method=True, + massif_names=None): """ at least 90% of values must be above zero""" massif_name_to_stationary_gev_params = {} massif_name_to_confidence = {} + massif_name_to_return_level_list = {} for massif_name, annual_maxima in self.massif_name_to_annual_maxima.items(): - percentage_of_non_zeros = 100 * np.count_nonzero(annual_maxima) / len(annual_maxima) - if percentage_of_non_zeros > 90: - start = time.time() - gev_params, mean_estimate, confidence = fitted_stationary_gev_with_uncertainty_interval(annual_maxima, - fit_method=MarginFitMethod.extremes_fevd_mle, - quantile_level=quantile_level, - confidence_interval_based_on_delta_method=confidence_interval_based_on_delta_method) - end = time.time() - duration = end - start - - if -0.5 <= gev_params.shape <= 0.5: - massif_name_to_stationary_gev_params[massif_name] = gev_params - massif_name_to_confidence[massif_name] = EurocodeConfidenceIntervalFromExtremes(mean_estimate, - confidence) - return massif_name_to_stationary_gev_params, massif_name_to_confidence + if (massif_names is None) or (massif_name in massif_names): + percentage_of_non_zeros = 100 * np.count_nonzero(annual_maxima) / len(annual_maxima) + if percentage_of_non_zeros > 90: + gev_params, mean_estimate, confidence, return_level_list = fitted_stationary_gev_with_uncertainty_interval( + annual_maxima, + fit_method=MarginFitMethod.extremes_fevd_mle, + quantile_level=quantile_level, + confidence_interval_based_on_delta_method=confidence_interval_based_on_delta_method) + + if -0.5 <= gev_params.shape <= 0.5: + massif_name_to_stationary_gev_params[massif_name] = gev_params + massif_name_to_confidence[massif_name] = EurocodeConfidenceIntervalFromExtremes(mean_estimate, + confidence) + massif_name_to_return_level_list[massif_name] = return_level_list + return massif_name_to_stationary_gev_params, massif_name_to_confidence, massif_name_to_return_level_list def massif_name_to_gev_param_list(self, year_min_and_max_list): year_to_index = {y: i for i, y in enumerate(self.ordered_years)} diff --git a/extreme_data/meteo_france_data/scm_models_data/utils_function.py b/extreme_data/meteo_france_data/scm_models_data/utils_function.py index 707188f3ec3f873beaabf526e297d7026b86e11b..d6a933991e4312e28067e592dd9b2dd6b3f4a8ef 100644 --- a/extreme_data/meteo_france_data/scm_models_data/utils_function.py +++ b/extreme_data/meteo_france_data/scm_models_data/utils_function.py @@ -37,10 +37,12 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM coordinate) mean_estimate = confidence_interval.mean_estimate confidence_interval = confidence_interval.confidence_interval + return_level_list = None else: # Bootstrap method return_level_list = ReturnLevelBootstrap(fit_method, model_class, starting_year, x_gev, quantile_level).compute_all_return_level() + return_level_list = np.array(return_level_list) # Remove infinite return levels and return level # percentage_of_inf = 100 * sum([np.isinf(r) for r in return_level_list]) / len(return_level_list) # print('Percentage of fit with inf = {} \%'.format(percentage_of_inf)) @@ -50,9 +52,13 @@ def fitted_stationary_gev_with_uncertainty_interval(x_gev, fit_method=MarginFitM else: confidence_interval = None mean_estimate = None - return gev_param, mean_estimate, confidence_interval + return_level_list = None + return gev_param, mean_estimate, confidence_interval, return_level_list + class ReturnLevelBootstrap(object): + only_physically_plausible_fits = False + multiprocess = None def __init__(self, fit_method, model_class, starting_year, x_gev, quantile_level): self.nb_bootstrap = AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP @@ -63,17 +69,16 @@ class ReturnLevelBootstrap(object): self.fit_method = fit_method def compute_all_return_level(self): - multiprocess = None idxs = list(range(self.nb_bootstrap)) - if multiprocess is None: + if self.multiprocess is None: with Pool(NB_CORES) as p: batchsize = math.ceil(AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP / NB_CORES) list_return_level = p.map(self.compute_return_level_batch, batch(idxs, batchsize=batchsize)) return_level_list = list(chain.from_iterable(list_return_level)) - elif multiprocess: + elif self.multiprocess: with Pool(NB_CORES) as p: return_level_list = p.map(self.compute_return_level, idxs) else: @@ -82,7 +87,10 @@ class ReturnLevelBootstrap(object): return return_level_list def compute_return_level_batch(self, idxs): - return [self.compute_return_level(idx) for idx in idxs] + if self.only_physically_plausible_fits: + return [self.compute_return_level_physically_plausible(idx) for idx in idxs] + else: + return [self.compute_return_level(idx) for idx in idxs] def compute_return_level(self, idx): x = resample(self.x_gev) @@ -90,5 +98,10 @@ class ReturnLevelBootstrap(object): gev_params = _fitted_stationary_gev(self.fit_method, self.model_class, self.starting_year, x)[1] return gev_params.quantile(self.quantile_level) - - + def compute_return_level_physically_plausible(self, idx): + gev_params = GevParams(0, 1, 0.6) + while not (-0.5 <= gev_params.shape < 0.5): + x = resample(self.x_gev) + with warnings.catch_warnings(): + gev_params = _fitted_stationary_gev(self.fit_method, self.model_class, self.starting_year, x)[1] + return gev_params.quantile(self.quantile_level) diff --git a/extreme_data/meteo_france_data/stations_data/comparison_analysis.py b/extreme_data/meteo_france_data/stations_data/comparison_analysis.py index 278991570f52c5059ee35973d351a081bf666e8f..d4bd1bd125f7b7fad8db1ab7629049a35516ccc1 100644 --- a/extreme_data/meteo_france_data/stations_data/comparison_analysis.py +++ b/extreme_data/meteo_france_data/stations_data/comparison_analysis.py @@ -213,7 +213,7 @@ class ComparisonAnalysis(object): # Mean number of non-Nan values d['% of Nan'] = df_values_from_1958.isna().mean().mean() # Number of lines with only Nan - d['Lines w Nan'] = df_values_from_1958.isna().all(axis=1).sum_of_differences() + d['Lines w Nan'] = df_values_from_1958.isna().all(axis=1).sum() return pd.Series(d) def altitude_short_analysis(self): diff --git a/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py b/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py index aed8b4dbfe3c71a5757f0cf7060c0264aa85de05..e5f6b7a5575abd544f01cb9cbf21d5693d261266 100644 --- a/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py +++ b/projects/archive/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py @@ -13,7 +13,7 @@ def compute_gelman_score(means, variances, N, M): assert isinstance(means, pd.Series) assert isinstance(variances, pd.Series) mean = means.mean() - B = N * (means - mean).pow(2).sum_of_differences() / (M - 1) + B = N * (means - mean).pow(2).sum() / (M - 1) W = variances.mean() V_hat = (N - 1) * W / N V_hat += (M + 1) * B / (M * N) diff --git a/projects/projected_swe/weight_solver/indicator.py b/projects/projected_swe/weight_solver/indicator.py index 70311f18c9c13d55c15abdf65be9fc8c9b590552..1e086777b2fa1bac1bf31e8ea74fc2ece19cef3b 100644 --- a/projects/projected_swe/weight_solver/indicator.py +++ b/projects/projected_swe/weight_solver/indicator.py @@ -35,8 +35,7 @@ class ReturnLevel30YearsIndicator(AbstractIndicator): @classmethod def get_indicator(cls, study: AbstractStudy, massif_name, bootstrap=False): if bootstrap: - print(study.massif_name_to_return_level_list(return_period=30)[massif_name]) - raise NotImplementedError + return study.massif_name_to_return_level_list_from_bootstrap(return_period=30)[massif_name] else: try: return study.massif_name_to_return_level(return_period=30)[massif_name] diff --git a/projects/projected_swe/weight_solver/knutti_weight_solver.py b/projects/projected_swe/weight_solver/knutti_weight_solver.py index 6c1674c229e5ee15810c08c2c7960fdb1911f824..4f1a230a107bbcd5eec7742ca6cdd08b0b335a5f 100644 --- a/projects/projected_swe/weight_solver/knutti_weight_solver.py +++ b/projects/projected_swe/weight_solver/knutti_weight_solver.py @@ -1,6 +1,7 @@ import numpy as np from scipy.stats import norm +from extreme_data.meteo_france_data.scm_models_data.utils_function import ReturnLevelBootstrap from projects.projected_swe.weight_solver.abtract_weight_solver import AbstractWeightSolver from projects.projected_swe.weight_solver.indicator import AbstractIndicator, NllhComputationException, \ WeightComputationException @@ -12,11 +13,18 @@ class KnuttiWeightSolver(AbstractWeightSolver): super().__init__(*args, **kwargs) self.sigma_skill = sigma_skill self.sigma_interdependence = sigma_interdependence + # Set some parameters for the bootstrap + ReturnLevelBootstrap.only_physically_plausible_fits = True + # Set some parameters to speed up results (by caching some results) + self.observation_study._massif_names_for_cache = self.massif_names + for study in self.couple_to_study.values(): + study._massif_names_for_cache = self.massif_names # Compute the subset of massif_names used for the computation self.massif_names_for_computation = [] for massif_name in self.massif_names: try: [self.compute_skill_one_massif(couple_study, massif_name) for couple_study in self.study_list] + [self.compute_interdependence_nllh_one_massif(couple_study, massif_name) for couple_study in self.study_list] except WeightComputationException: continue self.massif_names_for_computation.append(massif_name) @@ -50,15 +58,17 @@ class KnuttiWeightSolver(AbstractWeightSolver): return nllh def compute_nllh_from_two_study(self, study_1, study_2, sigma, massif_name): - differences = self.sum_of_differences(study_1, study_2, massif_name) + differences = self.differences(study_1, study_2, massif_name) scale = np.sqrt(np.power(sigma, 2) * self.nb_massifs_for_computation / 2) proba = norm.pdf(differences, 0, scale) - if not(0 < proba <= 1): - raise NllhComputationException + proba_positive = (proba > 0).all() + proba_lower_than_one = (proba <= 1).all() + if not (proba_positive and proba_lower_than_one): + raise NllhComputationException('{} {} {}'.format(proba, scale, differences)) nllh = -np.log(proba) return nllh.sum() - def sum_of_differences(self, study_1, study_2, massif_name): + def differences(self, study_1, study_2, massif_name): assert issubclass(self.indicator_class, AbstractIndicator) return np.array([self.indicator_class.get_indicator(study_1, massif_name) - self.indicator_class.get_indicator(study_2, massif_name)]) @@ -66,21 +76,19 @@ class KnuttiWeightSolver(AbstractWeightSolver): class KnuttiWeightSolverWithBootstrapVersion1(KnuttiWeightSolver): - def sum_of_differences(self, study_1, study_2, massif_name): + def differences(self, study_1, study_2, massif_name): assert issubclass(self.indicator_class, AbstractIndicator) bootstrap_study_1 = self.indicator_class.get_indicator(study_1, massif_name, bootstrap=True) bootstrap_study_2 = self.indicator_class.get_indicator(study_2, massif_name, bootstrap=True) differences = bootstrap_study_1 - bootstrap_study_2 - squared_difference = np.power(differences, 2) - return squared_difference.sum() + return differences class KnuttiWeightSolverWithBootstrapVersion2(KnuttiWeightSolver): - def sum_of_differences(self, study_1, study_2, massif_name): + def differences(self, study_1, study_2, massif_name): assert issubclass(self.indicator_class, AbstractIndicator) bootstrap_study_1 = self.indicator_class.get_indicator(study_1, massif_name, bootstrap=True) bootstrap_study_2 = self.indicator_class.get_indicator(study_2, massif_name, bootstrap=True) differences = np.subtract.outer(bootstrap_study_1, bootstrap_study_2) - squared_difference = np.power(differences, 2) - return squared_difference.sum() + return differences diff --git a/test/test_projects/test_projected_swe/test_model_as_truth.py b/test/test_projects/test_projected_swe/test_model_as_truth.py index 02aec49159a9db4faa30a5ecf3305c2aa0953b78..6b10b921cdb052b7841de0233ec1a8581bb08bf1 100644 --- a/test/test_projects/test_projected_swe/test_model_as_truth.py +++ b/test/test_projects/test_projected_swe/test_model_as_truth.py @@ -23,21 +23,25 @@ class TestModelAsTruth(unittest.TestCase): couple_to_study = {c: AdamontSnowfall(altitude=altitude, scenario=scenario, year_min=year_min, year_max=year_max, gcm_rcm_couple=c) for c in get_gcm_rcm_couples(adamont_scenario=scenario)} - massif_names = None + massif_names = ['Vercors'] AbstractExtractEurocodeReturnLevel.NB_BOOTSTRAP = 10 for knutti_weight_solver_class in [KnuttiWeightSolver, KnuttiWeightSolverWithBootstrapVersion1, - KnuttiWeightSolverWithBootstrapVersion2][:1]: - for indicator_class in [AnnualMaximaMeanIndicator, ReturnLevel30YearsIndicator][:1]: + KnuttiWeightSolverWithBootstrapVersion2][:]: + if knutti_weight_solver_class in [KnuttiWeightSolverWithBootstrapVersion1, KnuttiWeightSolverWithBootstrapVersion2]: + idx = 1 + else: + idx = 0 + for indicator_class in [AnnualMaximaMeanIndicator, ReturnLevel30YearsIndicator][idx:]: for add_interdependence_weight in [False, True]: - knutti_weight = knutti_weight_solver_class(sigma_skill=10.0, sigma_interdependence=10.0, + knutti_weight = knutti_weight_solver_class(sigma_skill=100.0, sigma_interdependence=100.0, massif_names=massif_names, observation_study=observation_study, couple_to_study=couple_to_study, indicator_class=indicator_class, add_interdependence_weight=add_interdependence_weight ) - # print(knutti_weight.couple_to_weight) + print(knutti_weight.couple_to_weight) weight = knutti_weight.couple_to_weight[('CNRM-CM5', 'CCLM4-8-17')] self.assertFalse(np.isnan(weight))