diff --git a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py index 576804ef45023b3985a0d1655187846cfe9670bb..238f1a97c095e4faa7fb1a2944776726d4d3238c 100644 --- a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py +++ b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py @@ -10,13 +10,16 @@ from extreme_fit.model.utils import r def compute_gelman_score(means, variances, N, M): + assert isinstance(means, pd.Series) + assert isinstance(variances, pd.Series) mean = means.mean() - B = N * (means - mean).sum() / (M - 1) + B = N * (means - mean).pow(2).sum() / (M - 1) W = variances.mean() - V_hat = (N -1) * W / N + V_hat = (N - 1) * W / N V_hat += (M + 1) * B / (M * N) return V_hat / W + def compute_refined_gelman_score(means, variances, N, M): R = compute_gelman_score(means, variances, N, M) # todo: check how to d @@ -31,16 +34,15 @@ def compute_gelman_convergence_value(non_null_years_and_maxima, mcmc_iterations, df_params_start_fit = sample_starting_value(nb_chains) for i, row in df_params_start_fit.iterrows(): s_mean, s_variance = compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_maxima, - params_start_fit=row.to_dict()) + params_start_fit=row.to_dict()) s_means.append(s_mean) s_variances.append(s_variance) df_mean = pd.concat(s_means, axis=1).transpose() df_variance = pd.concat(s_variances, axis=1).transpose() - Rs = [] - for param_name in df_params_start_fit.columns: - R = compute_gelman_score(df_mean[param_name], df_variance[param_name], N=mcmc_iterations//2, M=nb_chains) - Rs.append(R) - return max(Rs) + param_name_to_R = {param_name: compute_gelman_score(df_mean[param_name], df_variance[param_name], N=mcmc_iterations // 2, M=nb_chains) + for param_name in df_params_start_fit.columns} + print(param_name_to_R) + return max(param_name_to_R.values()) def sample_starting_value(nb_chains): @@ -59,7 +61,19 @@ def compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_m nb_iterations_for_bayesian_fit=mcmc_iterations, params_start_fit_bayesian=params_start_fit) res = fitted_estimator.result_from_model_fit # type: ResultFromBayesianExtremes - df = res.df_posterior_samples.iloc[:, :-2] - return df.mean(axis=0), df.std(axis=0) + return res.mean_posterior_parameters, res.variance_posterior_parameters + # +def test_gelman(): + M = 3 + epsi = 1e-2 + means = pd.Series([1 + i *epsi for i in range(M)]) + variances = pd.Series([1 for _ in range(M)]) + N = 5000 + res = compute_gelman_score(means, variances, M, N) + print(res) + + +if __name__ == '__main__': + test_gelman() diff --git a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_bayesian_mcmc.py b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_bayesian_mcmc.py index 1a2a7d47a54da1e7329aede3b46e013f5b31a90b..fffe27da6c8f89fc89ccb5f66bfc405c9e7cb38f 100644 --- a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_bayesian_mcmc.py +++ b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_bayesian_mcmc.py @@ -1,8 +1,11 @@ +import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from experiment.meteo_france_data.scm_models_data.crocus.crocus import CrocusSwe3Days, CrocusSnowLoadTotal from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes +from experiment.paper_past_snow_loads.check_mcmc_convergence_for_return_levels.gelman_convergence_test import \ + compute_gelman_score from experiment.trend_analysis.univariate_test.utils import load_temporal_coordinates_and_dataset, \ fitted_linear_margin_estimator from extreme_fit.distribution.gev.gev_params import GevParams @@ -15,8 +18,23 @@ from extreme_fit.model.result_from_model_fit.result_from_extremes.confidence_int ConfidenceIntervalMethodFromExtremes -def main_drawing_bayesian(): - return_level_bayesian = get_return_level_bayesian_example() +def main_drawing_bayesian(N=10000): + nb_chains = 3 + means, variances = [], [] + for i in range(nb_chains): + result_from_fit = plot_chain(N, show=False).result_from_fit + means.append(result_from_fit.mean_posterior_parameters) + variances.append(result_from_fit.variance_posterior_parameters) + means, variances = pd.DataFrame(means).transpose(), pd.DataFrame(variances).transpose() + scores = [] + for (_, row1), (_, row2) in zip(means.iterrows(), variances.iterrows()): + score = compute_gelman_score(row1, row2, N, nb_chains) + scores.append(score) + print(scores) + + +def plot_chain(N=10000, show=True): + return_level_bayesian = get_return_level_bayesian_example(N * 2) print(return_level_bayesian.result_from_fit.df_all_samples) print(return_level_bayesian.result_from_fit.df_posterior_samples) print(return_level_bayesian.posterior_eurocode_return_level_samples_for_temporal_covariate) @@ -32,7 +50,7 @@ def main_drawing_bayesian(): gev_param_name_to_color = dict(zip(GevParams.PARAM_NAMES, colors)) gev_param_name_to_ax = dict( zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories])) - # zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories_inverted])) + # zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories_inverted])) gev_param_name_to_label = {n: GevParams.greek_letter_from_gev_param_name(n) for n in GevParams.PARAM_NAMES} for j, gev_param_name in enumerate(GevParams.PARAM_NAMES[:]): label = gev_param_name_to_label[gev_param_name] @@ -50,12 +68,12 @@ def main_drawing_bayesian(): ax_trajectories_inverted.legend(loc=1) ax_trajectories.legend(loc=2) ax_trajectories.set_xlabel("MCMC iterations") - # Plot the parameter posterior on axes 1 ax_parameter_posterior = axes[1] ax_parameter_posterior_flip = ax_parameter_posterior.twiny() gev_param_name_to_ax = dict( - zip(GevParams.PARAM_NAMES, [ax_parameter_posterior, ax_parameter_posterior.twiny(), ax_parameter_posterior_flip])) + zip(GevParams.PARAM_NAMES, + [ax_parameter_posterior, ax_parameter_posterior.twiny(), ax_parameter_posterior_flip])) df_posterior_samples = return_level_bayesian.result_from_fit.df_posterior_samples lns = [] for j, gev_param_name in enumerate(GevParams.PARAM_NAMES[:]): @@ -66,18 +84,19 @@ def main_drawing_bayesian(): lns.append(ln) labs = [l.get_label() for l in lns] ax_parameter_posterior.legend(lns, labs, loc=0) - # Plot the return level posterior on the last axes ax_return_level_posterior = axes[2] sns.kdeplot(return_level_bayesian.posterior_eurocode_return_level_samples_for_temporal_covariate, ax=ax_return_level_posterior, color=colors[-1]) ax_return_level_posterior.set_xlabel("$z_p(\\theta)$") ax_return_level_posterior.set_ylabel("$p(z_p(\\theta)|y)$") - - plt.show() + if show: + plt.show() + return return_level_bayesian -def get_return_level_bayesian_example(): +def get_return_level_bayesian_example(nb_iterations_for_bayesian_fit): + # It converges well because we also take the zero values into account maxima, years = CrocusSnowLoadTotal(altitude=1800).annual_maxima_and_years('Vercors') # plt.plot(years, maxima) # plt.show() @@ -85,7 +104,7 @@ def get_return_level_bayesian_example(): coordinates, dataset = load_temporal_coordinates_and_dataset(maxima, years) fitted_estimator = fitted_linear_margin_estimator(model_class, coordinates, dataset, starting_year=1958, fit_method=TemporalMarginFitMethod.extremes_fevd_bayesian, - nb_iterations_for_bayesian_fit=100000) + nb_iterations_for_bayesian_fit=nb_iterations_for_bayesian_fit) return_level_bayesian = ExtractEurocodeReturnLevelFromMyBayesianExtremes(estimator=fitted_estimator, ci_method=ConfidenceIntervalMethodFromExtremes.my_bayes, temporal_covariate=2017) @@ -94,3 +113,5 @@ def get_return_level_bayesian_example(): if __name__ == '__main__': main_drawing_bayesian() + plt.plot() + # plot_chain() diff --git a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_gelman_convergence_test.py b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_gelman_convergence_test.py index d435acf8072f5eb0b7c2bf471c300bc30c917839..66e8c43a2877dd1dbc8ef18849028bd606292a78 100644 --- a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_gelman_convergence_test.py +++ b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/main_gelman_convergence_test.py @@ -2,6 +2,7 @@ import pandas as pd from experiment.paper_past_snow_loads.paper_utils import paper_altitudes, paper_study_classes, \ load_altitude_to_visualizer from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel +from root_utils import get_display_name_from_object_type def gelman_convergence_test(mcmc_iterations, model_class, altitudes, study_class, nb_chains=3, massif_names=None): @@ -30,10 +31,24 @@ and the for the 3 variables considered: GSL, GSL from eurocode, GLS in 3 days """ if __name__ == '__main__': - mcmc_iterations = 1000 - df = gelman_convergence_test(mcmc_iterations=mcmc_iterations, altitudes=paper_altitudes[:1], - study_class=paper_study_classes[0], model_class=StationaryTemporalModel, - massif_names=['Chartreuse']) - print(mcmc_iterations) - print(df.head()) - print('Overall maxima:', df.max().max()) + for half_mcmc_iterations in [10000, 50000, 100000, 1000000][-1:]: + for study_class in paper_study_classes[:1]: + study_name = get_display_name_from_object_type(study_class) + print(study_name, half_mcmc_iterations) + # df = gelman_convergence_test(mcmc_iterations=mcmc_iterations, altitudes=paper_altitudes[:], + # study_class=study_class, model_class=StationaryTemporalModel, + # massif_names=None, + # nb_chains=3) + mcmc_iterations = 2 * half_mcmc_iterations + df = gelman_convergence_test(mcmc_iterations=mcmc_iterations, altitudes=paper_altitudes[:1], + study_class=study_class, model_class=StationaryTemporalModel, + massif_names=['Vercors'], + nb_chains=3) + csv_filename = '{}_{}_{}_{}.csv'.format(study_name, mcmc_iterations, df.max().max(), df.mean().mean()) + df.to_csv(csv_filename) + + + # + # print(df.head()) + # df.to_csv(csv_filename) + # print('Overall maxima for {} iterations:'.format(mcmc_iterations), df.max().max()) diff --git a/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_bayesian_extremes.py b/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_bayesian_extremes.py index acb087e6348edbff3a5887b564cc0fd1cd4395e0..21d30fd4de1d1dd0f94f2afd79ed4c13623df211 100644 --- a/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_bayesian_extremes.py +++ b/extreme_fit/model/result_from_model_fit/result_from_extremes/result_from_bayesian_extremes.py @@ -36,6 +36,18 @@ class ResultFromBayesianExtremes(AbstractResultFromExtremes): def df_posterior_samples(self) -> pd.DataFrame: return self.df_all_samples.iloc[self.burn_in_nb:, :] + @property + def df_posterior_parameters(self) -> pd.DataFrame: + return self.df_posterior_samples.iloc[:, :-2] + + @property + def mean_posterior_parameters(self): + return self.df_posterior_parameters.mean(axis=0) + + @property + def variance_posterior_parameters(self): + return self.df_posterior_parameters.mean(axis=0) + def get_coef_dict_from_posterior_sample(self, s: pd.Series): assert len(s) >= 3 values = {i: v for i, v in enumerate(s)}