Commit 554e023e authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[PAPER 1] fix gelman convergence score formula, take the variance instead of...

[PAPER 1] fix gelman convergence score formula, take the variance instead of the std. improve bayesian display
parent 26d26f08
No related merge requests found
Showing with 89 additions and 27 deletions
+89 -27
...@@ -10,13 +10,16 @@ from extreme_fit.model.utils import r ...@@ -10,13 +10,16 @@ from extreme_fit.model.utils import r
def compute_gelman_score(means, variances, N, M): def compute_gelman_score(means, variances, N, M):
assert isinstance(means, pd.Series)
assert isinstance(variances, pd.Series)
mean = means.mean() mean = means.mean()
B = N * (means - mean).sum() / (M - 1) B = N * (means - mean).pow(2).sum() / (M - 1)
W = variances.mean() W = variances.mean()
V_hat = (N -1) * W / N V_hat = (N - 1) * W / N
V_hat += (M + 1) * B / (M * N) V_hat += (M + 1) * B / (M * N)
return V_hat / W return V_hat / W
def compute_refined_gelman_score(means, variances, N, M): def compute_refined_gelman_score(means, variances, N, M):
R = compute_gelman_score(means, variances, N, M) R = compute_gelman_score(means, variances, N, M)
# todo: check how to d # todo: check how to d
...@@ -31,16 +34,15 @@ def compute_gelman_convergence_value(non_null_years_and_maxima, mcmc_iterations, ...@@ -31,16 +34,15 @@ def compute_gelman_convergence_value(non_null_years_and_maxima, mcmc_iterations,
df_params_start_fit = sample_starting_value(nb_chains) df_params_start_fit = sample_starting_value(nb_chains)
for i, row in df_params_start_fit.iterrows(): for i, row in df_params_start_fit.iterrows():
s_mean, s_variance = compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_maxima, s_mean, s_variance = compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_maxima,
params_start_fit=row.to_dict()) params_start_fit=row.to_dict())
s_means.append(s_mean) s_means.append(s_mean)
s_variances.append(s_variance) s_variances.append(s_variance)
df_mean = pd.concat(s_means, axis=1).transpose() df_mean = pd.concat(s_means, axis=1).transpose()
df_variance = pd.concat(s_variances, axis=1).transpose() df_variance = pd.concat(s_variances, axis=1).transpose()
Rs = [] param_name_to_R = {param_name: compute_gelman_score(df_mean[param_name], df_variance[param_name], N=mcmc_iterations // 2, M=nb_chains)
for param_name in df_params_start_fit.columns: for param_name in df_params_start_fit.columns}
R = compute_gelman_score(df_mean[param_name], df_variance[param_name], N=mcmc_iterations//2, M=nb_chains) print(param_name_to_R)
Rs.append(R) return max(param_name_to_R.values())
return max(Rs)
def sample_starting_value(nb_chains): def sample_starting_value(nb_chains):
...@@ -59,7 +61,19 @@ def compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_m ...@@ -59,7 +61,19 @@ def compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_m
nb_iterations_for_bayesian_fit=mcmc_iterations, nb_iterations_for_bayesian_fit=mcmc_iterations,
params_start_fit_bayesian=params_start_fit) params_start_fit_bayesian=params_start_fit)
res = fitted_estimator.result_from_model_fit # type: ResultFromBayesianExtremes res = fitted_estimator.result_from_model_fit # type: ResultFromBayesianExtremes
df = res.df_posterior_samples.iloc[:, :-2] return res.mean_posterior_parameters, res.variance_posterior_parameters
return df.mean(axis=0), df.std(axis=0)
# #
def test_gelman():
M = 3
epsi = 1e-2
means = pd.Series([1 + i *epsi for i in range(M)])
variances = pd.Series([1 for _ in range(M)])
N = 5000
res = compute_gelman_score(means, variances, M, N)
print(res)
if __name__ == '__main__':
test_gelman()
import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from experiment.meteo_france_data.scm_models_data.crocus.crocus import CrocusSwe3Days, CrocusSnowLoadTotal from experiment.meteo_france_data.scm_models_data.crocus.crocus import CrocusSwe3Days, CrocusSnowLoadTotal
from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes
from experiment.paper_past_snow_loads.check_mcmc_convergence_for_return_levels.gelman_convergence_test import \
compute_gelman_score
from experiment.trend_analysis.univariate_test.utils import load_temporal_coordinates_and_dataset, \ from experiment.trend_analysis.univariate_test.utils import load_temporal_coordinates_and_dataset, \
fitted_linear_margin_estimator fitted_linear_margin_estimator
from extreme_fit.distribution.gev.gev_params import GevParams from extreme_fit.distribution.gev.gev_params import GevParams
...@@ -15,8 +18,23 @@ from extreme_fit.model.result_from_model_fit.result_from_extremes.confidence_int ...@@ -15,8 +18,23 @@ from extreme_fit.model.result_from_model_fit.result_from_extremes.confidence_int
ConfidenceIntervalMethodFromExtremes ConfidenceIntervalMethodFromExtremes
def main_drawing_bayesian(): def main_drawing_bayesian(N=10000):
return_level_bayesian = get_return_level_bayesian_example() nb_chains = 3
means, variances = [], []
for i in range(nb_chains):
result_from_fit = plot_chain(N, show=False).result_from_fit
means.append(result_from_fit.mean_posterior_parameters)
variances.append(result_from_fit.variance_posterior_parameters)
means, variances = pd.DataFrame(means).transpose(), pd.DataFrame(variances).transpose()
scores = []
for (_, row1), (_, row2) in zip(means.iterrows(), variances.iterrows()):
score = compute_gelman_score(row1, row2, N, nb_chains)
scores.append(score)
print(scores)
def plot_chain(N=10000, show=True):
return_level_bayesian = get_return_level_bayesian_example(N * 2)
print(return_level_bayesian.result_from_fit.df_all_samples) print(return_level_bayesian.result_from_fit.df_all_samples)
print(return_level_bayesian.result_from_fit.df_posterior_samples) print(return_level_bayesian.result_from_fit.df_posterior_samples)
print(return_level_bayesian.posterior_eurocode_return_level_samples_for_temporal_covariate) print(return_level_bayesian.posterior_eurocode_return_level_samples_for_temporal_covariate)
...@@ -32,7 +50,7 @@ def main_drawing_bayesian(): ...@@ -32,7 +50,7 @@ def main_drawing_bayesian():
gev_param_name_to_color = dict(zip(GevParams.PARAM_NAMES, colors)) gev_param_name_to_color = dict(zip(GevParams.PARAM_NAMES, colors))
gev_param_name_to_ax = dict( gev_param_name_to_ax = dict(
zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories])) zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories]))
# zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories_inverted])) # zip(GevParams.PARAM_NAMES, [ax_trajectories, ax_trajectories, ax_trajectories_inverted]))
gev_param_name_to_label = {n: GevParams.greek_letter_from_gev_param_name(n) for n in GevParams.PARAM_NAMES} gev_param_name_to_label = {n: GevParams.greek_letter_from_gev_param_name(n) for n in GevParams.PARAM_NAMES}
for j, gev_param_name in enumerate(GevParams.PARAM_NAMES[:]): for j, gev_param_name in enumerate(GevParams.PARAM_NAMES[:]):
label = gev_param_name_to_label[gev_param_name] label = gev_param_name_to_label[gev_param_name]
...@@ -50,12 +68,12 @@ def main_drawing_bayesian(): ...@@ -50,12 +68,12 @@ def main_drawing_bayesian():
ax_trajectories_inverted.legend(loc=1) ax_trajectories_inverted.legend(loc=1)
ax_trajectories.legend(loc=2) ax_trajectories.legend(loc=2)
ax_trajectories.set_xlabel("MCMC iterations") ax_trajectories.set_xlabel("MCMC iterations")
# Plot the parameter posterior on axes 1 # Plot the parameter posterior on axes 1
ax_parameter_posterior = axes[1] ax_parameter_posterior = axes[1]
ax_parameter_posterior_flip = ax_parameter_posterior.twiny() ax_parameter_posterior_flip = ax_parameter_posterior.twiny()
gev_param_name_to_ax = dict( gev_param_name_to_ax = dict(
zip(GevParams.PARAM_NAMES, [ax_parameter_posterior, ax_parameter_posterior.twiny(), ax_parameter_posterior_flip])) zip(GevParams.PARAM_NAMES,
[ax_parameter_posterior, ax_parameter_posterior.twiny(), ax_parameter_posterior_flip]))
df_posterior_samples = return_level_bayesian.result_from_fit.df_posterior_samples df_posterior_samples = return_level_bayesian.result_from_fit.df_posterior_samples
lns = [] lns = []
for j, gev_param_name in enumerate(GevParams.PARAM_NAMES[:]): for j, gev_param_name in enumerate(GevParams.PARAM_NAMES[:]):
...@@ -66,18 +84,19 @@ def main_drawing_bayesian(): ...@@ -66,18 +84,19 @@ def main_drawing_bayesian():
lns.append(ln) lns.append(ln)
labs = [l.get_label() for l in lns] labs = [l.get_label() for l in lns]
ax_parameter_posterior.legend(lns, labs, loc=0) ax_parameter_posterior.legend(lns, labs, loc=0)
# Plot the return level posterior on the last axes # Plot the return level posterior on the last axes
ax_return_level_posterior = axes[2] ax_return_level_posterior = axes[2]
sns.kdeplot(return_level_bayesian.posterior_eurocode_return_level_samples_for_temporal_covariate, sns.kdeplot(return_level_bayesian.posterior_eurocode_return_level_samples_for_temporal_covariate,
ax=ax_return_level_posterior, color=colors[-1]) ax=ax_return_level_posterior, color=colors[-1])
ax_return_level_posterior.set_xlabel("$z_p(\\theta)$") ax_return_level_posterior.set_xlabel("$z_p(\\theta)$")
ax_return_level_posterior.set_ylabel("$p(z_p(\\theta)|y)$") ax_return_level_posterior.set_ylabel("$p(z_p(\\theta)|y)$")
if show:
plt.show() plt.show()
return return_level_bayesian
def get_return_level_bayesian_example(): def get_return_level_bayesian_example(nb_iterations_for_bayesian_fit):
# It converges well because we also take the zero values into account
maxima, years = CrocusSnowLoadTotal(altitude=1800).annual_maxima_and_years('Vercors') maxima, years = CrocusSnowLoadTotal(altitude=1800).annual_maxima_and_years('Vercors')
# plt.plot(years, maxima) # plt.plot(years, maxima)
# plt.show() # plt.show()
...@@ -85,7 +104,7 @@ def get_return_level_bayesian_example(): ...@@ -85,7 +104,7 @@ def get_return_level_bayesian_example():
coordinates, dataset = load_temporal_coordinates_and_dataset(maxima, years) coordinates, dataset = load_temporal_coordinates_and_dataset(maxima, years)
fitted_estimator = fitted_linear_margin_estimator(model_class, coordinates, dataset, starting_year=1958, fitted_estimator = fitted_linear_margin_estimator(model_class, coordinates, dataset, starting_year=1958,
fit_method=TemporalMarginFitMethod.extremes_fevd_bayesian, fit_method=TemporalMarginFitMethod.extremes_fevd_bayesian,
nb_iterations_for_bayesian_fit=100000) nb_iterations_for_bayesian_fit=nb_iterations_for_bayesian_fit)
return_level_bayesian = ExtractEurocodeReturnLevelFromMyBayesianExtremes(estimator=fitted_estimator, return_level_bayesian = ExtractEurocodeReturnLevelFromMyBayesianExtremes(estimator=fitted_estimator,
ci_method=ConfidenceIntervalMethodFromExtremes.my_bayes, ci_method=ConfidenceIntervalMethodFromExtremes.my_bayes,
temporal_covariate=2017) temporal_covariate=2017)
...@@ -94,3 +113,5 @@ def get_return_level_bayesian_example(): ...@@ -94,3 +113,5 @@ def get_return_level_bayesian_example():
if __name__ == '__main__': if __name__ == '__main__':
main_drawing_bayesian() main_drawing_bayesian()
plt.plot()
# plot_chain()
...@@ -2,6 +2,7 @@ import pandas as pd ...@@ -2,6 +2,7 @@ import pandas as pd
from experiment.paper_past_snow_loads.paper_utils import paper_altitudes, paper_study_classes, \ from experiment.paper_past_snow_loads.paper_utils import paper_altitudes, paper_study_classes, \
load_altitude_to_visualizer load_altitude_to_visualizer
from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import StationaryTemporalModel
from root_utils import get_display_name_from_object_type
def gelman_convergence_test(mcmc_iterations, model_class, altitudes, study_class, nb_chains=3, massif_names=None): def gelman_convergence_test(mcmc_iterations, model_class, altitudes, study_class, nb_chains=3, massif_names=None):
...@@ -30,10 +31,24 @@ and the for the 3 variables considered: GSL, GSL from eurocode, GLS in 3 days ...@@ -30,10 +31,24 @@ and the for the 3 variables considered: GSL, GSL from eurocode, GLS in 3 days
""" """
if __name__ == '__main__': if __name__ == '__main__':
mcmc_iterations = 1000 for half_mcmc_iterations in [10000, 50000, 100000, 1000000][-1:]:
df = gelman_convergence_test(mcmc_iterations=mcmc_iterations, altitudes=paper_altitudes[:1], for study_class in paper_study_classes[:1]:
study_class=paper_study_classes[0], model_class=StationaryTemporalModel, study_name = get_display_name_from_object_type(study_class)
massif_names=['Chartreuse']) print(study_name, half_mcmc_iterations)
print(mcmc_iterations) # df = gelman_convergence_test(mcmc_iterations=mcmc_iterations, altitudes=paper_altitudes[:],
print(df.head()) # study_class=study_class, model_class=StationaryTemporalModel,
print('Overall maxima:', df.max().max()) # massif_names=None,
# nb_chains=3)
mcmc_iterations = 2 * half_mcmc_iterations
df = gelman_convergence_test(mcmc_iterations=mcmc_iterations, altitudes=paper_altitudes[:1],
study_class=study_class, model_class=StationaryTemporalModel,
massif_names=['Vercors'],
nb_chains=3)
csv_filename = '{}_{}_{}_{}.csv'.format(study_name, mcmc_iterations, df.max().max(), df.mean().mean())
df.to_csv(csv_filename)
#
# print(df.head())
# df.to_csv(csv_filename)
# print('Overall maxima for {} iterations:'.format(mcmc_iterations), df.max().max())
...@@ -36,6 +36,18 @@ class ResultFromBayesianExtremes(AbstractResultFromExtremes): ...@@ -36,6 +36,18 @@ class ResultFromBayesianExtremes(AbstractResultFromExtremes):
def df_posterior_samples(self) -> pd.DataFrame: def df_posterior_samples(self) -> pd.DataFrame:
return self.df_all_samples.iloc[self.burn_in_nb:, :] return self.df_all_samples.iloc[self.burn_in_nb:, :]
@property
def df_posterior_parameters(self) -> pd.DataFrame:
return self.df_posterior_samples.iloc[:, :-2]
@property
def mean_posterior_parameters(self):
return self.df_posterior_parameters.mean(axis=0)
@property
def variance_posterior_parameters(self):
return self.df_posterior_parameters.mean(axis=0)
def get_coef_dict_from_posterior_sample(self, s: pd.Series): def get_coef_dict_from_posterior_sample(self, s: pd.Series):
assert len(s) >= 3 assert len(s) >= 3
values = {i: v for i, v in enumerate(s)} values = {i: v for i, v in enumerate(s)}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment