diff --git a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py index 7df116048d418278a9654f4fbf3a525b00ab8e1d..576804ef45023b3985a0d1655187846cfe9670bb 100644 --- a/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py +++ b/experiment/paper_past_snow_loads/check_mcmc_convergence_for_return_levels/gelman_convergence_test.py @@ -1,5 +1,65 @@ +import numpy as np +import pandas as pd +from experiment.trend_analysis.univariate_test.utils import load_temporal_coordinates_and_dataset, \ + fitted_linear_margin_estimator +from extreme_fit.model.margin_model.linear_margin_model.abstract_temporal_linear_margin_model import \ + TemporalMarginFitMethod +from extreme_fit.model.result_from_model_fit.result_from_extremes.result_from_bayesian_extremes import \ + ResultFromBayesianExtremes +from extreme_fit.model.utils import r + + +def compute_gelman_score(means, variances, N, M): + mean = means.mean() + B = N * (means - mean).sum() / (M - 1) + W = variances.mean() + V_hat = (N -1) * W / N + V_hat += (M + 1) * B / (M * N) + return V_hat / W + +def compute_refined_gelman_score(means, variances, N, M): + R = compute_gelman_score(means, variances, N, M) + # todo: check how to d + # d = 2 * V_hat / W + # R = (d + 3) * V_hat + # R /= (d + 1) * W + # return np.sqrt(R) + def compute_gelman_convergence_value(non_null_years_and_maxima, mcmc_iterations, model_class, nb_chains): - return 1.0 + s_means, s_variances = [], [] + df_params_start_fit = sample_starting_value(nb_chains) + for i, row in df_params_start_fit.iterrows(): + s_mean, s_variance = compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_maxima, + params_start_fit=row.to_dict()) + s_means.append(s_mean) + s_variances.append(s_variance) + df_mean = pd.concat(s_means, axis=1).transpose() + df_variance = pd.concat(s_variances, axis=1).transpose() + Rs = [] + for param_name in df_params_start_fit.columns: + R = compute_gelman_score(df_mean[param_name], df_variance[param_name], N=mcmc_iterations//2, M=nb_chains) + Rs.append(R) + return max(Rs) + + +def sample_starting_value(nb_chains): + return pd.DataFrame({ + 'shape': np.array(r.rbeta(nb_chains, 6, 9)) - 0.5, + 'location': np.array(r.rnorm(nb_chains, 0, 1)), + 'scale': np.array(r.rexp(nb_chains, 1)), + }) + + +def compute_mean_and_variance(mcmc_iterations, model_class, non_null_years_and_maxima, params_start_fit): + maxima, years = non_null_years_and_maxima + coordinates, dataset = load_temporal_coordinates_and_dataset(maxima, years) + fitted_estimator = fitted_linear_margin_estimator(model_class, coordinates, dataset, starting_year=None, + fit_method=TemporalMarginFitMethod.extremes_fevd_bayesian, + nb_iterations_for_bayesian_fit=mcmc_iterations, + params_start_fit_bayesian=params_start_fit) + res = fitted_estimator.result_from_model_fit # type: ResultFromBayesianExtremes + df = res.df_posterior_samples.iloc[:, :-2] + return df.mean(axis=0), df.std(axis=0) -# rbeta(10000, 6, 9) - 0.5 \ No newline at end of file +# diff --git a/extreme_fit/distribution/gev/main_fevd_bayesian.R b/extreme_fit/distribution/gev/main_fevd_bayesian.R index 8cea57bbac012a40ce328cf99c909ce5b05ee6f5..416cb7837fb70fa2c7331edbb4ff57f052d630c3 100644 --- a/extreme_fit/distribution/gev/main_fevd_bayesian.R +++ b/extreme_fit/distribution/gev/main_fevd_bayesian.R @@ -48,7 +48,9 @@ coord[,1]=seq(0,N-1,1) print(coord) colnames(coord) = c("T") coord = data.frame(coord, stringsAsFactors = TRUE) -res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) +res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='Bayesian', priorFun="fevdPriorCustom", +priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE, +initial=list()) # res = fevd_fixed(x_gev, data=coord, method='Bayesian', priorFun="fevdPriorCustom", priorParams=list(q=c(6), p=c(9)), iter=5000, verbose=TRUE, use.phi=FALSE) print(res) diff --git a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py index 161033e938c54af4620adf4c472a292808057842..3dd02216f9069fff78f768ce3150b7823c01bfcb 100644 --- a/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py +++ b/extreme_fit/model/margin_model/linear_margin_model/abstract_temporal_linear_margin_model.py @@ -25,8 +25,9 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): def __init__(self, coordinates: AbstractCoordinates, use_start_value=False, params_start_fit=None, params_sample=None, starting_point=None, fit_method=TemporalMarginFitMethod.is_mev_gev_fit, - nb_iterations_for_bayesian_fit=5000): + nb_iterations_for_bayesian_fit=5000, params_start_fit_bayesian=None): super().__init__(coordinates, use_start_value, params_start_fit, params_sample, starting_point) + self.params_start_fit_bayesian = params_start_fit_bayesian self.nb_iterations_for_bayesian_fit = nb_iterations_for_bayesian_fit assert isinstance(fit_method, TemporalMarginFitMethod) self.fit_method = fit_method @@ -65,6 +66,8 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): def extremes_fevd_bayesian_fit(self, x, df_coordinates_temp) -> AbstractResultFromExtremes: r_type_argument_kwargs, y = self.extreme_arguments(df_coordinates_temp) + params_start_fit = self.params_start_fit_bayesian if self.params_start_fit_bayesian is not None else {} + r_type_argument_kwargs['initial'] = r.list(**params_start_fit) # Assert for any non-stationary model that the shape parameter is constant # (because the prior function considers that the last parameter should be the shape) assert GevParams.SHAPE not in self.margin_function_start_fit.gev_param_name_to_dims \