Commit 815d8196 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[contrasting] add default pval equal to zero in gnFit (to avoid the program to...

[contrasting] add default pval equal to zero in gnFit (to avoid the program to crash when called by python). refactor anderson test. add anderson test by default for the one fold analysis.
parent 2704b134
No related merge requests found
Showing with 85 additions and 58 deletions
+85 -58
......@@ -77,6 +77,7 @@ gnfit_fixed <- function (dat, dist, df = NULL, pr = NULL, threshold = NULL)
pval <- exp(1.111 - 34.242 * w + 12.832 * w^2)
}
else {
pval <- 0 # I added that to avoid the code to crash
warning("p-value is smaller than 7.37e-10")
}
z$Wpval <- pval
......@@ -96,6 +97,7 @@ gnfit_fixed <- function (dat, dist, df = NULL, pr = NULL, threshold = NULL)
pval <- exp(1.2937 - 5.709 * A + 0.0186 * A^2)
}
else {
pval <- 0 # I added that to avoid the code to crash
warning("p-value is smaller than 7.37e-10")
}
z$Apval <- pval
......
......@@ -11,5 +11,11 @@ def cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(
return res['Wpval'], res['Apval']
def goodness_of_fit_anderson(quantiles, significance_level=0.05):
test = cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(quantiles)
_, ander_darling_test_pvalue = test
return ander_darling_test_pvalue > significance_level
if __name__ == '__main__':
cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(np.arange(50))
......@@ -11,7 +11,7 @@ from extreme_data.eurocode_data.utils import EUROCODE_QUANTILE, YEAR_OF_INTEREST
from extreme_data.meteo_france_data.scm_models_data.crocus.crocus_variables import AbstractSnowLoadVariable
from extreme_fit.distribution.gev.gev_params import GevParams
from extreme_fit.distribution.gumbel.gumbel_gof import \
cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution
cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution, goodness_of_fit_anderson
from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator
from extreme_fit.model.margin_model.linear_margin_model.temporal_linear_margin_models import \
StationaryTemporalModel, GumbelTemporalModel
......@@ -70,18 +70,8 @@ class AbstractGevTrendTest(object):
@property
def goodness_of_fit_anderson_test(self):
# significance_level_to_index = dict(zip([0.25, 0.1, 0.05, 0.025, 0.01], list(range(5))))
# print(significance_level_to_index)
# assert self.SIGNIFICANCE_LEVEL in significance_level_to_index
# # significance_level=array([25. , 10. , 5. , 2.5, 1. ]))
# index_for_significance_level_5_percent = 2
quantiles = self.compute_empirical_quantiles(estimator=self.unconstrained_estimator)
# test_res = anderson(quantiles, dist='gumbel_r') # type: AndersonResult
# return test_res.statistic < test_res.critical_values[index_for_significance_level_5_percent]
# print(quantiles)
test = cramer_von_mises_and_anderson_darling_tests_pvalues_for_gumbel_distribution(quantiles)
_, ander_darling_test_pvalue = test
return ander_darling_test_pvalue > self.SIGNIFICANCE_LEVEL
return goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL)
@property
def name(self):
......
......@@ -2,7 +2,8 @@ import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.plot_total_aic import plot_total_aic
from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.plot_total_aic import plot_total_aic, \
plot_individual_aic
mpl.rcParams['text.usetex'] = True
mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']
......@@ -22,21 +23,6 @@ from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.altitudes_s
AltitudesStudiesVisualizerForNonStationaryModels
def plot_altitudinal_fit(studies, massif_names=None):
model_classes = ALTITUDINAL_GEV_MODELS + ALTITUDINAL_GUMBEL_MODELS
visualizer = AltitudesStudiesVisualizerForNonStationaryModels(studies=studies,
model_classes=model_classes,
massif_names=massif_names,
show=False)
# Plot the results for the model that minimizes the individual aic
OneFoldFit.best_estimator_minimizes_total_aic = False
visualizer.plot_moments()
# visualizer.plot_best_coef_maps()
visualizer.plot_shape_map()
plot_total_aic(model_classes, visualizer)
def plot_time_series(studies, massif_names=None):
studies.plot_maxima_time_series(massif_names=massif_names)
......@@ -47,19 +33,33 @@ def plot_moments(studies, massif_names=None):
studies.plot_mean_maxima_against_altitude(massif_names=massif_names, std=std, change=change)
def plot_altitudinal_fit(studies, massif_names=None):
model_classes = ALTITUDINAL_GEV_MODELS + ALTITUDINAL_GUMBEL_MODELS
visualizer = AltitudesStudiesVisualizerForNonStationaryModels(studies=studies,
model_classes=model_classes,
massif_names=massif_names,
show=False)
# Plot the results for the model that minimizes the individual aic
plot_individual_aic(visualizer)
# Plot the results for the model that minimizes the total aic
plot_total_aic(model_classes, visualizer)
def main():
# altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][4:7]
altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][4:7]
# altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][:]
# todo: l ecart pour les saisons de l automne ou de sprint
# vient probablement de certains zéros pas pris en compte pour le fit,
# mais pris en compte pour le calcul de mon aic
altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900][:]
# altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900][:]
study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, SafranSnowfall5Days, SafranSnowfall7Days][:2]
study_classes = [SafranPrecipitation1Day, SafranPrecipitation3Days, SafranPrecipitation5Days,
SafranPrecipitation7Days][:]
study_classes = [SafranSnowfall1Day, SafranPrecipitation1Day,
SafranSnowfall3Days, SafranPrecipitation3Days][:]
seasons = [Season.automn, Season.winter, Season.spring][::-1]
SafranSnowfall3Days, SafranPrecipitation3Days][:1]
# seasons = [Season.automn, Season.winter, Season.spring][::-1]
seasons = [Season.winter]
massif_names = None
# massif_names = ['Aravis']
# massif_names = ['Chartreuse', 'Belledonne']
......@@ -68,8 +68,8 @@ def main():
for study_class in study_classes:
studies = AltitudesStudies(study_class, altitudes, season=season)
print('inner loop', season, study_class)
plot_time_series(studies, massif_names)
plot_moments(studies, massif_names)
# plot_time_series(studies, massif_names)
# plot_moments(studies, massif_names)
plot_altitudinal_fit(studies, massif_names)
......
from typing import List
from typing import List, Dict
import matplotlib.pyplot as plt
import numpy as np
......@@ -25,17 +25,26 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
model_classes: List[AbstractSpatioTemporalPolynomialModel],
show=False,
massif_names=None,
fit_method=MarginFitMethod.extremes_fevd_mle):
fit_method=MarginFitMethod.extremes_fevd_mle,
display_only_model_that_pass_anderson_test=True):
super().__init__(studies.study, show=show, save_to_file=not show)
self.massif_names = massif_names if massif_names is not None else self.study.study_massif_names
self.studies = studies
self.non_stationary_models = model_classes
self.fit_method = fit_method
self.massif_name_to_one_fold_fit = {}
self.display_only_model_that_pass_anderson_test = display_only_model_that_pass_anderson_test
self.massif_names = massif_names if massif_names is not None else self.study.study_massif_names
self.massif_name_to_massif_id = {m: i for i, m in enumerate(self.massif_names)}
# Load one fold fit
self._massif_name_to_one_fold_fit = {}
for massif_name in self.massif_names:
dataset = studies.spatio_temporal_dataset(massif_name=massif_name)
old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method)
self.massif_name_to_one_fold_fit[massif_name] = old_fold_fit
self._massif_name_to_one_fold_fit[massif_name] = old_fold_fit
@property
def massif_name_to_one_fold_fit(self) -> Dict[str, OneFoldFit]:
return {massif_name: old_fold_fit for massif_name, old_fold_fit in self._massif_name_to_one_fold_fit.items()
if not self.display_only_model_that_pass_anderson_test or old_fold_fit.goodness_of_fit_anderson_test}
def plot_moments(self):
for method_name in ['moment', 'changes_in_the_moment', 'relative_changes_in_the_moment']:
......@@ -46,12 +55,12 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
ax = plt.gca()
min_altitude, *_, max_altitude = self.studies.altitudes
altitudes_plot = np.linspace(min_altitude, max_altitude, num=50)
for massif_id, massif_name in enumerate(self.massif_names):
for massif_name, one_fold_fit in self.massif_name_to_one_fold_fit.items():
massif_altitudes = self.studies.massif_name_to_altitudes[massif_name]
ind = (min(massif_altitudes) <= altitudes_plot) & (altitudes_plot <= max(massif_altitudes))
massif_altitudes_plot = altitudes_plot[ind]
one_fold_fit = self.massif_name_to_one_fold_fit[massif_name]
values = one_fold_fit.__getattribute__(method_name)(massif_altitudes_plot, order=order)
massif_id = self.massif_name_to_massif_id[massif_name]
plot_against_altitude(massif_altitudes_plot, ax, massif_id, massif_name, values)
# Plot settings
ax.legend(prop={'size': 7}, ncol=3)
......@@ -128,3 +137,6 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
label='Shape plot for {} {}'.format(SCM_STUDY_CLASS_TO_ABBREVIATION[type(self.study)],
self.study.variable_unit),
add_x_label=False, graduation=0.1, massif_name_to_text=self.massif_name_to_name)
def plot_altitude_change_of_sign(self):
pass
......@@ -4,6 +4,7 @@ import rpy2
from cached_property import cached_property
from scipy.stats import chi2
from extreme_fit.distribution.gumbel.gumbel_gof import goodness_of_fit_anderson
from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short
from extreme_fit.function.param_function.polynomial_coef import PolynomialAllCoef, PolynomialCoef
from extreme_fit.model.margin_model.polynomial_margin_model.gev_altitudinal_models import StationaryAltitudinal
......@@ -11,13 +12,14 @@ from extreme_fit.model.margin_model.polynomial_margin_model.gumbel_altitudinal_m
StationaryGumbelAltitudinal, AbstractGumbelAltitudinalModel
from extreme_fit.model.margin_model.utils import MarginFitMethod
from root_utils import classproperty
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
class OneFoldFit(object):
SIGNIFICANCE_LEVEL = 0.05
best_estimator_minimizes_total_aic = False
def __init__(self, massif_name, dataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle):
def __init__(self, massif_name: str, dataset: AbstractDataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle):
self.massif_name = massif_name
self.dataset = dataset
self.models_classes = models_classes
......@@ -88,20 +90,7 @@ class OneFoldFit(object):
@cached_property
def sorted_estimators(self):
estimators = list(self.model_class_to_estimator.values())
# estimators_with_finite_aic = []
# for estimator in estimators:
# # try:
# aic = estimator.aic()
# npt.assert_almost_equal(estimator.result_from_model_fit.aic, aic, decimal=5)
# print(self.massif_name, estimator.margin_model.name_str, aic)
# estimators_with_finite_aic.append(estimator)
# # except (AssertionError, rpy2.rinterface.RRuntimeError) as e:
# # raise e
# # print(self.massif_name, estimator.margin_model.name_str, 'infinite aic')
# # print(e)
# print('Summary {}: {}/{} fitted'.format(self.massif_name, len(estimators_with_finite_aic), len(estimators)))
sorted_estimators = sorted([estimator for estimator in estimators],
key=lambda e: e.aic())
sorted_estimators = sorted([estimator for estimator in estimators], key=lambda e: e.aic())
return sorted_estimators
@property
......@@ -168,3 +157,25 @@ class OneFoldFit(object):
return False
else:
return self.likelihood_ratio > chi2.ppf(q=1 - self.SIGNIFICANCE_LEVEL, df=self.degree_freedom_chi2)
@property
def goodness_of_fit_anderson_test(self):
quantiles = self.compute_empirical_quantiles(estimator=self.best_estimator)
goodness_of_fit_anderson_test = goodness_of_fit_anderson(quantiles, self.SIGNIFICANCE_LEVEL)
if not goodness_of_fit_anderson_test:
print('{} with {} does not pass the anderson test'.format(self.massif_name, self.folder_for_plots))
return goodness_of_fit_anderson_test
def compute_empirical_quantiles(self, estimator):
empirical_quantiles = []
df_maxima_gev = self.dataset.observations.df_maxima_gev
df_coordinates = self.dataset.coordinates.df_coordinates()
for coordinate, maximum in zip(df_coordinates.values.copy(), df_maxima_gev.values.copy()):
gev_param = estimator.function_from_fit.get_params(
coordinate=coordinate,
is_transformed=False)
assert len(maximum) == 1
maximum_standardized = gev_param.gumbel_standardization(maximum[0])
empirical_quantiles.append(maximum_standardized)
empirical_quantiles = sorted(empirical_quantiles)
return empirical_quantiles
......@@ -10,6 +10,13 @@ from projects.altitude_spatial_model.altitudes_fit.one_fold_analysis.one_fold_fi
from projects.exceeding_snow_loads.utils import dpi_paper1_figure
def plot_individual_aic(visualizer):
OneFoldFit.best_estimator_minimizes_total_aic = False
visualizer.plot_moments()
# visualizer.plot_best_coef_maps()
visualizer.plot_shape_map()
def plot_total_aic(model_classes, visualizer):
# Compute the mean AIC for each model_class
OneFoldFit.best_estimator_minimizes_total_aic = True
......@@ -35,7 +42,7 @@ def plot_total_aic(model_classes, visualizer):
best_model_class_for_total_aic = sorted_model_class[0]
for one_fold_fit in visualizer.massif_name_to_one_fold_fit.values():
one_fold_fit.best_estimator_class_for_total_aic = best_model_class_for_total_aic
# Plot the ranking of the
# Plot the ranking of the model based on their total aic
plot_total_aic_repartition(visualizer, sorted_labels, sorted_scores)
# Plot the results for the model that minimizes the mean aic
visualizer.plot_moments()
......@@ -43,7 +50,6 @@ def plot_total_aic(model_classes, visualizer):
visualizer.plot_best_coef_maps()
def plot_total_aic_repartition(visualizer, labels, scores):
"""
Plot a single trend curves
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment