diff --git a/experiment/meteo_france_data/scm_models_data/visualization/hypercube_visualization/main_files_after_conf/main_starting_years.py b/experiment/meteo_france_data/scm_models_data/visualization/hypercube_visualization/main_files_after_conf/main_choice_starting_years.py similarity index 88% rename from experiment/meteo_france_data/scm_models_data/visualization/hypercube_visualization/main_files_after_conf/main_starting_years.py rename to experiment/meteo_france_data/scm_models_data/visualization/hypercube_visualization/main_files_after_conf/main_choice_starting_years.py index 4ac6423cce980ed8e5b376d8888c2eac37d58ab3..51e242af71620301b6f91812cfaaf30dfdb82a96 100644 --- a/experiment/meteo_france_data/scm_models_data/visualization/hypercube_visualization/main_files_after_conf/main_starting_years.py +++ b/experiment/meteo_france_data/scm_models_data/visualization/hypercube_visualization/main_files_after_conf/main_choice_starting_years.py @@ -18,7 +18,8 @@ from experiment.meteo_france_data.scm_models_data.visualization.hypercube_visual load_altitude_visualizer, load_quantity_visualizer from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.main_study_visualizer import \ ALL_ALTITUDES, SCM_STUDIES -from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest +from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest, \ + GevScaleChangePointTest def get_fast_altitude_visualizer(altitude_hypercube_class): @@ -41,6 +42,7 @@ def get_full_altitude_visualizer(altitude_hypercube_class, exact_starting_year=N if exact_starting_year is not None: first_starting_year, last_starting_year = None, None study_classes = [CrocusRecentSwe] + trend_test_class = GevScaleChangePointTest visualizer = load_altitude_visualizer(altitude_hypercube_class, altitudes, last_starting_year, nb_data_reduced_for_speed, only_first_one, save_to_file, study_classes, trend_test_class, first_starting_year=first_starting_year, @@ -48,21 +50,23 @@ def get_full_altitude_visualizer(altitude_hypercube_class, exact_starting_year=N return visualizer -FULL_ALTITUDES = [900, 1500, 2100, 2700] +FULL_ALTITUDES = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000] +HALF_ALTITUDES = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][::2] def main_fast_spatial_repartition(): for altitude in FULL_ALTITUDES[-1:]: vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude, exact_starting_year=1958) + vizualiser.save_to_file = False vizualiser.visualize_massif_trend_test_one_altitude() def main_full_spatial_repartition(): for altitude in FULL_ALTITUDES[:]: # Compute for the most likely starting year - vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude) - vizualiser.visualize_massif_trend_test_one_altitude() + # vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude) + # vizualiser.visualize_massif_trend_test_one_altitude() # Compute the trend for a linear trend vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude, exact_starting_year=1958) @@ -70,8 +74,8 @@ def main_full_spatial_repartition(): def main_run(): - # main_full_spatial_repartition() - main_fast_spatial_repartition() + main_full_spatial_repartition() + # main_fast_spatial_repartition() if __name__ == '__main__': diff --git a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/main_study_visualizer.py index 7d22db85b060aed072bb2e4f33a80ffcdc4d6ff9..8b6e82bd277378138917ee6b458ca23612660838 100644 --- a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/main_study_visualizer.py +++ b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/main_study_visualizer.py @@ -216,10 +216,10 @@ def trend_analysis(): def maxima_analysis(): save_to_file = False only_first_one = True - durand_altitude = [1800] + durand_altitude = [2700] altitudes = durand_altitude normalization_class = BetweenZeroAndOneNormalization - study_classes = [SafranSnowfall][:] + study_classes = [CrocusRecentSwe][:] for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes): study_visualizer = StudyVisualizer(study, save_to_file=save_to_file, transformation_class=normalization_class, @@ -228,9 +228,11 @@ def maxima_analysis(): multiprocessing=True, complete_non_stationary_trend_analysis=True, score_class=MannKendall) - study_visualizer.visualize_all_score_wrt_starting_year() + # study_visualizer.visualize_all_score_wrt_starting_year() # study_visualizer.visualize_all_independent_temporal_trend() - study_visualizer.visualize_all_mean_and_max_graphs() + # study_visualizer.visualize_independent_margin_fits() + # study_visualizer.visualize_all_mean_and_max_graphs() + study_visualizer.visualize_summary_of_annual_values_and_stationary_gev_fit() def max_graph_annual_maxima_poster(): @@ -258,7 +260,8 @@ def main_run(): # trend_analysis() # altitude_analysis() - max_graph_annual_maxima_poster() + # max_graph_annual_maxima_poster() + maxima_analysis() # case_study() # all_scores_vizu() # maxima_analysis() diff --git a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py index 0c60d348b5b350c031ef14c7afa5156858022ba2..059c6afc25755b7dd257dd1f9e95db23689f33b2 100644 --- a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py @@ -3,6 +3,7 @@ import os.path as op from collections import OrderedDict from multiprocessing.pool import Pool from random import sample +from typing import Dict import math import matplotlib.pyplot as plt @@ -620,7 +621,8 @@ class StudyVisualizer(VisualizationParameters): x_gev = np.linspace(0.0, 1.5 * max(y), num=1000) y_gev = [gev_param.density(x) for x in x_gev] ax.plot(x_gev, y_gev, color=color, linewidth=5) - ax.set_xlabel('y = annual maxima of {} (in {})'.format(snow_abbreviation, self.study.variable_unit), color=color, fontsize=15) + ax.set_xlabel('y = annual maxima of {} (in {})'.format(snow_abbreviation, self.study.variable_unit), + color=color, fontsize=15) ax.set_ylabel('$f_{GEV}' + '(y|\mu={},\sigma={},\zeta={})$'.format(*gev_param.to_array()), fontsize=15) ax.tick_params(axis='both', which='major', labelsize=13) @@ -631,7 +633,6 @@ class StudyVisualizer(VisualizationParameters): self.show_or_save_to_file(add_classic_title=False, no_title=True) ax.clear() - def visualize_mean_and_max_graph(self, ax, massif_id): # Display the graph of the max on top color_maxima = 'r' @@ -815,6 +816,44 @@ class StudyVisualizer(VisualizationParameters): if show: plt.show() + def visualize_summary_of_annual_values_and_stationary_gev_fit(self): + fig, axes = plt.subplots(3, 4) + fig.subplots_adjust(hspace=self.subplot_space, wspace=self.subplot_space) + + # 1) First row, some experimental indicator + axes_first_row = axes[0] + df_maxima_gev = self.df_maxima_gev + name_to_serie = OrderedDict() + name_to_serie['mean'] = df_maxima_gev.mean(axis=1) + name_to_serie['std'] = df_maxima_gev.std(axis=1) + name_to_serie['quantile 0.9'] = df_maxima_gev.quantile(q=0.9, axis=1) + name_to_serie['quantile 0.99'] = df_maxima_gev.quantile(q=0.99, axis=1) + for (name, serie), ax in zip(name_to_serie.items(), axes_first_row): + self.study.visualize_study(ax=ax, + massif_name_to_value=serie.to_dict(), + show=False, + label='empirical ' + name) + + # 2) Second row, gev parameters fitted independently (and a qqplot) + axes_second_row = axes[1] + for ax, gev_param_name in zip(axes_second_row, GevParams.PARAM_NAMES): + self.study.visualize_study(ax=ax, + massif_name_to_value=self.df_gev_parameters.loc[gev_param_name, :].to_dict(), + show=False, + replace_blue_by_white=gev_param_name != GevParams.SHAPE, + label=gev_param_name) + # self.clean_axes_write_title_on_the_left(axes, title='Independent fits') + + # 3) Third row, gev indicator + axes_third_row = axes[2] + for ax, indicator_name in zip(axes_third_row, GevParams.indicator_names()): + self.study.visualize_study(ax=ax, + massif_name_to_value=self.df_gev_indicators.loc[indicator_name, :].to_dict(), + show=False, + label='gev ' + indicator_name) + + plt.show() + def visualize_annual_mean_values(self, ax=None, take_mean_value=True): if ax is None: _, ax = plt.subplots(1, 1, figsize=self.figsize) @@ -837,11 +876,21 @@ class StudyVisualizer(VisualizationParameters): def df_maxima_gev(self) -> pd.DataFrame: return self.study.observations_annual_maxima.df_maxima_gev - @property - def df_gev_mle(self) -> pd.DataFrame: - # Fit a margin_fits on each massif - massif_to_gev_mle = {massif_name: IsmevGevFit(self.df_maxima_gev.loc[massif_name]).gev_params.summary_serie - for massif_name in self.study.study_massif_names} + @cached_property + def massif_name_to_gev_mle_fitted(self) -> Dict[str, GevParams]: + return {massif_name: IsmevGevFit(self.df_maxima_gev.loc[massif_name]).gev_params + for massif_name in self.study.study_massif_names} + + @cached_property + def df_gev_parameters(self) -> pd.DataFrame: + massif_to_gev_mle = {massif_name: gev_params.to_dict() + for massif_name, gev_params in self.massif_name_to_gev_mle_fitted.items()} + return pd.DataFrame(massif_to_gev_mle, columns=self.study.study_massif_names) + + @cached_property + def df_gev_indicators(self) -> pd.DataFrame: + massif_to_gev_mle = {massif_name: gev_params.indicator_name_to_value + for massif_name, gev_params in self.massif_name_to_gev_mle_fitted.items()} return pd.DataFrame(massif_to_gev_mle, columns=self.study.study_massif_names) @property diff --git a/extreme_estimator/margin_fits/abstract_params.py b/extreme_estimator/margin_fits/abstract_params.py index 338b4b8d05a430246f1ee96b837866b82ff62a29..5858afb324ece5a371f060dd1e57f745fe58370f 100644 --- a/extreme_estimator/margin_fits/abstract_params.py +++ b/extreme_estimator/margin_fits/abstract_params.py @@ -60,3 +60,5 @@ class AbstractParams(object): def summary_serie(self) -> pd.Series: return pd.Series(self.summary_dict, index=self.SUMMARY_NAMES) + # Estimators + diff --git a/extreme_estimator/margin_fits/gev/gev_fit.py b/extreme_estimator/margin_fits/gev/gev_fit.py index 16865750bccb480bfa748eddbb6b50340e295cba..5b3600da6b36fa178832521bcb6f240c44d6cc19 100644 --- a/extreme_estimator/margin_fits/gev/gev_fit.py +++ b/extreme_estimator/margin_fits/gev/gev_fit.py @@ -12,5 +12,5 @@ class GevFit(object): self.x_gev = x_gev @property - def gev_params(self): + def gev_params(self) -> GevParams: raise NotImplementedError \ No newline at end of file diff --git a/extreme_estimator/margin_fits/gev/gev_params.py b/extreme_estimator/margin_fits/gev/gev_params.py index 355b3e0abf6ed5414d1c0c15f69e5c135a8fccc1..2094bbeb31a41996be8729f774e0106e6e433237 100644 --- a/extreme_estimator/margin_fits/gev/gev_params.py +++ b/extreme_estimator/margin_fits/gev/gev_params.py @@ -1,10 +1,14 @@ +from collections import OrderedDict + +from cached_property import cached_property + from extreme_estimator.extreme_models.utils import r from extreme_estimator.margin_fits.extreme_params import ExtremeParams import numpy as np +from scipy.special import gamma class GevParams(ExtremeParams): - # Parameters PARAM_NAMES = [ExtremeParams.LOC, ExtremeParams.SCALE, ExtremeParams.SHAPE] # Summary @@ -39,4 +43,41 @@ class GevParams(ExtremeParams): return [self.location, self.scale, self.shape] def __str__(self): - return self.to_dict().__str__() \ No newline at end of file + return self.to_dict().__str__() + + def g(self, k): + # Compute the g_k parameters as defined in wikipedia + # https://fr.wikipedia.org/wiki/Loi_d%27extremum_g%C3%A9n%C3%A9ralis%C3%A9e + return gamma(1 - k * self.shape) + + @property + def mean(self): + if self.shape >= 1: + return np.inf + else: + return self.location + self.scale * (self.g(k=1) - 1) / self.shape + + @property + def variance(self): + if self.shape >= 0.5: + return np.inf + else: + return ((self.scale / self.shape) ** 2) * (self.g(k=2) - self.g(k=1) ** 2) + + @property + def std(self): + return np.sqrt(self.variance) + + @classmethod + def indicator_names(cls): + return ['mean', 'std'] + cls.QUANTILE_NAMES[:2] + + @cached_property + def indicator_name_to_value(self) -> OrderedDict: + indicator_name_to_value = OrderedDict() + indicator_name_to_value['mean'] = self.mean + indicator_name_to_value['std'] = self.std + for quantile_name, quantile_value in zip(self.QUANTILE_NAMES[:2], self.QUANTILE_P_VALUES): + indicator_name_to_value[quantile_name] = self.quantile(quantile_value) + assert all([a == b for a, b in zip(self.indicator_names(), indicator_name_to_value.keys())]) + return indicator_name_to_value diff --git a/extreme_estimator/margin_fits/gev/gevmle_fit.py b/extreme_estimator/margin_fits/gev/gevmle_fit.py index fff6dad1f4d0c2d2088069d9dc13f88b0163bdf6..81513e2cdb040792723420cfd5a4ceaeb4daec61 100644 --- a/extreme_estimator/margin_fits/gev/gevmle_fit.py +++ b/extreme_estimator/margin_fits/gev/gevmle_fit.py @@ -13,7 +13,7 @@ class GevMleFit(GevFit): self.gev_params_object = GevParams.from_dict({**self._gev_params, 'block_size': block_size}) @property - def gev_params(self): - return self._gev_params + def gev_params(self) -> GevParams: + return self.gev_params_object diff --git a/extreme_estimator/margin_fits/gev/ismev_gev_fit.py b/extreme_estimator/margin_fits/gev/ismev_gev_fit.py index 778fc1bbee768128604d95431171bb6f7d812d57..b214cc23e6dfb4acf57284491e6ff6656e660435 100644 --- a/extreme_estimator/margin_fits/gev/ismev_gev_fit.py +++ b/extreme_estimator/margin_fits/gev/ismev_gev_fit.py @@ -16,9 +16,10 @@ class IsmevGevFit(GevFit): self.res = ismev_gev_fit(x_gev, y, mul) @property - def gev_params(self): + def gev_params(self) -> GevParams: assert self.y is None - return dict(zip(GevParams.PARAM_NAMES, self.res['mle'])) + gev_params_dict = dict(zip(GevParams.PARAM_NAMES, self.res['mle'])) + return GevParams.from_dict(gev_params_dict) diff --git a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py index 27b972710ed94e4413e9558003cdde49ad7234d9..82616977e72575c9c43f5e859cdbc0de8207ea57 100644 --- a/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py +++ b/test/test_extreme_estimator/test_margin_fits/test_gev/test_gevmle_fit.py @@ -3,6 +3,7 @@ import unittest import numpy as np from extreme_estimator.extreme_models.utils import r, set_seed_r +from extreme_estimator.margin_fits.gev.gev_params import GevParams from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit from extreme_estimator.margin_fits.gev.ismev_gev_fit import IsmevGevFit @@ -31,6 +32,8 @@ class TestGevMleFit(unittest.TestCase): def fit_estimator(self, estimator, ref): # Compare the MLE estimated parameters to the reference mle_params_estimated = estimator.gev_params + self.assertIsInstance(mle_params_estimated, GevParams) + mle_params_estimated = mle_params_estimated.to_dict() for key in ref.keys(): self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)