Commit 74f0849c authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[GEV] add computation of the mean & variance. modify gev_estimator. add...

[GEV] add computation of the mean & variance. modify gev_estimator. add function visualize_summary_of_annual_values_and_stationary_gev_fit: to quickly visualize the repartition of the main empirical indicators and corresponding stationary gev indicators.
parent 0fcf42ec
No related merge requests found
Showing with 128 additions and 25 deletions
+128 -25
......@@ -18,7 +18,8 @@ from experiment.meteo_france_data.scm_models_data.visualization.hypercube_visual
load_altitude_visualizer, load_quantity_visualizer
from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.main_study_visualizer import \
ALL_ALTITUDES, SCM_STUDIES
from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest
from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest, \
GevScaleChangePointTest
def get_fast_altitude_visualizer(altitude_hypercube_class):
......@@ -41,6 +42,7 @@ def get_full_altitude_visualizer(altitude_hypercube_class, exact_starting_year=N
if exact_starting_year is not None:
first_starting_year, last_starting_year = None, None
study_classes = [CrocusRecentSwe]
trend_test_class = GevScaleChangePointTest
visualizer = load_altitude_visualizer(altitude_hypercube_class, altitudes, last_starting_year,
nb_data_reduced_for_speed, only_first_one, save_to_file, study_classes,
trend_test_class, first_starting_year=first_starting_year,
......@@ -48,21 +50,23 @@ def get_full_altitude_visualizer(altitude_hypercube_class, exact_starting_year=N
return visualizer
FULL_ALTITUDES = [900, 1500, 2100, 2700]
FULL_ALTITUDES = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000]
HALF_ALTITUDES = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][::2]
def main_fast_spatial_repartition():
for altitude in FULL_ALTITUDES[-1:]:
vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude,
exact_starting_year=1958)
vizualiser.save_to_file = False
vizualiser.visualize_massif_trend_test_one_altitude()
def main_full_spatial_repartition():
for altitude in FULL_ALTITUDES[:]:
# Compute for the most likely starting year
vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude)
vizualiser.visualize_massif_trend_test_one_altitude()
# vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude)
# vizualiser.visualize_massif_trend_test_one_altitude()
# Compute the trend for a linear trend
vizualiser = get_full_altitude_visualizer(Altitude_Hypercube_Year_Visualizer, altitude=altitude,
exact_starting_year=1958)
......@@ -70,8 +74,8 @@ def main_full_spatial_repartition():
def main_run():
# main_full_spatial_repartition()
main_fast_spatial_repartition()
main_full_spatial_repartition()
# main_fast_spatial_repartition()
if __name__ == '__main__':
......
......@@ -216,10 +216,10 @@ def trend_analysis():
def maxima_analysis():
save_to_file = False
only_first_one = True
durand_altitude = [1800]
durand_altitude = [2700]
altitudes = durand_altitude
normalization_class = BetweenZeroAndOneNormalization
study_classes = [SafranSnowfall][:]
study_classes = [CrocusRecentSwe][:]
for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes):
study_visualizer = StudyVisualizer(study, save_to_file=save_to_file,
transformation_class=normalization_class,
......@@ -228,9 +228,11 @@ def maxima_analysis():
multiprocessing=True,
complete_non_stationary_trend_analysis=True,
score_class=MannKendall)
study_visualizer.visualize_all_score_wrt_starting_year()
# study_visualizer.visualize_all_score_wrt_starting_year()
# study_visualizer.visualize_all_independent_temporal_trend()
study_visualizer.visualize_all_mean_and_max_graphs()
# study_visualizer.visualize_independent_margin_fits()
# study_visualizer.visualize_all_mean_and_max_graphs()
study_visualizer.visualize_summary_of_annual_values_and_stationary_gev_fit()
def max_graph_annual_maxima_poster():
......@@ -258,7 +260,8 @@ def main_run():
# trend_analysis()
# altitude_analysis()
max_graph_annual_maxima_poster()
# max_graph_annual_maxima_poster()
maxima_analysis()
# case_study()
# all_scores_vizu()
# maxima_analysis()
......
......@@ -3,6 +3,7 @@ import os.path as op
from collections import OrderedDict
from multiprocessing.pool import Pool
from random import sample
from typing import Dict
import math
import matplotlib.pyplot as plt
......@@ -620,7 +621,8 @@ class StudyVisualizer(VisualizationParameters):
x_gev = np.linspace(0.0, 1.5 * max(y), num=1000)
y_gev = [gev_param.density(x) for x in x_gev]
ax.plot(x_gev, y_gev, color=color, linewidth=5)
ax.set_xlabel('y = annual maxima of {} (in {})'.format(snow_abbreviation, self.study.variable_unit), color=color, fontsize=15)
ax.set_xlabel('y = annual maxima of {} (in {})'.format(snow_abbreviation, self.study.variable_unit),
color=color, fontsize=15)
ax.set_ylabel('$f_{GEV}' + '(y|\mu={},\sigma={},\zeta={})$'.format(*gev_param.to_array()), fontsize=15)
ax.tick_params(axis='both', which='major', labelsize=13)
......@@ -631,7 +633,6 @@ class StudyVisualizer(VisualizationParameters):
self.show_or_save_to_file(add_classic_title=False, no_title=True)
ax.clear()
def visualize_mean_and_max_graph(self, ax, massif_id):
# Display the graph of the max on top
color_maxima = 'r'
......@@ -815,6 +816,44 @@ class StudyVisualizer(VisualizationParameters):
if show:
plt.show()
def visualize_summary_of_annual_values_and_stationary_gev_fit(self):
fig, axes = plt.subplots(3, 4)
fig.subplots_adjust(hspace=self.subplot_space, wspace=self.subplot_space)
# 1) First row, some experimental indicator
axes_first_row = axes[0]
df_maxima_gev = self.df_maxima_gev
name_to_serie = OrderedDict()
name_to_serie['mean'] = df_maxima_gev.mean(axis=1)
name_to_serie['std'] = df_maxima_gev.std(axis=1)
name_to_serie['quantile 0.9'] = df_maxima_gev.quantile(q=0.9, axis=1)
name_to_serie['quantile 0.99'] = df_maxima_gev.quantile(q=0.99, axis=1)
for (name, serie), ax in zip(name_to_serie.items(), axes_first_row):
self.study.visualize_study(ax=ax,
massif_name_to_value=serie.to_dict(),
show=False,
label='empirical ' + name)
# 2) Second row, gev parameters fitted independently (and a qqplot)
axes_second_row = axes[1]
for ax, gev_param_name in zip(axes_second_row, GevParams.PARAM_NAMES):
self.study.visualize_study(ax=ax,
massif_name_to_value=self.df_gev_parameters.loc[gev_param_name, :].to_dict(),
show=False,
replace_blue_by_white=gev_param_name != GevParams.SHAPE,
label=gev_param_name)
# self.clean_axes_write_title_on_the_left(axes, title='Independent fits')
# 3) Third row, gev indicator
axes_third_row = axes[2]
for ax, indicator_name in zip(axes_third_row, GevParams.indicator_names()):
self.study.visualize_study(ax=ax,
massif_name_to_value=self.df_gev_indicators.loc[indicator_name, :].to_dict(),
show=False,
label='gev ' + indicator_name)
plt.show()
def visualize_annual_mean_values(self, ax=None, take_mean_value=True):
if ax is None:
_, ax = plt.subplots(1, 1, figsize=self.figsize)
......@@ -837,11 +876,21 @@ class StudyVisualizer(VisualizationParameters):
def df_maxima_gev(self) -> pd.DataFrame:
return self.study.observations_annual_maxima.df_maxima_gev
@property
def df_gev_mle(self) -> pd.DataFrame:
# Fit a margin_fits on each massif
massif_to_gev_mle = {massif_name: IsmevGevFit(self.df_maxima_gev.loc[massif_name]).gev_params.summary_serie
for massif_name in self.study.study_massif_names}
@cached_property
def massif_name_to_gev_mle_fitted(self) -> Dict[str, GevParams]:
return {massif_name: IsmevGevFit(self.df_maxima_gev.loc[massif_name]).gev_params
for massif_name in self.study.study_massif_names}
@cached_property
def df_gev_parameters(self) -> pd.DataFrame:
massif_to_gev_mle = {massif_name: gev_params.to_dict()
for massif_name, gev_params in self.massif_name_to_gev_mle_fitted.items()}
return pd.DataFrame(massif_to_gev_mle, columns=self.study.study_massif_names)
@cached_property
def df_gev_indicators(self) -> pd.DataFrame:
massif_to_gev_mle = {massif_name: gev_params.indicator_name_to_value
for massif_name, gev_params in self.massif_name_to_gev_mle_fitted.items()}
return pd.DataFrame(massif_to_gev_mle, columns=self.study.study_massif_names)
@property
......
......@@ -60,3 +60,5 @@ class AbstractParams(object):
def summary_serie(self) -> pd.Series:
return pd.Series(self.summary_dict, index=self.SUMMARY_NAMES)
# Estimators
......@@ -12,5 +12,5 @@ class GevFit(object):
self.x_gev = x_gev
@property
def gev_params(self):
def gev_params(self) -> GevParams:
raise NotImplementedError
\ No newline at end of file
from collections import OrderedDict
from cached_property import cached_property
from extreme_estimator.extreme_models.utils import r
from extreme_estimator.margin_fits.extreme_params import ExtremeParams
import numpy as np
from scipy.special import gamma
class GevParams(ExtremeParams):
# Parameters
PARAM_NAMES = [ExtremeParams.LOC, ExtremeParams.SCALE, ExtremeParams.SHAPE]
# Summary
......@@ -39,4 +43,41 @@ class GevParams(ExtremeParams):
return [self.location, self.scale, self.shape]
def __str__(self):
return self.to_dict().__str__()
\ No newline at end of file
return self.to_dict().__str__()
def g(self, k):
# Compute the g_k parameters as defined in wikipedia
# https://fr.wikipedia.org/wiki/Loi_d%27extremum_g%C3%A9n%C3%A9ralis%C3%A9e
return gamma(1 - k * self.shape)
@property
def mean(self):
if self.shape >= 1:
return np.inf
else:
return self.location + self.scale * (self.g(k=1) - 1) / self.shape
@property
def variance(self):
if self.shape >= 0.5:
return np.inf
else:
return ((self.scale / self.shape) ** 2) * (self.g(k=2) - self.g(k=1) ** 2)
@property
def std(self):
return np.sqrt(self.variance)
@classmethod
def indicator_names(cls):
return ['mean', 'std'] + cls.QUANTILE_NAMES[:2]
@cached_property
def indicator_name_to_value(self) -> OrderedDict:
indicator_name_to_value = OrderedDict()
indicator_name_to_value['mean'] = self.mean
indicator_name_to_value['std'] = self.std
for quantile_name, quantile_value in zip(self.QUANTILE_NAMES[:2], self.QUANTILE_P_VALUES):
indicator_name_to_value[quantile_name] = self.quantile(quantile_value)
assert all([a == b for a, b in zip(self.indicator_names(), indicator_name_to_value.keys())])
return indicator_name_to_value
......@@ -13,7 +13,7 @@ class GevMleFit(GevFit):
self.gev_params_object = GevParams.from_dict({**self._gev_params, 'block_size': block_size})
@property
def gev_params(self):
return self._gev_params
def gev_params(self) -> GevParams:
return self.gev_params_object
......@@ -16,9 +16,10 @@ class IsmevGevFit(GevFit):
self.res = ismev_gev_fit(x_gev, y, mul)
@property
def gev_params(self):
def gev_params(self) -> GevParams:
assert self.y is None
return dict(zip(GevParams.PARAM_NAMES, self.res['mle']))
gev_params_dict = dict(zip(GevParams.PARAM_NAMES, self.res['mle']))
return GevParams.from_dict(gev_params_dict)
......@@ -3,6 +3,7 @@ import unittest
import numpy as np
from extreme_estimator.extreme_models.utils import r, set_seed_r
from extreme_estimator.margin_fits.gev.gev_params import GevParams
from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit
from extreme_estimator.margin_fits.gev.ismev_gev_fit import IsmevGevFit
......@@ -31,6 +32,8 @@ class TestGevMleFit(unittest.TestCase):
def fit_estimator(self, estimator, ref):
# Compare the MLE estimated parameters to the reference
mle_params_estimated = estimator.gev_params
self.assertIsInstance(mle_params_estimated, GevParams)
mle_params_estimated = mle_params_estimated.to_dict()
for key in ref.keys():
self.assertAlmostEqual(ref[key], mle_params_estimated[key], places=3)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment