From d5e81395482ca6026879fcf4d1161af2abb32fbd Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 12 Feb 2019 18:37:15 +0100 Subject: [PATCH] [EXTREME ESTIMATOR] add safran visualization for gpd (with a threshold fixed by hand) --- .../margin_fits/abstract_params.py | 3 +- .../margin_fits/extreme_params.py | 10 +-- .../margin_fits/gev/gev_params.py | 9 +++ .../margin_fits/gpd/gpd_params.py | 16 ++++- .../margin_fits/gpd/gpdmle_fit.py | 2 +- safran_study/safran.py | 62 ++++++++++++++----- safran_study/snowfall_annual_maxima.py | 9 ++- safran_study/visualize_safran.py | 7 ++- 8 files changed, 84 insertions(+), 34 deletions(-) diff --git a/extreme_estimator/margin_fits/abstract_params.py b/extreme_estimator/margin_fits/abstract_params.py index 9f180d18..fc5b80e3 100644 --- a/extreme_estimator/margin_fits/abstract_params.py +++ b/extreme_estimator/margin_fits/abstract_params.py @@ -3,8 +3,6 @@ from typing import List import numpy as np import pandas as pd -from extreme_estimator.extreme_models.utils import r - class AbstractParams(object): # Parameters @@ -29,6 +27,7 @@ class AbstractParams(object): raise NotImplementedError def to_dict(self) -> dict: + assert len(self.PARAM_NAMES) == len(self.param_values) return dict(zip(self.PARAM_NAMES, self.param_values)) def to_serie(self) -> pd.Series: diff --git a/extreme_estimator/margin_fits/extreme_params.py b/extreme_estimator/margin_fits/extreme_params.py index 60bbeca3..01a39d3f 100644 --- a/extreme_estimator/margin_fits/extreme_params.py +++ b/extreme_estimator/margin_fits/extreme_params.py @@ -1,20 +1,16 @@ -from extreme_estimator.margin_fits.abstract_params import AbstractParams +from abc import ABC +from extreme_estimator.margin_fits.abstract_params import AbstractParams -class ExtremeParams(AbstractParams): +class ExtremeParams(AbstractParams, ABC): # Extreme parameters SCALE = 'scale' LOC = 'loc' SHAPE = 'shape' - PARAM_NAMES = [LOC, SCALE, SHAPE] def __init__(self, loc: float, scale: float, shape: float): self.location = loc self.scale = scale self.shape = shape assert self.scale > 0 - - @property - def param_values(self): - return [self.location, self.scale, self.shape] \ No newline at end of file diff --git a/extreme_estimator/margin_fits/gev/gev_params.py b/extreme_estimator/margin_fits/gev/gev_params.py index ba743f8a..b4d9cbc2 100644 --- a/extreme_estimator/margin_fits/gev/gev_params.py +++ b/extreme_estimator/margin_fits/gev/gev_params.py @@ -4,9 +4,18 @@ from extreme_estimator.margin_fits.extreme_params import ExtremeParams class GevParams(ExtremeParams): + # Parameters + PARAM_NAMES = [ExtremeParams.LOC, ExtremeParams.SCALE, ExtremeParams.SHAPE] + # Summary + SUMMARY_NAMES = PARAM_NAMES + ExtremeParams.QUANTILE_NAMES + def __init__(self, loc: float, scale: float, shape: float, block_size: int = None): super().__init__(loc, scale, shape) self.block_size = block_size def quantile(self, p) -> float: return r.qgev(p, self.location, self.scale, self.shape)[0] + + @property + def param_values(self): + return [self.location, self.scale, self.shape] \ No newline at end of file diff --git a/extreme_estimator/margin_fits/gpd/gpd_params.py b/extreme_estimator/margin_fits/gpd/gpd_params.py index 3eb42331..0c8cc815 100644 --- a/extreme_estimator/margin_fits/gpd/gpd_params.py +++ b/extreme_estimator/margin_fits/gpd/gpd_params.py @@ -3,10 +3,20 @@ from extreme_estimator.margin_fits.extreme_params import ExtremeParams class GpdParams(ExtremeParams): + # TODO: understand better why the gpdfit return 2 parameters, alors que d'un autre cote d autres definitions de la distribution parlent d un parametre location - def __init__(self, loc: float, scale: float, shape: float, threshold: float = None): - super().__init__(loc, scale, shape) + # Parameters + PARAM_NAMES = [ExtremeParams.SCALE, ExtremeParams.SHAPE] + # Summary + SUMMARY_NAMES = PARAM_NAMES + ExtremeParams.QUANTILE_NAMES + + def __init__(self, scale: float, shape: float, threshold: float = None): + super().__init__(loc=0.0, scale=scale, shape=shape) self.threshold = threshold def quantile(self, p) -> float: - return r.qgpd(p, self.location, self.scale, self.shape)[0] + return r.qgpd(p, self.location, self.scale, self.shape)[0] + self.threshold + + @property + def param_values(self): + return [self.scale, self.shape] diff --git a/extreme_estimator/margin_fits/gpd/gpdmle_fit.py b/extreme_estimator/margin_fits/gpd/gpdmle_fit.py index da57af36..c26d924e 100644 --- a/extreme_estimator/margin_fits/gpd/gpdmle_fit.py +++ b/extreme_estimator/margin_fits/gpd/gpdmle_fit.py @@ -12,4 +12,4 @@ class GpdMleFit(object): self.x_gev = x_gev self.threshold = threshold self.mle_params = spatial_extreme_gpdmle_fit(x_gev, threshold) - self.gev_params = GpdParams.from_dict({**self.mle_params, 'threshold': threshold}) \ No newline at end of file + self.gpd_params = GpdParams.from_dict({**self.mle_params, 'threshold': threshold}) \ No newline at end of file diff --git a/safran_study/safran.py b/safran_study/safran.py index 2009ac91..43348aab 100644 --- a/safran_study/safran.py +++ b/safran_study/safran.py @@ -10,8 +10,11 @@ from matplotlib import cm from mpl_toolkits.axes_grid1 import make_axes_locatable from netCDF4 import Dataset +from extreme_estimator.margin_fits.extreme_params import ExtremeParams from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit from extreme_estimator.margin_fits.gev.gev_params import GevParams +from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams +from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit from safran_study.massif import safran_massif_names_from_datasets from safran_study.shifted_color_map import shiftedColorMap from safran_study.snowfall_annual_maxima import SafranSnowfall @@ -56,27 +59,34 @@ class Safran(object): if show: plt.show() - def visualize_gev_fit_with_cmap(self, show=True, axes=None): - params_names = GevParams.SUMMARY_NAMES + def visualize_margin_fits_with_cmap(self, threshold=None, show=True, axes=None): + if threshold is None: + params_names = GevParams.SUMMARY_NAMES + df = self.df_gev_mle_each_massif + # todo: understand how Maurienne could be negative + # print(df.head()) + else: + params_names = GpdParams.SUMMARY_NAMES + df = self.df_gpd_mle_each_massif(threshold) + if axes is None: fig, axes = plt.subplots(1, len(params_names)) fig.subplots_adjust(hspace=1.0, wspace=1.0) for i, gev_param_name in enumerate(params_names): ax = axes[i] - - massif_name_to_value = self.df_gev_mle_each_massif.loc[gev_param_name, :].to_dict() + massif_name_to_value = df.loc[gev_param_name, :].to_dict() # Compute the middle point of the values for the color map values = list(massif_name_to_value.values()) vmin, vmax = min(values), max(values) - midpoint = 1 - vmax / (vmax + abs(vmin)) - maxmax = max(vmax, -vmin) - scaling_factor = 2 * maxmax - # print(gev_param_name, midpoint, vmin, vmax, scaling_factor) + try: + midpoint = 1 - vmax / (vmax + abs(vmin)) + except ZeroDivisionError: + pass # Load the shifted cmap to center on a middle point cmap = [plt.cm.coolwarm, plt.cm.bwr, plt.cm.seismic][1] - if gev_param_name == GevParams.SHAPE: + if gev_param_name == ExtremeParams.SHAPE: shifted_cmap = shiftedColorMap(cmap, midpoint=midpoint, name='shifted') norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax) else: @@ -112,12 +122,24 @@ class Safran(object): @property def df_gev_mle_each_massif(self): - # Fit a margin_fits n each massif + # Fit a margin_fits on each massif massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.summary_serie for massif_name in self.safran_massif_names} return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names) - """ Annual maxima of snowfall """ + def df_gpd_mle_each_massif(self, threshold): + # Fit a margin fit on each massif + massif_to_gev_mle = {massif_name: GpdMleFit(self.df_all_snowfall_concatenated[massif_name], threshold=threshold).gpd_params.summary_serie + for massif_name in self.safran_massif_names} + return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names) + + """ Data """ + + @property + def df_all_snowfall_concatenated(self): + df_list = [pd.DataFrame(snowfall, columns=self.safran_massif_names) for snowfall in self.year_to_snowfall.values()] + df_concatenated = pd.concat(df_list) + return df_concatenated @property def df_annual_maxima(self): @@ -126,14 +148,22 @@ class Safran(object): """ Load some attributes only once """ @cached_property - def year_to_annual_maxima(self): + def year_to_annual_maxima(self) -> OrderedDict: + # Map each year to an array of size nb_massif + year_to_annual_maxima = OrderedDict() + for year, snowfall in self.year_to_snowfall.items(): + year_to_annual_maxima[year] = snowfall.max(axis=0) + return year_to_annual_maxima + + @cached_property + def year_to_snowfall(self) -> OrderedDict: + # Map each year to a matrix of size 365-nb_days_consecutive+1 x nb_massifs year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in self.year_to_dataset_ordered_dict.items()} - year_to_annual_maxima = OrderedDict() + year_to_snowfall = OrderedDict() for year in self.year_to_dataset_ordered_dict.keys(): - year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall( - self.nb_days_of_snowfall) - return year_to_annual_maxima + year_to_snowfall[year] = year_to_safran_snowfall[year].annual_snowfall(self.nb_days_of_snowfall) + return year_to_snowfall @property def safran_massif_names(self): diff --git a/safran_study/snowfall_annual_maxima.py b/safran_study/snowfall_annual_maxima.py index a92c992f..f2bce13b 100644 --- a/safran_study/snowfall_annual_maxima.py +++ b/safran_study/snowfall_annual_maxima.py @@ -31,12 +31,17 @@ class SafranSnowfall(object): nb_days = len(hourly_snowfall) // 24 self.daily_snowfall = [sum(hourly_snowfall[24 * i:24 * (i+1)]) for i in range(nb_days)] - def annual_maxima_of_snowfall(self, nb_days_of_snowfall=1): + def annual_snowfall(self, nb_days_of_snowfall=1): # Aggregate the daily snowfall by the number of consecutive days shifted_list = [self.daily_snowfall[i:] for i in range(nb_days_of_snowfall)] + # First element of shifted_list is of length n, Second element of length n-1, Third element n-2.... + # The zip is done with respect to the shortest list snowfall_in_consecutive_days = [sum(e) for e in zip(*shifted_list)] # Return the maximum of the aggregated list - return np.array(snowfall_in_consecutive_days).max(axis=0) + # The returned array is of size n-nb_days+1 x nb_massif + return np.array(snowfall_in_consecutive_days) + + diff --git a/safran_study/visualize_safran.py b/safran_study/visualize_safran.py index 8fbcdaa6..2b921725 100644 --- a/safran_study/visualize_safran.py +++ b/safran_study/visualize_safran.py @@ -12,11 +12,11 @@ def load_all_safran(only_first_one=False): return all_safran -def fit_mle_gev_independent(): +def fit_mle_gev_independent(threshold=None): # Dump the result in a csv dfs = [] for safran in load_all_safran(only_first_one=True): - safran.visualize_gev_fit_with_cmap() + safran.visualize_margin_fits_with_cmap(threshold=threshold) # path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv' # df.to_csv(path.format(VERSION)) @@ -26,4 +26,5 @@ def fit_max_stab(): if __name__ == '__main__': - fit_mle_gev_independent() + threshold = [None, 20, 40, 60][1] + fit_mle_gev_independent(threshold=threshold) -- GitLab