Commit d5e81395 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[EXTREME ESTIMATOR] add safran visualization for gpd (with a threshold fixed by hand)

parent f58c2bd9
No related merge requests found
Showing with 84 additions and 34 deletions
+84 -34
......@@ -3,8 +3,6 @@ from typing import List
import numpy as np
import pandas as pd
from extreme_estimator.extreme_models.utils import r
class AbstractParams(object):
# Parameters
......@@ -29,6 +27,7 @@ class AbstractParams(object):
raise NotImplementedError
def to_dict(self) -> dict:
assert len(self.PARAM_NAMES) == len(self.param_values)
return dict(zip(self.PARAM_NAMES, self.param_values))
def to_serie(self) -> pd.Series:
......
from extreme_estimator.margin_fits.abstract_params import AbstractParams
from abc import ABC
from extreme_estimator.margin_fits.abstract_params import AbstractParams
class ExtremeParams(AbstractParams):
class ExtremeParams(AbstractParams, ABC):
# Extreme parameters
SCALE = 'scale'
LOC = 'loc'
SHAPE = 'shape'
PARAM_NAMES = [LOC, SCALE, SHAPE]
def __init__(self, loc: float, scale: float, shape: float):
self.location = loc
self.scale = scale
self.shape = shape
assert self.scale > 0
@property
def param_values(self):
return [self.location, self.scale, self.shape]
\ No newline at end of file
......@@ -4,9 +4,18 @@ from extreme_estimator.margin_fits.extreme_params import ExtremeParams
class GevParams(ExtremeParams):
# Parameters
PARAM_NAMES = [ExtremeParams.LOC, ExtremeParams.SCALE, ExtremeParams.SHAPE]
# Summary
SUMMARY_NAMES = PARAM_NAMES + ExtremeParams.QUANTILE_NAMES
def __init__(self, loc: float, scale: float, shape: float, block_size: int = None):
super().__init__(loc, scale, shape)
self.block_size = block_size
def quantile(self, p) -> float:
return r.qgev(p, self.location, self.scale, self.shape)[0]
@property
def param_values(self):
return [self.location, self.scale, self.shape]
\ No newline at end of file
......@@ -3,10 +3,20 @@ from extreme_estimator.margin_fits.extreme_params import ExtremeParams
class GpdParams(ExtremeParams):
# TODO: understand better why the gpdfit return 2 parameters, alors que d'un autre cote d autres definitions de la distribution parlent d un parametre location
def __init__(self, loc: float, scale: float, shape: float, threshold: float = None):
super().__init__(loc, scale, shape)
# Parameters
PARAM_NAMES = [ExtremeParams.SCALE, ExtremeParams.SHAPE]
# Summary
SUMMARY_NAMES = PARAM_NAMES + ExtremeParams.QUANTILE_NAMES
def __init__(self, scale: float, shape: float, threshold: float = None):
super().__init__(loc=0.0, scale=scale, shape=shape)
self.threshold = threshold
def quantile(self, p) -> float:
return r.qgpd(p, self.location, self.scale, self.shape)[0]
return r.qgpd(p, self.location, self.scale, self.shape)[0] + self.threshold
@property
def param_values(self):
return [self.scale, self.shape]
......@@ -12,4 +12,4 @@ class GpdMleFit(object):
self.x_gev = x_gev
self.threshold = threshold
self.mle_params = spatial_extreme_gpdmle_fit(x_gev, threshold)
self.gev_params = GpdParams.from_dict({**self.mle_params, 'threshold': threshold})
\ No newline at end of file
self.gpd_params = GpdParams.from_dict({**self.mle_params, 'threshold': threshold})
\ No newline at end of file
......@@ -10,8 +10,11 @@ from matplotlib import cm
from mpl_toolkits.axes_grid1 import make_axes_locatable
from netCDF4 import Dataset
from extreme_estimator.margin_fits.extreme_params import ExtremeParams
from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit
from extreme_estimator.margin_fits.gev.gev_params import GevParams
from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams
from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit
from safran_study.massif import safran_massif_names_from_datasets
from safran_study.shifted_color_map import shiftedColorMap
from safran_study.snowfall_annual_maxima import SafranSnowfall
......@@ -56,27 +59,34 @@ class Safran(object):
if show:
plt.show()
def visualize_gev_fit_with_cmap(self, show=True, axes=None):
params_names = GevParams.SUMMARY_NAMES
def visualize_margin_fits_with_cmap(self, threshold=None, show=True, axes=None):
if threshold is None:
params_names = GevParams.SUMMARY_NAMES
df = self.df_gev_mle_each_massif
# todo: understand how Maurienne could be negative
# print(df.head())
else:
params_names = GpdParams.SUMMARY_NAMES
df = self.df_gpd_mle_each_massif(threshold)
if axes is None:
fig, axes = plt.subplots(1, len(params_names))
fig.subplots_adjust(hspace=1.0, wspace=1.0)
for i, gev_param_name in enumerate(params_names):
ax = axes[i]
massif_name_to_value = self.df_gev_mle_each_massif.loc[gev_param_name, :].to_dict()
massif_name_to_value = df.loc[gev_param_name, :].to_dict()
# Compute the middle point of the values for the color map
values = list(massif_name_to_value.values())
vmin, vmax = min(values), max(values)
midpoint = 1 - vmax / (vmax + abs(vmin))
maxmax = max(vmax, -vmin)
scaling_factor = 2 * maxmax
# print(gev_param_name, midpoint, vmin, vmax, scaling_factor)
try:
midpoint = 1 - vmax / (vmax + abs(vmin))
except ZeroDivisionError:
pass
# Load the shifted cmap to center on a middle point
cmap = [plt.cm.coolwarm, plt.cm.bwr, plt.cm.seismic][1]
if gev_param_name == GevParams.SHAPE:
if gev_param_name == ExtremeParams.SHAPE:
shifted_cmap = shiftedColorMap(cmap, midpoint=midpoint, name='shifted')
norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
else:
......@@ -112,12 +122,24 @@ class Safran(object):
@property
def df_gev_mle_each_massif(self):
# Fit a margin_fits n each massif
# Fit a margin_fits on each massif
massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.summary_serie
for massif_name in self.safran_massif_names}
return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names)
""" Annual maxima of snowfall """
def df_gpd_mle_each_massif(self, threshold):
# Fit a margin fit on each massif
massif_to_gev_mle = {massif_name: GpdMleFit(self.df_all_snowfall_concatenated[massif_name], threshold=threshold).gpd_params.summary_serie
for massif_name in self.safran_massif_names}
return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names)
""" Data """
@property
def df_all_snowfall_concatenated(self):
df_list = [pd.DataFrame(snowfall, columns=self.safran_massif_names) for snowfall in self.year_to_snowfall.values()]
df_concatenated = pd.concat(df_list)
return df_concatenated
@property
def df_annual_maxima(self):
......@@ -126,14 +148,22 @@ class Safran(object):
""" Load some attributes only once """
@cached_property
def year_to_annual_maxima(self):
def year_to_annual_maxima(self) -> OrderedDict:
# Map each year to an array of size nb_massif
year_to_annual_maxima = OrderedDict()
for year, snowfall in self.year_to_snowfall.items():
year_to_annual_maxima[year] = snowfall.max(axis=0)
return year_to_annual_maxima
@cached_property
def year_to_snowfall(self) -> OrderedDict:
# Map each year to a matrix of size 365-nb_days_consecutive+1 x nb_massifs
year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in
self.year_to_dataset_ordered_dict.items()}
year_to_annual_maxima = OrderedDict()
year_to_snowfall = OrderedDict()
for year in self.year_to_dataset_ordered_dict.keys():
year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall(
self.nb_days_of_snowfall)
return year_to_annual_maxima
year_to_snowfall[year] = year_to_safran_snowfall[year].annual_snowfall(self.nb_days_of_snowfall)
return year_to_snowfall
@property
def safran_massif_names(self):
......
......@@ -31,12 +31,17 @@ class SafranSnowfall(object):
nb_days = len(hourly_snowfall) // 24
self.daily_snowfall = [sum(hourly_snowfall[24 * i:24 * (i+1)]) for i in range(nb_days)]
def annual_maxima_of_snowfall(self, nb_days_of_snowfall=1):
def annual_snowfall(self, nb_days_of_snowfall=1):
# Aggregate the daily snowfall by the number of consecutive days
shifted_list = [self.daily_snowfall[i:] for i in range(nb_days_of_snowfall)]
# First element of shifted_list is of length n, Second element of length n-1, Third element n-2....
# The zip is done with respect to the shortest list
snowfall_in_consecutive_days = [sum(e) for e in zip(*shifted_list)]
# Return the maximum of the aggregated list
return np.array(snowfall_in_consecutive_days).max(axis=0)
# The returned array is of size n-nb_days+1 x nb_massif
return np.array(snowfall_in_consecutive_days)
......
......@@ -12,11 +12,11 @@ def load_all_safran(only_first_one=False):
return all_safran
def fit_mle_gev_independent():
def fit_mle_gev_independent(threshold=None):
# Dump the result in a csv
dfs = []
for safran in load_all_safran(only_first_one=True):
safran.visualize_gev_fit_with_cmap()
safran.visualize_margin_fits_with_cmap(threshold=threshold)
# path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
# df.to_csv(path.format(VERSION))
......@@ -26,4 +26,5 @@ def fit_max_stab():
if __name__ == '__main__':
fit_mle_gev_independent()
threshold = [None, 20, 40, 60][1]
fit_mle_gev_independent(threshold=threshold)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment