Commit f5c1e693 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[contrasting] add likelihood & improve plot of the model moments

parent 1232a342
No related merge requests found
Showing with 95 additions and 35 deletions
+95 -35
...@@ -38,7 +38,6 @@ class LinearMarginEstimator(AbstractMarginEstimator): ...@@ -38,7 +38,6 @@ class LinearMarginEstimator(AbstractMarginEstimator):
return self.dataset.coordinates.df_spatial_coordinates(split=self.train_split, return self.dataset.coordinates.df_spatial_coordinates(split=self.train_split,
drop_duplicates=self.margin_model.drop_duplicates) drop_duplicates=self.margin_model.drop_duplicates)
@property @property
def df_coordinates_temp(self): def df_coordinates_temp(self):
return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split, return self.dataset.coordinates.df_temporal_coordinates_for_fit(split=self.train_split,
...@@ -67,6 +66,9 @@ class LinearMarginEstimator(AbstractMarginEstimator): ...@@ -67,6 +66,9 @@ class LinearMarginEstimator(AbstractMarginEstimator):
assert not np.isinf(nllh) assert not np.isinf(nllh)
return nllh return nllh
def deviance(self, split=Split.all):
return 2 * self.nllh(split=split)
def aic(self, split=Split.all): def aic(self, split=Split.all):
aic = 2 * self.margin_model.nb_params + 2 * self.nllh(split=split) aic = 2 * self.margin_model.nb_params + 2 * self.nllh(split=split)
npt.assert_almost_equal(self.result_from_model_fit.aic, aic, decimal=5) npt.assert_almost_equal(self.result_from_model_fit.aic, aic, decimal=5)
...@@ -75,4 +77,3 @@ class LinearMarginEstimator(AbstractMarginEstimator): ...@@ -75,4 +77,3 @@ class LinearMarginEstimator(AbstractMarginEstimator):
def bic(self, split=Split.all): def bic(self, split=Split.all):
n = len(self.dataset.maxima_gev(split=split)) n = len(self.dataset.maxima_gev(split=split))
return np.log(n) * self.margin_model.nb_params + 2 * self.nllh(split=split) return np.log(n) * self.margin_model.nb_params + 2 * self.nllh(split=split)
...@@ -45,17 +45,25 @@ class AltitudesStudies(object): ...@@ -45,17 +45,25 @@ class AltitudesStudies(object):
def spatio_temporal_dataset(self, massif_name, s_split_spatial: pd.Series = None, def spatio_temporal_dataset(self, massif_name, s_split_spatial: pd.Series = None,
s_split_temporal: pd.Series = None): s_split_temporal: pd.Series = None):
coordinate_values_to_maxima = {} coordinate_values_to_maxima = {}
massif_altitudes = [] massif_altitudes = self.massif_name_to_altitudes[massif_name]
for altitude in self.altitudes: for altitude in massif_altitudes:
study = self.altitude_to_study[altitude] study = self.altitude_to_study[altitude]
if massif_name in study.study_massif_names: for year, maxima in zip(study.ordered_years, study.massif_name_to_annual_maxima[massif_name]):
massif_altitudes.append(altitude) coordinate_values_to_maxima[(altitude, year)] = [maxima]
for year, maxima in zip(study.ordered_years, study.massif_name_to_annual_maxima[massif_name]):
coordinate_values_to_maxima[(altitude, year)] = [maxima]
coordinates = self.spatio_temporal_coordinates(s_split_spatial, s_split_temporal, massif_altitudes) coordinates = self.spatio_temporal_coordinates(s_split_spatial, s_split_temporal, massif_altitudes)
observations = AnnualMaxima.from_coordinates(coordinates, coordinate_values_to_maxima) observations = AnnualMaxima.from_coordinates(coordinates, coordinate_values_to_maxima)
return AbstractDataset(observations=observations, coordinates=coordinates) return AbstractDataset(observations=observations, coordinates=coordinates)
@cached_property
def massif_name_to_altitudes(self):
massif_names = self.study.all_massif_names()
massif_name_to_altitudes = {massif_name: [] for massif_name in massif_names}
for altitude in self.altitudes:
study = self.altitude_to_study[altitude]
for massif_name in study.study_massif_names:
massif_name_to_altitudes[massif_name].append(altitude)
return massif_name_to_altitudes
# Coordinates Loader # Coordinates Loader
def spatio_temporal_coordinates(self, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None, def spatio_temporal_coordinates(self, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None,
......
import matplotlib as mpl
mpl.rcParams['text.usetex'] = True
mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']
from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day, SafranSnowfall3Days, \ from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day, SafranSnowfall3Days, \
SafranSnowfall5Days, SafranSnowfall7Days, SafranPrecipitation1Day, SafranPrecipitation3Days, \ SafranSnowfall5Days, SafranSnowfall7Days, SafranPrecipitation1Day, SafranPrecipitation3Days, \
SafranPrecipitation5Days, SafranPrecipitation7Days SafranPrecipitation5Days, SafranPrecipitation7Days
...@@ -13,8 +17,7 @@ def plot_altitudinal_fit(studies, massif_names=None): ...@@ -13,8 +17,7 @@ def plot_altitudinal_fit(studies, massif_names=None):
model_classes=ALTITUDINAL_MODELS, model_classes=ALTITUDINAL_MODELS,
massif_names=massif_names, massif_names=massif_names,
show=False) show=False)
visualizer.plot_mean() visualizer.plot_moments()
visualizer.plot_relative_change()
visualizer.plot_shape_map() visualizer.plot_shape_map()
...@@ -29,8 +32,9 @@ def plot_moments(studies, massif_names=None): ...@@ -29,8 +32,9 @@ def plot_moments(studies, massif_names=None):
def main(): def main():
# altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000] altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][4:7]
altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900] altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000][:]
# altitudes = [900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900]
study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, SafranSnowfall5Days, SafranSnowfall7Days][:2] study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, SafranSnowfall5Days, SafranSnowfall7Days][:2]
study_classes = [SafranPrecipitation1Day, SafranPrecipitation3Days, SafranPrecipitation5Days, study_classes = [SafranPrecipitation1Day, SafranPrecipitation3Days, SafranPrecipitation5Days,
SafranPrecipitation7Days][:] SafranPrecipitation7Days][:]
...@@ -42,7 +46,7 @@ def main(): ...@@ -42,7 +46,7 @@ def main():
for study_class in study_classes: for study_class in study_classes:
studies = AltitudesStudies(study_class, altitudes, season=Season.winter_extended) studies = AltitudesStudies(study_class, altitudes, season=Season.winter_extended)
# plot_time_series(studies, massif_names) # plot_time_series(studies, massif_names)
plot_moments(studies, massif_names) # plot_moments(studies, massif_names)
plot_altitudinal_fit(studies, massif_names) plot_altitudinal_fit(studies, massif_names)
......
...@@ -34,23 +34,26 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer): ...@@ -34,23 +34,26 @@ class AltitudesStudiesVisualizerForNonStationaryModels(StudyVisualizer):
old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method) old_fold_fit = OneFoldFit(massif_name, dataset, model_classes, self.fit_method)
self.massif_name_to_one_fold_fit[massif_name] = old_fold_fit self.massif_name_to_one_fold_fit[massif_name] = old_fold_fit
def plot_mean(self): def plot_moments(self):
self.plot_general('mean') for method_name in ['moment', 'changes_in_the_moment', 'relative_changes_in_the_moment']:
for order in [1, 2]:
self.plot_general(method_name, order)
def plot_relative_change(self): def plot_general(self, method_name, order):
self.plot_general('relative_changes_in_the_mean')
def plot_general(self, method_name):
ax = plt.gca() ax = plt.gca()
min_altitude, *_, max_altitude = self.studies.altitudes min_altitude, *_, max_altitude = self.studies.altitudes
altitudes = np.linspace(min_altitude, max_altitude, num=50) altitudes_plot = np.linspace(min_altitude, max_altitude, num=50)
for massif_id, massif_name in enumerate(self.massif_names): for massif_id, massif_name in enumerate(self.massif_names):
massif_altitudes = self.studies.massif_name_to_altitudes[massif_name]
ind = (min(massif_altitudes) <= altitudes_plot) & (altitudes_plot <= max(massif_altitudes))
massif_altitudes_plot = altitudes_plot[ind]
old_fold_fit = self.massif_name_to_one_fold_fit[massif_name] old_fold_fit = self.massif_name_to_one_fold_fit[massif_name]
values = old_fold_fit.__getattribute__(method_name)(altitudes) values = old_fold_fit.__getattribute__(method_name)(massif_altitudes_plot, order=order)
plot_against_altitude(altitudes, ax, massif_id, massif_name, values) plot_against_altitude(massif_altitudes_plot, ax, massif_id, massif_name, values)
# Plot settings # Plot settings
ax.legend(prop={'size': 7}, ncol=3) ax.legend(prop={'size': 7}, ncol=3)
moment = ' '.join(method_name.split('_')) moment = ' '.join(method_name.split('_'))
moment = moment.replace('moment', 'mean' if order == 1 else 'std')
plot_name = '{} annual maxima of {}'.format(moment, SCM_STUDY_CLASS_TO_ABBREVIATION[self.studies.study_class]) plot_name = '{} annual maxima of {}'.format(moment, SCM_STUDY_CLASS_TO_ABBREVIATION[self.studies.study_class])
ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15) ax.set_ylabel('{} ({})'.format(plot_name, self.study.variable_unit), fontsize=15)
ax.set_xlabel('altitudes', fontsize=15) ax.set_xlabel('altitudes', fontsize=15)
......
...@@ -2,12 +2,15 @@ import numpy.testing as npt ...@@ -2,12 +2,15 @@ import numpy.testing as npt
import numpy as np import numpy as np
import rpy2 import rpy2
from cached_property import cached_property from cached_property import cached_property
from scipy.stats import chi2
from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short from extreme_fit.estimator.margin_estimator.utils import fitted_linear_margin_estimator_short
from extreme_fit.model.margin_model.polynomial_margin_model.altitudinal_models import StationaryAltitudinal
from extreme_fit.model.margin_model.utils import MarginFitMethod from extreme_fit.model.margin_model.utils import MarginFitMethod
class OneFoldFit(object): class OneFoldFit(object):
SIGNIFICANCE_LEVEL = 0.05
def __init__(self, massif_name, dataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle): def __init__(self, massif_name, dataset, models_classes, fit_method=MarginFitMethod.extremes_fevd_mle):
self.massif_name = massif_name self.massif_name = massif_name
...@@ -22,22 +25,37 @@ class OneFoldFit(object): ...@@ -22,22 +25,37 @@ class OneFoldFit(object):
dataset=self.dataset, dataset=self.dataset,
fit_method=self.fit_method) fit_method=self.fit_method)
def mean(self, altitudes, year=2019): def get_moment(self, altitude, year, order=1):
return [self.get_mean(altitude, year) for altitude in altitudes] gev_params = self.get_gev_params(altitude, year)
if order == 1:
def get_mean(self, altitude, year): return gev_params.mean
return self.get_gev_params(altitude, year).mean elif order == 2:
return gev_params.std
else:
raise NotImplementedError
def get_gev_params(self, altitude, year): def get_gev_params(self, altitude, year):
coordinate = np.array([altitude, year]) coordinate = np.array([altitude, year])
gev_params = self.best_function_from_fit.get_params(coordinate, is_transformed=False) gev_params = self.best_function_from_fit.get_params(coordinate, is_transformed=False)
return gev_params return gev_params
def relative_changes_in_the_mean(self, altitudes, year=2019, nb_years=50): def moment(self, altitudes, year=2019, order=1):
return [self.get_moment(altitude, year, order) for altitude in altitudes]
def changes_in_the_moment(self, altitudes, year=2019, nb_years=50, order=1):
changes = []
for altitude in altitudes:
mean_after = self.get_moment(altitude, year, order)
mean_before = self.get_moment(altitude, year - nb_years, order)
change = mean_after - mean_before
changes.append(change)
return changes
def relative_changes_in_the_moment(self, altitudes, year=2019, nb_years=50, order=1):
relative_changes = [] relative_changes = []
for altitude in altitudes: for altitude in altitudes:
mean_after = self.get_mean(altitude, year) mean_after = self.get_moment(altitude, year, order)
mean_before = self.get_mean(altitude, year - nb_years) mean_before = self.get_moment(altitude, year - nb_years, order)
relative_change = 100 * (mean_after - mean_before) / mean_before relative_change = 100 * (mean_after - mean_before) / mean_before
relative_changes.append(relative_change) relative_changes.append(relative_change)
return relative_changes return relative_changes
...@@ -75,4 +93,29 @@ class OneFoldFit(object): ...@@ -75,4 +93,29 @@ class OneFoldFit(object):
@property @property
def best_name(self): def best_name(self):
return self.best_estimator.margin_model.name_str name = self.best_estimator.margin_model.name_str
latex_command = 'textbf' if self.is_significant else 'textrm'
return '$\\' + latex_command + '{' + name + '}$'
# Significant
@property
def stationary_estimator(self):
for estimator in self.sorted_estimators_with_finite_aic:
if isinstance(estimator.margin_model, StationaryAltitudinal):
return estimator
@property
def likelihood_ratio(self):
return self.stationary_estimator.deviance() - self.best_estimator.deviance()
@property
def degree_freedom_chi2(self):
return self.best_estimator.margin_model.nb_params - self.stationary_estimator.margin_model.nb_params
@property
def is_significant(self) -> bool:
if isinstance(self.best_estimator.margin_model, StationaryAltitudinal):
return False
else:
return self.likelihood_ratio > chi2.ppf(q=1 - self.SIGNIFICANCE_LEVEL, df=self.degree_freedom_chi2)
...@@ -84,10 +84,11 @@ class TestGevTemporalQuadraticExtremesMle(unittest.TestCase): ...@@ -84,10 +84,11 @@ class TestGevTemporalQuadraticExtremesMle(unittest.TestCase):
# self.assertAlmostEqual(estimator.result_from_model_fit.aic, estimator.aic(split=estimator.train_split)) # self.assertAlmostEqual(estimator.result_from_model_fit.aic, estimator.aic(split=estimator.train_split))
# self.assertAlmostEqual(estimator.result_from_model_fit.bic, estimator.bic(split=estimator.train_split)) # self.assertAlmostEqual(estimator.result_from_model_fit.bic, estimator.bic(split=estimator.train_split))
# #
# # def test_altitudinal_models(self): # #
# # for model_class in ALTITUDINAL_MODELS: # # # def test_altitudinal_models(self):
# # self.common_test(model_class) # # # for model_class in ALTITUDINAL_MODELS:
# # # # self.common_test(model_class)
# #
# def test_wrong(self): # def test_wrong(self):
# self.common_test(NonStationaryAltitudinalLocationQuadraticCrossTermForLocation) # self.common_test(NonStationaryAltitudinalLocationQuadraticCrossTermForLocation)
# # self.common_test(NonStationaryAltitudinalLocationQuadraticScaleLinearCrossTermForLocation) # # self.common_test(NonStationaryAltitudinalLocationQuadraticScaleLinearCrossTermForLocation)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment