Commit 2a3ea099 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SCM] add temporal_non_stationary mode to study_visualizer. estimate a smooth margin.

parent 4ecac4e7
No related merge requests found
Showing with 92 additions and 30 deletions
+92 -30
...@@ -2,10 +2,14 @@ import numpy as np ...@@ -2,10 +2,14 @@ import numpy as np
from collections import OrderedDict from collections import OrderedDict
from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates
class AbstractExtendedStudy(AbstractStudy): class AbstractExtendedStudy(AbstractStudy):
@property @property
def region_names(self): def region_names(self):
return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps'] return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps']
...@@ -40,6 +44,9 @@ class AbstractExtendedStudy(AbstractStudy): ...@@ -40,6 +44,9 @@ class AbstractExtendedStudy(AbstractStudy):
""" Properties """ """ Properties """
def massifs_coordinates(self) -> AbstractSpatialCoordinates:
raise ValueError('Coordinates are meaningless for an extended study')
@property @property
def _year_to_daily_time_serie_array(self) -> OrderedDict: def _year_to_daily_time_serie_array(self) -> OrderedDict:
return self._year_to_extended_time_serie(aggregation_function=np.mean) return self._year_to_extended_time_serie(aggregation_function=np.mean)
......
...@@ -25,6 +25,8 @@ from extreme_estimator.margin_fits.gev.gev_params import GevParams ...@@ -25,6 +25,8 @@ from extreme_estimator.margin_fits.gev.gev_params import GevParams
from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit
from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams
from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit
from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \
AbstractSpatioTemporalCoordinates
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
from test.test_utils import load_test_max_stable_models from test.test_utils import load_test_max_stable_models
from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits
...@@ -32,16 +34,23 @@ from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_ ...@@ -32,16 +34,23 @@ from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_
BLOCK_MAXIMA_DISPLAY_NAME = 'block maxima ' BLOCK_MAXIMA_DISPLAY_NAME = 'block maxima '
class StudyVisualizer(object): class StudyVisualizer(object):
def __init__(self, study: AbstractStudy, show=True, save_to_file=False, only_one_graph=False, only_first_row=False, def __init__(self, study: AbstractStudy, show=True, save_to_file=False, only_one_graph=False, only_first_row=False,
vertical_kde_plot=False, year_for_kde_plot=None, plot_block_maxima_quantiles=False): vertical_kde_plot=False, year_for_kde_plot=None, plot_block_maxima_quantiles=False,
temporal_non_stationarity=False):
self.temporal_non_stationarity = temporal_non_stationarity
self.only_first_row = only_first_row self.only_first_row = only_first_row
self.only_one_graph = only_one_graph self.only_one_graph = only_one_graph
self.save_to_file = save_to_file self.save_to_file = save_to_file
self.study = study self.study = study
self.plot_name = None self.plot_name = None
# Load some attributes
self._dataset = None
self._coordinates = None
self._observations = None
# KDE PLOT ARGUMENTS # KDE PLOT ARGUMENTS
self.vertical_kde_plot = vertical_kde_plot self.vertical_kde_plot = vertical_kde_plot
self.year_for_kde_plot = year_for_kde_plot self.year_for_kde_plot = year_for_kde_plot
...@@ -64,16 +73,33 @@ class StudyVisualizer(object): ...@@ -64,16 +73,33 @@ class StudyVisualizer(object):
AbstractParamFunction.OUT_OF_BOUNDS_ASSERT = False AbstractParamFunction.OUT_OF_BOUNDS_ASSERT = False
@property @property
def observations(self): def dataset(self):
return self.study.observations_annual_maxima if self._dataset is None:
self._dataset = AbstractDataset(self.observations, self.coordinates)
return self._dataset
@property @property
def coordinates(self): def coordinates(self):
return self.study.massifs_coordinates if self._coordinates is None:
coordinates = self.study.massifs_coordinates
if self.temporal_non_stationarity:
# Build spatio temporal dataset from a temporal dataset
df_spatial = coordinates.df_spatial_coordinates()
start, end = self.study.start_year_and_end_year
nb_steps = end - start + 1
coordinates = AbstractSpatioTemporalCoordinates.from_df_spatial_and_nb_steps(df_spatial=df_spatial,
nb_steps=nb_steps,
start=start)
self._coordinates = coordinates
return self._coordinates
@property @property
def dataset(self): def observations(self):
return AbstractDataset(self.observations, self.coordinates) if self._observations is None:
self._observations = self.study.observations_annual_maxima
if self.temporal_non_stationarity:
self._observations.convert_to_spatio_temporal_index(self.coordinates)
return self._observations
# Graph for each massif / or groups of massifs # Graph for each massif / or groups of massifs
...@@ -234,29 +260,31 @@ class StudyVisualizer(object): ...@@ -234,29 +260,31 @@ class StudyVisualizer(object):
max_stable_models = [] max_stable_models = []
# Load axes (either a 2D or 3D array depending on self.coordinates) # Load axes (either a 2D or 3D array depending on self.coordinates)
nb_max_stable_models = len(max_stable_models) + 2 nb_models = len(max_stable_models) + 1
nb_summary_names = GevParams.NB_SUMMARY_NAMES nb_summary_names = GevParams.NB_SUMMARY_NAMES
if self.coordinates.has_temporal_coordinates: if self.temporal_non_stationarity:
nb_times_steps = AbstractMarginFunction.VISUALIZATION_TEMPORAL_STEPS nb_visualization_times_steps = AbstractMarginFunction.VISUALIZATION_TEMPORAL_STEPS
# Create one plot for each max stable models # Create one plot for each max stable models
axes = [] axes = []
for _ in range(nb_max_stable_models): for _ in range(nb_models):
axes.append(create_adjusted_axes(nb_rows=nb_summary_names, nb_columns=nb_times_steps, axes.append(create_adjusted_axes(nb_rows=nb_summary_names, nb_columns=nb_visualization_times_steps,
figsize=self.figsize, subplot_space=self.subplot_space)) figsize=self.figsize, subplot_space=self.subplot_space))
# todo: add a fake vizu step at the end, where I could add the independent margin !!
# rajouter une colonne poru chaque plot, et donner cette colonne à independent margin
else: else:
axes = create_adjusted_axes(nb_rows=nb_max_stable_models, nb_columns=nb_summary_names, axes = create_adjusted_axes(nb_rows=nb_models + 1, nb_columns=nb_summary_names,
figsize=self.figsize, subplot_space=self.subplot_space) figsize=self.figsize, subplot_space=self.subplot_space)
margin_class = LinearAllParametersAllDimsMarginModel # Plot the margin fit independently on the additional row
self.visualize_independent_margin_fits(threshold=None, axes=axes[-1], show=False)
# Plot the margin fit independently margin_class = LinearAllParametersAllDimsMarginModel
self.visualize_independent_margin_fits(threshold=None, axes=axes[0], show=False)
# Plot the smooth margin only # Plot the smooth margin only
margin_model = margin_class(coordinates=self.coordinates) margin_model = margin_class(coordinates=self.coordinates)
estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model) estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model)
self.fit_and_visualize_estimator(estimator, axes[1], title='without max stable') self.fit_and_visualize_estimator(estimator, axes[0], title='without max stable')
# Plot the smooth margin fitted with a max stable # Plot the smooth margin fitted with a max stable
for i, max_stable_model in enumerate(max_stable_models, 2): for i, max_stable_model in enumerate(max_stable_models, 1):
margin_model = margin_class(coordinates=self.coordinates) margin_model = margin_class(coordinates=self.coordinates)
estimator = FullEstimatorInASingleStepWithSmoothMargin(self.dataset, margin_model, max_stable_model) estimator = FullEstimatorInASingleStepWithSmoothMargin(self.dataset, margin_model, max_stable_model)
title = get_display_name_from_object_type(type(max_stable_model)) title = get_display_name_from_object_type(type(max_stable_model))
...@@ -273,10 +301,16 @@ class StudyVisualizer(object): ...@@ -273,10 +301,16 @@ class StudyVisualizer(object):
margin_fct._visualization_y_limits = self.study.visualization_y_limits margin_fct._visualization_y_limits = self.study.visualization_y_limits
margin_fct.mask_2D = self.study.mask_french_alps margin_fct.mask_2D = self.study.mask_french_alps
axes = margin_fct.visualize_function(show=False, axes=axes, title='') axes = margin_fct.visualize_function(show=False, axes=axes, title='') # type: np.ndarray
self.visualize_contour_and_move_axes_limits(axes) if axes.ndim == 1:
self.clean_axes_write_title_on_the_left(axes, title) self.visualize_contour_and_move_axes_limits(axes)
self.clean_axes_write_title_on_the_left(axes, title)
else:
axes = np.transpose(axes)
for axes_line in axes:
self.visualize_contour_and_move_axes_limits(axes_line)
self.clean_axes_write_title_on_the_left(axes_line, title, left_border=False)
def visualize_contour_and_move_axes_limits(self, axes): def visualize_contour_and_move_axes_limits(self, axes):
def get_lim_array(ax_with_lim_to_measure): def get_lim_array(ax_with_lim_to_measure):
...@@ -292,8 +326,12 @@ class StudyVisualizer(object): ...@@ -292,8 +326,12 @@ class StudyVisualizer(object):
# method(updated_lim[i, 0], updated_lim[i, 1]) # method(updated_lim[i, 0], updated_lim[i, 1])
@staticmethod @staticmethod
def clean_axes_write_title_on_the_left(axes, title): def clean_axes_write_title_on_the_left(axes, title, left_border=True):
for ax in axes[1:]: if left_border:
ax0, *clean_axes = axes
else:
*clean_axes, ax0 = axes
for ax in clean_axes:
ax.tick_params(axis=u'both', which=u'both', length=0) ax.tick_params(axis=u'both', which=u'both', length=0)
ax.spines['top'].set_visible(False) ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False) ax.spines['right'].set_visible(False)
...@@ -302,12 +340,11 @@ class StudyVisualizer(object): ...@@ -302,12 +340,11 @@ class StudyVisualizer(object):
ax.get_xaxis().set_visible(False) ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False) ax.get_yaxis().set_visible(False)
ax.set_aspect('equal') ax.set_aspect('equal')
ax0 = axes[0]
ax0.get_yaxis().set_visible(True) ax0.get_yaxis().set_visible(True)
sub_title = ax0.yaxis.get_label() sub_title = ax0.yaxis.get_label()
full_title = title + '\n\n' + sub_title._text full_title = title + '\n\n' + sub_title._text
ax0.set_ylabel(full_title) label_function = ax0.set_ylabel if left_border else ax0.set_xlabel
ax0.set_ylabel(full_title) label_function(full_title)
ax0.tick_params(axis=u'both', which=u'both', length=0) ax0.tick_params(axis=u'both', which=u'both', length=0)
def show_or_save_to_file(self): def show_or_save_to_file(self):
......
...@@ -37,7 +37,7 @@ class SmoothMarginEstimator(AbstractMarginEstimator): ...@@ -37,7 +37,7 @@ class SmoothMarginEstimator(AbstractMarginEstimator):
self.margin_model = margin_model self.margin_model = margin_model
def _fit(self): def _fit(self):
maxima_gev = self.dataset.maxima_gev(split=self.train_split) maxima_gev = self.dataset.maxima_gev(self.train_split)
df_coordinates_spat = self.dataset.coordinates.df_spatial_coordinates(self.train_split) df_coordinates_spat = self.dataset.coordinates.df_spatial_coordinates(self.train_split)
df_coordinates_temp = self.dataset.coordinates.df_temporal_coordinates(self.train_split) df_coordinates_temp = self.dataset.coordinates.df_temporal_coordinates(self.train_split)
self._result_from_fit = self.margin_model.fitmargin_from_maxima_gev(maxima_gev=maxima_gev, self._result_from_fit = self.margin_model.fitmargin_from_maxima_gev(maxima_gev=maxima_gev,
......
...@@ -24,11 +24,13 @@ class ParametricMarginModel(AbstractMarginModel, ABC): ...@@ -24,11 +24,13 @@ class ParametricMarginModel(AbstractMarginModel, ABC):
df_coordinates_temp: pd.DataFrame) -> ResultFromFit: df_coordinates_temp: pd.DataFrame) -> ResultFromFit:
# The reshaping on the line below is only valid if we have a single observation per spatio-temporal point # The reshaping on the line below is only valid if we have a single observation per spatio-temporal point
if maxima_gev.shape[1] == 1: if maxima_gev.shape[1] == 1:
maxima_gev = maxima_gev.reshape([len(df_coordinates_temp), len(df_coordinates_spat)]) maxima_gev = maxima_gev.reshape([len(df_coordinates_spat), len(df_coordinates_temp)])
data = np.transpose(maxima_gev) data = np.transpose(maxima_gev)
assert data.shape[1] == len(df_coordinates_spat)
fit_params = get_margin_formula(self.margin_function_start_fit.form_dict) fit_params = get_margin_formula(self.margin_function_start_fit.form_dict)
# Covariables # Covariables
covariables = get_coord(df_coordinates=df_coordinates_spat) covariables = get_coord(df_coordinates=df_coordinates_spat)
fit_params['temp.cov'] = get_coord(df_coordinates=df_coordinates_temp) fit_params['temp.cov'] = get_coord(df_coordinates=df_coordinates_temp)
......
...@@ -15,6 +15,9 @@ from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer ...@@ -15,6 +15,9 @@ from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer
class AbstractCoordinates(object): class AbstractCoordinates(object):
""" """
Main attribute of the class is the DataFrame df_all_coordinates
Index are coordinates index
Columns are the value of each coordinates
So far, the train_split_ratio is the same between the spatial part of the data, and the temporal part So far, the train_split_ratio is the same between the spatial part of the data, and the temporal part
""" """
......
...@@ -2,6 +2,7 @@ import os.path as op ...@@ -2,6 +2,7 @@ import os.path as op
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.slicer.abstract_slicer import df_sliced, AbstractSlicer from spatio_temporal_dataset.slicer.abstract_slicer import df_sliced, AbstractSlicer
from spatio_temporal_dataset.slicer.split import Split from spatio_temporal_dataset.slicer.split import Split
...@@ -14,8 +15,8 @@ class AbstractSpatioTemporalObservations(object): ...@@ -14,8 +15,8 @@ class AbstractSpatioTemporalObservations(object):
def __init__(self, df_maxima_frech: pd.DataFrame = None, df_maxima_gev: pd.DataFrame = None): def __init__(self, df_maxima_frech: pd.DataFrame = None, df_maxima_gev: pd.DataFrame = None):
""" """
Main attribute of the class is the DataFrame df_maxima Main attribute of the class is the DataFrame df_maxima
Index are stations index Index are coordinates index
Columns are the temporal moment of the maxima Columns are independent observations from the same coordinates index
""" """
assert df_maxima_gev is not None or df_maxima_frech is not None assert df_maxima_gev is not None or df_maxima_frech is not None
if df_maxima_gev is not None and df_maxima_frech is not None: if df_maxima_gev is not None and df_maxima_frech is not None:
...@@ -72,6 +73,18 @@ class AbstractSpatioTemporalObservations(object): ...@@ -72,6 +73,18 @@ class AbstractSpatioTemporalObservations(object):
assert pd.Index.equals(df_maxima_gev.columns, df_maxima_frech.columns) assert pd.Index.equals(df_maxima_gev.columns, df_maxima_frech.columns)
return cls(df_maxima_gev=df_maxima_gev, df_maxima_frech=df_maxima_frech) return cls(df_maxima_gev=df_maxima_gev, df_maxima_frech=df_maxima_frech)
def convert_to_spatio_temporal_index(self, coordinates: AbstractCoordinates):
assert coordinates.has_spatio_temporal_coordinates
assert len(coordinates.index) == len(self.index) * self.nb_obs
assert pd.Index.equals(self.index, coordinates.spatial_index())
self.df_maxima_frech = self.flatten_df(self.df_maxima_frech, coordinates.index)
self.df_maxima_gev = self.flatten_df(self.df_maxima_gev, coordinates.index)
@staticmethod
def flatten_df(df, index):
if df is not None:
return pd.DataFrame(np.concatenate([df[c].values for c in df.columns]), index=index)
def maxima_gev(self, split: Split = Split.all, slicer: AbstractSlicer = None) -> np.ndarray: def maxima_gev(self, split: Split = Split.all, slicer: AbstractSlicer = None) -> np.ndarray:
return df_sliced(self.df_maxima_gev, split, slicer).values return df_sliced(self.df_maxima_gev, split, slicer).values
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment