From 2a3ea0991a0acdb6303df58cd445aec88e8c2ca1 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Fri, 22 Mar 2019 09:40:32 +0100 Subject: [PATCH] [SCM] add temporal_non_stationary mode to study_visualizer. estimate a smooth margin. --- .../abstract_extended_study.py | 7 ++ .../study_visualization/study_visualizer.py | 89 +++++++++++++------ .../abstract_margin_estimator.py | 2 +- .../margin_model/parametric_margin_model.py | 4 +- .../coordinates/abstract_coordinates.py | 3 + .../abstract_spatio_temporal_observations.py | 17 +++- 6 files changed, 92 insertions(+), 30 deletions(-) diff --git a/experiment/meteo_france_SCM_study/abstract_extended_study.py b/experiment/meteo_france_SCM_study/abstract_extended_study.py index d3dfe457..8d62a5b7 100644 --- a/experiment/meteo_france_SCM_study/abstract_extended_study.py +++ b/experiment/meteo_france_SCM_study/abstract_extended_study.py @@ -2,10 +2,14 @@ import numpy as np from collections import OrderedDict from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy +from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ + AbstractSpatialCoordinates class AbstractExtendedStudy(AbstractStudy): + + @property def region_names(self): return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps'] @@ -40,6 +44,9 @@ class AbstractExtendedStudy(AbstractStudy): """ Properties """ + def massifs_coordinates(self) -> AbstractSpatialCoordinates: + raise ValueError('Coordinates are meaningless for an extended study') + @property def _year_to_daily_time_serie_array(self) -> OrderedDict: return self._year_to_extended_time_serie(aggregation_function=np.mean) diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py index d138d7b1..d767bc56 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py @@ -25,6 +25,8 @@ from extreme_estimator.margin_fits.gev.gev_params import GevParams from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ + AbstractSpatioTemporalCoordinates from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from test.test_utils import load_test_max_stable_models from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits @@ -32,16 +34,23 @@ from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_ BLOCK_MAXIMA_DISPLAY_NAME = 'block maxima ' - class StudyVisualizer(object): def __init__(self, study: AbstractStudy, show=True, save_to_file=False, only_one_graph=False, only_first_row=False, - vertical_kde_plot=False, year_for_kde_plot=None, plot_block_maxima_quantiles=False): + vertical_kde_plot=False, year_for_kde_plot=None, plot_block_maxima_quantiles=False, + temporal_non_stationarity=False): + self.temporal_non_stationarity = temporal_non_stationarity self.only_first_row = only_first_row self.only_one_graph = only_one_graph self.save_to_file = save_to_file self.study = study self.plot_name = None + + # Load some attributes + self._dataset = None + self._coordinates = None + self._observations = None + # KDE PLOT ARGUMENTS self.vertical_kde_plot = vertical_kde_plot self.year_for_kde_plot = year_for_kde_plot @@ -64,16 +73,33 @@ class StudyVisualizer(object): AbstractParamFunction.OUT_OF_BOUNDS_ASSERT = False @property - def observations(self): - return self.study.observations_annual_maxima + def dataset(self): + if self._dataset is None: + self._dataset = AbstractDataset(self.observations, self.coordinates) + return self._dataset @property def coordinates(self): - return self.study.massifs_coordinates + if self._coordinates is None: + coordinates = self.study.massifs_coordinates + if self.temporal_non_stationarity: + # Build spatio temporal dataset from a temporal dataset + df_spatial = coordinates.df_spatial_coordinates() + start, end = self.study.start_year_and_end_year + nb_steps = end - start + 1 + coordinates = AbstractSpatioTemporalCoordinates.from_df_spatial_and_nb_steps(df_spatial=df_spatial, + nb_steps=nb_steps, + start=start) + self._coordinates = coordinates + return self._coordinates @property - def dataset(self): - return AbstractDataset(self.observations, self.coordinates) + def observations(self): + if self._observations is None: + self._observations = self.study.observations_annual_maxima + if self.temporal_non_stationarity: + self._observations.convert_to_spatio_temporal_index(self.coordinates) + return self._observations # Graph for each massif / or groups of massifs @@ -234,29 +260,31 @@ class StudyVisualizer(object): max_stable_models = [] # Load axes (either a 2D or 3D array depending on self.coordinates) - nb_max_stable_models = len(max_stable_models) + 2 + nb_models = len(max_stable_models) + 1 nb_summary_names = GevParams.NB_SUMMARY_NAMES - if self.coordinates.has_temporal_coordinates: - nb_times_steps = AbstractMarginFunction.VISUALIZATION_TEMPORAL_STEPS + if self.temporal_non_stationarity: + nb_visualization_times_steps = AbstractMarginFunction.VISUALIZATION_TEMPORAL_STEPS # Create one plot for each max stable models axes = [] - for _ in range(nb_max_stable_models): - axes.append(create_adjusted_axes(nb_rows=nb_summary_names, nb_columns=nb_times_steps, + for _ in range(nb_models): + axes.append(create_adjusted_axes(nb_rows=nb_summary_names, nb_columns=nb_visualization_times_steps, figsize=self.figsize, subplot_space=self.subplot_space)) + # todo: add a fake vizu step at the end, where I could add the independent margin !! + # rajouter une colonne poru chaque plot, et donner cette colonne à independent margin else: - axes = create_adjusted_axes(nb_rows=nb_max_stable_models, nb_columns=nb_summary_names, + axes = create_adjusted_axes(nb_rows=nb_models + 1, nb_columns=nb_summary_names, figsize=self.figsize, subplot_space=self.subplot_space) - margin_class = LinearAllParametersAllDimsMarginModel + # Plot the margin fit independently on the additional row + self.visualize_independent_margin_fits(threshold=None, axes=axes[-1], show=False) - # Plot the margin fit independently - self.visualize_independent_margin_fits(threshold=None, axes=axes[0], show=False) + margin_class = LinearAllParametersAllDimsMarginModel # Plot the smooth margin only margin_model = margin_class(coordinates=self.coordinates) estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model) - self.fit_and_visualize_estimator(estimator, axes[1], title='without max stable') + self.fit_and_visualize_estimator(estimator, axes[0], title='without max stable') # Plot the smooth margin fitted with a max stable - for i, max_stable_model in enumerate(max_stable_models, 2): + for i, max_stable_model in enumerate(max_stable_models, 1): margin_model = margin_class(coordinates=self.coordinates) estimator = FullEstimatorInASingleStepWithSmoothMargin(self.dataset, margin_model, max_stable_model) title = get_display_name_from_object_type(type(max_stable_model)) @@ -273,10 +301,16 @@ class StudyVisualizer(object): margin_fct._visualization_y_limits = self.study.visualization_y_limits margin_fct.mask_2D = self.study.mask_french_alps - axes = margin_fct.visualize_function(show=False, axes=axes, title='') + axes = margin_fct.visualize_function(show=False, axes=axes, title='') # type: np.ndarray - self.visualize_contour_and_move_axes_limits(axes) - self.clean_axes_write_title_on_the_left(axes, title) + if axes.ndim == 1: + self.visualize_contour_and_move_axes_limits(axes) + self.clean_axes_write_title_on_the_left(axes, title) + else: + axes = np.transpose(axes) + for axes_line in axes: + self.visualize_contour_and_move_axes_limits(axes_line) + self.clean_axes_write_title_on_the_left(axes_line, title, left_border=False) def visualize_contour_and_move_axes_limits(self, axes): def get_lim_array(ax_with_lim_to_measure): @@ -292,8 +326,12 @@ class StudyVisualizer(object): # method(updated_lim[i, 0], updated_lim[i, 1]) @staticmethod - def clean_axes_write_title_on_the_left(axes, title): - for ax in axes[1:]: + def clean_axes_write_title_on_the_left(axes, title, left_border=True): + if left_border: + ax0, *clean_axes = axes + else: + *clean_axes, ax0 = axes + for ax in clean_axes: ax.tick_params(axis=u'both', which=u'both', length=0) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) @@ -302,12 +340,11 @@ class StudyVisualizer(object): ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_aspect('equal') - ax0 = axes[0] ax0.get_yaxis().set_visible(True) sub_title = ax0.yaxis.get_label() full_title = title + '\n\n' + sub_title._text - ax0.set_ylabel(full_title) - ax0.set_ylabel(full_title) + label_function = ax0.set_ylabel if left_border else ax0.set_xlabel + label_function(full_title) ax0.tick_params(axis=u'both', which=u'both', length=0) def show_or_save_to_file(self): diff --git a/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py b/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py index a1b8264b..8ffb8bf2 100644 --- a/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py +++ b/extreme_estimator/estimator/margin_estimator/abstract_margin_estimator.py @@ -37,7 +37,7 @@ class SmoothMarginEstimator(AbstractMarginEstimator): self.margin_model = margin_model def _fit(self): - maxima_gev = self.dataset.maxima_gev(split=self.train_split) + maxima_gev = self.dataset.maxima_gev(self.train_split) df_coordinates_spat = self.dataset.coordinates.df_spatial_coordinates(self.train_split) df_coordinates_temp = self.dataset.coordinates.df_temporal_coordinates(self.train_split) self._result_from_fit = self.margin_model.fitmargin_from_maxima_gev(maxima_gev=maxima_gev, diff --git a/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py b/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py index b26d7efb..79b203a1 100644 --- a/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py +++ b/extreme_estimator/extreme_models/margin_model/parametric_margin_model.py @@ -24,11 +24,13 @@ class ParametricMarginModel(AbstractMarginModel, ABC): df_coordinates_temp: pd.DataFrame) -> ResultFromFit: # The reshaping on the line below is only valid if we have a single observation per spatio-temporal point if maxima_gev.shape[1] == 1: - maxima_gev = maxima_gev.reshape([len(df_coordinates_temp), len(df_coordinates_spat)]) + maxima_gev = maxima_gev.reshape([len(df_coordinates_spat), len(df_coordinates_temp)]) data = np.transpose(maxima_gev) + assert data.shape[1] == len(df_coordinates_spat) fit_params = get_margin_formula(self.margin_function_start_fit.form_dict) + # Covariables covariables = get_coord(df_coordinates=df_coordinates_spat) fit_params['temp.cov'] = get_coord(df_coordinates=df_coordinates_temp) diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index b6d2da35..3d259a95 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -15,6 +15,9 @@ from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer class AbstractCoordinates(object): """ + Main attribute of the class is the DataFrame df_all_coordinates + Index are coordinates index + Columns are the value of each coordinates So far, the train_split_ratio is the same between the spatial part of the data, and the temporal part """ diff --git a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py index fd42288b..8b7076d0 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py @@ -2,6 +2,7 @@ import os.path as op import pandas as pd import numpy as np +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.slicer.abstract_slicer import df_sliced, AbstractSlicer from spatio_temporal_dataset.slicer.split import Split @@ -14,8 +15,8 @@ class AbstractSpatioTemporalObservations(object): def __init__(self, df_maxima_frech: pd.DataFrame = None, df_maxima_gev: pd.DataFrame = None): """ Main attribute of the class is the DataFrame df_maxima - Index are stations index - Columns are the temporal moment of the maxima + Index are coordinates index + Columns are independent observations from the same coordinates index """ assert df_maxima_gev is not None or df_maxima_frech is not None if df_maxima_gev is not None and df_maxima_frech is not None: @@ -72,6 +73,18 @@ class AbstractSpatioTemporalObservations(object): assert pd.Index.equals(df_maxima_gev.columns, df_maxima_frech.columns) return cls(df_maxima_gev=df_maxima_gev, df_maxima_frech=df_maxima_frech) + def convert_to_spatio_temporal_index(self, coordinates: AbstractCoordinates): + assert coordinates.has_spatio_temporal_coordinates + assert len(coordinates.index) == len(self.index) * self.nb_obs + assert pd.Index.equals(self.index, coordinates.spatial_index()) + self.df_maxima_frech = self.flatten_df(self.df_maxima_frech, coordinates.index) + self.df_maxima_gev = self.flatten_df(self.df_maxima_gev, coordinates.index) + + @staticmethod + def flatten_df(df, index): + if df is not None: + return pd.DataFrame(np.concatenate([df[c].values for c in df.columns]), index=index) + def maxima_gev(self, split: Split = Split.all, slicer: AbstractSlicer = None) -> np.ndarray: return df_sliced(self.df_maxima_gev, split, slicer).values -- GitLab