diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py index a54c835822ad60a815b405845f5d9771b26f3a42..e5259df593c9e7db722f7039a2da945578851df5 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py @@ -119,8 +119,8 @@ def complete_analysis(only_first_one=False): def trend_analysis(): - save_to_file = True - only_first_one = False + save_to_file = False + only_first_one = True short_altitudes = [300, 1200, 2100, 3000][:1] full_altitude_with_at_least_2_stations = [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200][:] durand_altitude = [1800] @@ -129,14 +129,23 @@ def trend_analysis(): study_classes = [CrocusSwe, CrocusDepth, SafranSnowfall, SafranRainfall, SafranTemperature][2:3] for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes): study_visualizer = StudyVisualizer(study, save_to_file=save_to_file, - transformation_class=normalization_class) - study_visualizer.visualize_temporal_trend_relevance(complete_analysis=False, multiprocessing=True) + transformation_class=normalization_class, + temporal_non_stationarity=True, + verbose=True, + multiprocessing=True, + complete_non_stationary_trend_analysis=False) + # study_visualizer.only_one_graph = True + study_visualizer.visualize_all_independent_temporal_trend() + # study_visualizer.visualize_temporal_trend_relevance() def main_run(): + # normal_visualization(temporal_non_stationarity=True) + trend_analysis() + + + # annual_mean_vizu_compare_durand_study(safran=True, take_mean_value=True, altitude=2100) - normal_visualization(temporal_non_stationarity=True) - # trend_analysis() # max_stable_process_vizu_compare_gaume_study(altitude=1800, nb_days=1) diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py index b805e09a72412775158e50e3467ec660d553bb4d..ae8de281e0ceea2a5068569886e6183e1a52373c 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py @@ -15,6 +15,8 @@ from extreme_estimator.extreme_models.margin_model.linear_margin_model import \ LinearAllParametersTwoFirstCoordinatesMarginModel, LinearAllTwoStatialCoordinatesLocationLinearMarginModel, \ LinearStationaryMarginModel, LinearNonStationaryLocationMarginModel from extreme_estimator.extreme_models.margin_model.margin_function.linear_margin_function import LinearMarginFunction +from extreme_estimator.extreme_models.margin_model.temporal_linear_margin_model import StationaryStationModel, \ + NonStationaryStationModel from extreme_estimator.extreme_models.utils import OptimizationConstants from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from utils import get_display_name_from_object_type @@ -45,7 +47,6 @@ class AbstractNonStationaryTrendTest(object): self._starting_point_to_estimator[starting_point] = estimator return self._starting_point_to_estimator[starting_point] - def load_estimator(self, starting_point) -> Union[ AbstractFullEstimator, AbstractMarginEstimator]: margin_model_class = self.stationary_margin_model_class if starting_point is None else self.non_stationary_margin_model_class @@ -79,17 +80,25 @@ class AbstractNonStationaryTrendTest(object): estimator = self.get_estimator(starting_point) margin_function = estimator.margin_function_fitted # type: LinearMarginFunction assert isinstance(margin_function, LinearMarginFunction) - mu_coefs = [margin_function.mu_intercept, margin_function.mu_longitude_trend, margin_function.mu_latitude_trend, - margin_function.mu1_temporal_trend] + mu_coefs = [margin_function.mu_intercept, margin_function.mu1_temporal_trend] + if self.has_spatial_coordinates: + mu_coefs += [margin_function.mu_longitude_trend, margin_function.mu_latitude_trend] return dict(zip(self.mu_coef_names, mu_coefs)) @property def mu_coef_names(self): - return ['mu_intercept', 'mu_longitude', 'mu_latitude', 'mu_temporal'] + mu_coef_names = ['mu_intercept', 'mu_temporal'] + if self.has_spatial_coordinates: + mu_coef_names += ['mu_longitude', 'mu_latitude'] + return mu_coef_names + + @property + def has_spatial_coordinates(self): + return self.dataset.coordinates.has_spatial_coordinates @property def mu_coef_colors(self): - return ['b', 'g', 'y', 'c'] + return ['b', 'c', 'g', 'y', ] def visualize(self, ax, complete_analysis=True): years = self.years(complete_analysis) @@ -141,17 +150,17 @@ class AbstractNonStationaryTrendTest(object): df_mus = pd.DataFrame(mus) min_mus, max_mus = df_mus.min().min(), df_mus.max().max() min_global, max_global = min(min_deviance_data, min_mus), max(max_deviance_data, max_mus) - ax2.set_ylim(min_global, max_global) + # ax2.set_ylim(min_global, max_global) + # if min_mus < 0.0 < max_mus: + # align_yaxis_on_zero(ax2, ax) + ax.set_title(self.display_name) ax.set_xlabel('starting year for the linear trend of {}'.format(self.mu_coef_names[-1])) - if min_mus < 0.0 < max_mus: - align_yaxis_on_zero(ax2, ax) - - title = self.display_name - ax.set_title(title) ax.grid() - ax.legend(loc=6) - ax2.legend(loc=7) + + prop = {'size': 5} if not self.has_spatial_coordinates else None + ax.legend(loc=6, prop=prop) + ax2.legend(loc=7, prop=prop) def years(self, complete_analysis=True): # Define the year_min and year_max for the starting point @@ -170,8 +179,16 @@ class AbstractNonStationaryTrendTest(object): class IndependenceLocationTrendTest(AbstractNonStationaryTrendTest): - def __init__(self, dataset, coordinate_idx): - pass + def __init__(self, station_name, *args, **kwargs): + super().__init__(*args, **kwargs, + estimator_class=LinearMarginEstimator, + stationary_margin_model_class=StationaryStationModel, + non_stationary_margin_model_class=NonStationaryStationModel) + self.station_name = station_name + + @property + def display_name(self): + return self.station_name class ConditionalIndedendenceLocationTrendTest(AbstractNonStationaryTrendTest): diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py index af522ee361b462d6aee3dd56e92f60816618985b..4f1eb446909eb70868f394047fec7d67eca55616 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py @@ -1,6 +1,7 @@ import os import os.path as op from collections import OrderedDict +from copy import deepcopy from typing import Union import math @@ -11,7 +12,7 @@ import seaborn as sns from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy from experiment.meteo_france_SCM_study.visualization.study_visualization.non_stationary_trends import \ - ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest + ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest from experiment.meteo_france_SCM_study.visualization.utils import create_adjusted_axes from experiment.utils import average_smoothing_with_sliding_window from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ @@ -36,6 +37,9 @@ from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal ConsecutiveTemporalCoordinates from spatio_temporal_dataset.coordinates.transformed_coordinates.transformed_coordinates import TransformedCoordinates from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset +from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ + AbstractSpatioTemporalObservations +from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima from test.test_utils import load_test_max_stable_models from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits @@ -48,7 +52,11 @@ class StudyVisualizer(object): vertical_kde_plot=False, year_for_kde_plot=None, plot_block_maxima_quantiles=False, temporal_non_stationarity=False, transformation_class=None, + verbose=False, + multiprocessing=False, + complete_non_stationary_trend_analysis=False, normalization_under_one_observations=True): + self.massif_id_to_smooth_maxima = {} self.temporal_non_stationarity = temporal_non_stationarity self.only_first_row = only_first_row @@ -58,6 +66,10 @@ class StudyVisualizer(object): self.plot_name = None self.normalization_under_one_observations = normalization_under_one_observations + self.multiprocessing = multiprocessing + self.verbose = verbose + self.complete_non_stationary_trend_analysis = complete_non_stationary_trend_analysis + # Load some attributes self._dataset = None self._coordinates = None @@ -135,10 +147,21 @@ class StudyVisualizer(object): self._observations.convert_to_spatio_temporal_index(self.coordinates) if self.normalization_under_one_observations: self._observations.normalize() - self._observations.print_summary() + if self.verbose: + self._observations.print_summary() return self._observations - # Graph for each massif / or groups of massifs + def observation_massif_id(self, massif_id): + annual_maxima = [data[massif_id] for data in self.study.year_to_annual_maxima.values()] + df_annual_maxima = pd.DataFrame(annual_maxima, index=self.temporal_coordinates.index) + observation_massif_id = AnnualMaxima(df_maxima_gev=df_annual_maxima) + if self.normalization_under_one_observations: + observation_massif_id.normalize() + if self.verbose: + observation_massif_id.print_summary() + return observation_massif_id + + # PLOT FOR SEVERAL MASSIFS def visualize_massif_graphs(self, visualize_function, specified_massif_names=None): if self.only_one_graph: @@ -157,29 +180,39 @@ class StudyVisualizer(object): if specified_massif_names is None: massif_ids = list(range(len(self.study.study_massif_names))) else: - massif_ids = [self.study.study_massif_names.index(massif_name) for massif_name in specified_massif_names] + massif_ids = [self.study.study_massif_names.index(massif_name) for massif_name in + specified_massif_names] for j, massif_id in enumerate(massif_ids): row_id, column_id = j // nb_columns, j % nb_columns ax = axes[row_id, column_id] visualize_function(ax, massif_id) - def visualize_all_experimental_law( self): - self.visualize_massif_graphs(self.visualize_experimental_law) - self.plot_name = ' Empirical distribution \n' - self.plot_name += 'with data from the 23 mountain chains of the French Alps ' if self.year_for_kde_plot is None else \ - 'for the year {}'.format(self.year_for_kde_plot) + # TEMPORAL TREND + + def visualize_all_independent_temporal_trend(self): + self.visualize_massif_graphs(self.visualize_independent_temporal_trend) + self.plot_name = ' Independent temporal trend \n' self.show_or_save_to_file() - def visualize_temporal_trend_relevance(self, complete_analysis, verbose=True, multiprocessing=False): - self.temporal_non_stationarity = True - trend_tests = [ConditionalIndedendenceLocationTrendTest(dataset=self.dataset, verbose=verbose, - multiprocessing=multiprocessing)] + def visualize_independent_temporal_trend(self, ax, massif_id): + assert self.temporal_non_stationarity + # Create a dataset with temporal coordinates from the massif id + dataset_massif_id = AbstractDataset(self.observation_massif_id(massif_id), self.temporal_coordinates) + trend_test = IndependenceLocationTrendTest(station_name=self.study.study_massif_names[massif_id], + dataset=dataset_massif_id, verbose=self.verbose, + multiprocessing=self.multiprocessing) + trend_test.visualize(ax, self.complete_non_stationary_trend_analysis) + + def visualize_temporal_trend_relevance(self): + assert self.temporal_non_stationarity + trend_tests = [ConditionalIndedendenceLocationTrendTest(dataset=self.dataset, verbose=self.verbose, + multiprocessing=self.multiprocessing)] max_stable_models = load_test_max_stable_models(default_covariance_function=self.default_covariance_function) for max_stable_model in [max_stable_models[1], max_stable_models[-2]]: trend_tests.append(MaxStableLocationTrendTest(dataset=self.dataset, - max_stable_model=max_stable_model, verbose=verbose, - multiprocessing=multiprocessing)) + max_stable_model=max_stable_model, verbose=self.verbose, + multiprocessing=self.multiprocessing)) nb_trend_tests = len(trend_tests) fig, axes = plt.subplots(1, nb_trend_tests, figsize=self.figsize) @@ -187,15 +220,25 @@ class StudyVisualizer(object): axes = [axes] fig.subplots_adjust(hspace=self.subplot_space, wspace=self.subplot_space) for ax, trend_test in zip(axes, trend_tests): - trend_test.visualize(ax, complete_analysis=complete_analysis) + trend_test.visualize(ax, complete_analysis=self.complete_non_stationary_trend_analysis) plot_name = 'trend tests' - plot_name += ' with {} applied spatially & temporally'.format(get_display_name_from_object_type(self.transformation_class)) + plot_name += ' with {} applied spatially & temporally'.format( + get_display_name_from_object_type(self.transformation_class)) if self.normalization_under_one_observations: plot_name += '(and maxima <= 1)' self.plot_name = plot_name self.show_or_save_to_file() + # EXPERIMENTAL LAW + + def visualize_all_experimental_law(self): + self.visualize_massif_graphs(self.visualize_experimental_law) + self.plot_name = ' Empirical distribution \n' + self.plot_name += 'with data from the 23 mountain chains of the French Alps ' if self.year_for_kde_plot is None else \ + 'for the year {}'.format(self.year_for_kde_plot) + self.show_or_save_to_file() + def visualize_experimental_law(self, ax, massif_id): # Display the experimental law for a given massif all_massif_data = self.get_all_massif_data(massif_id) @@ -281,6 +324,8 @@ class StudyVisualizer(object): all_massif_data = np.sort(all_massif_data) return all_massif_data + # + def visualize_all_mean_and_max_graphs(self): # Compute the order of massif names massif_name_to_score = {} @@ -289,7 +334,8 @@ class StudyVisualizer(object): massif_name_to_score[massif_name] = score ordered_massif_names = sorted(self.study.study_massif_names[:], key=lambda s: massif_name_to_score[s]) self.visualize_massif_graphs(self.visualize_mean_and_max_graph, specified_massif_names=ordered_massif_names) - self.plot_name = ' smoothing values temporally with sliding window of size {}'.format(self.window_size_for_smoothing) + self.plot_name = ' smoothing values temporally with sliding window of size {}'.format( + self.window_size_for_smoothing) self.show_or_save_to_file() def visualize_mean_and_max_graph(self, ax, massif_id): @@ -313,7 +359,6 @@ class StudyVisualizer(object): ax.set_title(self.study.study_massif_names[massif_id]) ax.xaxis.set_ticks(x[2::10]) - def smooth_maxima_x_y(self, massif_id): if massif_id not in self.massif_id_to_smooth_maxima: tuples_x_y = [(year, annual_maxima[massif_id]) for year, annual_maxima in diff --git a/extreme_estimator/extreme_models/result_from_fit.py b/extreme_estimator/extreme_models/result_from_fit.py index e8f50a720b5034076227f764426a1fdb65d96180..d20187c11455e88b92d875614d5c7fb6c94e0c66 100644 --- a/extreme_estimator/extreme_models/result_from_fit.py +++ b/extreme_estimator/extreme_models/result_from_fit.py @@ -8,8 +8,10 @@ from extreme_estimator.margin_fits.gev.gev_params import GevParams from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates -class ResultFromFit(object): +def convertFloatVector_to_float(f): + return np.array(f)[0] +class ResultFromFit(object): def __init__(self, result_from_fit: robjects.ListVector) -> None: if hasattr(result_from_fit, 'names'): @@ -73,7 +75,16 @@ class ResultFromIsmev(ResultFromFit): @property def nllh(self): - return self.name_to_value['nllh'] + return convertFloatVector_to_float(self.name_to_value['nllh']) + + @property + def deviance(self): + return - 2 * self.nllh + + @property + def convergence(self) -> str: + return convertFloatVector_to_float(self.name_to_value['conv']) == 0 + class ResultFromSpatialExtreme(ResultFromFit): diff --git a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py index c55d3dfa8ffd1e112ef139c2315f335230239afc..5825458b3598e0b302d3f1087b1611e6c2c17112 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py @@ -3,6 +3,8 @@ import pandas as pd import numpy as np from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \ + AbstractTemporalCoordinates from spatio_temporal_dataset.slicer.abstract_slicer import df_sliced, AbstractSlicer from spatio_temporal_dataset.slicer.split import Split @@ -91,6 +93,24 @@ class AbstractSpatioTemporalObservations(object): if df is not None: return pd.DataFrame(np.concatenate([df[c].values for c in df.columns]), index=index) + def convert_to_temporal_index(self, temporal_coordinates: AbstractTemporalCoordinates, spatial_idx: int): + assert len(self.index) > len(temporal_coordinates) and len(self.index) % len(temporal_coordinates) == 0 + spatial_len = len(self.index) // len(temporal_coordinates) + assert 0 <= spatial_idx < spatial_len + # Build ind to select the observations of interest + ind = np.zeros(spatial_len, dtype=bool) + ind[spatial_idx] = True + ind = np.concatenate([ind for _ in range(len(temporal_coordinates))]) + self.df_maxima_frech = self.loc_df(self.df_maxima_frech, ind, temporal_coordinates.index) + self.df_maxima_gev = self.loc_df(self.df_maxima_gev, ind, temporal_coordinates.index) + + @staticmethod + def loc_df(df, ind, new_index): + if df is not None: + df = df.loc[ind] + df.index = new_index + return df + def maxima_gev(self, split: Split = Split.all, slicer: AbstractSlicer = None) -> np.ndarray: return df_sliced(self.df_maxima_gev, split, slicer).values @@ -118,5 +138,3 @@ class AbstractSpatioTemporalObservations(object): @_df_maxima.setter def _df_maxima(self, value): self.__df_maxima = value - -