diff --git a/experiment/meteo_france_data/scm_models_data/visualization/studies_visualization/studies_visualizer.py b/experiment/meteo_france_data/scm_models_data/visualization/studies_visualization/studies_visualizer.py index c9c979c4dcf83d3b7c9f8bc72205737bebb0d017..92da9208e0af6d725a96d763f03e3d49158ba86a 100644 --- a/experiment/meteo_france_data/scm_models_data/visualization/studies_visualization/studies_visualizer.py +++ b/experiment/meteo_france_data/scm_models_data/visualization/studies_visualization/studies_visualizer.py @@ -10,10 +10,10 @@ import matplotlib.pyplot as plt from matplotlib.lines import Line2D from experiment.meteo_france_data.scm_models_data.abstract_extended_study import AbstractExtendedStudy +from experiment.meteo_france_data.scm_models_data.visualization.studies_visualization.studies import Studies +from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \ + StudyVisualizer from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest -from experiment.meteo_france_data.scm_models_data.visualization import \ - Studies -from experiment.meteo_france_data.scm_models_data.visualization import StudyVisualizer from experiment.meteo_france_data.scm_models_data.visualization.utils import plot_df from utils import cached_property, get_display_name_from_object_type, VERSION_TIME diff --git a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py index 7b5d3c9f33379c8f804e3aa9faed5bb657c8562e..b8d0740be45965580dea6f4a7cf97be957ec5984 100644 --- a/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_data/scm_models_data/visualization/study_visualization/study_visualizer.py @@ -16,6 +16,7 @@ from experiment.trend_analysis.univariate_test.abstract_univariate_test import A from experiment.trend_analysis.non_stationary_trends import \ ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes +from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results from experiment.utils import average_smoothing_with_sliding_window from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ FullEstimatorInASingleStepWithSmoothMargin @@ -405,23 +406,8 @@ class StudyVisualizer(VisualizationParameters): massif_names = massif_names[:nb_massif_for_fast_mode] for massif_id, massif_name in enumerate(massif_names): years, smooth_maxima = self.smooth_maxima_x_y(massif_id) - if self.multiprocessing: - list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in - starting_years] - with Pool(NB_CORES) as p: - trend_test_res = p.starmap(self.compute_gev_change_point_test_result, list_args) - else: - trend_test_res = [ - self.compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years) - for starting_year in starting_years] - # Keep only the most likely starting year - # (i.e. the starting year that minimizes its negative log likelihood) - # (set all the other data to np.nan so that they will not be taken into account in mean function) - best_idx = list(np.argmin(trend_test_res, axis=0))[2] - # print(best_idx, trend_test_res) - best_idxs = [best_idx] - # todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values - # best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:] + trend_test_res, best_idxs = compute_gev_change_point_test_results(self.multiprocessing, smooth_maxima, starting_years, + trend_test_class, years) trend_test_res = [(a, b) if i in best_idxs else (np.nan, np.nan) for i, (a, b, *_) in enumerate(trend_test_res)] massif_name_to_trend_res[massif_name] = list(zip(*trend_test_res)) @@ -432,12 +418,6 @@ class StudyVisualizer(VisualizationParameters): return [pd.DataFrame(massif_name_to_res, index=starting_years).transpose() for massif_name_to_res in all_massif_name_to_res] - @staticmethod - def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years): - trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractGevChangePointTest - assert isinstance(trend_test, AbstractGevChangePointTest) - return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance - @staticmethod def compute_trend_test_result(smooth_maxima, starting_year, trend_test_class, years): trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractUnivariateTest diff --git a/experiment/meteo_france_data/stations_data/main_station_comparison.py b/experiment/meteo_france_data/stations_data/main_station_comparison.py index 0a060b0cbf091fd83dce3c493457a958b20c5a93..d64a21bd29e5ff1db7b14e7c2b303b81b11ce8bf 100644 --- a/experiment/meteo_france_data/stations_data/main_station_comparison.py +++ b/experiment/meteo_france_data/stations_data/main_station_comparison.py @@ -13,22 +13,23 @@ def visualize_all_stations(): def visualize_non_nan_station(): vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST, keep_only_station_without_nan_values=True, - normalize_observations=True) - # vizu.visualize_maximum() - vizu.visualize_gev() + normalize_observations=False) + vizu.visualize_maximum() + # vizu.visualize_gev() def example(): # this is a really good example for the maxima at least - vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False) - vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True) + # vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False) + # vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True) - # vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True) + vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True) # vizu._visualize_ax_main(vizu.plot_gev, vizu.comparisons[0], 'Beaufortain', show=True) + vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True) if __name__ == '__main__': # visualize_fast_comparison() - # visualize_all_stations() + visualize_all_stations() # visualize_non_nan_station() - example() + # example() diff --git a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py index 454382ec1b0f3fd7f56ff8403312ddf2a211444b..dc0239be14d8e2eea0412302b01286bd7bad4c92 100644 --- a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py +++ b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py @@ -10,6 +10,9 @@ from experiment.meteo_france_data.scm_models_data.visualization.study_visualizat VisualizationParameters from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \ REANALYSE_STR, ALTITUDE_COLUMN_NAME +from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest +from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest +from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates @@ -67,6 +70,7 @@ class ComparisonsVisualization(VisualizationParameters): def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False): if ax is None: _, ax = plt.subplots(1, 1, figsize=self.figsize) + ax2 = ax.twinx() df = comparison.df_merged_intersection_clean.copy() ind = df[MASSIF_COLUMN_NAME] == massif @@ -78,19 +82,21 @@ class ComparisonsVisualization(VisualizationParameters): ind_location = df.index.str.contains(REANALYSE_STR) df[DISTANCE_COLUMN_NAME] = 0 for coordinate_name in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]: - print(df[DISTANCE_COLUMN_NAME]) center = df.loc[ind_location, coordinate_name].values[0] - print(center) df[DISTANCE_COLUMN_NAME] += (center - df[coordinate_name]).pow(2) df[DISTANCE_COLUMN_NAME] = df[DISTANCE_COLUMN_NAME].pow(0.5) df[DISTANCE_COLUMN_NAME] = (df[DISTANCE_COLUMN_NAME] / 1000).round(1) + for color, (i, s) in zip(colors_station, df.iterrows()): label = i label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME]) label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME]) s_values = s.iloc[3:-1].to_dict() + years, maxima = list(s_values.keys()), list(s_values.values()) + plot_color = color if REANALYSE_STR not in label else 'g' - plot_function(ax, s_values, label, plot_color) + + plot_function(ax, ax2, years, maxima, label, plot_color) ax.set_title('{} at {}m'.format(massif, comparison.altitude)) ax.legend(prop={'size': 5}) @@ -100,13 +106,30 @@ class ComparisonsVisualization(VisualizationParameters): def visualize_maximum(self): return self._visualize_main(self.plot_maxima, 'Recent trend of Annual maxima of snowfall') - def plot_maxima(self, ax, s_values, label, plot_color): - ax.plot(list(s_values.keys()), list(s_values.values()), label=label, color=plot_color) + def plot_maxima(self, ax, ax2, years, maxima, label, plot_color): + if self.keep_only_station_without_nan_values: + # Run trend test to improve the label + starting_years = years[:-1] + trend_test_res, best_idxs = compute_gev_change_point_test_results(multiprocessing=True, + maxima=maxima, starting_years=starting_years, + trend_test_class=GevLocationChangePointTest, + years=years) + best_idx = best_idxs[0] + most_likely_year = years[best_idx] + most_likely_trend_type = trend_test_res[best_idx][0] + display_trend_type = AbstractUnivariateTest.get_display_trend_type(real_trend_type=most_likely_trend_type) + label += "\n {} starting in {}".format(display_trend_type, most_likely_year) + # Display the nllh against the starting year + step = 10 + ax2.plot(starting_years[::step], [t[3] for t in trend_test_res][::step], color=plot_color, marker='o') + ax2.plot(starting_years[::step], [t[4] for t in trend_test_res][::step], color=plot_color, marker='x') + # Plot maxima + ax.plot(years, maxima, label=label, color=plot_color) def visualize_gev(self): return self._visualize_main(self.plot_gev) - def plot_gev(self, ax, s_values, label, plot_color): + def plot_gev(self, ax, ax2, s_values, label, plot_color): # todo should I normalize here ? # fit gev data = list(s_values.values()) @@ -120,4 +143,3 @@ class ComparisonsVisualization(VisualizationParameters): y = gev_params.density(x) # display the gev distribution that was obtained ax.plot(x, y, label=label, color=plot_color) - diff --git a/experiment/trend_analysis/univariate_test/abstract_univariate_test.py b/experiment/trend_analysis/univariate_test/abstract_univariate_test.py index b646301a215e8a64b5c5991a7b196aec712867a4..bfbe4733cfcc2b00fbc611f7f7c566c8e76e4ef0 100644 --- a/experiment/trend_analysis/univariate_test/abstract_univariate_test.py +++ b/experiment/trend_analysis/univariate_test/abstract_univariate_test.py @@ -62,6 +62,13 @@ class AbstractUnivariateTest(object): # d[cls.NO_TREND] = 'k--' return d + @classmethod + def get_display_trend_type(cls, real_trend_type): + if cls.SIGNIFICATIVE in real_trend_type: + return real_trend_type + else: + return cls.NON_SIGNIFICATIVE_TREND + @classmethod def get_real_trend_types(cls, display_trend_type): if display_trend_type is cls.ALL_TREND: diff --git a/experiment/trend_analysis/univariate_test/utils.py b/experiment/trend_analysis/univariate_test/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..327039890e912bc67c8f1b87efc64c976460a95a --- /dev/null +++ b/experiment/trend_analysis/univariate_test/utils.py @@ -0,0 +1,35 @@ +from multiprocessing.pool import Pool + +import numpy as np + +from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import AbstractGevChangePointTest +from utils import NB_CORES + + +def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years): + trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractGevChangePointTest + assert isinstance(trend_test, AbstractGevChangePointTest) + return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance + + +def compute_gev_change_point_test_results(multiprocessing, maxima, starting_years, trend_test_class, + years): + if multiprocessing: + list_args = [(maxima, starting_year, trend_test_class, years) for starting_year in + starting_years] + with Pool(NB_CORES) as p: + trend_test_res = p.starmap(compute_gev_change_point_test_result, list_args) + else: + trend_test_res = [ + compute_gev_change_point_test_result(maxima, starting_year, trend_test_class, years) + for starting_year in starting_years] + # Keep only the most likely starting year + # (i.e. the starting year that minimizes its negative log likelihood) + # (set all the other data to np.nan so that they will not be taken into account in mean function) + best_idx = list(np.argmin(trend_test_res, axis=0))[2] + # print(best_idx, trend_test_res) + best_idxs = [best_idx] + # todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values + # best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:] + + return trend_test_res, best_idxs diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index a4135fadfec26fc6a7631bda3baf52be6bfb2703..290bbe31941aedfbd1e79da6fffe6ed81db89ab3 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -251,7 +251,7 @@ class AbstractCoordinates(object): # Compute the indices to modify ind_to_modify = df_temporal_coordinates.iloc[:, 0] <= starting_point # type: pd.Series # Assert that some coordinates are selected but not all - assert 0 < sum(ind_to_modify) < len(ind_to_modify) + assert 0 < sum(ind_to_modify) < len(ind_to_modify), sum(ind_to_modify) # Modify the temporal coordinates to enforce the stationarity df_temporal_coordinates.loc[ind_to_modify] = starting_point # Load the temporal transformation object