Commit c0c3ddce authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[STATIONS] add trend_test result display in comparisons_visualization graphs

parent f9ad9960
No related merge requests found
Showing with 87 additions and 42 deletions
+87 -42
...@@ -10,10 +10,10 @@ import matplotlib.pyplot as plt ...@@ -10,10 +10,10 @@ import matplotlib.pyplot as plt
from matplotlib.lines import Line2D from matplotlib.lines import Line2D
from experiment.meteo_france_data.scm_models_data.abstract_extended_study import AbstractExtendedStudy from experiment.meteo_france_data.scm_models_data.abstract_extended_study import AbstractExtendedStudy
from experiment.meteo_france_data.scm_models_data.visualization.studies_visualization.studies import Studies
from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \
StudyVisualizer
from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest
from experiment.meteo_france_data.scm_models_data.visualization import \
Studies
from experiment.meteo_france_data.scm_models_data.visualization import StudyVisualizer
from experiment.meteo_france_data.scm_models_data.visualization.utils import plot_df from experiment.meteo_france_data.scm_models_data.visualization.utils import plot_df
from utils import cached_property, get_display_name_from_object_type, VERSION_TIME from utils import cached_property, get_display_name_from_object_type, VERSION_TIME
......
...@@ -16,6 +16,7 @@ from experiment.trend_analysis.univariate_test.abstract_univariate_test import A ...@@ -16,6 +16,7 @@ from experiment.trend_analysis.univariate_test.abstract_univariate_test import A
from experiment.trend_analysis.non_stationary_trends import \ from experiment.trend_analysis.non_stationary_trends import \
ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest
from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes
from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results
from experiment.utils import average_smoothing_with_sliding_window from experiment.utils import average_smoothing_with_sliding_window
from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \
FullEstimatorInASingleStepWithSmoothMargin FullEstimatorInASingleStepWithSmoothMargin
...@@ -405,23 +406,8 @@ class StudyVisualizer(VisualizationParameters): ...@@ -405,23 +406,8 @@ class StudyVisualizer(VisualizationParameters):
massif_names = massif_names[:nb_massif_for_fast_mode] massif_names = massif_names[:nb_massif_for_fast_mode]
for massif_id, massif_name in enumerate(massif_names): for massif_id, massif_name in enumerate(massif_names):
years, smooth_maxima = self.smooth_maxima_x_y(massif_id) years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
if self.multiprocessing: trend_test_res, best_idxs = compute_gev_change_point_test_results(self.multiprocessing, smooth_maxima, starting_years,
list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in trend_test_class, years)
starting_years]
with Pool(NB_CORES) as p:
trend_test_res = p.starmap(self.compute_gev_change_point_test_result, list_args)
else:
trend_test_res = [
self.compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years)
for starting_year in starting_years]
# Keep only the most likely starting year
# (i.e. the starting year that minimizes its negative log likelihood)
# (set all the other data to np.nan so that they will not be taken into account in mean function)
best_idx = list(np.argmin(trend_test_res, axis=0))[2]
# print(best_idx, trend_test_res)
best_idxs = [best_idx]
# todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values
# best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:]
trend_test_res = [(a, b) if i in best_idxs else (np.nan, np.nan) trend_test_res = [(a, b) if i in best_idxs else (np.nan, np.nan)
for i, (a, b, *_) in enumerate(trend_test_res)] for i, (a, b, *_) in enumerate(trend_test_res)]
massif_name_to_trend_res[massif_name] = list(zip(*trend_test_res)) massif_name_to_trend_res[massif_name] = list(zip(*trend_test_res))
...@@ -432,12 +418,6 @@ class StudyVisualizer(VisualizationParameters): ...@@ -432,12 +418,6 @@ class StudyVisualizer(VisualizationParameters):
return [pd.DataFrame(massif_name_to_res, index=starting_years).transpose() return [pd.DataFrame(massif_name_to_res, index=starting_years).transpose()
for massif_name_to_res in all_massif_name_to_res] for massif_name_to_res in all_massif_name_to_res]
@staticmethod
def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years):
trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractGevChangePointTest
assert isinstance(trend_test, AbstractGevChangePointTest)
return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance
@staticmethod @staticmethod
def compute_trend_test_result(smooth_maxima, starting_year, trend_test_class, years): def compute_trend_test_result(smooth_maxima, starting_year, trend_test_class, years):
trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractUnivariateTest trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractUnivariateTest
......
...@@ -13,22 +13,23 @@ def visualize_all_stations(): ...@@ -13,22 +13,23 @@ def visualize_all_stations():
def visualize_non_nan_station(): def visualize_non_nan_station():
vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST, vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST,
keep_only_station_without_nan_values=True, keep_only_station_without_nan_values=True,
normalize_observations=True) normalize_observations=False)
# vizu.visualize_maximum() vizu.visualize_maximum()
vizu.visualize_gev() # vizu.visualize_gev()
def example(): def example():
# this is a really good example for the maxima at least # this is a really good example for the maxima at least
vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False) # vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False)
vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True) # vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
# vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True) vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True)
# vizu._visualize_ax_main(vizu.plot_gev, vizu.comparisons[0], 'Beaufortain', show=True) # vizu._visualize_ax_main(vizu.plot_gev, vizu.comparisons[0], 'Beaufortain', show=True)
vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
if __name__ == '__main__': if __name__ == '__main__':
# visualize_fast_comparison() # visualize_fast_comparison()
# visualize_all_stations() visualize_all_stations()
# visualize_non_nan_station() # visualize_non_nan_station()
example() # example()
...@@ -10,6 +10,9 @@ from experiment.meteo_france_data.scm_models_data.visualization.study_visualizat ...@@ -10,6 +10,9 @@ from experiment.meteo_france_data.scm_models_data.visualization.study_visualizat
VisualizationParameters VisualizationParameters
from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \ from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \
REANALYSE_STR, ALTITUDE_COLUMN_NAME REANALYSE_STR, ALTITUDE_COLUMN_NAME
from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest
from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest
from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results
from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev
from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
...@@ -67,6 +70,7 @@ class ComparisonsVisualization(VisualizationParameters): ...@@ -67,6 +70,7 @@ class ComparisonsVisualization(VisualizationParameters):
def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False): def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False):
if ax is None: if ax is None:
_, ax = plt.subplots(1, 1, figsize=self.figsize) _, ax = plt.subplots(1, 1, figsize=self.figsize)
ax2 = ax.twinx()
df = comparison.df_merged_intersection_clean.copy() df = comparison.df_merged_intersection_clean.copy()
ind = df[MASSIF_COLUMN_NAME] == massif ind = df[MASSIF_COLUMN_NAME] == massif
...@@ -78,19 +82,21 @@ class ComparisonsVisualization(VisualizationParameters): ...@@ -78,19 +82,21 @@ class ComparisonsVisualization(VisualizationParameters):
ind_location = df.index.str.contains(REANALYSE_STR) ind_location = df.index.str.contains(REANALYSE_STR)
df[DISTANCE_COLUMN_NAME] = 0 df[DISTANCE_COLUMN_NAME] = 0
for coordinate_name in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]: for coordinate_name in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
print(df[DISTANCE_COLUMN_NAME])
center = df.loc[ind_location, coordinate_name].values[0] center = df.loc[ind_location, coordinate_name].values[0]
print(center)
df[DISTANCE_COLUMN_NAME] += (center - df[coordinate_name]).pow(2) df[DISTANCE_COLUMN_NAME] += (center - df[coordinate_name]).pow(2)
df[DISTANCE_COLUMN_NAME] = df[DISTANCE_COLUMN_NAME].pow(0.5) df[DISTANCE_COLUMN_NAME] = df[DISTANCE_COLUMN_NAME].pow(0.5)
df[DISTANCE_COLUMN_NAME] = (df[DISTANCE_COLUMN_NAME] / 1000).round(1) df[DISTANCE_COLUMN_NAME] = (df[DISTANCE_COLUMN_NAME] / 1000).round(1)
for color, (i, s) in zip(colors_station, df.iterrows()): for color, (i, s) in zip(colors_station, df.iterrows()):
label = i label = i
label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME]) label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME])
label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME]) label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME])
s_values = s.iloc[3:-1].to_dict() s_values = s.iloc[3:-1].to_dict()
years, maxima = list(s_values.keys()), list(s_values.values())
plot_color = color if REANALYSE_STR not in label else 'g' plot_color = color if REANALYSE_STR not in label else 'g'
plot_function(ax, s_values, label, plot_color)
plot_function(ax, ax2, years, maxima, label, plot_color)
ax.set_title('{} at {}m'.format(massif, comparison.altitude)) ax.set_title('{} at {}m'.format(massif, comparison.altitude))
ax.legend(prop={'size': 5}) ax.legend(prop={'size': 5})
...@@ -100,13 +106,30 @@ class ComparisonsVisualization(VisualizationParameters): ...@@ -100,13 +106,30 @@ class ComparisonsVisualization(VisualizationParameters):
def visualize_maximum(self): def visualize_maximum(self):
return self._visualize_main(self.plot_maxima, 'Recent trend of Annual maxima of snowfall') return self._visualize_main(self.plot_maxima, 'Recent trend of Annual maxima of snowfall')
def plot_maxima(self, ax, s_values, label, plot_color): def plot_maxima(self, ax, ax2, years, maxima, label, plot_color):
ax.plot(list(s_values.keys()), list(s_values.values()), label=label, color=plot_color) if self.keep_only_station_without_nan_values:
# Run trend test to improve the label
starting_years = years[:-1]
trend_test_res, best_idxs = compute_gev_change_point_test_results(multiprocessing=True,
maxima=maxima, starting_years=starting_years,
trend_test_class=GevLocationChangePointTest,
years=years)
best_idx = best_idxs[0]
most_likely_year = years[best_idx]
most_likely_trend_type = trend_test_res[best_idx][0]
display_trend_type = AbstractUnivariateTest.get_display_trend_type(real_trend_type=most_likely_trend_type)
label += "\n {} starting in {}".format(display_trend_type, most_likely_year)
# Display the nllh against the starting year
step = 10
ax2.plot(starting_years[::step], [t[3] for t in trend_test_res][::step], color=plot_color, marker='o')
ax2.plot(starting_years[::step], [t[4] for t in trend_test_res][::step], color=plot_color, marker='x')
# Plot maxima
ax.plot(years, maxima, label=label, color=plot_color)
def visualize_gev(self): def visualize_gev(self):
return self._visualize_main(self.plot_gev) return self._visualize_main(self.plot_gev)
def plot_gev(self, ax, s_values, label, plot_color): def plot_gev(self, ax, ax2, s_values, label, plot_color):
# todo should I normalize here ? # todo should I normalize here ?
# fit gev # fit gev
data = list(s_values.values()) data = list(s_values.values())
...@@ -120,4 +143,3 @@ class ComparisonsVisualization(VisualizationParameters): ...@@ -120,4 +143,3 @@ class ComparisonsVisualization(VisualizationParameters):
y = gev_params.density(x) y = gev_params.density(x)
# display the gev distribution that was obtained # display the gev distribution that was obtained
ax.plot(x, y, label=label, color=plot_color) ax.plot(x, y, label=label, color=plot_color)
...@@ -62,6 +62,13 @@ class AbstractUnivariateTest(object): ...@@ -62,6 +62,13 @@ class AbstractUnivariateTest(object):
# d[cls.NO_TREND] = 'k--' # d[cls.NO_TREND] = 'k--'
return d return d
@classmethod
def get_display_trend_type(cls, real_trend_type):
if cls.SIGNIFICATIVE in real_trend_type:
return real_trend_type
else:
return cls.NON_SIGNIFICATIVE_TREND
@classmethod @classmethod
def get_real_trend_types(cls, display_trend_type): def get_real_trend_types(cls, display_trend_type):
if display_trend_type is cls.ALL_TREND: if display_trend_type is cls.ALL_TREND:
......
from multiprocessing.pool import Pool
import numpy as np
from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import AbstractGevChangePointTest
from utils import NB_CORES
def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years):
trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractGevChangePointTest
assert isinstance(trend_test, AbstractGevChangePointTest)
return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance
def compute_gev_change_point_test_results(multiprocessing, maxima, starting_years, trend_test_class,
years):
if multiprocessing:
list_args = [(maxima, starting_year, trend_test_class, years) for starting_year in
starting_years]
with Pool(NB_CORES) as p:
trend_test_res = p.starmap(compute_gev_change_point_test_result, list_args)
else:
trend_test_res = [
compute_gev_change_point_test_result(maxima, starting_year, trend_test_class, years)
for starting_year in starting_years]
# Keep only the most likely starting year
# (i.e. the starting year that minimizes its negative log likelihood)
# (set all the other data to np.nan so that they will not be taken into account in mean function)
best_idx = list(np.argmin(trend_test_res, axis=0))[2]
# print(best_idx, trend_test_res)
best_idxs = [best_idx]
# todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values
# best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:]
return trend_test_res, best_idxs
...@@ -251,7 +251,7 @@ class AbstractCoordinates(object): ...@@ -251,7 +251,7 @@ class AbstractCoordinates(object):
# Compute the indices to modify # Compute the indices to modify
ind_to_modify = df_temporal_coordinates.iloc[:, 0] <= starting_point # type: pd.Series ind_to_modify = df_temporal_coordinates.iloc[:, 0] <= starting_point # type: pd.Series
# Assert that some coordinates are selected but not all # Assert that some coordinates are selected but not all
assert 0 < sum(ind_to_modify) < len(ind_to_modify) assert 0 < sum(ind_to_modify) < len(ind_to_modify), sum(ind_to_modify)
# Modify the temporal coordinates to enforce the stationarity # Modify the temporal coordinates to enforce the stationarity
df_temporal_coordinates.loc[ind_to_modify] = starting_point df_temporal_coordinates.loc[ind_to_modify] = starting_point
# Load the temporal transformation object # Load the temporal transformation object
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment