Commit c0c3ddce authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[STATIONS] add trend_test result display in comparisons_visualization graphs

parent f9ad9960
No related merge requests found
Showing with 87 additions and 42 deletions
+87 -42
......@@ -10,10 +10,10 @@ import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from experiment.meteo_france_data.scm_models_data.abstract_extended_study import AbstractExtendedStudy
from experiment.meteo_france_data.scm_models_data.visualization.studies_visualization.studies import Studies
from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \
StudyVisualizer
from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest
from experiment.meteo_france_data.scm_models_data.visualization import \
Studies
from experiment.meteo_france_data.scm_models_data.visualization import StudyVisualizer
from experiment.meteo_france_data.scm_models_data.visualization.utils import plot_df
from utils import cached_property, get_display_name_from_object_type, VERSION_TIME
......
......@@ -16,6 +16,7 @@ from experiment.trend_analysis.univariate_test.abstract_univariate_test import A
from experiment.trend_analysis.non_stationary_trends import \
ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest
from experiment.meteo_france_data.scm_models_data.visualization.utils import create_adjusted_axes
from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results
from experiment.utils import average_smoothing_with_sliding_window
from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \
FullEstimatorInASingleStepWithSmoothMargin
......@@ -405,23 +406,8 @@ class StudyVisualizer(VisualizationParameters):
massif_names = massif_names[:nb_massif_for_fast_mode]
for massif_id, massif_name in enumerate(massif_names):
years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
if self.multiprocessing:
list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in
starting_years]
with Pool(NB_CORES) as p:
trend_test_res = p.starmap(self.compute_gev_change_point_test_result, list_args)
else:
trend_test_res = [
self.compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years)
for starting_year in starting_years]
# Keep only the most likely starting year
# (i.e. the starting year that minimizes its negative log likelihood)
# (set all the other data to np.nan so that they will not be taken into account in mean function)
best_idx = list(np.argmin(trend_test_res, axis=0))[2]
# print(best_idx, trend_test_res)
best_idxs = [best_idx]
# todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values
# best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:]
trend_test_res, best_idxs = compute_gev_change_point_test_results(self.multiprocessing, smooth_maxima, starting_years,
trend_test_class, years)
trend_test_res = [(a, b) if i in best_idxs else (np.nan, np.nan)
for i, (a, b, *_) in enumerate(trend_test_res)]
massif_name_to_trend_res[massif_name] = list(zip(*trend_test_res))
......@@ -432,12 +418,6 @@ class StudyVisualizer(VisualizationParameters):
return [pd.DataFrame(massif_name_to_res, index=starting_years).transpose()
for massif_name_to_res in all_massif_name_to_res]
@staticmethod
def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years):
trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractGevChangePointTest
assert isinstance(trend_test, AbstractGevChangePointTest)
return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance
@staticmethod
def compute_trend_test_result(smooth_maxima, starting_year, trend_test_class, years):
trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractUnivariateTest
......
......@@ -13,22 +13,23 @@ def visualize_all_stations():
def visualize_non_nan_station():
vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST,
keep_only_station_without_nan_values=True,
normalize_observations=True)
# vizu.visualize_maximum()
vizu.visualize_gev()
normalize_observations=False)
vizu.visualize_maximum()
# vizu.visualize_gev()
def example():
# this is a really good example for the maxima at least
vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False)
vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
# vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False)
# vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
# vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True)
vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True)
# vizu._visualize_ax_main(vizu.plot_gev, vizu.comparisons[0], 'Beaufortain', show=True)
vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True)
if __name__ == '__main__':
# visualize_fast_comparison()
# visualize_all_stations()
visualize_all_stations()
# visualize_non_nan_station()
example()
# example()
......@@ -10,6 +10,9 @@ from experiment.meteo_france_data.scm_models_data.visualization.study_visualizat
VisualizationParameters
from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \
REANALYSE_STR, ALTITUDE_COLUMN_NAME
from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import GevLocationChangePointTest
from experiment.trend_analysis.univariate_test.abstract_univariate_test import AbstractUnivariateTest
from experiment.trend_analysis.univariate_test.utils import compute_gev_change_point_test_results
from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev
from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
......@@ -67,6 +70,7 @@ class ComparisonsVisualization(VisualizationParameters):
def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False):
if ax is None:
_, ax = plt.subplots(1, 1, figsize=self.figsize)
ax2 = ax.twinx()
df = comparison.df_merged_intersection_clean.copy()
ind = df[MASSIF_COLUMN_NAME] == massif
......@@ -78,19 +82,21 @@ class ComparisonsVisualization(VisualizationParameters):
ind_location = df.index.str.contains(REANALYSE_STR)
df[DISTANCE_COLUMN_NAME] = 0
for coordinate_name in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
print(df[DISTANCE_COLUMN_NAME])
center = df.loc[ind_location, coordinate_name].values[0]
print(center)
df[DISTANCE_COLUMN_NAME] += (center - df[coordinate_name]).pow(2)
df[DISTANCE_COLUMN_NAME] = df[DISTANCE_COLUMN_NAME].pow(0.5)
df[DISTANCE_COLUMN_NAME] = (df[DISTANCE_COLUMN_NAME] / 1000).round(1)
for color, (i, s) in zip(colors_station, df.iterrows()):
label = i
label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME])
label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME])
s_values = s.iloc[3:-1].to_dict()
years, maxima = list(s_values.keys()), list(s_values.values())
plot_color = color if REANALYSE_STR not in label else 'g'
plot_function(ax, s_values, label, plot_color)
plot_function(ax, ax2, years, maxima, label, plot_color)
ax.set_title('{} at {}m'.format(massif, comparison.altitude))
ax.legend(prop={'size': 5})
......@@ -100,13 +106,30 @@ class ComparisonsVisualization(VisualizationParameters):
def visualize_maximum(self):
return self._visualize_main(self.plot_maxima, 'Recent trend of Annual maxima of snowfall')
def plot_maxima(self, ax, s_values, label, plot_color):
ax.plot(list(s_values.keys()), list(s_values.values()), label=label, color=plot_color)
def plot_maxima(self, ax, ax2, years, maxima, label, plot_color):
if self.keep_only_station_without_nan_values:
# Run trend test to improve the label
starting_years = years[:-1]
trend_test_res, best_idxs = compute_gev_change_point_test_results(multiprocessing=True,
maxima=maxima, starting_years=starting_years,
trend_test_class=GevLocationChangePointTest,
years=years)
best_idx = best_idxs[0]
most_likely_year = years[best_idx]
most_likely_trend_type = trend_test_res[best_idx][0]
display_trend_type = AbstractUnivariateTest.get_display_trend_type(real_trend_type=most_likely_trend_type)
label += "\n {} starting in {}".format(display_trend_type, most_likely_year)
# Display the nllh against the starting year
step = 10
ax2.plot(starting_years[::step], [t[3] for t in trend_test_res][::step], color=plot_color, marker='o')
ax2.plot(starting_years[::step], [t[4] for t in trend_test_res][::step], color=plot_color, marker='x')
# Plot maxima
ax.plot(years, maxima, label=label, color=plot_color)
def visualize_gev(self):
return self._visualize_main(self.plot_gev)
def plot_gev(self, ax, s_values, label, plot_color):
def plot_gev(self, ax, ax2, s_values, label, plot_color):
# todo should I normalize here ?
# fit gev
data = list(s_values.values())
......@@ -120,4 +143,3 @@ class ComparisonsVisualization(VisualizationParameters):
y = gev_params.density(x)
# display the gev distribution that was obtained
ax.plot(x, y, label=label, color=plot_color)
......@@ -62,6 +62,13 @@ class AbstractUnivariateTest(object):
# d[cls.NO_TREND] = 'k--'
return d
@classmethod
def get_display_trend_type(cls, real_trend_type):
if cls.SIGNIFICATIVE in real_trend_type:
return real_trend_type
else:
return cls.NON_SIGNIFICATIVE_TREND
@classmethod
def get_real_trend_types(cls, display_trend_type):
if display_trend_type is cls.ALL_TREND:
......
from multiprocessing.pool import Pool
import numpy as np
from experiment.trend_analysis.univariate_test.abstract_gev_change_point_test import AbstractGevChangePointTest
from utils import NB_CORES
def compute_gev_change_point_test_result(smooth_maxima, starting_year, trend_test_class, years):
trend_test = trend_test_class(years, smooth_maxima, starting_year) # type: AbstractGevChangePointTest
assert isinstance(trend_test, AbstractGevChangePointTest)
return trend_test.test_trend_type, trend_test.test_trend_strength, trend_test.non_stationary_nllh, trend_test.non_stationary_deviance, trend_test.stationary_deviance
def compute_gev_change_point_test_results(multiprocessing, maxima, starting_years, trend_test_class,
years):
if multiprocessing:
list_args = [(maxima, starting_year, trend_test_class, years) for starting_year in
starting_years]
with Pool(NB_CORES) as p:
trend_test_res = p.starmap(compute_gev_change_point_test_result, list_args)
else:
trend_test_res = [
compute_gev_change_point_test_result(maxima, starting_year, trend_test_class, years)
for starting_year in starting_years]
# Keep only the most likely starting year
# (i.e. the starting year that minimizes its negative log likelihood)
# (set all the other data to np.nan so that they will not be taken into account in mean function)
best_idx = list(np.argmin(trend_test_res, axis=0))[2]
# print(best_idx, trend_test_res)
best_idxs = [best_idx]
# todo: by doing a sorting on the deviance, I could get the nb_top_likelihood_values values
# best_idxs = list(np.argmax(trend_test_res, axis=0))[-nb_top_likelihood_values:]
return trend_test_res, best_idxs
......@@ -251,7 +251,7 @@ class AbstractCoordinates(object):
# Compute the indices to modify
ind_to_modify = df_temporal_coordinates.iloc[:, 0] <= starting_point # type: pd.Series
# Assert that some coordinates are selected but not all
assert 0 < sum(ind_to_modify) < len(ind_to_modify)
assert 0 < sum(ind_to_modify) < len(ind_to_modify), sum(ind_to_modify)
# Modify the temporal coordinates to enforce the stationarity
df_temporal_coordinates.loc[ind_to_modify] = starting_point
# Load the temporal transformation object
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment