Commit 1d98720b authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SCM][STUDIES VISUALIZER] add significative trend evolution. add abstract_trend_test object.

parent fb667260
No related merge requests found
Showing with 191 additions and 21 deletions
+191 -21
......@@ -9,9 +9,8 @@ class AbstractTrendScore(object):
We don't care what happen before the change point.
All we want to focus on, is the potential trend that could exist in the data after a potential change point"""
def __init__(self, starting_years, number_of_top_values) -> None:
def __init__(self, number_of_top_values) -> None:
self.number_of_top_values = number_of_top_values
self.starting_years = starting_years
def get_detailed_score(self, years_after_change_point, maxima_after_change_point):
raise NotImplementedError
......
import random
class AbstractTrendTest(object):
SIGNIFICATIVE = 'significative'
# 5 possible types of trends
NO_TREND = 'no trend'
POSITIVE_TREND = 'positive trend'
NEGATIVE_TREND = 'negative trend'
SIGNIFICATIVE_POSITIVE_TREND = SIGNIFICATIVE + ' ' + POSITIVE_TREND
SIGNIFICATIVE_NEGATIVE_TREND = SIGNIFICATIVE + ' ' + NEGATIVE_TREND
# todo: maybe create ordered dict
TREND_TYPE_TO_STYLE = {
NO_TREND: 'k--',
POSITIVE_TREND: 'g--',
SIGNIFICATIVE_POSITIVE_TREND: 'g-',
SIGNIFICATIVE_NEGATIVE_TREND: 'r-',
NEGATIVE_TREND: 'r--',
}
TREND_TYPES = list(TREND_TYPE_TO_STYLE.keys())
def __init__(self, years_after_change_point, maxima_after_change_point):
self.years_after_change_point = years_after_change_point
self.maxima_after_change_point = maxima_after_change_point
@property
def test_trend_type(self) -> str:
test_sign = self.test_sign
assert test_sign in [-1, 0, 1]
if test_sign == 0:
trend_type = self.NO_TREND
else:
trend_type = self.POSITIVE_TREND if test_sign > 0 else self.NEGATIVE_TREND
if self.is_significant:
trend_type = self.SIGNIFICATIVE + ' ' + trend_type
assert trend_type in self.TREND_TYPE_TO_STYLE
return trend_type
@property
def test_sign(self) -> int:
raise NotImplementedError
@property
def is_significant(self) -> bool:
raise NotImplementedError
class ExampleTrendTest(AbstractTrendTest):
@property
def test_sign(self) -> int:
return random.randint(0, 2) - 1
@property
def is_significant(self) -> bool:
return random.randint(1, 10) == 10
class MannKendallTrendTest(AbstractTrendTest):
pass
from experiment.meteo_france_SCM_study.abstract_score import MannKendall, WeigthedScore, MeanScore, MedianScore
from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
from experiment.meteo_france_SCM_study.abstract_trend_test import MannKendallTrendTest, ExampleTrendTest
from experiment.meteo_france_SCM_study.crocus.crocus import CrocusDepth, CrocusSwe, ExtendedCrocusDepth, \
ExtendedCrocusSwe
from experiment.meteo_france_SCM_study.safran.safran import SafranSnowfall, ExtendedSafranSnowfall, \
......@@ -31,12 +32,31 @@ def altitude_trends():
for score_class in [MedianScore, MeanScore, MannKendall, WeigthedScore]:
visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=True,
score_class=score_class)
for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
altitudes=altitudes)]
for study in
study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
altitudes=altitudes)]
altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers))
visualizer = AltitudeVisualizer(altitude_to_visualizer, multiprocessing=False, save_to_file=save_to_file)
visualizer.negative_trend_percentages_evolution(reverse=True)
def altitude_trends_significant():
save_to_file = False
only_first_one = False
# altitudes that have 20 massifs at least
altitudes = ALL_ALTITUDES[3:-6]
altitudes = ALL_ALTITUDES[3:5]
# altitudes = ALL_ALTITUDES[:2]
for study_class in SCM_STUDIES[:1]:
trend_test_classes = [ExampleTrendTest, ExampleTrendTest, MannKendallTrendTest][:2]
visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=False)
for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
altitudes=altitudes)]
altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers))
visualizer = AltitudeVisualizer(altitude_to_visualizer, multiprocessing=False, save_to_file=save_to_file)
visualizer.trend_tests_percentage_evolution(trend_test_classes, starting_year_to_weights=None)
if __name__ == '__main__':
altitude_trends()
# altitude_trends()
altitude_trends_significant()
......@@ -2,7 +2,7 @@ from collections import OrderedDict, Counter
import os
import os.path as op
from multiprocessing.dummy import Pool
from typing import Dict
from typing import Dict, List
import numpy as np
import pandas as pd
......@@ -10,6 +10,7 @@ import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from experiment.meteo_france_SCM_study.abstract_extended_study import AbstractExtendedStudy
from experiment.meteo_france_SCM_study.abstract_trend_test import AbstractTrendTest
from experiment.meteo_france_SCM_study.visualization.studies_visualization.studies import \
Studies
from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer
......@@ -54,7 +55,7 @@ class AltitudeVisualizer(object):
self.save_to_file = save_to_file
self.multiprocessing = multiprocessing
assert isinstance(altitude_to_study_visualizer, OrderedDict)
self.altitude_to_study_visualizer = altitude_to_study_visualizer
self.altitude_to_study_visualizer = altitude_to_study_visualizer # type: Dict[int, StudyVisualizer]
@property
def altitudes(self):
......@@ -90,10 +91,11 @@ class AltitudeVisualizer(object):
top_n = 5
top_top = 3
# keep the top_n for each altitude
all_years = [[year for year, _ in sorted(enumerate(p), key=lambda s:s[1], reverse=reverse)[-top_n:]] for p in self.all_percentages]
all_years = [[year for year, _ in sorted(enumerate(p), key=lambda s: s[1], reverse=reverse)[-top_n:]] for p in
self.all_percentages]
from itertools import chain
all_years = list(chain(*all_years))
years = [y for y, _ in sorted(Counter(all_years).items(), key=lambda s:s[1])[-top_top:]]
years = [y for y, _ in sorted(Counter(all_years).items(), key=lambda s: s[1])[-top_top:]]
years = [y + self.starting_year for y in years]
return years
......@@ -147,7 +149,7 @@ class AltitudeVisualizer(object):
ax.plot(self.altitudes, values, color + marker, label=curve_name)
ax.legend()
ax.set_xticks(self.altitudes)
ax.set_yticks(list(range(0,101, 10)))
ax.set_yticks(list(range(0, 101, 10)))
ax.grid()
ax.axhline(y=50, color='k')
......@@ -158,4 +160,70 @@ class AltitudeVisualizer(object):
score_name = get_display_name_from_object_type(self.any_study_visualizer.score_class)
title = 'Evolution of {} trends wrt to the altitude with {}'.format(variable_name, score_name)
ax.set_title(title)
self.show_or_save_to_file(specific_title=title)
\ No newline at end of file
self.show_or_save_to_file(specific_title=title)
# Trend tests evolution
def trend_tests_percentage_evolution(self, trend_test_classes, starting_year_to_weights: None):
# Load uniform weights by default
if starting_year_to_weights is None:
startings_years = self.any_study_visualizer.starting_years
uniform_weight = 1 / len(startings_years)
starting_year_to_weights = {year: uniform_weight for year in startings_years}
else:
uniform_weight = 0.0
fig, ax = plt.subplots(1, 1, figsize=self.any_study_visualizer.figsize)
# Create one display for each trend test class
markers = ['o', '+']
assert len(markers) >= len(trend_test_classes)
# Add a second legend for the color and to explain the line
for marker, trend_test_class in zip(markers, trend_test_classes):
self.trend_test_percentages_evolution(ax, marker, trend_test_class, starting_year_to_weights)
# Add the color legend
handles, labels = ax.get_legend_handles_labels()
handles_ax, labels_ax = handles[:5], labels[:5]
ax.legend(handles_ax, labels_ax, markerscale=0.0, loc=1)
ax.set_xticks(self.altitudes)
ax.set_yticks(list(range(0, 101, 10)))
ax.grid()
# Add the marker legend
names = [get_display_name_from_object_type(c) for c in trend_test_classes]
handles_ax2, labels_ax2 = handles[::5], names
ax2 = ax.twinx()
ax2.legend(handles_ax2, labels_ax2, loc=2)
ax2.set_yticks([])
# Global information
added_str = ''if uniform_weight > 0.0 else 'weighted '
ylabel = '% averaged on massifs & {}averaged on starting years'.format(added_str)
ylabel += ' (with uniform weights)'
ax.set_ylabel(ylabel)
ax.set_xlabel('altitude')
variable_name = self.study.variable_class.NAME
title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name, ', '.join(names))
ax.set_title(title)
self.show_or_save_to_file(specific_title=title)
def trend_test_percentages_evolution(self, ax, marker, trend_test_class, starting_year_to_weights):
"""
Positive trend in green
Negative trend in red
Non significative trend with dotted line
Significative trend with real line
:return:
"""
# Build OrderedDict mapping altitude to a mean serie
altitude_to_serie_with_mean_percentages = OrderedDict()
for altitude, study_visualizer in self.altitude_to_study_visualizer.items():
s = study_visualizer.serie_mean_trend_test_count(trend_test_class, starting_year_to_weights)
altitude_to_serie_with_mean_percentages[altitude] = s
# Plot lines
for trend_type, style in AbstractTrendTest.TREND_TYPE_TO_STYLE.items():
percentages = [v.loc[trend_type] for v in altitude_to_serie_with_mean_percentages.values()]
ax.plot(self.altitudes, percentages, style + marker, label=trend_type)
......@@ -153,8 +153,8 @@ def complete_analysis(only_first_one=False):
def trend_analysis():
save_to_file = False
only_first_one = True
save_to_file = True
only_first_one = False
short_altitudes = [300, 1200, 2100, 3000][:1]
full_altitude_with_at_least_2_stations = [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600,
3900, 4200][:]
......@@ -168,10 +168,9 @@ def trend_analysis():
temporal_non_stationarity=True,
verbose=True,
multiprocessing=True,
complete_non_stationary_trend_analysis=False)
# study_visualizer.only_one_graph = True
complete_non_stationary_trend_analysis=True)
study_visualizer.visualize_all_independent_temporal_trend()
# study_visualizer.visualize_temporal_trend_relevance()
study_visualizer.visualize_temporal_trend_relevance()
def maxima_analysis():
......@@ -195,9 +194,9 @@ def maxima_analysis():
def main_run():
# normal_visualization(temporal_non_stationarity=True)
# trend_analysis()
trend_analysis()
# all_scores_vizu()
maxima_analysis()
# maxima_analysis()
# all_normal_vizu()
# annual_mean_vizu_compare_durand_study(safran=True, take_mean_value=True, altitude=2100)
......
......@@ -12,6 +12,7 @@ import seaborn as sns
from experiment.meteo_france_SCM_study.abstract_score import MeanScore, WeigthedScore, AbstractTrendScore
from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
from experiment.meteo_france_SCM_study.abstract_trend_test import AbstractTrendTest
from experiment.meteo_france_SCM_study.visualization.study_visualization.non_stationary_trends import \
ConditionalIndedendenceLocationTrendTest, MaxStableLocationTrendTest, IndependenceLocationTrendTest
from experiment.meteo_france_SCM_study.visualization.utils import create_adjusted_axes
......@@ -90,7 +91,7 @@ class StudyVisualizer(object):
self.window_size_for_smoothing = 1 # other value could be
self.number_of_top_values = 10 # 1 if we just want the maxima
self.score_class = score_class
self.score = self.score_class(self.starting_years, self.number_of_top_values) # type: AbstractTrendScore
self.score = self.score_class(self.number_of_top_values) # type: AbstractTrendScore
# PLOT ARGUMENTS
self.show = False if self.save_to_file else show
......@@ -333,8 +334,6 @@ class StudyVisualizer(object):
all_massif_data = np.sort(all_massif_data)
return all_massif_data
@property
def starting_years(self):
start_year, stop_year = self.study.start_year_and_stop_year
......@@ -363,11 +362,34 @@ class StudyVisualizer(object):
for j, starting_year in enumerate(self.starting_years):
detailed_scores.append(self.score.get_detailed_score(years, smooth_maxima))
assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
# Remove the first element from the list
years = years[1:]
smooth_maxima = smooth_maxima[1:]
massif_name_to_scores[massif_name] = np.array(detailed_scores)
return massif_name_to_scores
def massif_name_to_trend_test_count(self, trend_test_class, starting_year_to_weight):
massif_name_to_serie_percentages = {}
for massif_id, massif_name in enumerate(self.study.study_massif_names):
trend_type_and_weight = []
years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
for starting_year, weight in starting_year_to_weight.items():
idx = years.index(starting_year)
years, smooth_maxima = years[idx:], smooth_maxima[idx:]
assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
trend_test = trend_test_class(years, smooth_maxima) # type: AbstractTrendTest
trend_type_and_weight.append((trend_test.test_trend_type, weight))
df = pd.DataFrame(trend_type_and_weight, columns=['trend type', 'weight'])
serie = df.groupby(['trend type']).sum()
massif_name_to_serie_percentages[massif_name] = serie * 100
return massif_name_to_serie_percentages
def serie_mean_trend_test_count(self, trend_test_class, starting_year_to_weight):
massif_name_to_trend_test_count = self.massif_name_to_trend_test_count(trend_test_class, starting_year_to_weight)
df = pd.concat(list(massif_name_to_trend_test_count.values()), axis=1, sort=False)
df.fillna(0.0, inplace=True)
return df.mean(axis=1)
@cached_property
def massif_name_to_scores(self):
return {k: v[:, 0] for k, v in self.massif_name_to_detailed_scores.items()}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment