From 31bc28dd352b1e14687836601a7ad26fe5a3b46c Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 21 May 2019 13:38:58 +0200 Subject: [PATCH] [SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify @cached_property in utils (from a library and seems to a bit faster). added some parallelization for trend test type in study_visualizer. --- .../hypercube_visualizer.py | 89 +++++++++++++++++++ .../main_studies_visualizer.py | 34 ++++++- .../studies_visualizer.py | 7 +- .../study_visualization/study_visualizer.py | 38 +++++++- utils.py | 26 +----- 5 files changed, 160 insertions(+), 34 deletions(-) create mode 100644 experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py diff --git a/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py b/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py new file mode 100644 index 00000000..96e88035 --- /dev/null +++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py @@ -0,0 +1,89 @@ +import os +import os +import os.path as op +from multiprocessing.dummy import Pool +from typing import Dict, Tuple + +import matplotlib.pyplot as plt +import pandas as pd + +from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer +from utils import cached_property, VERSION_TIME + + +def get_df_trend_spatio_temporal(study_visualizer, trend_class, starting_years): + return study_visualizer.df_trend_spatio_temporal(trend_class, starting_years) + + +class HypercubeVisualizer(object): + """ + A study visualizer contain some massifs and years. This forms the base DataFrame of the hypercube + Additional index will come from the tuple. + Tuple could contain altitudes, type of snow quantity + """ + + def __init__(self, tuple_to_study_visualizer: Dict[Tuple, StudyVisualizer], + trend_class, + save_to_file=False): + self.save_to_file = save_to_file + self.trend_class = trend_class + self.tuple_to_study_visualizer = tuple_to_study_visualizer # type: Dict[Tuple, StudyVisualizer] + + # Main attributes defining the hypercube + + def tuple_to_massif_names(self, tuple): + return self.tuple_to_study_visualizer[tuple].study.study_massif_names + + @cached_property + def starting_years(self): + return self.study_visualizer.starting_years[:7] + + @property + def starting_year_to_weights(self): + # Load uniform weights by default + uniform_weight = 1 / len(self.starting_years) + return {year: uniform_weight for year in self.starting_years} + + @cached_property + def tuple_to_df_trend_type(self): + df_spatio_temporal_trend_types = [get_df_trend_spatio_temporal(study_visualizer, self.trend_class, self.starting_years) + for study_visualizer in self.tuple_to_study_visualizer.values()] + return dict(zip(self.tuple_to_study_visualizer.keys(), df_spatio_temporal_trend_types)) + + @cached_property + def hypercube(self): + keys = list(self.tuple_to_df_trend_type.keys()) + values = list(self.tuple_to_df_trend_type.values()) + df = pd.concat(values, keys=keys, axis=0) + return df + + # Some properties + + def show_or_save_to_file(self, specific_title=''): + if self.save_to_file: + main_title, _ = '_'.join(self.study.title.split()).split('/') + filename = "{}/{}/".format(VERSION_TIME, main_title) + filename += specific_title + filepath = op.join(self.study.result_full_path, filename + '.png') + dirname = op.dirname(filepath) + if not op.exists(dirname): + os.makedirs(dirname, exist_ok=True) + plt.savefig(filepath) + else: + plt.show() + + @property + def study_visualizer(self) -> StudyVisualizer: + return list(self.tuple_to_study_visualizer.values())[0] + + @property + def study(self): + return self.study_visualizer.study + + +class AltitudeHypercubeVisualizer(HypercubeVisualizer): + pass + + +class QuantitityAltitudeHypercubeVisualizer(HypercubeVisualizer): + pass diff --git a/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py b/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py index 3cad2ef6..634bb4b8 100644 --- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py @@ -1,3 +1,7 @@ +import time + +from experiment.meteo_france_SCM_study.visualization.studies_visualization.hypercube_visualizer import \ + HypercubeVisualizer from experiment.trend_analysis.abstract_score import MannKendall, WeigthedScore, MeanScore, MedianScore from experiment.trend_analysis.univariate_trend_test.abstract_gev_trend_test import GevLocationTrendTest, \ GevScaleTrendTest, GevShapeTrendTest @@ -55,6 +59,32 @@ def altitude_trends_significant(): visualizer.trend_tests_percentage_evolution_with_altitude(trend_test_classes, starting_year_to_weights=None) -if __name__ == '__main__': +def hypercube_test(): + save_to_file = False + only_first_one = True + altitudes = ALL_ALTITUDES[3:-6] + altitudes = ALL_ALTITUDES[2:4] + for study_class in SCM_STUDIES[:1]: + trend_test_class = [MannKendallTrendTest, GevLocationTrendTest, GevScaleTrendTest, GevShapeTrendTest][0] + visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=False, multiprocessing=True) + for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one, + altitudes=altitudes)] + altitudes = [(a) for a in altitudes] + print(altitudes) + altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers)) + visualizer = HypercubeVisualizer(altitude_to_visualizer, save_to_file=save_to_file, + trend_class=trend_test_class) + print(visualizer.hypercube) + + +def main_run(): # altitude_trends() - altitude_trends_significant() + # altitude_trends_significant() + hypercube_test() + + +if __name__ == '__main__': + start = time.time() + main_run() + duration = time.time() - start + print('Full run took {}s'.format(round(duration, 1))) diff --git a/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py b/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py index 74255add..aa4b6c68 100644 --- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py @@ -219,13 +219,14 @@ class AltitudeVisualizer(object): ax2.set_yticks([]) # Global information - added_str = ''if uniform_weight > 0.0 else 'weighted ' + added_str = '' if uniform_weight > 0.0 else 'weighted ' ylabel = '% averaged on massifs & {}averaged on starting years'.format(added_str) ylabel += ' (with uniform weights)' ax.set_ylabel(ylabel) ax.set_xlabel('altitude') variable_name = self.study.variable_class.NAME - title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name, ', '.join(names)) + title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name, + ', '.join(names)) ax.set_title(title) self.show_or_save_to_file(specific_title=title) @@ -239,7 +240,7 @@ class AltitudeVisualizer(object): for trend_type, style in trend_test_class.trend_type_to_style().items(): weighted_percentages = [v.loc[trend_type] if trend_type in v.index else 0.0 - for v in altitude_to_serie_with_mean_percentages.values()] + for v in altitude_to_serie_with_mean_percentages.values()] if set(weighted_percentages) == {0.0}: ax.plot([], [], style + marker, label=trend_type) else: diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py index 647623d7..ca9ec542 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py @@ -1,6 +1,7 @@ import os import os.path as op from collections import OrderedDict +from multiprocessing.pool import Pool import math import matplotlib.pyplot as plt @@ -56,6 +57,7 @@ class StudyVisualizer(object): normalization_under_one_observations=True, score_class=MeanScore): + self.nb_cores = 7 self.massif_id_to_smooth_maxima = {} self.temporal_non_stationarity = temporal_non_stationarity self.only_first_row = only_first_row @@ -374,14 +376,42 @@ class StudyVisualizer(object): trend_type_and_weight = [] years, smooth_maxima = self.smooth_maxima_x_y(massif_id) for starting_year, weight in starting_year_to_weight.items(): - idx = years.index(starting_year) - # assert years[0] == starting_year, "{} {}".format(years[0], starting_year) - trend_test = trend_test_class(years[:][idx:], smooth_maxima[:][idx:]) # type: AbstractTrendTest - trend_type_and_weight.append((trend_test.test_trend_type, weight)) + test_trend_type = self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years) + trend_type_and_weight.append((test_trend_type, weight)) df = pd.DataFrame(trend_type_and_weight, columns=['trend type', 'weight']) massif_name_to_df_trend_type[massif_name] = df return massif_name_to_df_trend_type + def df_trend_spatio_temporal(self, trend_test_class, starting_years): + """ + Index are the massif + Columns are the starting year + + :param trend_test_class: + :param starting_year_to_weight: + :return: + """ + massif_name_to_trend_types = {} + for massif_id, massif_name in enumerate(self.study.study_massif_names): + years, smooth_maxima = self.smooth_maxima_x_y(massif_id) + if self.multiprocessing: + list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in starting_years] + with Pool(self.nb_cores) as p: + trend_types = p.starmap(self.compute_trend_test_type, list_args) + else: + trend_types = [self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years) + for starting_year in starting_years] + massif_name_to_trend_types[massif_name] = trend_types + df = pd.DataFrame(massif_name_to_trend_types, index=starting_years) + return df.transpose() + + @staticmethod + def compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years): + idx = years.index(starting_year) + # assert years[0] == starting_year, "{} {}".format(years[0], starting_year) + trend_test = trend_test_class(years[:][idx:], smooth_maxima[:][idx:]) # type: AbstractTrendTest + return trend_test.test_trend_type + def df_trend_test_count(self, trend_test_class, starting_year_to_weight): """ Index are the trend type diff --git a/utils.py b/utils.py index a27829a2..f9540d88 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,6 @@ import datetime import os.path as op +from cached_property import cached_property VERSION = datetime.datetime.now() VERSION_TIME = str(VERSION).split('.')[0] @@ -34,31 +35,6 @@ def float_to_str_with_only_some_significant_digits(f, nb_digits) -> str: return '%s' % float('%.{}g'.format(nb_digits) % f) -# todo: these cached property have a weird behavior with inheritence, -# when we call the super cached_property in the child method -class cached_property(object): - """ - Descriptor (non-data) for building an attribute on-demand on first use. - From: https://stackoverflow.com/questions/4037481/caching-attributes-of-classes-in-python - """ - - def __init__(self, factory): - """ - <factory> is called such: factory(instance) to build the attribute. - """ - self._attr_name = factory.__name__ - self._factory = factory - - def __get__(self, instance, owner): - # Build the attribute. - attr = self._factory(instance) - - # Cache the value; hide ourselves. - setattr(instance, self._attr_name, attr) - - return attr - - class Example(object): @cached_property -- GitLab