[SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify...

[SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify @cached_property in utils (from a library and seems to a bit faster). added some parallelization for trend test type in study_visualizer.

[SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify...
[SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify @cached_property in utils (from a library and seems to a bit faster). added some parallelization for trend test type in study_visualizer.
31bc28dd · Le Roux Erwan · 11b44c4e · 31bc28dd · 31bc28dd · 31bc28dd
Commit 31bc28dd authored 6 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 160 additions and 34 deletions
+160 -34
--- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py
+import os
+import os
+import os.path as op
+from multiprocessing.dummy import Pool
+from typing import Dict, Tuple
+import matplotlib.pyplot as plt
+import pandas as pd
+from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer
+from utils import cached_property, VERSION_TIME
+def get_df_trend_spatio_temporal(study_visualizer, trend_class, starting_years):
+    return study_visualizer.df_trend_spatio_temporal(trend_class, starting_years)
+class HypercubeVisualizer(object):
+    """
+    A study visualizer contain some massifs and years. This forms the base DataFrame of the hypercube
+    Additional index will come from the tuple.
+    Tuple could contain altitudes, type of snow quantity
+    """
+    def __init__(self, tuple_to_study_visualizer: Dict[Tuple, StudyVisualizer],
+                 trend_class,
+                 save_to_file=False):
+        self.save_to_file = save_to_file
+        self.trend_class = trend_class
+        self.tuple_to_study_visualizer = tuple_to_study_visualizer  # type: Dict[Tuple, StudyVisualizer]
+    # Main attributes defining the hypercube
+    def tuple_to_massif_names(self, tuple):
+        return self.tuple_to_study_visualizer[tuple].study.study_massif_names
+    @cached_property
+    def starting_years(self):
+        return self.study_visualizer.starting_years[:7]
+    @property
+    def starting_year_to_weights(self):
+        # Load uniform weights by default
+        uniform_weight = 1 / len(self.starting_years)
+        return {year: uniform_weight for year in self.starting_years}
+    @cached_property
+    def tuple_to_df_trend_type(self):
+        df_spatio_temporal_trend_types = [get_df_trend_spatio_temporal(study_visualizer, self.trend_class, self.starting_years)
+                                          for study_visualizer in self.tuple_to_study_visualizer.values()]
+        return dict(zip(self.tuple_to_study_visualizer.keys(), df_spatio_temporal_trend_types))
+    @cached_property
+    def hypercube(self):
+        keys = list(self.tuple_to_df_trend_type.keys())
+        values = list(self.tuple_to_df_trend_type.values())
+        df = pd.concat(values, keys=keys, axis=0)
+        return df
+    # Some properties
+    def show_or_save_to_file(self, specific_title=''):
+        if self.save_to_file:
+            main_title, _ = '_'.join(self.study.title.split()).split('/')
+            filename = "{}/{}/".format(VERSION_TIME, main_title)
+            filename += specific_title
+            filepath = op.join(self.study.result_full_path, filename + '.png')
+            dirname = op.dirname(filepath)
+            if not op.exists(dirname):
+                os.makedirs(dirname, exist_ok=True)
+            plt.savefig(filepath)
+        else:
+            plt.show()
+    @property
+    def study_visualizer(self) -> StudyVisualizer:
+        return list(self.tuple_to_study_visualizer.values())[0]
+    @property
+    def study(self):
+        return self.study_visualizer.study
+class AltitudeHypercubeVisualizer(HypercubeVisualizer):
+    pass
+class QuantitityAltitudeHypercubeVisualizer(HypercubeVisualizer):
+    pass
--- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py
+import time
+from experiment.meteo_france_SCM_study.visualization.studies_visualization.hypercube_visualizer import \
+    HypercubeVisualizer
 from experiment.trend_analysis.abstract_score import MannKendall, WeigthedScore, MeanScore, MedianScore
 from experiment.trend_analysis.univariate_trend_test.abstract_gev_trend_test import GevLocationTrendTest, \
    GevScaleTrendTest, GevShapeTrendTest
@@ -55,6 +59,32 @@ def altitude_trends_significant():
        visualizer.trend_tests_percentage_evolution_with_altitude(trend_test_classes, starting_year_to_weights=None)
-if __name__ == '__main__':
+def hypercube_test():
+    save_to_file = False
+    only_first_one = True
+    altitudes = ALL_ALTITUDES[3:-6]
+    altitudes = ALL_ALTITUDES[2:4]
+    for study_class in SCM_STUDIES[:1]:
+        trend_test_class = [MannKendallTrendTest, GevLocationTrendTest, GevScaleTrendTest, GevShapeTrendTest][0]
+        visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=False, multiprocessing=True)
+                       for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
+                                                          altitudes=altitudes)]
+        altitudes = [(a) for a in altitudes]
+        print(altitudes)
+        altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers))
+        visualizer = HypercubeVisualizer(altitude_to_visualizer, save_to_file=save_to_file,
+                                         trend_class=trend_test_class)
+        print(visualizer.hypercube)
+def main_run():
    # altitude_trends()
-    altitude_trends_significant()
+    # altitude_trends_significant()
+    hypercube_test()
+if __name__ == '__main__':
+    start = time.time()
+    main_run()
+    duration = time.time() - start
+    print('Full run took {}s'.format(round(duration, 1)))
--- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/studies_visualizer.py
@@ -219,13 +219,14 @@ class AltitudeVisualizer(object):
        ax2.set_yticks([])
        # Global information
-        added_str = ''if uniform_weight > 0.0 else 'weighted '
+        added_str = '' if uniform_weight > 0.0 else 'weighted '
        ylabel = '% averaged on massifs & {}averaged on starting years'.format(added_str)
        ylabel += ' (with uniform weights)'
        ax.set_ylabel(ylabel)
        ax.set_xlabel('altitude')
        variable_name = self.study.variable_class.NAME
-        title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name, ', '.join(names))
+        title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name,
+                                                                                                   ', '.join(names))
        ax.set_title(title)
        self.show_or_save_to_file(specific_title=title)
@@ -239,7 +240,7 @@ class AltitudeVisualizer(object):
        for trend_type, style in trend_test_class.trend_type_to_style().items():
            weighted_percentages = [v.loc[trend_type] if trend_type in v.index else 0.0
-                           for v in altitude_to_serie_with_mean_percentages.values()]
+                                    for v in altitude_to_serie_with_mean_percentages.values()]
            if set(weighted_percentages) == {0.0}:
                ax.plot([], [], style + marker, label=trend_type)
            else:

--- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py
 import os
 import os.path as op
 from collections import OrderedDict
+from multiprocessing.pool import Pool
 import math
 import matplotlib.pyplot as plt
@@ -56,6 +57,7 @@ class StudyVisualizer(object):
                 normalization_under_one_observations=True,
                 score_class=MeanScore):
+        self.nb_cores = 7
        self.massif_id_to_smooth_maxima = {}
        self.temporal_non_stationarity = temporal_non_stationarity
        self.only_first_row = only_first_row
@@ -374,14 +376,42 @@ class StudyVisualizer(object):
            trend_type_and_weight = []
            years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
            for starting_year, weight in starting_year_to_weight.items():
-                idx = years.index(starting_year)
+                test_trend_type = self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years)
-                # assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
+                trend_type_and_weight.append((test_trend_type, weight))
-                trend_test = trend_test_class(years[:][idx:], smooth_maxima[:][idx:])  # type: AbstractTrendTest
-                trend_type_and_weight.append((trend_test.test_trend_type, weight))
            df = pd.DataFrame(trend_type_and_weight, columns=['trend type', 'weight'])
            massif_name_to_df_trend_type[massif_name] = df
        return massif_name_to_df_trend_type
+    def df_trend_spatio_temporal(self, trend_test_class, starting_years):
+        """
+        Index are the massif
+        Columns are the starting year
+        :param trend_test_class:
+        :param starting_year_to_weight:
+        :return:
+        """
+        massif_name_to_trend_types = {}
+        for massif_id, massif_name in enumerate(self.study.study_massif_names):
+            years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
+            if self.multiprocessing:
+                list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in starting_years]
+                with Pool(self.nb_cores) as p:
+                    trend_types = p.starmap(self.compute_trend_test_type, list_args)
+            else:
+                trend_types = [self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years)
+                               for starting_year in starting_years]
+            massif_name_to_trend_types[massif_name] = trend_types
+        df = pd.DataFrame(massif_name_to_trend_types, index=starting_years)
+        return df.transpose()
+    @staticmethod
+    def compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years):
+        idx = years.index(starting_year)
+        # assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
+        trend_test = trend_test_class(years[:][idx:], smooth_maxima[:][idx:])  # type: AbstractTrendTest
+        return trend_test.test_trend_type
    def df_trend_test_count(self, trend_test_class, starting_year_to_weight):
        """
        Index are the trend type

--- a/utils.py
+++ b/utils.py
 import datetime
 import os.path as op
+from cached_property import cached_property
 VERSION = datetime.datetime.now()
 VERSION_TIME = str(VERSION).split('.')[0]
@@ -34,31 +35,6 @@ def float_to_str_with_only_some_significant_digits(f, nb_digits) -> str:
    return '%s' % float('%.{}g'.format(nb_digits) % f)
-# todo: these cached property have a weird behavior with inheritence,
-#  when we call the super cached_property in the child method
-class cached_property(object):
-    """
-    Descriptor (non-data) for building an attribute on-demand on first use.
-    From: https://stackoverflow.com/questions/4037481/caching-attributes-of-classes-in-python
-    """
-    def __init__(self, factory):
-        """
-        <factory> is called such: factory(instance) to build the attribute.
-        """
-        self._attr_name = factory.__name__
-        self._factory = factory
-    def __get__(self, instance, owner):
-        # Build the attribute.
-        attr = self._factory(instance)
-        # Cache the value; hide ourselves.
-        setattr(instance, self._attr_name, attr)
-        return attr
 class Example(object):
    @cached_property