Commit 31bc28dd authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify...

[SCM][HYPERCUBE] Create Hypercube object with MultiIndex. Modify @cached_property in utils (from a library and seems to a bit faster). added some parallelization for trend test type in study_visualizer.
parent 11b44c4e
No related merge requests found
Showing with 160 additions and 34 deletions
+160 -34
import os
import os
import os.path as op
from multiprocessing.dummy import Pool
from typing import Dict, Tuple
import matplotlib.pyplot as plt
import pandas as pd
from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer
from utils import cached_property, VERSION_TIME
def get_df_trend_spatio_temporal(study_visualizer, trend_class, starting_years):
return study_visualizer.df_trend_spatio_temporal(trend_class, starting_years)
class HypercubeVisualizer(object):
"""
A study visualizer contain some massifs and years. This forms the base DataFrame of the hypercube
Additional index will come from the tuple.
Tuple could contain altitudes, type of snow quantity
"""
def __init__(self, tuple_to_study_visualizer: Dict[Tuple, StudyVisualizer],
trend_class,
save_to_file=False):
self.save_to_file = save_to_file
self.trend_class = trend_class
self.tuple_to_study_visualizer = tuple_to_study_visualizer # type: Dict[Tuple, StudyVisualizer]
# Main attributes defining the hypercube
def tuple_to_massif_names(self, tuple):
return self.tuple_to_study_visualizer[tuple].study.study_massif_names
@cached_property
def starting_years(self):
return self.study_visualizer.starting_years[:7]
@property
def starting_year_to_weights(self):
# Load uniform weights by default
uniform_weight = 1 / len(self.starting_years)
return {year: uniform_weight for year in self.starting_years}
@cached_property
def tuple_to_df_trend_type(self):
df_spatio_temporal_trend_types = [get_df_trend_spatio_temporal(study_visualizer, self.trend_class, self.starting_years)
for study_visualizer in self.tuple_to_study_visualizer.values()]
return dict(zip(self.tuple_to_study_visualizer.keys(), df_spatio_temporal_trend_types))
@cached_property
def hypercube(self):
keys = list(self.tuple_to_df_trend_type.keys())
values = list(self.tuple_to_df_trend_type.values())
df = pd.concat(values, keys=keys, axis=0)
return df
# Some properties
def show_or_save_to_file(self, specific_title=''):
if self.save_to_file:
main_title, _ = '_'.join(self.study.title.split()).split('/')
filename = "{}/{}/".format(VERSION_TIME, main_title)
filename += specific_title
filepath = op.join(self.study.result_full_path, filename + '.png')
dirname = op.dirname(filepath)
if not op.exists(dirname):
os.makedirs(dirname, exist_ok=True)
plt.savefig(filepath)
else:
plt.show()
@property
def study_visualizer(self) -> StudyVisualizer:
return list(self.tuple_to_study_visualizer.values())[0]
@property
def study(self):
return self.study_visualizer.study
class AltitudeHypercubeVisualizer(HypercubeVisualizer):
pass
class QuantitityAltitudeHypercubeVisualizer(HypercubeVisualizer):
pass
import time
from experiment.meteo_france_SCM_study.visualization.studies_visualization.hypercube_visualizer import \
HypercubeVisualizer
from experiment.trend_analysis.abstract_score import MannKendall, WeigthedScore, MeanScore, MedianScore from experiment.trend_analysis.abstract_score import MannKendall, WeigthedScore, MeanScore, MedianScore
from experiment.trend_analysis.univariate_trend_test.abstract_gev_trend_test import GevLocationTrendTest, \ from experiment.trend_analysis.univariate_trend_test.abstract_gev_trend_test import GevLocationTrendTest, \
GevScaleTrendTest, GevShapeTrendTest GevScaleTrendTest, GevShapeTrendTest
...@@ -55,6 +59,32 @@ def altitude_trends_significant(): ...@@ -55,6 +59,32 @@ def altitude_trends_significant():
visualizer.trend_tests_percentage_evolution_with_altitude(trend_test_classes, starting_year_to_weights=None) visualizer.trend_tests_percentage_evolution_with_altitude(trend_test_classes, starting_year_to_weights=None)
if __name__ == '__main__': def hypercube_test():
save_to_file = False
only_first_one = True
altitudes = ALL_ALTITUDES[3:-6]
altitudes = ALL_ALTITUDES[2:4]
for study_class in SCM_STUDIES[:1]:
trend_test_class = [MannKendallTrendTest, GevLocationTrendTest, GevScaleTrendTest, GevShapeTrendTest][0]
visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=False, multiprocessing=True)
for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one,
altitudes=altitudes)]
altitudes = [(a) for a in altitudes]
print(altitudes)
altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers))
visualizer = HypercubeVisualizer(altitude_to_visualizer, save_to_file=save_to_file,
trend_class=trend_test_class)
print(visualizer.hypercube)
def main_run():
# altitude_trends() # altitude_trends()
altitude_trends_significant() # altitude_trends_significant()
hypercube_test()
if __name__ == '__main__':
start = time.time()
main_run()
duration = time.time() - start
print('Full run took {}s'.format(round(duration, 1)))
...@@ -219,13 +219,14 @@ class AltitudeVisualizer(object): ...@@ -219,13 +219,14 @@ class AltitudeVisualizer(object):
ax2.set_yticks([]) ax2.set_yticks([])
# Global information # Global information
added_str = ''if uniform_weight > 0.0 else 'weighted ' added_str = '' if uniform_weight > 0.0 else 'weighted '
ylabel = '% averaged on massifs & {}averaged on starting years'.format(added_str) ylabel = '% averaged on massifs & {}averaged on starting years'.format(added_str)
ylabel += ' (with uniform weights)' ylabel += ' (with uniform weights)'
ax.set_ylabel(ylabel) ax.set_ylabel(ylabel)
ax.set_xlabel('altitude') ax.set_xlabel('altitude')
variable_name = self.study.variable_class.NAME variable_name = self.study.variable_class.NAME
title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name, ', '.join(names)) title = 'Evolution of {} trends (significative or not) wrt to the altitude with {}'.format(variable_name,
', '.join(names))
ax.set_title(title) ax.set_title(title)
self.show_or_save_to_file(specific_title=title) self.show_or_save_to_file(specific_title=title)
...@@ -239,7 +240,7 @@ class AltitudeVisualizer(object): ...@@ -239,7 +240,7 @@ class AltitudeVisualizer(object):
for trend_type, style in trend_test_class.trend_type_to_style().items(): for trend_type, style in trend_test_class.trend_type_to_style().items():
weighted_percentages = [v.loc[trend_type] if trend_type in v.index else 0.0 weighted_percentages = [v.loc[trend_type] if trend_type in v.index else 0.0
for v in altitude_to_serie_with_mean_percentages.values()] for v in altitude_to_serie_with_mean_percentages.values()]
if set(weighted_percentages) == {0.0}: if set(weighted_percentages) == {0.0}:
ax.plot([], [], style + marker, label=trend_type) ax.plot([], [], style + marker, label=trend_type)
else: else:
......
import os import os
import os.path as op import os.path as op
from collections import OrderedDict from collections import OrderedDict
from multiprocessing.pool import Pool
import math import math
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -56,6 +57,7 @@ class StudyVisualizer(object): ...@@ -56,6 +57,7 @@ class StudyVisualizer(object):
normalization_under_one_observations=True, normalization_under_one_observations=True,
score_class=MeanScore): score_class=MeanScore):
self.nb_cores = 7
self.massif_id_to_smooth_maxima = {} self.massif_id_to_smooth_maxima = {}
self.temporal_non_stationarity = temporal_non_stationarity self.temporal_non_stationarity = temporal_non_stationarity
self.only_first_row = only_first_row self.only_first_row = only_first_row
...@@ -374,14 +376,42 @@ class StudyVisualizer(object): ...@@ -374,14 +376,42 @@ class StudyVisualizer(object):
trend_type_and_weight = [] trend_type_and_weight = []
years, smooth_maxima = self.smooth_maxima_x_y(massif_id) years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
for starting_year, weight in starting_year_to_weight.items(): for starting_year, weight in starting_year_to_weight.items():
idx = years.index(starting_year) test_trend_type = self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years)
# assert years[0] == starting_year, "{} {}".format(years[0], starting_year) trend_type_and_weight.append((test_trend_type, weight))
trend_test = trend_test_class(years[:][idx:], smooth_maxima[:][idx:]) # type: AbstractTrendTest
trend_type_and_weight.append((trend_test.test_trend_type, weight))
df = pd.DataFrame(trend_type_and_weight, columns=['trend type', 'weight']) df = pd.DataFrame(trend_type_and_weight, columns=['trend type', 'weight'])
massif_name_to_df_trend_type[massif_name] = df massif_name_to_df_trend_type[massif_name] = df
return massif_name_to_df_trend_type return massif_name_to_df_trend_type
def df_trend_spatio_temporal(self, trend_test_class, starting_years):
"""
Index are the massif
Columns are the starting year
:param trend_test_class:
:param starting_year_to_weight:
:return:
"""
massif_name_to_trend_types = {}
for massif_id, massif_name in enumerate(self.study.study_massif_names):
years, smooth_maxima = self.smooth_maxima_x_y(massif_id)
if self.multiprocessing:
list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in starting_years]
with Pool(self.nb_cores) as p:
trend_types = p.starmap(self.compute_trend_test_type, list_args)
else:
trend_types = [self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years)
for starting_year in starting_years]
massif_name_to_trend_types[massif_name] = trend_types
df = pd.DataFrame(massif_name_to_trend_types, index=starting_years)
return df.transpose()
@staticmethod
def compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years):
idx = years.index(starting_year)
# assert years[0] == starting_year, "{} {}".format(years[0], starting_year)
trend_test = trend_test_class(years[:][idx:], smooth_maxima[:][idx:]) # type: AbstractTrendTest
return trend_test.test_trend_type
def df_trend_test_count(self, trend_test_class, starting_year_to_weight): def df_trend_test_count(self, trend_test_class, starting_year_to_weight):
""" """
Index are the trend type Index are the trend type
......
import datetime import datetime
import os.path as op import os.path as op
from cached_property import cached_property
VERSION = datetime.datetime.now() VERSION = datetime.datetime.now()
VERSION_TIME = str(VERSION).split('.')[0] VERSION_TIME = str(VERSION).split('.')[0]
...@@ -34,31 +35,6 @@ def float_to_str_with_only_some_significant_digits(f, nb_digits) -> str: ...@@ -34,31 +35,6 @@ def float_to_str_with_only_some_significant_digits(f, nb_digits) -> str:
return '%s' % float('%.{}g'.format(nb_digits) % f) return '%s' % float('%.{}g'.format(nb_digits) % f)
# todo: these cached property have a weird behavior with inheritence,
# when we call the super cached_property in the child method
class cached_property(object):
"""
Descriptor (non-data) for building an attribute on-demand on first use.
From: https://stackoverflow.com/questions/4037481/caching-attributes-of-classes-in-python
"""
def __init__(self, factory):
"""
<factory> is called such: factory(instance) to build the attribute.
"""
self._attr_name = factory.__name__
self._factory = factory
def __get__(self, instance, owner):
# Build the attribute.
attr = self._factory(instance)
# Cache the value; hide ourselves.
setattr(instance, self._attr_name, attr)
return attr
class Example(object): class Example(object):
@cached_property @cached_property
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment