diff --git a/experiment/meteo_france_SCM_study/abstract_study.py b/experiment/meteo_france_SCM_study/abstract_study.py index 56b61fc83959a4d62b48226db48fc857ee832c6f..9620e50ed365aabef007a614637827d9fc2bcf32 100644 --- a/experiment/meteo_france_SCM_study/abstract_study.py +++ b/experiment/meteo_france_SCM_study/abstract_study.py @@ -3,6 +3,7 @@ import os import os.path as op from collections import OrderedDict from contextlib import redirect_stdout +from multiprocessing.pool import Pool from typing import List, Dict, Tuple import matplotlib.pyplot as plt @@ -22,7 +23,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ AbstractSpatialCoordinates from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima -from utils import get_full_path, cached_property +from utils import get_full_path, cached_property, NB_CORES f = io.StringIO() with redirect_stdout(f): @@ -35,13 +36,15 @@ class AbstractStudy(object): """ REANALYSIS_FOLDER = 'alp_flat/reanalysis' - def __init__(self, variable_class: type, altitude: int = 1800, year_min=1000, year_max=3000): + def __init__(self, variable_class: type, altitude: int = 1800, year_min=1000, year_max=3000, + multiprocessing=False): assert altitude in ALTITUDES, altitude self.altitude = altitude self.model_name = None self.variable_class = variable_class self.year_min = year_min self.year_max = year_max + self.multiprocessing = multiprocessing def write_to_file(self, df: pd.DataFrame): if not op.exists(self.result_full_path): @@ -72,17 +75,46 @@ class AbstractStudy(object): @cached_property def year_to_dataset_ordered_dict(self) -> OrderedDict: + print('This code is quite long... ' + 'You should consider year_to_variable which is way faster when multiprocessing=True') # Map each year to the correspond netCDF4 Dataset - year_to_dataset = OrderedDict() + path_files, ordered_years = self.ordered_years_and_path_files() + datasets = [Dataset(path_file) for path_file in path_files] + return OrderedDict(zip(ordered_years, datasets)) + + def ordered_years_and_path_files(self): nc_files = [(int(f.split('_')[-2][:4]), f) for f in os.listdir(self.study_full_path) if f.endswith('.nc')] - for year, nc_file in sorted(nc_files, key=lambda t: t[0]): - if self.year_min <= year < self.year_max: - year_to_dataset[year] = Dataset(op.join(self.study_full_path, nc_file)) - return year_to_dataset + ordered_years, path_files = zip(*[(year, op.join(self.study_full_path, nc_file)) + for year, nc_file in sorted(nc_files, key=lambda t: t[0]) + if self.year_min <= year < self.year_max]) + return path_files, ordered_years + + @property + def ordered_years(self): + return self.ordered_years_and_path_files()[1] + + @cached_property + def year_to_variable_array(self) -> OrderedDict: + # Map each year to the variable array + path_files, ordered_years = self.ordered_years_and_path_files() + if self.multiprocessing: + with Pool(NB_CORES) as p: + variables = p.map(self.load_variable, path_files) + else: + variables = [self.load_variable(path_file) for path_file in path_files] + return OrderedDict(zip(ordered_years, variables)) + + def load_variable(self, path_file): + dataset = Dataset(path_file) + keyword = self.variable_class.keyword() + if isinstance(keyword, str): + return np.array(dataset.variables[keyword]) + else: + return [np.array(dataset.variables[k]) for k in keyword] @property def start_year_and_stop_year(self) -> Tuple[int, int]: - ordered_years = list(self.year_to_dataset_ordered_dict.keys()) + ordered_years = self.ordered_years return ordered_years[0], ordered_years[-1] @cached_property @@ -108,18 +140,19 @@ class AbstractStudy(object): def apply_annual_aggregation(self, time_serie): return self.annual_aggregation_function(time_serie, axis=0) - def instantiate_variable_object(self, dataset) -> AbstractVariable: - return self.variable_class(dataset, self.altitude) + def instantiate_variable_object(self, variable_array) -> AbstractVariable: + return self.variable_class(variable_array) """ Private methods to be overwritten """ @property def _year_to_daily_time_serie_array(self) -> OrderedDict: # Map each year to a matrix of size 365-nb_days_consecutive+1 x nb_massifs - year_to_variable = {year: self.instantiate_variable_object(dataset) for year, dataset in - self.year_to_dataset_ordered_dict.items()} + variables = [self.instantiate_variable_object(dataset) for dataset in + self.year_to_variable_array.values()] + year_to_variable = dict(zip(self.ordered_years, variables)) year_to_daily_time_serie_array = OrderedDict() - for year in self.year_to_dataset_ordered_dict.keys(): + for year in self.ordered_years: # Check daily data daily_time_serie = year_to_variable[year].daily_time_serie_array assert daily_time_serie.shape[0] in [365, 366] @@ -234,7 +267,8 @@ class AbstractStudy(object): # ax.scatter(x, y) # ax.text(x, y, massif_name) # Display the center of the massif - ax.scatter(self.massifs_coordinates_for_display.x_coordinates, self.massifs_coordinates_for_display.y_coordinates, s=1) + ax.scatter(self.massifs_coordinates_for_display.x_coordinates, + self.massifs_coordinates_for_display.y_coordinates, s=1) # Improve some explanation on the X axis and on the Y axis ax.set_xlabel('Longitude (km)') ax.xaxis.set_major_formatter(get_km_formatter()) diff --git a/experiment/meteo_france_SCM_study/abstract_variable.py b/experiment/meteo_france_SCM_study/abstract_variable.py index bcd5b1b7d77c0fef9c73a72d620a758e3bcd1afe..2ed37e127d7f23d93ba9525fe1dd8aa24b76e9ae 100644 --- a/experiment/meteo_france_SCM_study/abstract_variable.py +++ b/experiment/meteo_france_SCM_study/abstract_variable.py @@ -9,11 +9,14 @@ class AbstractVariable(object): NAME = '' UNIT = '' - def __init__(self, dataset, altitude): - self.dataset = dataset - self.altitude = altitude + def __init__(self, variable_array): + self.variable_array = variable_array + + @classmethod + def keyword(cls): + raise NotImplementedError @property def daily_time_serie_array(self) -> np.ndarray: # Return an array of size length of time series x nb_massif - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/experiment/meteo_france_SCM_study/crocus/crocus_variables.py b/experiment/meteo_france_SCM_study/crocus/crocus_variables.py index 529cd72a103ad577fc602098d8970bb53a7f70c4..6f1bf6fc9d4029ff8fc6fbb3fae5fc6127a87fd4 100644 --- a/experiment/meteo_france_SCM_study/crocus/crocus_variables.py +++ b/experiment/meteo_france_SCM_study/crocus/crocus_variables.py @@ -5,27 +5,27 @@ from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable class CrocusVariable(AbstractVariable): - def __init__(self, dataset, altitude, variable_name): - super().__init__(dataset, altitude) - self.variable_name = variable_name + def __init__(self, variable_array): + super().__init__(variable_array) @property def daily_time_serie_array(self) -> np.ndarray: - return np.array(self.dataset.variables[self.variable_name]) + return self.variable_array class CrocusSweVariable(CrocusVariable): NAME = 'Snow Water Equivalent' UNIT = 'kg per m2 or mm' - def __init__(self, dataset, altitude): - super().__init__(dataset, altitude, 'SWE_1DY_ISBA') + @classmethod + def keyword(cls): + return 'SWE_1DY_ISBA' class CrocusDepthVariable(CrocusVariable): NAME = 'Snow Depth' UNIT = 'm' - def __init__(self, dataset, altitude): - super().__init__(dataset, altitude, "SD_1DY_ISBA") - + @classmethod + def keyword(cls): + return "SD_1DY_ISBA" diff --git a/experiment/meteo_france_SCM_study/safran/safran_variable.py b/experiment/meteo_france_SCM_study/safran/safran_variable.py index 864a6f58b92bab4a5226830339f3da1f62ac891d..1903abcd231e9e37ee410f19730d70b5ef6c2809 100644 --- a/experiment/meteo_france_SCM_study/safran/safran_variable.py +++ b/experiment/meteo_france_SCM_study/safran/safran_variable.py @@ -24,11 +24,15 @@ class SafranSnowfallVariable(AbstractVariable): NAME = 'Snowfall' UNIT = 'kg per m2 or mm' - def __init__(self, dataset, altitude, nb_consecutive_days_of_snowfall=1, keyword='Snowf'): - super().__init__(dataset, altitude) + @classmethod + def keyword(cls): + return 'Snowf' + + def __init__(self, variable_array, nb_consecutive_days_of_snowfall=1): + super().__init__(variable_array) self.nb_consecutive_days_of_snowfall = nb_consecutive_days_of_snowfall # Compute the daily snowfall in kg/m2 - snowfall_rates = np.array(dataset.variables[keyword]) + snowfall_rates = variable_array # Compute the mean snowrate, then multiply it by 60 * 60 * 24 # day_duration_in_seconds = 24 * 60 * 60 @@ -60,16 +64,22 @@ class SafranRainfallVariable(SafranSnowfallVariable): NAME = 'Rainfall' UNIT = 'kg per m2 or mm' - def __init__(self, dataset, altitude, nb_consecutive_days_of_snowfall=1, keyword='Rainf'): - super().__init__(dataset, altitude, nb_consecutive_days_of_snowfall, keyword) + @classmethod + def keyword(cls): + return 'Rainf' class SafranTotalPrecipVariable(AbstractVariable): - def __init__(self, dataset, altitude): - super().__init__(dataset, altitude) - self.snow_precipitation = SafranSnowfallVariable(dataset=dataset, altitude=altitude) - self.rain_precipitation = SafranRainfallVariable(dataset=dataset, altitude=altitude) + def __init__(self, variable_array): + super().__init__(variable_array) + snow_variable_array, rain_variable_array = self.variable_array + self.snow_precipitation = SafranSnowfallVariable(snow_variable_array) + self.rain_precipitation = SafranRainfallVariable(rain_variable_array) + + @classmethod + def keyword(cls): + return [SafranSnowfallVariable.keyword(), SafranRainfallVariable.keyword()] @property def daily_time_serie_array(self) -> np.ndarray: @@ -81,10 +91,14 @@ class SafranTemperatureVariable(AbstractVariable): NAME = 'Temperature' UNIT = 'Celsius Degrees' - def __init__(self, dataset, altitude, keyword='Tair'): - super().__init__(dataset, altitude) + @classmethod + def keyword(cls): + return 'Tair' + + def __init__(self, variable_array): + super().__init__(variable_array) # Temperature are in K, I transform them as celsius - self.hourly_temperature = np.array(dataset.variables[keyword]) - 273.15 + self.hourly_temperature = self.variable_array - 273.15 nb_days = len(self.hourly_temperature) // 24 self.daily_temperature = [np.mean(self.hourly_temperature[24 * i:24 * (i + 1)], axis=0) for i in range(nb_days)] diff --git a/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py b/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py index 96e88035f4edc4cbb894dacbe1db4f659861361d..971bf62f511a6bbe62522d7555b91b80ebbe7e8f 100644 --- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/hypercube_visualizer.py @@ -1,7 +1,5 @@ import os -import os import os.path as op -from multiprocessing.dummy import Pool from typing import Dict, Tuple import matplotlib.pyplot as plt @@ -11,10 +9,6 @@ from experiment.meteo_france_SCM_study.visualization.study_visualization.study_v from utils import cached_property, VERSION_TIME -def get_df_trend_spatio_temporal(study_visualizer, trend_class, starting_years): - return study_visualizer.df_trend_spatio_temporal(trend_class, starting_years) - - class HypercubeVisualizer(object): """ A study visualizer contain some massifs and years. This forms the base DataFrame of the hypercube @@ -24,7 +18,9 @@ class HypercubeVisualizer(object): def __init__(self, tuple_to_study_visualizer: Dict[Tuple, StudyVisualizer], trend_class, + fast=False, save_to_file=False): + self.nb_data_for_fast_mode = 2 if fast else None self.save_to_file = save_to_file self.trend_class = trend_class self.tuple_to_study_visualizer = tuple_to_study_visualizer # type: Dict[Tuple, StudyVisualizer] @@ -36,17 +32,15 @@ class HypercubeVisualizer(object): @cached_property def starting_years(self): - return self.study_visualizer.starting_years[:7] - - @property - def starting_year_to_weights(self): - # Load uniform weights by default - uniform_weight = 1 / len(self.starting_years) - return {year: uniform_weight for year in self.starting_years} + starting_years = self.study_visualizer.starting_years + if self.nb_data_for_fast_mode is not None: + starting_years = starting_years[:self.nb_data_for_fast_mode] + return starting_years @cached_property def tuple_to_df_trend_type(self): - df_spatio_temporal_trend_types = [get_df_trend_spatio_temporal(study_visualizer, self.trend_class, self.starting_years) + df_spatio_temporal_trend_types = [study_visualizer.df_trend_spatio_temporal(self.trend_class, self.starting_years, + self.nb_data_for_fast_mode) for study_visualizer in self.tuple_to_study_visualizer.values()] return dict(zip(self.tuple_to_study_visualizer.keys(), df_spatio_temporal_trend_types)) @@ -80,6 +74,12 @@ class HypercubeVisualizer(object): def study(self): return self.study_visualizer.study + @property + def starting_year_to_weights(self): + # Load uniform weights by default + uniform_weight = 1 / len(self.starting_years) + return {year: uniform_weight for year in self.starting_years} + class AltitudeHypercubeVisualizer(HypercubeVisualizer): pass diff --git a/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py b/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py index 634bb4b83f4b3812712f22bdb092ea0d98f923d8..337449f621069410d4b45dc575eb24320244eee1 100644 --- a/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/studies_visualization/main_studies_visualizer.py @@ -11,7 +11,7 @@ from experiment.meteo_france_SCM_study.visualization.studies_visualization.studi from experiment.meteo_france_SCM_study.visualization.studies_visualization.studies_visualizer import StudiesVisualizer, \ AltitudeVisualizer from experiment.meteo_france_SCM_study.visualization.study_visualization.main_study_visualizer import ALL_ALTITUDES, \ - study_iterator_global, SCM_STUDIES + study_iterator_global, SCM_STUDIES, study_iterator from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer from collections import OrderedDict @@ -61,19 +61,17 @@ def altitude_trends_significant(): def hypercube_test(): save_to_file = False - only_first_one = True + only_first_one = False altitudes = ALL_ALTITUDES[3:-6] altitudes = ALL_ALTITUDES[2:4] for study_class in SCM_STUDIES[:1]: trend_test_class = [MannKendallTrendTest, GevLocationTrendTest, GevScaleTrendTest, GevShapeTrendTest][0] visualizers = [StudyVisualizer(study, temporal_non_stationarity=True, verbose=False, multiprocessing=True) - for study in study_iterator_global(study_classes=[study_class], only_first_one=only_first_one, - altitudes=altitudes)] - altitudes = [(a) for a in altitudes] - print(altitudes) + for study in study_iterator(study_class=study_class, only_first_one=only_first_one, + altitudes=altitudes, multiprocessing=True)] altitude_to_visualizer = OrderedDict(zip(altitudes, visualizers)) visualizer = HypercubeVisualizer(altitude_to_visualizer, save_to_file=save_to_file, - trend_class=trend_test_class) + trend_class=trend_test_class, fast=True) print(visualizer.hypercube) diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py index 6f587381170a90f8c5b4845507eee8b6545d63fb..ded791ecbb663514de9749be3779716efeca127f 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py @@ -35,7 +35,8 @@ List[AbstractStudy]: break -def study_iterator(study_class, only_first_one=False, both_altitude=False, verbose=True, altitudes=None) -> List[ +def study_iterator(study_class, only_first_one=False, both_altitude=False, verbose=True, altitudes=None, + multiprocessing=True) -> List[ AbstractStudy]: all_studies = [] is_safran_study = study_class in [SafranSnowfall, ExtendedSafranSnowfall] @@ -49,8 +50,7 @@ def study_iterator(study_class, only_first_one=False, both_altitude=False, verbo if verbose: print('alti: {}, nb_day: {} '.format(alti, nb_day), end='') - study = study_class(altitude=alti, nb_consecutive_days=nb_day) if is_safran_study \ - else study_class(altitude=alti) + study = study_class(altitude=alti, multiprocessing=multiprocessing) massifs = study.altitude_to_massif_names[alti] if verbose: print('{} massifs: {} \n'.format(len(massifs), massifs)) diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py index ca9ec54290817448d904983f3b55fe28c40c9a87..32b5898ccc5d8a140dc4356454b4773782727954 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/study_visualizer.py @@ -40,7 +40,7 @@ from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima from test.test_utils import load_test_max_stable_models from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits, \ - cached_property + cached_property, NB_CORES BLOCK_MAXIMA_DISPLAY_NAME = 'block maxima ' @@ -382,7 +382,7 @@ class StudyVisualizer(object): massif_name_to_df_trend_type[massif_name] = df return massif_name_to_df_trend_type - def df_trend_spatio_temporal(self, trend_test_class, starting_years): + def df_trend_spatio_temporal(self, trend_test_class, starting_years, nb_massif_for_fast_mode=None): """ Index are the massif Columns are the starting year @@ -392,11 +392,14 @@ class StudyVisualizer(object): :return: """ massif_name_to_trend_types = {} - for massif_id, massif_name in enumerate(self.study.study_massif_names): + massif_names = self.study.study_massif_names + if nb_massif_for_fast_mode is not None: + massif_names = massif_names[:nb_massif_for_fast_mode] + for massif_id, massif_name in enumerate(massif_names): years, smooth_maxima = self.smooth_maxima_x_y(massif_id) if self.multiprocessing: list_args = [(smooth_maxima, starting_year, trend_test_class, years) for starting_year in starting_years] - with Pool(self.nb_cores) as p: + with Pool(NB_CORES) as p: trend_types = p.starmap(self.compute_trend_test_type, list_args) else: trend_types = [self.compute_trend_test_type(smooth_maxima, starting_year, trend_test_class, years) diff --git a/utils.py b/utils.py index f9540d88893e3d7ec3ff7f590d150e0c8c326691..37d72bb945e22206b596a7b12962a0d8fc726a40 100644 --- a/utils.py +++ b/utils.py @@ -7,6 +7,7 @@ VERSION_TIME = str(VERSION).split('.')[0] for c in [' ', ':', '-']: VERSION_TIME = VERSION_TIME.replace(c, '_') +NB_CORES = 7 def get_root_path() -> str: return op.dirname(op.abspath(__file__))