Commit f28d5eb2 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SCM] remove map issue that was due to the fact of caching year_to_dataset_ordered_dict attribute

parent 944f1cbb
No related merge requests found
Showing with 26 additions and 322 deletions
+26 -322
...@@ -75,7 +75,7 @@ class AbstractStudy(object): ...@@ -75,7 +75,7 @@ class AbstractStudy(object):
""" Load some attributes only once """ """ Load some attributes only once """
@cached_property @property
def year_to_dataset_ordered_dict(self) -> OrderedDict: def year_to_dataset_ordered_dict(self) -> OrderedDict:
print('This code is quite long... ' print('This code is quite long... '
'You should consider year_to_variable which is way faster when multiprocessing=True') 'You should consider year_to_variable which is way faster when multiprocessing=True')
...@@ -101,12 +101,12 @@ class AbstractStudy(object): ...@@ -101,12 +101,12 @@ class AbstractStudy(object):
path_files, ordered_years = self.ordered_years_and_path_files() path_files, ordered_years = self.ordered_years_and_path_files()
if self.multiprocessing: if self.multiprocessing:
with Pool(NB_CORES) as p: with Pool(NB_CORES) as p:
variables = p.map(self.load_variable, path_files) variables = p.map(self.load_variables, path_files)
else: else:
variables = [self.load_variable(path_file) for path_file in path_files] variables = [self.load_variables(path_file) for path_file in path_files]
return OrderedDict(zip(ordered_years, variables)) return OrderedDict(zip(ordered_years, variables))
def load_variable(self, path_file): def load_variables(self, path_file):
dataset = Dataset(path_file) dataset = Dataset(path_file)
keyword = self.variable_class.keyword() keyword = self.variable_class.keyword()
if isinstance(keyword, str): if isinstance(keyword, str):
......
...@@ -3,6 +3,7 @@ import numpy as np ...@@ -3,6 +3,7 @@ import numpy as np
from experiment.meteo_france_SCM_models.study.abstract_extended_study import AbstractExtendedStudy from experiment.meteo_france_SCM_models.study.abstract_extended_study import AbstractExtendedStudy
from experiment.meteo_france_SCM_models.study.abstract_study import AbstractStudy from experiment.meteo_france_SCM_models.study.abstract_study import AbstractStudy
from experiment.meteo_france_SCM_models.study.crocus.crocus_variables import CrocusSweVariable, CrocusDepthVariable from experiment.meteo_france_SCM_models.study.crocus.crocus_variables import CrocusSweVariable, CrocusDepthVariable
from experiment.meteo_france_SCM_models.study.cumulated_study import CumulatedStudy
class Crocus(AbstractStudy): class Crocus(AbstractStudy):
...@@ -25,10 +26,11 @@ class Crocus(AbstractStudy): ...@@ -25,10 +26,11 @@ class Crocus(AbstractStudy):
return super().apply_annual_aggregation(time_serie[91:-92, ...]) return super().apply_annual_aggregation(time_serie[91:-92, ...])
class CrocusSwe(Crocus): class CrocusSwe(Crocus, CumulatedStudy):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(CrocusSweVariable, *args, **kwargs) CumulatedStudy.__init__(self, CrocusSweVariable, *args, **kwargs)
Crocus.__init__(self, CrocusSweVariable, *args, **kwargs)
def apply_annual_aggregation(self, time_serie): def apply_annual_aggregation(self, time_serie):
return self.winter_annual_aggregation(time_serie) return self.winter_annual_aggregation(time_serie)
...@@ -62,8 +64,7 @@ class CrocusDaysWithSnowOnGround(Crocus): ...@@ -62,8 +64,7 @@ class CrocusDaysWithSnowOnGround(Crocus):
if __name__ == '__main__': if __name__ == '__main__':
for variable_class in [CrocusSweVariable, CrocusDepthVariable][:1]: for study in [CrocusSwe(altitude=900)]:
study = Crocus(variable_class=variable_class, altitude=2400)
d = study.year_to_dataset_ordered_dict[1960] d = study.year_to_dataset_ordered_dict[1960]
print(study.df_massifs_longitude_and_latitude) print(study.df_massifs_longitude_and_latitude)
time_arr = np.array(d.variables['time']) time_arr = np.array(d.variables['time'])
......
...@@ -131,8 +131,8 @@ def fast_quantity_altitude_hypercube(): ...@@ -131,8 +131,8 @@ def fast_quantity_altitude_hypercube():
def main_run(): def main_run():
# fast_altitude_hypercube() fast_altitude_hypercube()
fast_altitude_year_hypercube() # fast_altitude_year_hypercube()
# full_altitude_year_hypercube() # full_altitude_year_hypercube()
# fast_quantity_altitude_hypercube() # fast_quantity_altitude_hypercube()
# full_quantity_altitude_hypercube() # full_quantity_altitude_hypercube()
......
...@@ -27,41 +27,40 @@ full_altitude_with_at_least_2_stations = [0, 300, 600, 900, 1200, 1500, 1800, 21 ...@@ -27,41 +27,40 @@ full_altitude_with_at_least_2_stations = [0, 300, 600, 900, 1200, 1500, 1800, 21
ALL_STUDIES = SCM_STUDIES + [SafranTemperature, SafranRainfall] ALL_STUDIES = SCM_STUDIES + [SafranTemperature, SafranRainfall]
def study_iterator_global(study_classes, only_first_one=False, both_altitude=False, verbose=True, altitudes=None) -> \ def study_iterator_global(study_classes, only_first_one=False, verbose=True, altitudes=None, nb_days=None) -> \
List[AbstractStudy]: List[AbstractStudy]:
for study_class in study_classes: for study_class in study_classes:
for study in study_iterator(study_class, only_first_one, both_altitude, verbose, altitudes): for study in study_iterator(study_class, only_first_one, verbose, altitudes, nb_days):
yield study yield study
if only_first_one: if only_first_one:
break break
def study_iterator(study_class, only_first_one=False, both_altitude=False, verbose=True, altitudes=None) -> List[ def study_iterator(study_class, only_first_one=False, verbose=True, altitudes=None, nb_days=None) -> List[AbstractStudy]:
AbstractStudy]: # Default argument
all_studies = [] nb_days = [1] if nb_days is None else nb_days
is_safran_study = study_class in [SafranSnowfall, ExtendedSafranSnowfall] altis = [1800] if altitudes is None else altitudes
nb_days = [1] if is_safran_study else [1]
if verbose: if verbose:
print('\n\n\n\n\nLoading studies....', end='') print('\n\n\n\n\nLoading studies....')
for nb_day in nb_days: for nb_day in nb_days:
altis = [1800] if altitudes is None else altitudes
for alti in altis: for alti in altis:
if verbose: if verbose:
print('alti: {}, nb_day: {} '.format(alti, nb_day), end='') print('alti: {}, nb_day: {} '.format(alti, nb_day), end='')
study = study_class(altitude=alti) study = study_class(altitude=alti)
massifs = study.altitude_to_massif_names[alti]
if verbose: if verbose:
massifs = study.altitude_to_massif_names[alti]
print('{} massifs: {} \n'.format(len(massifs), massifs)) print('{} massifs: {} \n'.format(len(massifs), massifs))
yield study yield study
if only_first_one and not both_altitude:
# Stop iterations on purpose
if only_first_one:
break break
if only_first_one: if only_first_one:
break break
return all_studies
def extended_visualization(): def extended_visualization():
save_to_file = False save_to_file = False
......
import numpy as np
from experiment.meteo_france_SCM_study.abstract_extended_study import AbstractExtendedStudy
from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
from experiment.meteo_france_SCM_study.crocus.crocus_variables import CrocusSweVariable, CrocusDepthVariable
class Crocus(AbstractStudy):
"""
In the Crocus data, there is no 'massifsList' variable, thus we assume massifs are ordered just like Safran data
"""
def __init__(self, variable_class, *args, **kwargs):
assert variable_class in [CrocusSweVariable, CrocusDepthVariable]
super().__init__(variable_class, *args, **kwargs)
self.model_name = 'Crocus'
def annual_aggregation_function(self, *args, **kwargs):
return np.mean(*args, **kwargs)
def winter_annual_aggregation(self, time_serie):
# In the Durand paper, we only want the data from November to April
# 91 = 30 + 31 + 30 first days of the time serie correspond to the month of August + September + October
# 92 = 31 + 30 + 31 last days correspond to the month of May + June + JUly
return super().apply_annual_aggregation(time_serie[91:-92, ...])
class CrocusSwe(Crocus):
def __init__(self, *args, **kwargs):
super().__init__(CrocusSweVariable, *args, **kwargs)
def apply_annual_aggregation(self, time_serie):
return self.winter_annual_aggregation(time_serie)
class ExtendedCrocusSwe(AbstractExtendedStudy, CrocusSwe):
pass
class CrocusDepth(Crocus):
def __init__(self, *args, **kwargs):
super().__init__(CrocusDepthVariable, *args, **kwargs)
def apply_annual_aggregation(self, time_serie):
return self.winter_annual_aggregation(time_serie)
class ExtendedCrocusDepth(AbstractExtendedStudy, CrocusDepth):
pass
class CrocusDaysWithSnowOnGround(Crocus):
"""Having snow on the ground is equivalent to snow depth > 0"""
def __init__(self, *args, **kwargs):
super().__init__(CrocusDepthVariable, *args, **kwargs)
def annual_aggregation_function(self, *args, **kwargs):
return np.count_nonzero(*args, **kwargs)
if __name__ == '__main__':
for variable_class in [CrocusSweVariable, CrocusDepthVariable][:1]:
study = Crocus(variable_class=variable_class, altitude=2400)
d = study.year_to_dataset_ordered_dict[1960]
print(study.df_massifs_longitude_and_latitude)
time_arr = np.array(d.variables['time'])
a = study.year_to_daily_time_serie_array[1960]
print(a.shape)
import numpy as np
from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
class CrocusVariable(AbstractVariable):
def __init__(self, variable_array):
super().__init__(variable_array)
@property
def daily_time_serie_array(self) -> np.ndarray:
return self.variable_array
class CrocusSweVariable(CrocusVariable):
NAME = 'Snow Water Equivalent'
UNIT = 'kg per m2 or mm'
@classmethod
def keyword(cls):
return 'SWE_1DY_ISBA'
class CrocusDepthVariable(CrocusVariable):
NAME = 'Snow Depth'
UNIT = 'm'
@classmethod
def keyword(cls):
return "SD_1DY_ISBA"
import numpy as np
from experiment.meteo_france_SCM_study.abstract_extended_study import AbstractExtendedStudy
from experiment.meteo_france_SCM_study.abstract_study import AbstractStudy
from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
from experiment.meteo_france_SCM_study.safran.safran_variable import SafranSnowfallVariable, \
SafranRainfallVariable, SafranTemperatureVariable, SafranTotalPrecipVariable
class Safran(AbstractStudy):
def __init__(self, variable_class: type, *args, **kwargs):
assert variable_class in [SafranSnowfallVariable, SafranRainfallVariable, SafranTemperatureVariable,
SafranTotalPrecipVariable]
super().__init__(variable_class, *args, **kwargs)
self.model_name = 'Safran'
class SafranFrequency(Safran):
def __init__(self, variable_class: type, nb_consecutive_days: int = 1, *args, **kwargs):
assert 1 <= nb_consecutive_days <= 7
super().__init__(variable_class, *args, **kwargs)
self.nb_consecutive_days = nb_consecutive_days
def instantiate_variable_object(self, variable_array) -> AbstractVariable:
return self.variable_class(variable_array, self.nb_consecutive_days)
@property
def variable_name(self):
return super().variable_name + ' cumulated over {} day(s)'.format(self.nb_consecutive_days)
def annual_aggregation_function(self, *args, **kwargs):
return np.sum(*args, **kwargs)
class SafranSnowfall(SafranFrequency):
def __init__(self, *args, **kwargs):
super().__init__(SafranSnowfallVariable, *args, **kwargs)
class ExtendedSafranSnowfall(AbstractExtendedStudy, SafranSnowfall):
pass
class SafranRainfall(SafranFrequency):
def __init__(self, *args, **kwargs):
super().__init__(SafranRainfallVariable, *args, **kwargs)
class SafranTotalPrecip(SafranFrequency):
def __init__(self, *args, **kwargs):
super().__init__(SafranTotalPrecipVariable, *args, **kwargs)
def instantiate_variable_object(self, variable_array) -> AbstractVariable:
variable_array_snowfall, variable_array_rainfall = variable_array
return self.variable_class(variable_array_snowfall, variable_array_rainfall, self.nb_consecutive_days)
class ExtendedSafranTotalPrecip(AbstractExtendedStudy, SafranTotalPrecip):
pass
class SafranTemperature(Safran):
def __init__(self, *args, **kwargs):
super().__init__(SafranTemperatureVariable, *args, **kwargs)
def annual_aggregation_function(self, *args, **kwargs):
return np.mean(*args, **kwargs)
if __name__ == '__main__':
study = SafranSnowfall(altitude=1800)
d = study.year_to_dataset_ordered_dict[1958]
# print(d.variables['time'])
# print(study.all_massif_names)
# print(study.massif_name_to_altitudes)
# print(study.year_to_daily_time_serie_array[1958].shape)
print(study.missing_massif_name)
# print(len(d.variables['time']))
# print(study.year_to_annual_total)
# print(study.df_annual_total.columns)
import numpy as np
from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
class SafranSnowfallVariable(AbstractVariable):
""""
Safran data is hourly
Hypothesis:
-How to count how much snowfall in one hour ?
I take the average between the rhythm of snowfall per second between the start and the end
and multiply that by 60 x 60 which corresponds to the number of seconds in one hour
-How do how I define the limit of a day ?
From the start, i.e. in August at 4am something like that,then if I add a 24H duration, I arrive to the next day
-How do you aggregate several days ?
We aggregate all the N consecutive days into a value x_i, then we take the max
(but here the problem might be that the x_i are not idnependent, they are highly dependent one from another)
"""
NAME = 'Snowfall'
UNIT = 'kg per m2 or mm'
@classmethod
def keyword(cls):
return 'Snowf'
def __init__(self, variable_array, nb_consecutive_days_of_snowfall=1):
super().__init__(variable_array)
self.nb_consecutive_days_of_snowfall = nb_consecutive_days_of_snowfall
# Compute the daily snowfall in kg/m2
snowfall_rates = variable_array
# Compute the mean snowrate, then multiply it by 60 * 60 * 24
# day_duration_in_seconds = 24 * 60 * 60
# nb_days = len(snowfall_rates) // 24
# print(nb_days)
# daily_snowrate = [np.mean(snowfall_rates[24 * i:24 * (i + 1) + 1], axis=0) for i in range(nb_days)]
# self.daily_snowfall = day_duration_in_seconds * np.array(daily_snowrate)
# Compute the hourly snowfall first, then aggregate
mean_snowfall_rates = 0.5 * (snowfall_rates[:-1] + snowfall_rates[1:])
hourly_snowfall = 60 * 60 * mean_snowfall_rates
# Transform the snowfall amount into a dataframe
nb_days = len(hourly_snowfall) // 24
self.daily_snowfall = [sum(hourly_snowfall[24 * i:24 * (i + 1)]) for i in range(nb_days)]
@property
def daily_time_serie_array(self) -> np.ndarray:
# Aggregate the daily snowfall by the number of consecutive days
shifted_list = [self.daily_snowfall[i:] for i in range(self.nb_consecutive_days_of_snowfall)]
# First element of shifted_list is of length n, Second element of length n-1, Third element n-2....
# The zip is done with respect to the shortest list
snowfall_in_consecutive_days = np.array([sum(e) for e in zip(*shifted_list)])
# The returned array is of size n-nb_days+1 x nb_massif
return snowfall_in_consecutive_days
class SafranRainfallVariable(SafranSnowfallVariable):
NAME = 'Rainfall'
UNIT = 'kg per m2 or mm'
@classmethod
def keyword(cls):
return 'Rainf'
class SafranTotalPrecipVariable(AbstractVariable):
def __init__(self, snow_variable_array, rain_variable_array, nb_consecutive_days_of_snowfall=1):
super().__init__(None)
self.snow_precipitation = SafranSnowfallVariable(snow_variable_array, nb_consecutive_days_of_snowfall)
self.rain_precipitation = SafranRainfallVariable(rain_variable_array, nb_consecutive_days_of_snowfall)
@classmethod
def keyword(cls):
return [SafranSnowfallVariable.keyword(), SafranRainfallVariable.keyword()]
@property
def daily_time_serie_array(self) -> np.ndarray:
return self.snow_precipitation.daily_time_serie_array + self.rain_precipitation.daily_time_serie_array
class SafranTemperatureVariable(AbstractVariable):
NAME = 'Temperature'
UNIT = 'Celsius Degrees'
@classmethod
def keyword(cls):
return 'Tair'
def __init__(self, variable_array):
super().__init__(variable_array)
# Temperature are in K, I transform them as celsius
self.hourly_temperature = self.variable_array - 273.15
nb_days = len(self.hourly_temperature) // 24
self.daily_temperature = [np.mean(self.hourly_temperature[24 * i:24 * (i + 1)], axis=0) for i in range(nb_days)]
@property
def daily_time_serie_array(self):
return np.array(self.daily_temperature)
...@@ -15,7 +15,7 @@ class TestSCMAllStudy(unittest.TestCase): ...@@ -15,7 +15,7 @@ class TestSCMAllStudy(unittest.TestCase):
def test_extended_run(self): def test_extended_run(self):
for study_class in [ExtendedSafranSnowfall]: for study_class in [ExtendedSafranSnowfall]:
for study in study_iterator(study_class, only_first_one=True, both_altitude=False, verbose=False): for study in study_iterator(study_class, only_first_one=True, verbose=False):
study_visualizer = StudyVisualizer(study, show=False, save_to_file=False) study_visualizer = StudyVisualizer(study, show=False, save_to_file=False)
study_visualizer.visualize_all_mean_and_max_graphs() study_visualizer.visualize_all_mean_and_max_graphs()
self.assertTrue(True) self.assertTrue(True)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment