From e78eb79fc93faecc3ad70ebc9db1961768483ada Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Thu, 24 Jan 2019 17:26:25 +0100 Subject: [PATCH] [DATA] add safran extended (which include as massifs: the original massif + the 4 alps regions + the whole alps) --- safran_study/abstract_safran.py | 115 --------------------------- safran_study/fit_safran.py | 25 ++++++ safran_study/safran.py | 133 ++++++++++++++++++++++++++------ safran_study/safran_extended.py | 36 +++++++++ test/test_utils.py | 6 +- utils.py | 2 + 6 files changed, 174 insertions(+), 143 deletions(-) delete mode 100644 safran_study/abstract_safran.py create mode 100644 safran_study/fit_safran.py create mode 100644 safran_study/safran_extended.py diff --git a/safran_study/abstract_safran.py b/safran_study/abstract_safran.py deleted file mode 100644 index 5dce958e..00000000 --- a/safran_study/abstract_safran.py +++ /dev/null @@ -1,115 +0,0 @@ -import os -import os.path as op -from collections import OrderedDict - -import matplotlib.pyplot as plt -import pandas as pd -from netCDF4 import Dataset - -from extreme_estimator.gev.gevmle_fit import GevMleFit -from safran_study.massif import safran_massif_names_from_datasets -from safran_study.snowfall_annual_maxima import SafranSnowfall -from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates -from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ - AbstractSpatialCoordinates -from utils import get_full_path, cached_property - - -class Safran(object): - - def __init__(self, nb_days_of_snowfall=1): - self.safran_altitude = None - self.nb_days_of_snowfall = nb_days_of_snowfall - - def write_to_file(self, df): - if not op.exists(self.result_full_path): - os.makedirs(self.result_full_path, exist_ok=True) - df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude))) - - """ Visualization methods """ - - def visualize(self): - df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv')) - coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X], - row_massif[AbstractCoordinates.COORDINATE_Y]) - for _, row_massif in df_massif.iterrows()] - for massif_idx in set([tuple[0] for tuple in coord_tuples]): - l = [coords for idx, *coords in coord_tuples if idx == massif_idx] - l = list(zip(*l)) - plt.plot(*l, color='black') - plt.fill(*l) - self.massifs_coordinates.visualization_2D() - - """ Statistics methods """ - - @property - def df_gev_mle_each_massif(self): - # Fit a gev n each massif - massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.to_serie() - for massif_name in self.safran_massif_names} - return pd.DataFrame(massif_to_gev_mle) - - """ Annual maxima of snowfall """ - - @property - def df_annual_maxima(self): - return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T - - """ Load some attributes only once """ - - @cached_property - def year_to_annual_maxima(self): - year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in - self.year_to_dataset_ordered_dict.items()} - year_to_annual_maxima = OrderedDict() - for year in self.year_to_dataset_ordered_dict.keys(): - year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall( - self.nb_days_of_snowfall) - return year_to_annual_maxima - - @cached_property - def safran_massif_names(self): - # Load the names of the massif as defined by SAFRAN - return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values()) - - @cached_property - def year_to_dataset_ordered_dict(self) -> OrderedDict: - # Map each year to the correspond netCDF4 Dataset - year_to_dataset = OrderedDict() - nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')] - for year, nc_file in sorted(nc_files, key=lambda t: t[0]): - year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file)) - return year_to_dataset - - @cached_property - def massifs_coordinates(self) -> AbstractSpatialCoordinates: - # Coordinate object that represents the massif coordinates in Lambert extended - df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv')) - for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]: - df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float) - # Assert that the massif names are the same between SAFRAN and the coordinate file - assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM'])) - # Build coordinate object from df_centroid - return AbstractSpatialCoordinates.from_df(df_centroid) - - """ Some properties """ - - @property - def relative_path(self) -> str: - return r'local/spatio_temporal_datasets' - - @property - def full_path(self) -> str: - return get_full_path(relative_path=self.relative_path) - - @property - def safran_full_path(self) -> str: - return op.join(self.full_path, 'safran-crocus_{}'.format(self.safran_altitude), 'Safran') - - @property - def map_full_path(self) -> str: - return op.join(self.full_path, 'map') - - @property - def result_full_path(self) -> str: - return op.join(self.full_path, 'results') \ No newline at end of file diff --git a/safran_study/fit_safran.py b/safran_study/fit_safran.py new file mode 100644 index 00000000..29c6f9ae --- /dev/null +++ b/safran_study/fit_safran.py @@ -0,0 +1,25 @@ +import pandas as pd + +from safran_study.safran import Safran +from safran_study.safran_extended import ExtendedSafran +from utils import VERSION + + +def fit_mle_gev_for_all_safran_and_different_days(): + # Dump the result in a csv + dfs = [] + for safran_alti in [1800, 2400][:1]: + for nb_day in [1, 3, 7][:]: + print('alti: {}, nb_day: {}'.format(safran_alti, nb_day)) + # safran = Safran(safran_alti, nb_day) + safran = ExtendedSafran(safran_alti, nb_day) + df = safran.df_gev_mle_each_massif + df.index += ' Safran{} with {} days'.format(safran.safran_altitude, safran.nb_days_of_snowfall) + dfs.append(df) + df = pd.concat(dfs) + path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv' + df.to_csv(path.format(VERSION)) + + +if __name__ == '__main__': + fit_mle_gev_for_all_safran_and_different_days() diff --git a/safran_study/safran.py b/safran_study/safran.py index cc32c2bf..7b735f0c 100644 --- a/safran_study/safran.py +++ b/safran_study/safran.py @@ -1,36 +1,119 @@ +import os +import os.path as op +from collections import OrderedDict + +import matplotlib.pyplot as plt import pandas as pd +from netCDF4 import Dataset + +from extreme_estimator.gev.gevmle_fit import GevMleFit +from safran_study.massif import safran_massif_names_from_datasets +from safran_study.snowfall_annual_maxima import SafranSnowfall +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ + AbstractSpatialCoordinates +from utils import get_full_path, cached_property + + +class Safran(object): + + def __init__(self, safran_altitude=1800, nb_days_of_snowfall=1): + assert safran_altitude in [1800, 2400] + self.safran_altitude = safran_altitude + self.nb_days_of_snowfall = nb_days_of_snowfall + + def write_to_file(self, df): + if not op.exists(self.result_full_path): + os.makedirs(self.result_full_path, exist_ok=True) + df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude))) + + """ Visualization methods """ + + def visualize(self): + df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv')) + coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X], + row_massif[AbstractCoordinates.COORDINATE_Y]) + for _, row_massif in df_massif.iterrows()] + for massif_idx in set([tuple[0] for tuple in coord_tuples]): + l = [coords for idx, *coords in coord_tuples if idx == massif_idx] + l = list(zip(*l)) + plt.plot(*l, color='black') + plt.fill(*l) + self.massifs_coordinates.visualization_2D() + + """ Statistics methods """ + + @property + def df_gev_mle_each_massif(self): + # Fit a gev n each massif + massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.to_serie() + for massif_name in self.safran_massif_names} + return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names) + + """ Annual maxima of snowfall """ + + @property + def df_annual_maxima(self): + return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T + + """ Load some attributes only once """ + + @cached_property + def year_to_annual_maxima(self): + year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in + self.year_to_dataset_ordered_dict.items()} + year_to_annual_maxima = OrderedDict() + for year in self.year_to_dataset_ordered_dict.keys(): + year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall( + self.nb_days_of_snowfall) + return year_to_annual_maxima -from safran_study.abstract_safran import Safran -from utils import VERSION + @property + def safran_massif_names(self): + # Load the names of the massif as defined by SAFRAN + return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values()) + @cached_property + def year_to_dataset_ordered_dict(self) -> OrderedDict: + # Map each year to the correspond netCDF4 Dataset + year_to_dataset = OrderedDict() + nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')] + for year, nc_file in sorted(nc_files, key=lambda t: t[0]): + year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file)) + return year_to_dataset -class Safran1800(Safran): + @cached_property + def massifs_coordinates(self) -> AbstractSpatialCoordinates: + # Coordinate object that represents the massif coordinates in Lambert extended + df_centroid = self.load_df_centroid() + for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]: + df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float) + # Assert that the massif names are the same between SAFRAN and the coordinate file + assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM'])) + # Build coordinate object from df_centroid + return AbstractSpatialCoordinates.from_df(df_centroid) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.safran_altitude = 1800 + def load_df_centroid(self): + return pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv')) + """ Some properties """ -class Safran2400(Safran): + @property + def relative_path(self) -> str: + return r'local/spatio_temporal_datasets' - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.safran_altitude = 2400 + @property + def full_path(self) -> str: + return get_full_path(relative_path=self.relative_path) -def fit_mle_gev_for_all_safran_and_different_days(): - # Dump the result in a csv - dfs = [] - for safran_class in [Safran1800, Safran2400]: - for nb_day in [1, 3, 7]: - print('here') - safran = safran_class(nb_day) - df = safran.df_gev_mle_each_massif - df.index += ' Safran{} with {} days'.format(safran.safran_altitude, safran.nb_days_of_snowfall) - dfs.append(df) - df = pd.concat(dfs) - path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv' - df.to_csv(path.format(VERSION)) + @property + def safran_full_path(self) -> str: + return op.join(self.full_path, 'safran-crocus_{}'.format(self.safran_altitude), 'Safran') + @property + def map_full_path(self) -> str: + return op.join(self.full_path, 'map') -if __name__ == '__main__': - fit_mle_gev_for_all_safran_and_different_days() \ No newline at end of file + @property + def result_full_path(self) -> str: + return op.join(self.full_path, 'results') \ No newline at end of file diff --git a/safran_study/safran_extended.py b/safran_study/safran_extended.py new file mode 100644 index 00000000..e3e76208 --- /dev/null +++ b/safran_study/safran_extended.py @@ -0,0 +1,36 @@ +from collections import OrderedDict + +import pandas as pd + +from safran_study.safran import Safran +from utils import cached_property + + +class ExtendedSafran(Safran): + + @property + def safran_massif_names(self): + return self.region_names + super().safran_massif_names + + """ Properties """ + + @cached_property + def df_annual_maxima(self): + df_annual_maxima = pd.DataFrame(self.year_to_annual_maxima, index=super().safran_massif_names).T + # Add the column corresponding to the aggregated massif + for region_name_loop in self.region_names: + # We use "is in" so that the "Alps" will automatically regroup all the massif data + massif_belong_to_the_group = [massif_name + for massif_name, region_name in self.massif_name_to_region_name.items() + if region_name_loop in region_name] + df_annual_maxima[region_name_loop] = df_annual_maxima.loc[:, massif_belong_to_the_group].max(axis=1) + return df_annual_maxima + + @property + def massif_name_to_region_name(self): + df_centroid = self.load_df_centroid() + return OrderedDict(zip(df_centroid['NOM'], df_centroid['REGION'])) + + @property + def region_names(self): + return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps'] diff --git a/test/test_utils.py b/test/test_utils.py index 1c4fa313..146b7713 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -7,7 +7,7 @@ from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model AbstractMaxStableModelWithCovarianceFunction, CovarianceFunction from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith, BrownResnick, Schlather, \ Geometric, ExtremalT, ISchlather -from safran_study.safran import Safran2400, Safran1800 +from safran_study.safran import Safran from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \ AlpsStation3DCoordinatesWithAnisotropy from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ @@ -85,5 +85,5 @@ def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio= def load_safran_objects(): nb_days_list = [1, 3, 5][:1] - safran_classes = [Safran1800, Safran2400][:1] - return [safran_class(nb_days) for safran_class in safran_classes for nb_days in nb_days_list] \ No newline at end of file + safran_altitude_list = [1800, 2400][:1] + return [Safran(safran_altitude, nb_days) for safran_altitude in safran_altitude_list for nb_days in nb_days_list] \ No newline at end of file diff --git a/utils.py b/utils.py index c682da11..44b396f6 100644 --- a/utils.py +++ b/utils.py @@ -24,6 +24,8 @@ def first(s): return next(iter(s)) +# todo: these cached property have a weird behavior with inheritence, +# when we call the super cached_property in the child method class cached_property(object): """ Descriptor (non-data) for building an attribute on-demand on first use. -- GitLab