Commit e78eb79f authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[DATA] add safran extended (which include as massifs: the original massif +...

[DATA] add safran extended (which include as massifs: the original massif + the 4 alps regions + the whole alps)
parent 9818f99a
No related merge requests found
Showing with 174 additions and 143 deletions
+174 -143
import os
import os.path as op
from collections import OrderedDict
import matplotlib.pyplot as plt
import pandas as pd
from netCDF4 import Dataset
from extreme_estimator.gev.gevmle_fit import GevMleFit
from safran_study.massif import safran_massif_names_from_datasets
from safran_study.snowfall_annual_maxima import SafranSnowfall
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates
from utils import get_full_path, cached_property
class Safran(object):
def __init__(self, nb_days_of_snowfall=1):
self.safran_altitude = None
self.nb_days_of_snowfall = nb_days_of_snowfall
def write_to_file(self, df):
if not op.exists(self.result_full_path):
os.makedirs(self.result_full_path, exist_ok=True)
df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude)))
""" Visualization methods """
def visualize(self):
df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X],
row_massif[AbstractCoordinates.COORDINATE_Y])
for _, row_massif in df_massif.iterrows()]
for massif_idx in set([tuple[0] for tuple in coord_tuples]):
l = [coords for idx, *coords in coord_tuples if idx == massif_idx]
l = list(zip(*l))
plt.plot(*l, color='black')
plt.fill(*l)
self.massifs_coordinates.visualization_2D()
""" Statistics methods """
@property
def df_gev_mle_each_massif(self):
# Fit a gev n each massif
massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.to_serie()
for massif_name in self.safran_massif_names}
return pd.DataFrame(massif_to_gev_mle)
""" Annual maxima of snowfall """
@property
def df_annual_maxima(self):
return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T
""" Load some attributes only once """
@cached_property
def year_to_annual_maxima(self):
year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in
self.year_to_dataset_ordered_dict.items()}
year_to_annual_maxima = OrderedDict()
for year in self.year_to_dataset_ordered_dict.keys():
year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall(
self.nb_days_of_snowfall)
return year_to_annual_maxima
@cached_property
def safran_massif_names(self):
# Load the names of the massif as defined by SAFRAN
return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values())
@cached_property
def year_to_dataset_ordered_dict(self) -> OrderedDict:
# Map each year to the correspond netCDF4 Dataset
year_to_dataset = OrderedDict()
nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')]
for year, nc_file in sorted(nc_files, key=lambda t: t[0]):
year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file))
return year_to_dataset
@cached_property
def massifs_coordinates(self) -> AbstractSpatialCoordinates:
# Coordinate object that represents the massif coordinates in Lambert extended
df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
# Assert that the massif names are the same between SAFRAN and the coordinate file
assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
# Build coordinate object from df_centroid
return AbstractSpatialCoordinates.from_df(df_centroid)
""" Some properties """
@property
def relative_path(self) -> str:
return r'local/spatio_temporal_datasets'
@property
def full_path(self) -> str:
return get_full_path(relative_path=self.relative_path)
@property
def safran_full_path(self) -> str:
return op.join(self.full_path, 'safran-crocus_{}'.format(self.safran_altitude), 'Safran')
@property
def map_full_path(self) -> str:
return op.join(self.full_path, 'map')
@property
def result_full_path(self) -> str:
return op.join(self.full_path, 'results')
\ No newline at end of file
import pandas as pd
from safran_study.safran import Safran
from safran_study.safran_extended import ExtendedSafran
from utils import VERSION
def fit_mle_gev_for_all_safran_and_different_days():
# Dump the result in a csv
dfs = []
for safran_alti in [1800, 2400][:1]:
for nb_day in [1, 3, 7][:]:
print('alti: {}, nb_day: {}'.format(safran_alti, nb_day))
# safran = Safran(safran_alti, nb_day)
safran = ExtendedSafran(safran_alti, nb_day)
df = safran.df_gev_mle_each_massif
df.index += ' Safran{} with {} days'.format(safran.safran_altitude, safran.nb_days_of_snowfall)
dfs.append(df)
df = pd.concat(dfs)
path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
df.to_csv(path.format(VERSION))
if __name__ == '__main__':
fit_mle_gev_for_all_safran_and_different_days()
import os
import os.path as op
from collections import OrderedDict
import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
from netCDF4 import Dataset
from extreme_estimator.gev.gevmle_fit import GevMleFit
from safran_study.massif import safran_massif_names_from_datasets
from safran_study.snowfall_annual_maxima import SafranSnowfall
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates
from utils import get_full_path, cached_property
class Safran(object):
def __init__(self, safran_altitude=1800, nb_days_of_snowfall=1):
assert safran_altitude in [1800, 2400]
self.safran_altitude = safran_altitude
self.nb_days_of_snowfall = nb_days_of_snowfall
def write_to_file(self, df):
if not op.exists(self.result_full_path):
os.makedirs(self.result_full_path, exist_ok=True)
df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude)))
""" Visualization methods """
def visualize(self):
df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X],
row_massif[AbstractCoordinates.COORDINATE_Y])
for _, row_massif in df_massif.iterrows()]
for massif_idx in set([tuple[0] for tuple in coord_tuples]):
l = [coords for idx, *coords in coord_tuples if idx == massif_idx]
l = list(zip(*l))
plt.plot(*l, color='black')
plt.fill(*l)
self.massifs_coordinates.visualization_2D()
""" Statistics methods """
@property
def df_gev_mle_each_massif(self):
# Fit a gev n each massif
massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.to_serie()
for massif_name in self.safran_massif_names}
return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names)
""" Annual maxima of snowfall """
@property
def df_annual_maxima(self):
return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T
""" Load some attributes only once """
@cached_property
def year_to_annual_maxima(self):
year_to_safran_snowfall = {year: SafranSnowfall(dataset) for year, dataset in
self.year_to_dataset_ordered_dict.items()}
year_to_annual_maxima = OrderedDict()
for year in self.year_to_dataset_ordered_dict.keys():
year_to_annual_maxima[year] = year_to_safran_snowfall[year].annual_maxima_of_snowfall(
self.nb_days_of_snowfall)
return year_to_annual_maxima
from safran_study.abstract_safran import Safran @property
from utils import VERSION def safran_massif_names(self):
# Load the names of the massif as defined by SAFRAN
return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values())
@cached_property
def year_to_dataset_ordered_dict(self) -> OrderedDict:
# Map each year to the correspond netCDF4 Dataset
year_to_dataset = OrderedDict()
nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')]
for year, nc_file in sorted(nc_files, key=lambda t: t[0]):
year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file))
return year_to_dataset
class Safran1800(Safran): @cached_property
def massifs_coordinates(self) -> AbstractSpatialCoordinates:
# Coordinate object that represents the massif coordinates in Lambert extended
df_centroid = self.load_df_centroid()
for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
# Assert that the massif names are the same between SAFRAN and the coordinate file
assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
# Build coordinate object from df_centroid
return AbstractSpatialCoordinates.from_df(df_centroid)
def __init__(self, *args, **kwargs): def load_df_centroid(self):
super().__init__(*args, **kwargs) return pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
self.safran_altitude = 1800
""" Some properties """
class Safran2400(Safran): @property
def relative_path(self) -> str:
return r'local/spatio_temporal_datasets'
def __init__(self, *args, **kwargs): @property
super().__init__(*args, **kwargs) def full_path(self) -> str:
self.safran_altitude = 2400 return get_full_path(relative_path=self.relative_path)
def fit_mle_gev_for_all_safran_and_different_days(): @property
# Dump the result in a csv def safran_full_path(self) -> str:
dfs = [] return op.join(self.full_path, 'safran-crocus_{}'.format(self.safran_altitude), 'Safran')
for safran_class in [Safran1800, Safran2400]:
for nb_day in [1, 3, 7]:
print('here')
safran = safran_class(nb_day)
df = safran.df_gev_mle_each_massif
df.index += ' Safran{} with {} days'.format(safran.safran_altitude, safran.nb_days_of_snowfall)
dfs.append(df)
df = pd.concat(dfs)
path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
df.to_csv(path.format(VERSION))
@property
def map_full_path(self) -> str:
return op.join(self.full_path, 'map')
if __name__ == '__main__': @property
fit_mle_gev_for_all_safran_and_different_days() def result_full_path(self) -> str:
\ No newline at end of file return op.join(self.full_path, 'results')
\ No newline at end of file
from collections import OrderedDict
import pandas as pd
from safran_study.safran import Safran
from utils import cached_property
class ExtendedSafran(Safran):
@property
def safran_massif_names(self):
return self.region_names + super().safran_massif_names
""" Properties """
@cached_property
def df_annual_maxima(self):
df_annual_maxima = pd.DataFrame(self.year_to_annual_maxima, index=super().safran_massif_names).T
# Add the column corresponding to the aggregated massif
for region_name_loop in self.region_names:
# We use "is in" so that the "Alps" will automatically regroup all the massif data
massif_belong_to_the_group = [massif_name
for massif_name, region_name in self.massif_name_to_region_name.items()
if region_name_loop in region_name]
df_annual_maxima[region_name_loop] = df_annual_maxima.loc[:, massif_belong_to_the_group].max(axis=1)
return df_annual_maxima
@property
def massif_name_to_region_name(self):
df_centroid = self.load_df_centroid()
return OrderedDict(zip(df_centroid['NOM'], df_centroid['REGION']))
@property
def region_names(self):
return ['Alps', 'Northern Alps', 'Central Alps', 'Southern Alps', 'Extreme South Alps']
...@@ -7,7 +7,7 @@ from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model ...@@ -7,7 +7,7 @@ from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model
AbstractMaxStableModelWithCovarianceFunction, CovarianceFunction AbstractMaxStableModelWithCovarianceFunction, CovarianceFunction
from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith, BrownResnick, Schlather, \ from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith, BrownResnick, Schlather, \
Geometric, ExtremalT, ISchlather Geometric, ExtremalT, ISchlather
from safran_study.safran import Safran2400, Safran1800 from safran_study.safran import Safran
from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \
AlpsStation3DCoordinatesWithAnisotropy AlpsStation3DCoordinatesWithAnisotropy
from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \
...@@ -85,5 +85,5 @@ def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio= ...@@ -85,5 +85,5 @@ def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=
def load_safran_objects(): def load_safran_objects():
nb_days_list = [1, 3, 5][:1] nb_days_list = [1, 3, 5][:1]
safran_classes = [Safran1800, Safran2400][:1] safran_altitude_list = [1800, 2400][:1]
return [safran_class(nb_days) for safran_class in safran_classes for nb_days in nb_days_list] return [Safran(safran_altitude, nb_days) for safran_altitude in safran_altitude_list for nb_days in nb_days_list]
\ No newline at end of file \ No newline at end of file
...@@ -24,6 +24,8 @@ def first(s): ...@@ -24,6 +24,8 @@ def first(s):
return next(iter(s)) return next(iter(s))
# todo: these cached property have a weird behavior with inheritence,
# when we call the super cached_property in the child method
class cached_property(object): class cached_property(object):
""" """
Descriptor (non-data) for building an attribute on-demand on first use. Descriptor (non-data) for building an attribute on-demand on first use.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment