From 4664516989fcebb1acf6b11a96ee6376f5218bd8 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Mon, 6 Apr 2020 10:59:05 +0200 Subject: [PATCH] [contrasting project] add pyrenees data. add refuge_sarradet.py analysis --- .../scm_models_data/abstract_study.py | 75 ++++++++++++++----- .../case_studies/refuge_sarradet.py | 44 +++++++++++ .../crocus/crocus_variables.py | 2 +- .../scm_models_data/safran/cumulated_study.py | 2 +- .../scm_models_data/utils.py | 30 ++++++++ ...dy_visualizer_for_non_stationary_trends.py | 2 +- 6 files changed, 135 insertions(+), 20 deletions(-) create mode 100644 extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py index 5f6e0ba8..53840c30 100644 --- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py @@ -24,7 +24,7 @@ from extreme_data.meteo_france_data.scm_models_data.abstract_variable import Abs from extreme_data.meteo_france_data.scm_models_data.utils import ALTITUDES, ZS_INT_23, ZS_INT_MASK, LONGITUDES, \ LATITUDES, ORIENTATIONS, SLOPES, ORDERED_ALLSLOPES_ALTITUDES, ORDERED_ALLSLOPES_ORIENTATIONS, \ ORDERED_ALLSLOPES_SLOPES, ORDERED_ALLSLOPES_MASSIFNUM, date_to_str, WP_PATTERN_MAX_YEAR, SeasonForTheMaxima, \ - first_day_and_last_day + first_day_and_last_day, FrenchRegion, ZS_INT_MASK_PYRENNES, alps_massif_order, ZS_INT_MASK_PYRENNES_LIST from extreme_data.meteo_france_data.scm_models_data.visualization.utils import get_km_formatter from extreme_fit.function.margin_function.abstract_margin_function import \ AbstractMarginFunction @@ -56,16 +56,19 @@ class AbstractStudy(object): The year 2017 represents the nc file that correspond to the winter between the year 2017 and 2018. """ # REANALYSIS_FLAT_FOLDER = 'SAFRAN_montagne-CROCUS_2019/alp_flat/reanalysis' - REANALYSIS_FLAT_FOLDER = 'S2M_AERIS_MARS_2020/' - REANALYSIS_ALLSLOPES_FOLDER = 'SAFRAN_montagne-CROCUS_2019/alp_allslopes/reanalysis' + REANALYSIS_ALPS_FLAT_FOLDER = 'S2M_AERIS_MARS_2020/' + REANALYSIS_PYRENEES_FLAT_FOLDER = 'S2M_AERIS_AVRIL_2020/' + REANALYSIS_ALPS_ALLSLOPES_FOLDER = 'SAFRAN_montagne-CROCUS_2019/alp_allslopes/reanalysis' # REANALYSIS_FOLDER = 'SAFRAN_montagne-CROCUS_2019/postes/reanalysis' def __init__(self, variable_class: type, altitude: int = 1800, year_min=1959, year_max=2019, multiprocessing=True, orientation=None, slope=20.0, - season=SeasonForTheMaxima.annual): + season=SeasonForTheMaxima.annual, + french_region=FrenchRegion.alps): assert isinstance(altitude, int), type(altitude) assert altitude in ALTITUDES, altitude + self.french_region = french_region self.altitude = altitude self.model_name = None self.variable_class = variable_class @@ -377,7 +380,7 @@ class AbstractStudy(object): @property def missing_massif_name(self): - return set(self.all_massif_names) - set(self.altitude_to_massif_names[self.altitude]) + return set(self.all_massif_names(self.reanalysis_path, self.dbf_filename)) - set(self.altitude_to_massif_names[self.altitude]) @property def column_mask(self): @@ -395,7 +398,12 @@ class AbstractStudy(object): @cached_property def flat_mask(self): - altitude_mask = ZS_INT_MASK == self.altitude + if self.french_region is FrenchRegion.alps: + altitude_mask = ZS_INT_MASK == self.altitude + elif self.french_region is FrenchRegion.pyrenees: + altitude_mask = ZS_INT_MASK_PYRENNES == self.altitude + else: + raise ValueError('{}'.format(self.french_region)) assert np.sum(altitude_mask) == len(self.altitude_to_massif_names[self.altitude]) return altitude_mask @@ -580,9 +588,28 @@ class AbstractStudy(object): @property def reanalysis_path(self): - reanalysis_folder = self.REANALYSIS_ALLSLOPES_FOLDER if self.has_orientation else self.REANALYSIS_FLAT_FOLDER + if self.french_region is FrenchRegion.alps: + if self.has_orientation: + reanalysis_folder = self.REANALYSIS_ALPS_ALLSLOPES_FOLDER + else: + reanalysis_folder = self.REANALYSIS_ALPS_FLAT_FOLDER + elif self.french_region is FrenchRegion.pyrenees and not self.has_orientation: + reanalysis_folder = self.REANALYSIS_PYRENEES_FLAT_FOLDER + else: + raise ValueError( + 'french_region = {}, has_orientation = {}'.format(self.french_region, self.has_orientation)) + return op.join(self.full_path, reanalysis_folder) + @property + def dbf_filename(self) -> str: + if self.french_region is FrenchRegion.alps: + return 'massifs_alpes' + elif self.french_region is FrenchRegion.pyrenees: + return 'massifs_pyrenees' + else: + raise ValueError('{}'.format(self.french_region)) + @property def has_orientation(self): return self.orientation is not None @@ -593,19 +620,31 @@ class AbstractStudy(object): def original_safran_massif_id_to_massif_name(cls) -> Dict[int, str]: return {massif_id: massif_name for massif_id, massif_name in enumerate(cls.all_massif_names)} - @classproperty - def all_massif_names(cls) -> List[str]: + @classmethod + def all_massif_names(cls, reanalysis_path=None, dbf_filename='massifs_alpes') -> List[str]: """ Pour l'identification des massifs, le numéro de la variable massif_num correspond à celui de l'attribut num_opp """ - metadata_path = op.join(cls.full_path, cls.REANALYSIS_FLAT_FOLDER, 'metadata') - dbf = Dbf5(op.join(metadata_path, 'massifs_alpes.dbf')) + if reanalysis_path is None: + # Default case if the french alps + reanalysis_path = op.join(cls.full_path, cls.REANALYSIS_ALPS_FLAT_FOLDER) + if cls.REANALYSIS_ALPS_FLAT_FOLDER in reanalysis_path or cls.REANALYSIS_ALPS_ALLSLOPES_FOLDER in reanalysis_path: + french_region = FrenchRegion.alps + key = 'num_opp' + else: + french_region = FrenchRegion.pyrenees + key = 'massif_num' + + metadata_path = op.join(reanalysis_path, 'metadata') + dbf = Dbf5(op.join(metadata_path, '{}.dbf'.format(dbf_filename))) df = dbf.to_dataframe().copy() # type: pd.DataFrame dbf.f.close() - df.sort_values(by='num_opp', inplace=True) + # Important part (for the alps & pyrenees all data is order from the smaller massif number to the bigger) + df.sort_values(by=key, inplace=True) all_massif_names = list(df['nom']) # Correct a massif name - all_massif_names[all_massif_names.index('Beaufortin')] = 'Beaufortain' + if french_region is FrenchRegion.alps: + all_massif_names[all_massif_names.index('Beaufortin')] = 'Beaufortain' return all_massif_names @classmethod @@ -614,17 +653,18 @@ class AbstractStudy(object): df_centroid = pd.read_csv(op.join(cls.map_full_path, 'coordonnees_massifs_alpes.csv')) df_centroid.set_index('NOM', inplace=True) # Check that the names of massifs are the same - symmetric_difference = set(df_centroid.index).symmetric_difference(cls.all_massif_names) + symmetric_difference = set(df_centroid.index).symmetric_difference(cls.all_massif_names()) assert len(symmetric_difference) == 0, symmetric_difference # Sort the column in the order of the SAFRAN dataset - df_centroid = df_centroid.reindex(cls.all_massif_names, axis=0) + df_centroid = df_centroid.reindex(cls.all_massif_names(), axis=0) for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]: df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float) return df_centroid @cached_property def massif_name_to_altitudes(self) -> Dict[str, List[int]]: - s = ZS_INT_23 + [0] + zs = ZS_INT_23 if self.french_region is FrenchRegion.alps else ZS_INT_MASK_PYRENNES_LIST + s = zs + [0] zs_list = [] zs_all_list = [] for a, b in zip(s[:-1], s[1:]): @@ -632,7 +672,8 @@ class AbstractStudy(object): if a > b: zs_all_list.append(zs_list) zs_list = [] - return OrderedDict(zip(self.all_massif_names, zs_all_list)) + all_massif_names = self.all_massif_names(self.reanalysis_path, self.dbf_filename) + return OrderedDict(zip(all_massif_names, zs_all_list)) @cached_property def altitude_to_massif_names(self) -> Dict[int, List[str]]: diff --git a/extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py b/extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py new file mode 100644 index 00000000..5834fb88 --- /dev/null +++ b/extreme_data/meteo_france_data/scm_models_data/case_studies/refuge_sarradet.py @@ -0,0 +1,44 @@ +import pandas as pd +import numpy as np +import xlsxwriter + + +from extreme_data.meteo_france_data.scm_models_data.abstract_study import AbstractStudy +from extreme_data.meteo_france_data.scm_models_data.crocus.crocus import CrocusDepth, CrocusSwe3Days +from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranSnowfall1Day, SafranSnowfall3Days +from extreme_data.meteo_france_data.scm_models_data.utils import FrenchRegion +from extreme_fit.estimator.margin_estimator.utils import fitted_stationary_gev + + +def generate_excel_with_annual_maxima(fit=True): + massif_name, french_region, altitudes = 'Beaufortain', FrenchRegion.alps, [900, 1200] + # massif_name, french_region, altitudes = 'haute-bigorre', FrenchRegion.pyrenees, [300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300] + study_classes = [SafranSnowfall1Day, SafranSnowfall3Days, CrocusSwe3Days, CrocusDepth] + writer = pd.ExcelWriter('{}.xlsx'.format(massif_name), engine='xlsxwriter') + # writer = pd.ExcelWriter('pandas_multiple.xlsx') + for j, study_class in enumerate(study_classes, 1): + write_df_with_annual_maxima(massif_name, french_region, writer, study_class, altitudes, fit=fit) + writer.save() + + +def write_df_with_annual_maxima(massif_name, french_region, writer, study_class, altitudes, fit=True) -> pd.DataFrame: + columns = [] + for altitude in altitudes: + study = study_class(altitude=altitude, french_region=french_region) # type: AbstractStudy + df_maxima = study.observations_annual_maxima.df_maxima_gev + s = df_maxima.loc[massif_name] + # Fit the data and add the parameters as the first columns + gev_param = fitted_stationary_gev(s.values) + s = pd.concat([gev_param.to_serie(), s]) + columns.append(s) + df = pd.concat(columns, axis=1) + altitude_str = [str(a) + ' m' for a in altitudes] + df.columns = altitude_str + name = study.variable_name + short_name = name[:31] + df.to_excel(writer, sheet_name=short_name) + return df + + +if __name__ == '__main__': + generate_excel_with_annual_maxima(fit=True) diff --git a/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py b/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py index ad044cad..8b4c7c9d 100644 --- a/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py +++ b/extreme_data/meteo_france_data/scm_models_data/crocus/crocus_variables.py @@ -29,7 +29,7 @@ class CrocusRecentSweVariableOneDay(CrocusTotalSweVariable): class CrocusRecentSweVariableThreeDays(CrocusTotalSweVariable): - NAME = 'Snow Water Equivalent last 3 days' + NAME = 'SWE in 3 days' @classmethod def keyword(cls): diff --git a/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py b/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py index 7f08607b..2e55205a 100644 --- a/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/safran/cumulated_study.py @@ -15,4 +15,4 @@ class CumulatedStudy(AbstractStudy): @property def variable_name(self): - return super().variable_name + ' cumulated over {} day(s)'.format(self.nb_consecutive_days) + return super().variable_name + ' {} day(s)'.format(self.nb_consecutive_days) diff --git a/extreme_data/meteo_france_data/scm_models_data/utils.py b/extreme_data/meteo_france_data/scm_models_data/utils.py index 1dbdfe4b..3f1c57e9 100644 --- a/extreme_data/meteo_france_data/scm_models_data/utils.py +++ b/extreme_data/meteo_france_data/scm_models_data/utils.py @@ -8,6 +8,8 @@ import numpy as np WP_PATTERN_MAX_YEAR = 2008 +alps_massif_order = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 30] +pyrenees_massif_order = [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91] def date_to_str(date: datetime) -> str: return str(date).split()[0] @@ -19,6 +21,11 @@ class SeasonForTheMaxima(Enum): # i could add the classical seasons if needed +class FrenchRegion(Enum): + alps = 1 + pyrenees = 2 + + def first_day_and_last_day(season): season_to_start_day_and_last_day = { SeasonForTheMaxima.annual: ('08-01', '07-31'), @@ -60,6 +67,29 @@ ZS_INT_23 = ZS_INT[:-10].copy() ZS_INT_MASK = np.array(ZS_INT) ZS_INT_MASK[-10:] = np.nan +ZS_PYRENNES = """[ 0. 300. 600. 900. 1200. 1500. 1800. 2100. 0. 300. 600. 900. + 1200. 1500. 1800. 2100. 2400. 2700. 3000. 300. 600. 900. 1200. 1500. + 1800. 2100. 2400. 2700. 3000. 3300. 300. 600. 900. 1200. 1500. 1800. + 2100. 2400. 2700. 3000. 3300. 300. 600. 900. 1200. 1500. 1800. 2100. + 2400. 2700. 3000. 3300. 300. 600. 900. 1200. 1500. 1800. 2100. 2400. + 2700. 3000. 300. 600. 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. + 3300. 600. 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 300. 600. + 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 300. 600. 900. 1200. + 1500. 1800. 2100. 2400. 2700. 3000. 0. 300. 600. 900. 1200. 1500. + 1800. 2100. 2400. 2700. 3000. 0. 300. 600. 900. 1200. 1500. 1800. + 2100. 2400. 2700. 600. 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. + 600. 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300. 300. 600. + 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300. 3600. 300. 600. + 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300. 3600. 300. 600. + 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 3300. 600. 900. 1200. + 1500. 1800. 2100. 2400. 2700. 3000. 300. 600. 900. 1200. 1500. 1800. + 2100. 2400. 2700. 3000. 600. 900. 1200. 1500. 1800. 2100. 2400. 2700. + 3000. 600. 900. 1200. 1500. 1800. 2100. 2400. 2700. 3000. 300. 600. + 900. 1200. 1500. 1800. 2100. 2400. 2700. 300. 600. 900. 1200. 1500. + 1800. 2100. 2400. 2700.]""" +ZS_INT_MASK_PYRENNES_LIST = [int(float(e)) for e in ZS_PYRENNES[1:-1].split()] +ZS_INT_MASK_PYRENNES = np.array(ZS_INT_MASK_PYRENNES_LIST) + # Longitudes and Latitudes in degrees LONGITUDES = [6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.64493, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.39738, 6.82392, diff --git a/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py b/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py index 05bcf688..f9a2c83d 100644 --- a/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py +++ b/extreme_trend/visualizers/study_visualizer_for_non_stationary_trends.py @@ -356,7 +356,7 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer): res = [compute_eurocode_confidence_interval(*argument) for argument in arguments] massif_name_to_eurocode_return_level_uncertainty = dict(zip(massifs_names, res)) # For the rest of the massif names. Create a Eurocode Return Level Uncertainty as nan - for massif_name in set(self.study.all_massif_names) - set(massifs_names): + for massif_name in set(self.study.all_massif_names()) - set(massifs_names): massif_name_to_eurocode_return_level_uncertainty[massif_name] = self.default_eurocode_uncertainty return massif_name_to_eurocode_return_level_uncertainty -- GitLab