diff --git a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py index 17bbf6f7a471253e35e83675c4ed006047118fa8..91be36f36cfcfc76af97424480e4ddcaecfd58ce 100644 --- a/extreme_data/meteo_france_data/scm_models_data/abstract_study.py +++ b/extreme_data/meteo_france_data/scm_models_data/abstract_study.py @@ -77,10 +77,6 @@ class AbstractStudy(object): """ Time """ - @property - def nb_years(self): - return self.year_max - self.year_min + 1 - @cached_property def year_to_days(self) -> OrderedDict: # Map each year to the 'days since year-08-01 06:00:00' @@ -115,19 +111,24 @@ class AbstractStudy(object): wps = pd.Series(np.concatenate([self.massif_name_to_df_ordered_by_maxima[massif_name]['WP'].values[:nb_top] for massif_name in massif_names])) s_normalized = wps.value_counts(normalize=True) * 100 - s_normalized = s_normalized.round() - s_not_normalized = wps.value_counts() - # todo: do that, complete the last columns with the mean maxima - # Add a column that indicate the mean maxima associated to each weather pattern - # f = {} - # for wp in s_normalized.index: - # print(wp) - # for massif_id - - - # Concatenate all the results in one dataframe - df = pd.concat([s_normalized, s_not_normalized], axis=1) - df.columns = ['Percentage', 'Nb massifs concerned'] + # Add several columns that indicate the strength of the maxima to each weather pattern + f = {wp: [] for wp in s_normalized.index} + for massif_name in massif_names: + df_ordered_by_maxima = self.massif_name_to_df_ordered_by_maxima[massif_name] + df_ordered_by_maxima = df_ordered_by_maxima.iloc[:nb_top, :] # type: pd.DataFrame + assert len(df_ordered_by_maxima) == nb_top + for _, row in df_ordered_by_maxima.iterrows(): + wp, maxima = row['WP'], row['Maxima'] + f[wp].append(maxima) + df = pd.DataFrame({wp: pd.Series(l).describe() for wp, l in f.items()}).transpose() + df = pd.concat([s_normalized, df], axis=1) + df.columns = ['%' if i == 0 else c for i, c in enumerate(df.columns)] + drop_columns = ['25%', '75%'] + if df['std'].isnull().any(): + drop_columns.append('std') + df.drop(columns=drop_columns, inplace=True) + df.rename(columns={'50%': 'median'}, inplace=True) + df = df.astype(int) df.index.name = 'Number Top={}'.format(nb_top) return df @@ -146,6 +147,7 @@ class AbstractStudy(object): } df = pd.DataFrame(d) df.set_index('Year', inplace=True) + assert len(df) == self.nb_years massif_name_to_df_ordered_by_maxima[massif_name] = df assert set(self.study_massif_names) == set(massif_name_to_df_ordered_by_maxima.keys()) return massif_name_to_df_ordered_by_maxima @@ -315,6 +317,10 @@ class AbstractStudy(object): """ Temporal properties """ + @property + def nb_years(self): + return len(self.ordered_years) + @property def ordered_years(self): return self.ordered_years_and_path_files[1] diff --git a/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py b/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py index 5018c1b49315301bf9b010276007822cb979c79b..9372f13b45771fbe09760ee51d802cb4931a7d5c 100644 --- a/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py +++ b/projects/contrasting_trends_in_snow_loads/weather_types_analysis/main_distribution_wps.py @@ -26,20 +26,23 @@ If we look at the 5 biggest maxima for each massif, and look to which Weather pattern they correspond then we do some percentage for each climatic region - Percentage Nb massifs concerned + Northern Alps -Steady Oceanic 77.0 27 -Atlantic Wave 11.0 4 + % count mean std min median max +Steady Oceanic 77 27 105 16 80 104 150 +Atlantic Wave 11 4 111 15 95 111 129 Central Alps -Steady Oceanic 57.0 20 -South Circulation 17.0 6 -East Return 14.0 5 + % count mean min median max +Steady Oceanic 57 20 90 70 86 119 +South Circulation 17 6 85 64 88 104 +East Return 14 5 88 74 93 100 Southern Alps -South Circulation 43.0 13 -Central Depression 37.0 11 -East Return 17.0 5 + % count mean min median max +South Circulation 43 13 99 72 106 122 +Central Depression 36 11 98 68 97 136 +East Return 16 5 90 70 83 121 Extreme South Alps Central Depression 53.0 8 @@ -55,7 +58,7 @@ def main_temporal_distribution_wps(study_class, year_min=1954, year_max=2008): for region_name in AbstractExtendedStudy.region_names: massif_names = AbstractExtendedStudy.region_name_to_massif_names[region_name] print('\n \n', region_name, '\n') - for nb_top in [study_before.nb_years, 10, 5, 1][-1:]: + for nb_top in [study_before.nb_years, 10][1:]: print(study_before.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n') print(study_after.df_for_top_annual_maxima(nb_top=nb_top, massif_names=massif_names), '\n')