diff --git a/experiment/meteo_france_data/stations_data/comparison_analysis.py b/experiment/meteo_france_data/stations_data/comparison_analysis.py index c02d2b8dec674c7006eb1e0c8365b1abdabc032b..2dc6b0e5c98fd3e69acf3a938c93581dbadb6e52 100644 --- a/experiment/meteo_france_data/stations_data/comparison_analysis.py +++ b/experiment/meteo_france_data/stations_data/comparison_analysis.py @@ -246,7 +246,8 @@ class ComparisonAnalysis(object): df_study = pd.concat([df, df_coord, df_obs], axis=1) return df_study - def load_main_df_merged_intersection_clean(self): + @cached_property + def df_merged_intersection_clean(self): df_stations = self.load_main_df_station_intersection_clean() df_study = self.load_main_df_study_intersection_clean() diff = set(df_study.columns).symmetric_difference(set(df_stations.columns)) diff --git a/experiment/meteo_france_data/stations_data/main_station_comparison.py b/experiment/meteo_france_data/stations_data/main_station_comparison.py index 18589d823ddec369b11fc4986ba8bc4eb9121737..27a7a13a2fd9807f8d6e38b40045ab550da70a51 100644 --- a/experiment/meteo_france_data/stations_data/main_station_comparison.py +++ b/experiment/meteo_france_data/stations_data/main_station_comparison.py @@ -5,22 +5,30 @@ from experiment.meteo_france_data.stations_data.visualization.comparisons_visual ComparisonsVisualization -def visualize_full_comparison(): +def visualize_all_stations(): vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST, margin=150) vizu.visualize_maximum() -def visualize_fast_comparison(): - vizu = ComparisonsVisualization(altitudes=[900]) - vizu.visualize_maximum() +def visualize_non_nan_station(): + vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST, + keep_only_station_without_nan_values=True, + normalize_observations=True) + # vizu.visualize_maximum() + vizu.visualize_gev() def example(): - vizu = ComparisonsVisualization(altitudes=[900]) - vizu._visualize_maximum(vizu.comparisons[0], 'Beaufortain', show=True) + # this is a really good example for the maxima at least + vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False) + vizu._visualize_ax_main(vizu.plot_maxima, vizu.comparisons[0], 'Beaufortain', show=True) + + # vizu = ComparisonsVisualization(altitudes=[900], normalize_observations=False, keep_only_station_without_nan_values=True) + # vizu._visualize_ax_main(vizu.plot_gev, vizu.comparisons[0], 'Beaufortain', show=True) if __name__ == '__main__': # visualize_fast_comparison() - visualize_full_comparison() + # visualize_all_stations() + visualize_non_nan_station() # example() diff --git a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py index f399d6a2daf46bfa6c14cbab94fabb8fd6b2e258..d07e610acefbefa72a9097546fc9241c98a2d02f 100644 --- a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py +++ b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py @@ -1,19 +1,23 @@ +from itertools import chain from typing import Dict, List import math import matplotlib.pyplot as plt +import numpy as np import pandas as pd from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \ VisualizationParameters from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \ REANALYSE_STR, ALTITUDE_COLUMN_NAME -from itertools import chain +from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev +from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro class ComparisonsVisualization(VisualizationParameters): - def __init__(self, altitudes=None, keep_only_station_without_nan_values=False, margin=150): + def __init__(self, altitudes=None, keep_only_station_without_nan_values=False, margin=150, + normalize_observations=False): self.keep_only_station_without_nan_values = keep_only_station_without_nan_values if self.keep_only_station_without_nan_values: self.nb_columns = 5 @@ -24,7 +28,7 @@ class ComparisonsVisualization(VisualizationParameters): self.altitude_to_comparison = {} # type: Dict[int, ComparisonAnalysis] for altitude in altitudes: comparison = ComparisonAnalysis(altitude=altitude, - normalize_observations=False, + normalize_observations=normalize_observations, one_station_per_massif=False, transformation_class=None, margin=margin, @@ -43,7 +47,7 @@ class ComparisonsVisualization(VisualizationParameters): def massifs(self): return sorted(set(chain(*[c.intersection_massif_names for c in self.comparisons]))) - def _visualize_main(self, visualization_ax_function, title=''): + def _visualize_main(self, plot_function, title=''): nb_rows = math.ceil(self.nb_plot / self.nb_columns) fig, axes = plt.subplots(nb_rows, self.nb_columns, figsize=self.figsize) fig.subplots_adjust(hspace=self.subplot_space, wspace=self.subplot_space) @@ -52,39 +56,54 @@ class ComparisonsVisualization(VisualizationParameters): ax_idx = 0 for massif in self.massifs: for c in [c for c in self.comparisons if massif in c.intersection_massif_names]: - visualization_ax_function(c, massif, axes[ax_idx]) + self._visualize_ax_main(plot_function, c, massif, axes[ax_idx]) ax_idx += 1 plt.suptitle(title) plt.show() - def visualize_maximum(self): - return self._visualize_main(self._visualize_maximum, 'Recent trend of Annual maxima of snowfall') - - def _visualize_maximum(self, comparison: ComparisonAnalysis, massif, ax=None, show=False): + def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False): if ax is None: _, ax = plt.subplots(1, 1, figsize=self.figsize) - df = comparison.load_main_df_merged_intersection_clean() + df = comparison.df_merged_intersection_clean.copy() ind = df[MASSIF_COLUMN_NAME] == massif df.drop([MASSIF_COLUMN_NAME], axis=1, inplace=True) assert sum(ind) > 0 - df = df.loc[ind] # type: pd.DataFrame + df = df.loc[ind] # type: pd.DataFrame colors_station = ['r', 'tab:orange', 'tab:purple', 'm', 'k'] for color, (i, s) in zip(colors_station, df.iterrows()): label = i label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME]) s_values = s.iloc[3:].to_dict() plot_color = color if REANALYSE_STR not in label else 'g' - ax.plot(list(s_values.keys()), list(s_values.values()), label=label, color=plot_color) - ax.legend(prop={'size': 5}) + plot_function(ax, s_values, label, plot_color) ax.set_title('{} at {}'.format(massif, comparison.altitude)) + ax.legend(prop={'size': 5}) if show: - plt.show() + def visualize_maximum(self): + return self._visualize_main(self.plot_maxima, 'Recent trend of Annual maxima of snowfall') + + def plot_maxima(self, ax, s_values, label, plot_color): + ax.plot(list(s_values.keys()), list(s_values.values()), label=label, color=plot_color) + def visualize_gev(self): - return self._visualize_main(self._visualize_gev) + return self._visualize_main(self.plot_gev) + + def plot_gev(self, ax, s_values, label, plot_color): + # todo should I normalize here ? + # fit gev + data = list(s_values.values()) + res = safe_run_r_estimator(function=r('gev.fit'), xdat=ro.FloatVector(data), + use_start=True) + res = ResultFromIsmev(res, {}) + gev_params = res.stationary_gev_params + + lim = 1.5 * max(data) + x = np.linspace(0, lim, 1000) + y = gev_params.density(x) + # display the gev distribution that was obtained + ax.plot(x, y, label=label, color=plot_color) - def _visualize_gev(self): - pass diff --git a/extreme_estimator/extreme_models/result_from_fit.py b/extreme_estimator/extreme_models/result_from_fit.py index 257017ce542e511ad309e045797d748faa09835b..006d5575b67b6af2a90bb9f72ca94a668af11614 100644 --- a/extreme_estimator/extreme_models/result_from_fit.py +++ b/extreme_estimator/extreme_models/result_from_fit.py @@ -11,6 +11,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo def convertFloatVector_to_float(f): return np.array(f)[0] + class ResultFromFit(object): def __init__(self, result_from_fit: robjects.ListVector) -> None: @@ -74,6 +75,12 @@ class ResultFromIsmev(ResultFromFit): i += 1 return coef_dict + @property + def stationary_gev_params(self) -> GevParams: + params = {k.split('Coeff1')[0]: v for k, v in self.margin_coef_dict.items() + if 'Coeff1' in k and 'temp' not in k} + return GevParams.from_dict(params) + @property def all_parameters(self): return self.margin_coef_dict @@ -91,7 +98,6 @@ class ResultFromIsmev(ResultFromFit): return convertFloatVector_to_float(self.name_to_value['conv']) == 0 - class ResultFromSpatialExtreme(ResultFromFit): """ Handler from any result with the result of a fit functions from the package Spatial Extreme diff --git a/extreme_estimator/margin_fits/gev/gev_params.py b/extreme_estimator/margin_fits/gev/gev_params.py index 56a044471381c2aa0dc1e982c93004f894cf935d..355b3e0abf6ed5414d1c0c15f69e5c135a8fccc1 100644 --- a/extreme_estimator/margin_fits/gev/gev_params.py +++ b/extreme_estimator/margin_fits/gev/gev_params.py @@ -21,9 +21,22 @@ class GevParams(ExtremeParams): else: return r.qgev(p, self.location, self.scale, self.shape)[0] + def density(self, x): + if self.has_undefined_parameters: + return np.nan + else: + res = r.dgev(x, self.location, self.scale, self.shape) + if isinstance(x, float): + return res[0] + else: + return np.array(res) + @property def param_values(self): if self.has_undefined_parameters: return [np.nan for _ in range(3)] else: - return [self.location, self.scale, self.shape] \ No newline at end of file + return [self.location, self.scale, self.shape] + + def __str__(self): + return self.to_dict().__str__() \ No newline at end of file