diff --git a/experiment/meteo_france_data/scm_models_data/abstract_study.py b/experiment/meteo_france_data/scm_models_data/abstract_study.py index 0a18ad1fe19d8aa86e6f76c62313076facffb8d8..6fe1e078b5d19065ec5def8cca49ae2196954ad5 100644 --- a/experiment/meteo_france_data/scm_models_data/abstract_study.py +++ b/experiment/meteo_france_data/scm_models_data/abstract_study.py @@ -4,7 +4,7 @@ import os.path as op from collections import OrderedDict from contextlib import redirect_stdout from multiprocessing.pool import Pool -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Union import matplotlib.pyplot as plt import numpy as np @@ -26,7 +26,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ AbstractSpatialCoordinates from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima -from utils import get_full_path, cached_property, NB_CORES +from utils import get_full_path, cached_property, NB_CORES, classproperty f = io.StringIO() with redirect_stdout(f): @@ -112,8 +112,6 @@ class AbstractStudy(object): year_to_daily_time_serie_array[year] = daily_time_serie return year_to_daily_time_serie_array - - """ Load Variables and Datasets """ @cached_property @@ -215,20 +213,29 @@ class AbstractStudy(object): """ Visualization methods """ - @cached_property - def massifs_coordinates_for_display(self) -> AbstractSpatialCoordinates: + @classmethod + def massifs_coordinates_for_display(cls, massif_names) -> AbstractSpatialCoordinates: # Coordinate object that represents the massif coordinates in Lambert extended # extracted for a csv file, and used only for display purposes - df = self.load_df_centroid() + df = cls.load_df_centroid() # Filter, keep massifs present at the altitude of interest - df = df.loc[self.study_massif_names] + df = df.loc[massif_names, :] # Build coordinate object from df_centroid return AbstractSpatialCoordinates.from_df(df) - def visualize_study(self, ax=None, massif_name_to_value=None, show=True, fill=True, replace_blue_by_white=True, - label=None, add_text=False, cmap=None, vmax=100, vmin=0): + @classmethod + def visualize_study(cls, ax=None, massif_name_to_value: Union[None, Dict[str, float]] = None, show=True, fill=True, + replace_blue_by_white=True, + label=None, add_text=False, cmap=None, vmax=100, vmin=0, + default_color_for_missing_massif='w', + default_color_for_nan_values='w', + ): + if ax is None: + ax = plt.gca() + if massif_name_to_value is None: massif_name_to_fill_kwargs = None + massif_names, values = None, None else: massif_names, values = list(zip(*massif_name_to_value.items())) if cmap is None: @@ -237,14 +244,11 @@ class AbstractStudy(object): norm = Normalize(vmin, vmax) create_colorbase_axis(ax, label, cmap, norm) m = cm.ScalarMappable(norm=norm, cmap=cmap) - colors = [m.to_rgba(value) if not np.isnan(value) else 'w' for value in values] + colors = [m.to_rgba(value) if not np.isnan(value) else default_color_for_nan_values for value in values] massif_name_to_fill_kwargs = {massif_name: {'color': color} for massif_name, color in zip(massif_names, colors)} - if ax is None: - ax = plt.gca() - - for coordinate_id, coords_list in self.idx_to_coords_list.items(): + for coordinate_id, coords_list in cls.idx_to_coords_list.items(): # Retrieve the list of coords (x,y) that define the contour of the massif of id coordinate_id # if j == 0: @@ -253,11 +257,14 @@ class AbstractStudy(object): coords_list = list(zip(*coords_list)) ax.plot(*coords_list, color='black') # Potentially, fill the inside of the polygon with some color - if fill and coordinate_id in self.coordinate_id_to_massif_name: - massif_name = self.coordinate_id_to_massif_name[coordinate_id] + if fill and coordinate_id in cls.coordinate_id_to_massif_name: + massif_name = cls.coordinate_id_to_massif_name[coordinate_id] if massif_name_to_fill_kwargs is not None and massif_name in massif_name_to_fill_kwargs: fill_kwargs = massif_name_to_fill_kwargs[massif_name] ax.fill(*coords_list, **fill_kwargs) + else: + ax.fill(*coords_list, **{'color': default_color_for_missing_massif}) + # else: # fill_kwargs = {} @@ -268,8 +275,11 @@ class AbstractStudy(object): # ax.scatter(x, y) # ax.text(x, y, massif_name) # Display the center of the massif - ax.scatter(self.massifs_coordinates_for_display.x_coordinates, - self.massifs_coordinates_for_display.y_coordinates, s=1) + print(massif_names) + masssif_coordinate_for_display = cls.massifs_coordinates_for_display(massif_names) + + ax.scatter(masssif_coordinate_for_display.x_coordinates, + masssif_coordinate_for_display.y_coordinates, s=1) # Improve some explanation on the X axis and on the Y axis ax.set_xlabel('Longitude (km)') ax.xaxis.set_major_formatter(get_km_formatter()) @@ -277,7 +287,7 @@ class AbstractStudy(object): ax.yaxis.set_major_formatter(get_km_formatter()) # Display the name or value of the massif if add_text: - for _, row in self.massifs_coordinates_for_display.df_all_coordinates.iterrows(): + for _, row in masssif_coordinate_for_display.df_all_coordinates.iterrows(): x, y = list(row) massif_name = row.name value = massif_name_to_value[massif_name] @@ -293,15 +303,15 @@ class AbstractStudy(object): """ Path properties """ - @property + @classproperty def relative_path(self) -> str: return r'local/spatio_temporal_datasets' - @property + @classproperty def full_path(self) -> str: return get_full_path(relative_path=self.relative_path) - @property + @classproperty def map_full_path(self) -> str: return op.join(self.full_path, 'map') @@ -321,12 +331,13 @@ class AbstractStudy(object): def original_safran_massif_id_to_massif_name(self) -> Dict[int, str]: return {massif_id: massif_name for massif_id, massif_name in enumerate(self.all_massif_names)} - @cached_property - def all_massif_names(self) -> List[str]: + # @cached_property + @classproperty + def all_massif_names(cls) -> List[str]: """ Pour l'identification des massifs, le numéro de la variable massif_num correspond à celui de l'attribut num_opp """ - metadata_path = op.join(self.full_path, self.REANALYSIS_FOLDER, 'metadata') + metadata_path = op.join(cls.full_path, cls.REANALYSIS_FOLDER, 'metadata') dbf = Dbf5(op.join(metadata_path, 'massifs_alpes.dbf')) df = dbf.to_dataframe().copy() # type: pd.DataFrame dbf.f.close() @@ -336,15 +347,16 @@ class AbstractStudy(object): all_massif_names[all_massif_names.index('Beaufortin')] = 'Beaufortain' return all_massif_names - def load_df_centroid(self) -> pd.DataFrame: + @classmethod + def load_df_centroid(cls) -> pd.DataFrame: # Load df_centroid containing all the massif names - df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv')) + df_centroid = pd.read_csv(op.join(cls.map_full_path, 'coordonnees_massifs_alpes.csv')) df_centroid.set_index('NOM', inplace=True) # Check that the names of massifs are the same - symmetric_difference = set(df_centroid.index).symmetric_difference(self.all_massif_names) + symmetric_difference = set(df_centroid.index).symmetric_difference(cls.all_massif_names) assert len(symmetric_difference) == 0, symmetric_difference # Sort the column in the order of the SAFRAN dataset - df_centroid = df_centroid.reindex(self.all_massif_names, axis=0) + df_centroid = df_centroid.reindex(cls.all_massif_names, axis=0) for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]: df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float) return df_centroid @@ -371,12 +383,12 @@ class AbstractStudy(object): """ Visualization methods """ - @property - def coordinate_id_to_massif_name(self) -> Dict[int, str]: - df_centroid = self.load_df_centroid() + @classproperty + def coordinate_id_to_massif_name(cls) -> Dict[int, str]: + df_centroid = cls.load_df_centroid() return dict(zip(df_centroid['id'], df_centroid.index)) - @property + @classproperty def idx_to_coords_list(self): df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv')) coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X], diff --git a/experiment/meteo_france_data/stations_data/main_station_comparison.py b/experiment/meteo_france_data/stations_data/main_station_comparison.py index 4a62e8d0c59a378f3b29d1d401d508487d29f22b..e8dd8bb4544f93101dcfc73e243fc0e58de727dd 100644 --- a/experiment/meteo_france_data/stations_data/main_station_comparison.py +++ b/experiment/meteo_france_data/stations_data/main_station_comparison.py @@ -7,7 +7,7 @@ from experiment.meteo_france_data.stations_data.visualization.comparisons_visual def visualize_all_stations(): vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST, margin=150) - vizu.visualize_maximum() + vizu.visualize_maximum(visualize_metric_only=False) def visualize_non_nan_station(): @@ -53,9 +53,9 @@ def quick_metric_analysis(): if __name__ == '__main__': # wrong_example3() # visualize_fast_comparison() - # visualize_all_stations() + visualize_all_stations() + # quick_metric_analysis() # wrong_example2() # visualize_non_nan_station() - quick_metric_analysis() # example() diff --git a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py index 60ed00648c1131596d7e4febd726f987a30527bd..0e5a4914b26e4139e3a3f13fdc2a4fa0e149c0ce 100644 --- a/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py +++ b/experiment/meteo_france_data/stations_data/visualization/comparisons_visualization/comparisons_visualization.py @@ -7,6 +7,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +from experiment.meteo_france_data.scm_models_data.abstract_study import AbstractStudy from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \ VisualizationParameters from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \ @@ -17,6 +18,9 @@ from experiment.trend_analysis.univariate_test.utils import compute_gev_change_p from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from utils import classproperty + +MAE_COLUMN_NAME = 'mean absolute difference' DISTANCE_COLUMN_NAME = 'distance' path_df_location_to_value_csv_example = r'/home/erwan/Documents/projects/spatiotemporalextremes/experiment/meteo_france_data/stations_data/csv/example.csv' @@ -47,6 +51,14 @@ class ComparisonsVisualization(VisualizationParameters): def comparisons(self) -> List[ComparisonAnalysis]: return list(self.altitude_to_comparison.values()) + @property + def comparison(self): + return self.comparisons[0] + + @property + def study(self): + return self.comparison.study + @property def nb_plot(self): return sum([c.nb_massifs for c in self.comparisons]) @@ -78,6 +90,8 @@ class ComparisonsVisualization(VisualizationParameters): plt.suptitle(title) if show: plt.show() + else: + plt.clf() # Build dataframe from the dictionary df = pd.DataFrame(tuple_location_to_values, index=index).transpose() @@ -85,18 +99,24 @@ class ComparisonsVisualization(VisualizationParameters): return df @classmethod - def visualize_metric(cls, df_location_to_value=None): + def visualize_metric(cls, df=None): # Load or update df value from example file - if df_location_to_value is None: - df_location_to_value = pd.read_csv(path_df_location_to_value_csv_example, index_col=[0, 1, 2]) + if df is None: + df = pd.read_csv(path_df_location_to_value_csv_example, index_col=[0, 1, 2]) else: - df_location_to_value.to_csv(path_df_location_to_value_csv_example) + df.to_csv(path_df_location_to_value_csv_example) - print(df_location_to_value) - print(df_location_to_value.index) - print(df_location_to_value.columns) + # Compute some column like a classication boolean # Display some score spatially + df_score = df.groupby([MASSIF_COLUMN_NAME]).mean() + s_mae = df_score[MAE_COLUMN_NAME] + massif_name_to_value = s_mae.to_dict() + AbstractStudy.visualize_study(massif_name_to_value=massif_name_to_value, + default_color_for_missing_massif='b', + cmap=plt.cm.Reds, + vmin=s_mae.min(), + vmax=s_mae.max()) def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False): if ax is None: @@ -126,21 +146,19 @@ class ComparisonsVisualization(VisualizationParameters): ordered_value_dict = OrderedDict() label = i - label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME]) - label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME]) + maxima, years = self.get_maxima_and_year(s) # Compute the distance between maxima and maxima_center # In percent the number of times the observations is stricty higher than the reanalysis mask = ~np.isnan(maxima_center) & ~np.isnan(maxima) mean_absolute_difference = np.round(np.mean(np.abs(maxima[mask] - maxima_center[mask])), 0) - label += 'Mean absolute diff {}mm'.format(mean_absolute_difference) - ordered_value_dict['mean absolute difference'] = mean_absolute_difference - # metric = np.mean(np.sign(maxima[mask] - maxima_center[mask]) == 1) - # metric = np.round(metric * 100, 0) - # label += '{}% strictly above'.format(metric) + ordered_value_dict[MAE_COLUMN_NAME] = mean_absolute_difference + label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME]) + label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME]) + label += '({}mm)'.format(mean_absolute_difference) plot_color = color if REANALYSE_STR not in label else 'g' plot_ordered_value_dict = plot_function(ax, ax2, years, maxima, label, plot_color) diff --git a/utils.py b/utils.py index 37d72bb945e22206b596a7b12962a0d8fc726a40..f43fe09f504956b9e7a0a31c52e02aeac3a95646 100644 --- a/utils.py +++ b/utils.py @@ -44,6 +44,38 @@ class Example(object): return 2 +# https://stackoverflow.com/questions/5189699/how-to-make-a-class-property +# does not enable setter, but follow the link to activate setter option + +class ClassPropertyDescriptor(object): + + def __init__(self, fget, fset=None): + self.fget = fget + self.fset = fset + + def __get__(self, obj, klass=None): + if klass is None: + klass = type(obj) + return self.fget.__get__(obj, klass)() + + def __set__(self, obj, value): + if not self.fset: + raise AttributeError("can't set attribute") + type_ = type(obj) + return self.fset.__get__(obj, type_)(value) + + def setter(self, func): + if not isinstance(func, (classmethod, staticmethod)): + func = classmethod(func) + self.fset = func + return self + +def classproperty(func): + if not isinstance(func, (classmethod, staticmethod)): + func = classmethod(func) + + return ClassPropertyDescriptor(func) + if __name__ == '__main__': e = Example() print(e.big_attribute)