Commit 6e34a0e8 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[COMPARISON] add class_property to abstract study, to enable spatial display...

[COMPARISON] add class_property to abstract study, to enable spatial display of errors between SAFRAN reanalysis & data from observations
parent 9b40ad17
No related merge requests found
Showing with 113 additions and 51 deletions
+113 -51
......@@ -4,7 +4,7 @@ import os.path as op
from collections import OrderedDict
from contextlib import redirect_stdout
from multiprocessing.pool import Pool
from typing import List, Dict, Tuple
from typing import List, Dict, Tuple, Union
import matplotlib.pyplot as plt
import numpy as np
......@@ -26,7 +26,7 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates
from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima
from utils import get_full_path, cached_property, NB_CORES
from utils import get_full_path, cached_property, NB_CORES, classproperty
f = io.StringIO()
with redirect_stdout(f):
......@@ -112,8 +112,6 @@ class AbstractStudy(object):
year_to_daily_time_serie_array[year] = daily_time_serie
return year_to_daily_time_serie_array
""" Load Variables and Datasets """
@cached_property
......@@ -215,20 +213,29 @@ class AbstractStudy(object):
""" Visualization methods """
@cached_property
def massifs_coordinates_for_display(self) -> AbstractSpatialCoordinates:
@classmethod
def massifs_coordinates_for_display(cls, massif_names) -> AbstractSpatialCoordinates:
# Coordinate object that represents the massif coordinates in Lambert extended
# extracted for a csv file, and used only for display purposes
df = self.load_df_centroid()
df = cls.load_df_centroid()
# Filter, keep massifs present at the altitude of interest
df = df.loc[self.study_massif_names]
df = df.loc[massif_names, :]
# Build coordinate object from df_centroid
return AbstractSpatialCoordinates.from_df(df)
def visualize_study(self, ax=None, massif_name_to_value=None, show=True, fill=True, replace_blue_by_white=True,
label=None, add_text=False, cmap=None, vmax=100, vmin=0):
@classmethod
def visualize_study(cls, ax=None, massif_name_to_value: Union[None, Dict[str, float]] = None, show=True, fill=True,
replace_blue_by_white=True,
label=None, add_text=False, cmap=None, vmax=100, vmin=0,
default_color_for_missing_massif='w',
default_color_for_nan_values='w',
):
if ax is None:
ax = plt.gca()
if massif_name_to_value is None:
massif_name_to_fill_kwargs = None
massif_names, values = None, None
else:
massif_names, values = list(zip(*massif_name_to_value.items()))
if cmap is None:
......@@ -237,14 +244,11 @@ class AbstractStudy(object):
norm = Normalize(vmin, vmax)
create_colorbase_axis(ax, label, cmap, norm)
m = cm.ScalarMappable(norm=norm, cmap=cmap)
colors = [m.to_rgba(value) if not np.isnan(value) else 'w' for value in values]
colors = [m.to_rgba(value) if not np.isnan(value) else default_color_for_nan_values for value in values]
massif_name_to_fill_kwargs = {massif_name: {'color': color} for massif_name, color in
zip(massif_names, colors)}
if ax is None:
ax = plt.gca()
for coordinate_id, coords_list in self.idx_to_coords_list.items():
for coordinate_id, coords_list in cls.idx_to_coords_list.items():
# Retrieve the list of coords (x,y) that define the contour of the massif of id coordinate_id
# if j == 0:
......@@ -253,11 +257,14 @@ class AbstractStudy(object):
coords_list = list(zip(*coords_list))
ax.plot(*coords_list, color='black')
# Potentially, fill the inside of the polygon with some color
if fill and coordinate_id in self.coordinate_id_to_massif_name:
massif_name = self.coordinate_id_to_massif_name[coordinate_id]
if fill and coordinate_id in cls.coordinate_id_to_massif_name:
massif_name = cls.coordinate_id_to_massif_name[coordinate_id]
if massif_name_to_fill_kwargs is not None and massif_name in massif_name_to_fill_kwargs:
fill_kwargs = massif_name_to_fill_kwargs[massif_name]
ax.fill(*coords_list, **fill_kwargs)
else:
ax.fill(*coords_list, **{'color': default_color_for_missing_massif})
# else:
# fill_kwargs = {}
......@@ -268,8 +275,11 @@ class AbstractStudy(object):
# ax.scatter(x, y)
# ax.text(x, y, massif_name)
# Display the center of the massif
ax.scatter(self.massifs_coordinates_for_display.x_coordinates,
self.massifs_coordinates_for_display.y_coordinates, s=1)
print(massif_names)
masssif_coordinate_for_display = cls.massifs_coordinates_for_display(massif_names)
ax.scatter(masssif_coordinate_for_display.x_coordinates,
masssif_coordinate_for_display.y_coordinates, s=1)
# Improve some explanation on the X axis and on the Y axis
ax.set_xlabel('Longitude (km)')
ax.xaxis.set_major_formatter(get_km_formatter())
......@@ -277,7 +287,7 @@ class AbstractStudy(object):
ax.yaxis.set_major_formatter(get_km_formatter())
# Display the name or value of the massif
if add_text:
for _, row in self.massifs_coordinates_for_display.df_all_coordinates.iterrows():
for _, row in masssif_coordinate_for_display.df_all_coordinates.iterrows():
x, y = list(row)
massif_name = row.name
value = massif_name_to_value[massif_name]
......@@ -293,15 +303,15 @@ class AbstractStudy(object):
""" Path properties """
@property
@classproperty
def relative_path(self) -> str:
return r'local/spatio_temporal_datasets'
@property
@classproperty
def full_path(self) -> str:
return get_full_path(relative_path=self.relative_path)
@property
@classproperty
def map_full_path(self) -> str:
return op.join(self.full_path, 'map')
......@@ -321,12 +331,13 @@ class AbstractStudy(object):
def original_safran_massif_id_to_massif_name(self) -> Dict[int, str]:
return {massif_id: massif_name for massif_id, massif_name in enumerate(self.all_massif_names)}
@cached_property
def all_massif_names(self) -> List[str]:
# @cached_property
@classproperty
def all_massif_names(cls) -> List[str]:
"""
Pour l'identification des massifs, le numéro de la variable massif_num correspond à celui de l'attribut num_opp
"""
metadata_path = op.join(self.full_path, self.REANALYSIS_FOLDER, 'metadata')
metadata_path = op.join(cls.full_path, cls.REANALYSIS_FOLDER, 'metadata')
dbf = Dbf5(op.join(metadata_path, 'massifs_alpes.dbf'))
df = dbf.to_dataframe().copy() # type: pd.DataFrame
dbf.f.close()
......@@ -336,15 +347,16 @@ class AbstractStudy(object):
all_massif_names[all_massif_names.index('Beaufortin')] = 'Beaufortain'
return all_massif_names
def load_df_centroid(self) -> pd.DataFrame:
@classmethod
def load_df_centroid(cls) -> pd.DataFrame:
# Load df_centroid containing all the massif names
df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
df_centroid = pd.read_csv(op.join(cls.map_full_path, 'coordonnees_massifs_alpes.csv'))
df_centroid.set_index('NOM', inplace=True)
# Check that the names of massifs are the same
symmetric_difference = set(df_centroid.index).symmetric_difference(self.all_massif_names)
symmetric_difference = set(df_centroid.index).symmetric_difference(cls.all_massif_names)
assert len(symmetric_difference) == 0, symmetric_difference
# Sort the column in the order of the SAFRAN dataset
df_centroid = df_centroid.reindex(self.all_massif_names, axis=0)
df_centroid = df_centroid.reindex(cls.all_massif_names, axis=0)
for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
return df_centroid
......@@ -371,12 +383,12 @@ class AbstractStudy(object):
""" Visualization methods """
@property
def coordinate_id_to_massif_name(self) -> Dict[int, str]:
df_centroid = self.load_df_centroid()
@classproperty
def coordinate_id_to_massif_name(cls) -> Dict[int, str]:
df_centroid = cls.load_df_centroid()
return dict(zip(df_centroid['id'], df_centroid.index))
@property
@classproperty
def idx_to_coords_list(self):
df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X],
......
......@@ -7,7 +7,7 @@ from experiment.meteo_france_data.stations_data.visualization.comparisons_visual
def visualize_all_stations():
vizu = ComparisonsVisualization(altitudes=ALL_ALTITUDES_WITH_20_STATIONS_AT_LEAST, margin=150)
vizu.visualize_maximum()
vizu.visualize_maximum(visualize_metric_only=False)
def visualize_non_nan_station():
......@@ -53,9 +53,9 @@ def quick_metric_analysis():
if __name__ == '__main__':
# wrong_example3()
# visualize_fast_comparison()
# visualize_all_stations()
visualize_all_stations()
# quick_metric_analysis()
# wrong_example2()
# visualize_non_nan_station()
quick_metric_analysis()
# example()
......@@ -7,6 +7,7 @@ import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from experiment.meteo_france_data.scm_models_data.abstract_study import AbstractStudy
from experiment.meteo_france_data.scm_models_data.visualization.study_visualization.study_visualizer import \
VisualizationParameters
from experiment.meteo_france_data.stations_data.comparison_analysis import ComparisonAnalysis, MASSIF_COLUMN_NAME, \
......@@ -17,6 +18,9 @@ from experiment.trend_analysis.univariate_test.utils import compute_gev_change_p
from extreme_estimator.extreme_models.result_from_fit import ResultFromIsmev
from extreme_estimator.extreme_models.utils import r, safe_run_r_estimator, ro
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from utils import classproperty
MAE_COLUMN_NAME = 'mean absolute difference'
DISTANCE_COLUMN_NAME = 'distance'
path_df_location_to_value_csv_example = r'/home/erwan/Documents/projects/spatiotemporalextremes/experiment/meteo_france_data/stations_data/csv/example.csv'
......@@ -47,6 +51,14 @@ class ComparisonsVisualization(VisualizationParameters):
def comparisons(self) -> List[ComparisonAnalysis]:
return list(self.altitude_to_comparison.values())
@property
def comparison(self):
return self.comparisons[0]
@property
def study(self):
return self.comparison.study
@property
def nb_plot(self):
return sum([c.nb_massifs for c in self.comparisons])
......@@ -78,6 +90,8 @@ class ComparisonsVisualization(VisualizationParameters):
plt.suptitle(title)
if show:
plt.show()
else:
plt.clf()
# Build dataframe from the dictionary
df = pd.DataFrame(tuple_location_to_values, index=index).transpose()
......@@ -85,18 +99,24 @@ class ComparisonsVisualization(VisualizationParameters):
return df
@classmethod
def visualize_metric(cls, df_location_to_value=None):
def visualize_metric(cls, df=None):
# Load or update df value from example file
if df_location_to_value is None:
df_location_to_value = pd.read_csv(path_df_location_to_value_csv_example, index_col=[0, 1, 2])
if df is None:
df = pd.read_csv(path_df_location_to_value_csv_example, index_col=[0, 1, 2])
else:
df_location_to_value.to_csv(path_df_location_to_value_csv_example)
df.to_csv(path_df_location_to_value_csv_example)
print(df_location_to_value)
print(df_location_to_value.index)
print(df_location_to_value.columns)
# Compute some column like a classication boolean
# Display some score spatially
df_score = df.groupby([MASSIF_COLUMN_NAME]).mean()
s_mae = df_score[MAE_COLUMN_NAME]
massif_name_to_value = s_mae.to_dict()
AbstractStudy.visualize_study(massif_name_to_value=massif_name_to_value,
default_color_for_missing_massif='b',
cmap=plt.cm.Reds,
vmin=s_mae.min(),
vmax=s_mae.max())
def _visualize_ax_main(self, plot_function, comparison: ComparisonAnalysis, massif, ax=None, show=False):
if ax is None:
......@@ -126,21 +146,19 @@ class ComparisonsVisualization(VisualizationParameters):
ordered_value_dict = OrderedDict()
label = i
label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME])
label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME])
maxima, years = self.get_maxima_and_year(s)
# Compute the distance between maxima and maxima_center
# In percent the number of times the observations is stricty higher than the reanalysis
mask = ~np.isnan(maxima_center) & ~np.isnan(maxima)
mean_absolute_difference = np.round(np.mean(np.abs(maxima[mask] - maxima_center[mask])), 0)
label += 'Mean absolute diff {}mm'.format(mean_absolute_difference)
ordered_value_dict['mean absolute difference'] = mean_absolute_difference
# metric = np.mean(np.sign(maxima[mask] - maxima_center[mask]) == 1)
# metric = np.round(metric * 100, 0)
# label += '{}% strictly above'.format(metric)
ordered_value_dict[MAE_COLUMN_NAME] = mean_absolute_difference
label += ' ({}m)'.format(s[ALTITUDE_COLUMN_NAME])
label += ' ({}km)'.format(s[DISTANCE_COLUMN_NAME])
label += '({}mm)'.format(mean_absolute_difference)
plot_color = color if REANALYSE_STR not in label else 'g'
plot_ordered_value_dict = plot_function(ax, ax2, years, maxima, label, plot_color)
......
......@@ -44,6 +44,38 @@ class Example(object):
return 2
# https://stackoverflow.com/questions/5189699/how-to-make-a-class-property
# does not enable setter, but follow the link to activate setter option
class ClassPropertyDescriptor(object):
def __init__(self, fget, fset=None):
self.fget = fget
self.fset = fset
def __get__(self, obj, klass=None):
if klass is None:
klass = type(obj)
return self.fget.__get__(obj, klass)()
def __set__(self, obj, value):
if not self.fset:
raise AttributeError("can't set attribute")
type_ = type(obj)
return self.fset.__get__(obj, type_)(value)
def setter(self, func):
if not isinstance(func, (classmethod, staticmethod)):
func = classmethod(func)
self.fset = func
return self
def classproperty(func):
if not isinstance(func, (classmethod, staticmethod)):
func = classmethod(func)
return ClassPropertyDescriptor(func)
if __name__ == '__main__':
e = Example()
print(e.big_attribute)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment