Commit b097f9da authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SCM] add mean daily data regional map. improve the 2D map for the independent margin too.

parent 152f00fb
No related merge requests found
Showing with 122 additions and 112 deletions
+122 -112
...@@ -9,7 +9,7 @@ from netCDF4 import Dataset ...@@ -9,7 +9,7 @@ from netCDF4 import Dataset
from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
from experiment.meteo_france_SCM_study.massif import safran_massif_names_from_datasets from experiment.meteo_france_SCM_study.massif import safran_massif_names_from_datasets
from extreme_estimator.margin_fits.plot.mask_poly import mask_outside_polygon from extreme_estimator.margin_fits.plot.create_shifted_cmap import get_color_rbga_shifted
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates AbstractSpatialCoordinates
...@@ -34,7 +34,7 @@ class AbstractStudy(object): ...@@ -34,7 +34,7 @@ class AbstractStudy(object):
""" Data """ """ Data """
@property @property
def df_all_snowfall_concatenated(self) -> pd.DataFrame: def df_all_daily_time_series_concatenated(self) -> pd.DataFrame:
df_list = [pd.DataFrame(time_serie, columns=self.safran_massif_names) for time_serie in df_list = [pd.DataFrame(time_serie, columns=self.safran_massif_names) for time_serie in
self.year_to_daily_time_serie.values()] self.year_to_daily_time_serie.values()]
df_concatenated = pd.concat(df_list) df_concatenated = pd.concat(df_list)
...@@ -123,7 +123,13 @@ class AbstractStudy(object): ...@@ -123,7 +123,13 @@ class AbstractStudy(object):
""" Visualization methods """ """ Visualization methods """
def visualize(self, ax=None, massif_name_to_fill_kwargs=None, show=True, fill=True): def visualize_study(self, ax=None, massif_name_to_value=None, show=True, fill=True, replace_blue_by_white=True,
label=None):
massif_names, values = list(zip(*massif_name_to_value.items()))
colors = get_color_rbga_shifted(ax, replace_blue_by_white, values, label=label)
massif_name_to_fill_kwargs = {massif_name: {'color': color} for massif_name, color in
zip(massif_names, colors)}
if ax is None: if ax is None:
ax = plt.gca() ax = plt.gca()
df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv')) df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
...@@ -131,19 +137,19 @@ class AbstractStudy(object): ...@@ -131,19 +137,19 @@ class AbstractStudy(object):
row_massif[AbstractCoordinates.COORDINATE_Y]) row_massif[AbstractCoordinates.COORDINATE_Y])
for _, row_massif in df_massif.iterrows()] for _, row_massif in df_massif.iterrows()]
for j, coordinate_id in enumerate(set([tuple[0] for tuple in coord_tuples])): for j, coordinate_id in enumerate(set([t[0] for t in coord_tuples])):
# Retrieve the list of coords (x,y) that define the contour of the massif of id coordinate_id # Retrieve the list of coords (x,y) that define the contour of the massif of id coordinate_id
l = [coords for idx, *coords in coord_tuples if idx == coordinate_id] coords_list = [coords for idx, *coords in coord_tuples if idx == coordinate_id]
# if j == 0: # if j == 0:
# mask_outside_polygon(poly_verts=l, ax=ax) # mask_outside_polygon(poly_verts=l, ax=ax)
# Plot the contour of the massif # Plot the contour of the massif
l = list(zip(*l)) coords_list = list(zip(*coords_list))
ax.plot(*l, color='black') ax.plot(*coords_list, color='black')
# Potentially, fill the inside of the polygon with some color # Potentially, fill the inside of the polygon with some color
if fill: if fill:
massif_name = self.coordinate_id_to_massif_name[coordinate_id] massif_name = self.coordinate_id_to_massif_name[coordinate_id]
fill_kwargs = massif_name_to_fill_kwargs[massif_name] if massif_name_to_fill_kwargs is not None else {} fill_kwargs = massif_name_to_fill_kwargs[massif_name] if massif_name_to_fill_kwargs is not None else {}
ax.fill(*l, **fill_kwargs) ax.fill(*coords_list, **fill_kwargs)
# Display the center of the massif # Display the center of the massif
ax.scatter(self.massifs_coordinates.x_coordinates, self.massifs_coordinates.y_coordinates, s=1) ax.scatter(self.massifs_coordinates.x_coordinates, self.massifs_coordinates.y_coordinates, s=1)
......
...@@ -50,11 +50,12 @@ def extended_visualization(): ...@@ -50,11 +50,12 @@ def extended_visualization():
def normal_visualization(): def normal_visualization():
save_to_file = False save_to_file = False
only_first_one = True only_first_one = True
for study_class in SCM_STUDIES[-1:]: for study_class in SCM_STUDIES[:1]:
for study in study_iterator(study_class, only_first_one=only_first_one): for study in study_iterator(study_class, only_first_one=only_first_one):
study_visualizer = StudyVisualizer(study, save_to_file=save_to_file) study_visualizer = StudyVisualizer(study, save_to_file=save_to_file)
# study_visualizer.visualize_independent_margin_fits(threshold=[None, 20, 40, 60][0]) # study_visualizer.visualize_independent_margin_fits(threshold=[None, 20, 40, 60][0])
study_visualizer.visualize_linear_margin_fit(only_first_max_stable=True) study_visualizer.visualize_mean_daily_values()
# study_visualizer.visualize_linear_margin_fit(only_first_max_stable=True)
def complete_analysis(only_first_one=False): def complete_analysis(only_first_one=False):
...@@ -73,6 +74,6 @@ def complete_analysis(only_first_one=False): ...@@ -73,6 +74,6 @@ def complete_analysis(only_first_one=False):
if __name__ == '__main__': if __name__ == '__main__':
# normal_visualization() normal_visualization()
extended_visualization() # extended_visualization()
# complete_analysis() # complete_analysis()
...@@ -14,14 +14,12 @@ from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ ...@@ -14,14 +14,12 @@ from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \
FullEstimatorInASingleStepWithSmoothMargin FullEstimatorInASingleStepWithSmoothMargin
from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator
from extreme_estimator.extreme_models.margin_model.smooth_margin_model import LinearAllParametersAllDimsMarginModel from extreme_estimator.extreme_models.margin_model.smooth_margin_model import LinearAllParametersAllDimsMarginModel
from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model import CovarianceFunction, \ from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model import CovarianceFunction
AbstractMaxStableModelWithCovarianceFunction
from extreme_estimator.margin_fits.abstract_params import AbstractParams from extreme_estimator.margin_fits.abstract_params import AbstractParams
from extreme_estimator.margin_fits.gev.gev_params import GevParams from extreme_estimator.margin_fits.gev.gev_params import GevParams
from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit
from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams
from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit
from extreme_estimator.margin_fits.plot.create_shifted_cmap import get_color_rbga_shifted
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
from test.test_utils import load_test_max_stable_models from test.test_utils import load_test_max_stable_models
from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits from utils import get_display_name_from_object_type, VERSION_TIME, float_to_str_with_only_some_significant_digits
...@@ -53,7 +51,7 @@ class StudyVisualizer(object): ...@@ -53,7 +51,7 @@ class StudyVisualizer(object):
self.figsize = (8.0, 6.0) self.figsize = (8.0, 6.0)
else: else:
self.figsize = (16.0, 10.0) self.figsize = (16.0, 10.0)
self.subplot_space = 0.05 self.subplot_space = 0.5
self.coef_zoom_map = 0 self.coef_zoom_map = 0
@property @property
...@@ -98,12 +96,7 @@ class StudyVisualizer(object): ...@@ -98,12 +96,7 @@ class StudyVisualizer(object):
def visualize_experimental_law(self, ax, massif_id): def visualize_experimental_law(self, ax, massif_id):
# Display the experimental law for a given massif # Display the experimental law for a given massif
if self.year_for_kde_plot is not None: all_massif_data = self.get_all_massif_data(massif_id)
all_massif_data = self.study.year_to_daily_time_serie[self.year_for_kde_plot][:, massif_id]
else:
all_massif_data = np.concatenate(
[data[:, massif_id] for data in self.study.year_to_daily_time_serie.values()])
all_massif_data = np.sort(all_massif_data)
# Display an histogram on the background (with 100 bins, for visibility, and to check 0.9 quantiles) # Display an histogram on the background (with 100 bins, for visibility, and to check 0.9 quantiles)
ax2 = ax.twiny() if self.vertical_kde_plot else ax.twinx() ax2 = ax.twiny() if self.vertical_kde_plot else ax.twinx()
...@@ -177,6 +170,15 @@ class StudyVisualizer(object): ...@@ -177,6 +170,15 @@ class StudyVisualizer(object):
ax.set_title(self.study.safran_massif_names[massif_id]) ax.set_title(self.study.safran_massif_names[massif_id])
ax.legend() ax.legend()
def get_all_massif_data(self, massif_id):
if self.year_for_kde_plot is not None:
all_massif_data = self.study.year_to_daily_time_serie[self.year_for_kde_plot][:, massif_id]
else:
all_massif_data = np.concatenate(
[data[:, massif_id] for data in self.study.year_to_daily_time_serie.values()])
all_massif_data = np.sort(all_massif_data)
return all_massif_data
def visualize_all_mean_and_max_graphs(self): def visualize_all_mean_and_max_graphs(self):
self.visualize_massif_graphs(self.visualize_mean_and_max_graph) self.visualize_massif_graphs(self.visualize_mean_and_max_graph)
self.plot_name = ' mean with sliding window of size {}'.format(self.window_size_for_smoothing) self.plot_name = ' mean with sliding window of size {}'.format(self.window_size_for_smoothing)
...@@ -206,7 +208,8 @@ class StudyVisualizer(object): ...@@ -206,7 +208,8 @@ class StudyVisualizer(object):
def visualize_linear_margin_fit(self, only_first_max_stable=False): def visualize_linear_margin_fit(self, only_first_max_stable=False):
default_covariance_function = CovarianceFunction.cauchy default_covariance_function = CovarianceFunction.cauchy
plot_name = 'Full Likelihood with Linear marginals and max stable dependency structure' plot_name = 'Full Likelihood with Linear marginals and max stable dependency structure'
plot_name += '\n(with {} covariance structure when a covariance is needed)'.format(str(default_covariance_function).split('.')[-1]) plot_name += '\n(with {} covariance structure when a covariance is needed)'.format(
str(default_covariance_function).split('.')[-1])
self.plot_name = plot_name self.plot_name = plot_name
max_stable_models = load_test_max_stable_models(default_covariance_function=default_covariance_function) max_stable_models = load_test_max_stable_models(default_covariance_function=default_covariance_function)
if only_first_max_stable: if only_first_max_stable:
...@@ -235,12 +238,12 @@ class StudyVisualizer(object): ...@@ -235,12 +238,12 @@ class StudyVisualizer(object):
margin_fct = estimator.margin_function_fitted margin_fct = estimator.margin_function_fitted
axes = margin_fct.visualize_function(show=False, axes=axes, title='') axes = margin_fct.visualize_function(show=False, axes=axes, title='')
def get_lim_array(ax): def get_lim_array(ax_with_lim_to_measure):
return np.array([np.array(ax.get_xlim()), np.array(ax.get_ylim())]) return np.array([np.array(ax_with_lim_to_measure.get_xlim()), np.array(ax_with_lim_to_measure.get_ylim())])
for ax in axes: for ax in axes:
old_lim = get_lim_array(ax) old_lim = get_lim_array(ax)
self.study.visualize(ax, fill=False, show=False) self.study.visualize_study(ax, fill=False, show=False)
new_lim = get_lim_array(ax) new_lim = get_lim_array(ax)
assert 0 <= self.coef_zoom_map <= 1 assert 0 <= self.coef_zoom_map <= 1
updated_lim = new_lim * self.coef_zoom_map + (1 - self.coef_zoom_map) * old_lim updated_lim = new_lim * self.coef_zoom_map + (1 - self.coef_zoom_map) * old_lim
...@@ -277,20 +280,19 @@ class StudyVisualizer(object): ...@@ -277,20 +280,19 @@ class StudyVisualizer(object):
if not self.only_one_graph: if not self.only_one_graph:
filename += "/{}".format('_'.join(self.plot_name.split())) filename += "/{}".format('_'.join(self.plot_name.split()))
filepath = op.join(self.study.result_full_path, filename + '.png') filepath = op.join(self.study.result_full_path, filename + '.png')
dir = op.dirname(filepath) dirname = op.dirname(filepath)
if not op.exists(dir): if not op.exists(dirname):
os.makedirs(dir, exist_ok=True) os.makedirs(dirname, exist_ok=True)
plt.savefig(filepath) plt.savefig(filepath)
def visualize_independent_margin_fits(self, threshold=None, axes=None): def visualize_independent_margin_fits(self, threshold=None, axes=None):
# Fit either a GEV or a GPD
if threshold is None: if threshold is None:
params_names = GevParams.SUMMARY_NAMES params_names = GevParams.SUMMARY_NAMES
df = self.df_gev_mle_each_massif df = self.df_gev_mle
# todo: understand how Maurienne could be negative
# print(df.head())
else: else:
params_names = GpdParams.SUMMARY_NAMES params_names = GpdParams.SUMMARY_NAMES
df = self.df_gpd_mle_each_massif(threshold) df = self.df_gpd_mle(threshold)
if axes is None: if axes is None:
fig, axes = plt.subplots(1, len(params_names)) fig, axes = plt.subplots(1, len(params_names))
...@@ -298,39 +300,44 @@ class StudyVisualizer(object): ...@@ -298,39 +300,44 @@ class StudyVisualizer(object):
for i, gev_param_name in enumerate(params_names): for i, gev_param_name in enumerate(params_names):
ax = axes[i] ax = axes[i]
massif_name_to_value = df.loc[gev_param_name, :].to_dict() self.study.visualize_study(ax=ax, massif_name_to_value=df.loc[gev_param_name, :].to_dict(), show=False,
# Compute the middle point of the values for the color map replace_blue_by_white=gev_param_name != GevParams.SHAPE,
values = list(massif_name_to_value.values()) label=gev_param_name)
colors = get_color_rbga_shifted(ax, gev_param_name, values)
massif_name_to_fill_kwargs = {massif_name: {'color': color} for massif_name, color in
zip(self.study.safran_massif_names, colors)}
self.study.visualize(ax=ax, massif_name_to_fill_kwargs=massif_name_to_fill_kwargs, show=False)
if self.show: if self.show:
# plot_name = 'Full Likelihood with Linear marginals and max stable dependency structure'
plt.show() plt.show()
def visualize_cmap(self, massif_name_to_value): def visualize_mean_daily_values(self, ax=None):
orig_cmap = plt.cm.coolwarm if ax is None:
# shifted_cmap = shiftedColorMap(orig_cmap, midpoint=0.75, name='shifted') _, ax = plt.subplots(1, 1, figsize=self.figsize)
massif_name_to_fill_kwargs = {massif_name: {'color': orig_cmap(value)} for massif_name, value in massif_name_to_value = {
massif_name_to_value.items()} massif_name: np.mean(self.get_all_massif_data(massif_id))
for massif_id, massif_name in enumerate(self.study.safran_massif_names)
}
self.study.visualize(massif_name_to_fill_kwargs=massif_name_to_fill_kwargs) self.study.visualize_study(ax=ax, massif_name_to_value=massif_name_to_value, show=False)
if self.show:
plt.show()
""" Statistics methods """ """ Statistics methods """
@property @property
def df_gev_mle_each_massif(self): def df_maxima_gev(self) -> pd.DataFrame:
return self.study.observations_annual_maxima.df_maxima_gev
@property
def df_gev_mle(self) -> pd.DataFrame:
# Fit a margin_fits on each massif # Fit a margin_fits on each massif
massif_to_gev_mle = { massif_to_gev_mle = {massif_name: GevMleFit(self.df_maxima_gev.loc[massif_name]).gev_params.summary_serie
massif_name: GevMleFit(self.study.observations_annual_maxima.loc[massif_name]).gev_params.summary_serie for massif_name in self.study.safran_massif_names}
for massif_name in self.study.safran_massif_names}
return pd.DataFrame(massif_to_gev_mle, columns=self.study.safran_massif_names) return pd.DataFrame(massif_to_gev_mle, columns=self.study.safran_massif_names)
def df_gpd_mle_each_massif(self, threshold): def df_gpd_mle(self, threshold) -> pd.DataFrame:
# Fit a margin fit on each massif # Fit a margin fit on each massif
massif_to_gev_mle = {massif_name: GpdMleFit(self.study.df_all_snowfall_concatenated[massif_name], massif_to_gev_mle = {massif_name: GpdMleFit(self.study.df_all_daily_time_series_concatenated[massif_name],
threshold=threshold).gpd_params.summary_serie threshold=threshold).gpd_params.summary_serie
for massif_name in self.study.safran_massif_names} for massif_name in self.study.safran_massif_names}
return pd.DataFrame(massif_to_gev_mle, columns=self.study.safran_massif_names) return pd.DataFrame(massif_to_gev_mle, columns=self.study.safran_massif_names)
#
#
#
if __name__ == '__main__': # if __name__ == '__main__':
# Parameters # # Parameters
scenarios = [] # scenarios = []
nb_obs_list = [] # nb_obs_list = []
nb_fit = 1000 # nb_fit = 1000
#
# Load the object that will handle the simulation # # Load the object that will handle the simulation
simu = Simulations(nb_fit, scenarios, nb_obs_list) # simu = Simulations(nb_fit, scenarios, nb_obs_list)
#
# Fit many estimators to this simulation # # Fit many estimators to this simulation
estimator_types = [] # estimator_types = []
for estimator_type in estimator_types: # for estimator_type in estimator_types:
simu.fit(estimator_type) # simu.fit(estimator_type)
#
# Comparison of the diverse estimator # # Comparison of the diverse estimator
#
# Compare all the estimator on a global graph (one graph per scenario) # # Compare all the estimator on a global graph (one graph per scenario)
# On each graph the X axis should be the number of obs # # On each graph the X axis should be the number of obs
# the Y graph should the error # # the Y graph should the error
simu.visualize_mean_test_error_graph(estimator_types, scenarios, nb_obs_list) # simu.visualize_mean_test_error_graph(estimator_types, scenarios, nb_obs_list)
# the other possible view, is to have one graph per number of observations # # the other possible view, is to have one graph per number of observations
# on the X axis should the name of the different estimator # # on the X axis should the name of the different estimator
# on the y axis their error # # on the y axis their error
#
#
# Plot the same graph for the train/test error # # Plot the same graph for the train/test error
# For a single scenario, and a single obs (we give a plot detailing all the estimation steps that enabled to get # # For a single scenario, and a single obs (we give a plot detailing all the estimation steps that enabled to get
# the result) # # the result)
simu.visualize_comparison_graph(estimator_types, scenario, nb_obs) # simu.visualize_comparison_graph(estimator_types, scenario, nb_obs)
#
# Analyse the result of a single estimator # # Analyse the result of a single estimator
#
# Or all the result could be recorded in a matrix, with scenario as line, and nb_observaitons as columns # # Or all the result could be recorded in a matrix, with scenario as line, and nb_observaitons as columns
# with the mean value (and the std in parenthesis) # # with the mean value (and the std in parenthesis)
# (on the border on this matrix we should have the mean value) # # (on the border on this matrix we should have the mean value)
# for example, the first columns should be the mean of the other column for the same line # # for example, the first columns should be the mean of the other column for the same line
simu.visualize_mean_test_error_matrix(estimator_type, scenarios, nb_obs_list) # simu.visualize_mean_test_error_matrix(estimator_type, scenarios, nb_obs_list)
#
#
# # #
simu.visualize # simu.visualize
#
#
#
from typing import Dict
import matplotlib as mpl import matplotlib as mpl
import matplotlib.cm as cm import matplotlib.cm as cm
import matplotlib.colorbar as cbar import matplotlib.colorbar as cbar
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd
from mpl_toolkits.axes_grid1 import make_axes_locatable from mpl_toolkits.axes_grid1 import make_axes_locatable
from extreme_estimator.margin_fits.plot.shifted_color_map import shiftedColorMap
from extreme_estimator.margin_fits.extreme_params import ExtremeParams from extreme_estimator.margin_fits.extreme_params import ExtremeParams
from extreme_estimator.margin_fits.gev.gev_params import GevParams from extreme_estimator.margin_fits.plot.shifted_color_map import shiftedColorMap
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.slicer.split import Split
def plot_extreme_param(ax, gev_param_name, values): def plot_extreme_param(ax, label: str, values: np.ndarray):
# Load the shifted cmap to center on a middle point # Load the shifted cmap to center on a middle point
vmin, vmax = np.min(values), np.max(values) vmin, vmax = np.min(values), np.max(values)
if vmin < 0 < vmax: if vmin < 0 < vmax:
...@@ -32,19 +26,21 @@ def plot_extreme_param(ax, gev_param_name, values): ...@@ -32,19 +26,21 @@ def plot_extreme_param(ax, gev_param_name, values):
divider = make_axes_locatable(ax) divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.03) cax = divider.append_axes('right', size='5%', pad=0.03)
cb = cbar.ColorbarBase(cax, cmap=shifted_cmap, norm=norm) cb = cbar.ColorbarBase(cax, cmap=shifted_cmap, norm=norm)
cb.set_label(gev_param_name) cb.set_label(label)
return norm, shifted_cmap return norm, shifted_cmap
def get_color_rbga_shifted(ax, gev_param_name, values): def get_color_rbga_shifted(ax, replace_blue_by_white: bool, values: np.ndarray, label=None):
""" """
For some display it was necessary to transform dark blue values into white values For some display it was necessary to transform dark blue values into white values
""" """
norm, shifted_cmap = plot_extreme_param(ax, gev_param_name, values) norm, shifted_cmap = plot_extreme_param(ax, label, values)
m = cm.ScalarMappable(norm=norm, cmap=shifted_cmap) m = cm.ScalarMappable(norm=norm, cmap=shifted_cmap)
colors = [m.to_rgba(value) for value in values] colors = [m.to_rgba(value) for value in values]
if gev_param_name != ExtremeParams.SHAPE: # We do not want any blue values for parameters other than the Shape
colors = [color if color[2] == 1 else (1, 1, 1, 1) for color in colors] # So when the value corresponding to the blue color is 1, then we set the color to white, i.e. (1,1,1,1)
if replace_blue_by_white:
colors = [color if color[2] != 1 else (1, 1, 1, 1) for color in colors]
return colors return colors
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment