diff --git a/experiment/safran_study/__init__.py b/experiment/safran_study/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/safran_study/fit_safran.py b/experiment/safran_study/fit_safran.py similarity index 90% rename from safran_study/fit_safran.py rename to experiment/safran_study/fit_safran.py index 29c6f9aedbeda84d8ed654edf2a59274249acf55..843150240e3c069adaa62c86100a5933be9724ac 100644 --- a/safran_study/fit_safran.py +++ b/experiment/safran_study/fit_safran.py @@ -1,7 +1,6 @@ import pandas as pd -from safran_study.safran import Safran -from safran_study.safran_extended import ExtendedSafran +from experiment.safran_study.safran_extended import ExtendedSafran from utils import VERSION diff --git a/experiment/safran_study/main_visualize_safran.py b/experiment/safran_study/main_visualize_safran.py new file mode 100644 index 0000000000000000000000000000000000000000..dd0e9ea5b549fd5f44bf1aeac831e0d7b6d37ea1 --- /dev/null +++ b/experiment/safran_study/main_visualize_safran.py @@ -0,0 +1,22 @@ +from experiment.safran_study.safran import Safran +from itertools import product + +from experiment.safran_study.safran_visualizer import SafranVisualizer + + +def load_all_safran(only_first_one=False): + all_safran = [] + for safran_alti, nb_day in product([1800, 2400], [1, 3, 7]): + print('alti: {}, nb_day: {}'.format(safran_alti, nb_day)) + all_safran.append(Safran(safran_alti, nb_day)) + if only_first_one: + break + return all_safran + + +if __name__ == '__main__': + for safran in load_all_safran(only_first_one=True): + safran_visualizer = SafranVisualizer(safran) + # safran_visualizer.visualize_independent_margin_fits(threshold=[None, 20, 40, 60][1]) + # safran_visualizer.visualize_smooth_margin_fit() + safran_visualizer.visualize_full_fit() diff --git a/safran_study/massif.py b/experiment/safran_study/massif.py similarity index 100% rename from safran_study/massif.py rename to experiment/safran_study/massif.py diff --git a/safran_study/safran.py b/experiment/safran_study/safran.py similarity index 57% rename from safran_study/safran.py rename to experiment/safran_study/safran.py index 43348aabe5f2626cd2eec753130848b3a6d48cde..63185388e2ddd231eed05e60ee1b5fe6c5cb714b 100644 --- a/safran_study/safran.py +++ b/experiment/safran_study/safran.py @@ -1,26 +1,19 @@ import os -import matplotlib as mpl -import matplotlib.colorbar as cbar +from typing import List + import os.path as op from collections import OrderedDict import matplotlib.pyplot as plt import pandas as pd -from matplotlib import cm -from mpl_toolkits.axes_grid1 import make_axes_locatable from netCDF4 import Dataset -from extreme_estimator.margin_fits.extreme_params import ExtremeParams -from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit -from extreme_estimator.margin_fits.gev.gev_params import GevParams -from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams -from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit -from safran_study.massif import safran_massif_names_from_datasets -from safran_study.shifted_color_map import shiftedColorMap -from safran_study.snowfall_annual_maxima import SafranSnowfall +from experiment.safran_study.massif import safran_massif_names_from_datasets +from experiment.safran_study.snowfall_annual_maxima import SafranSnowfall from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ AbstractSpatialCoordinates +from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima from utils import get_full_path, cached_property @@ -36,103 +29,6 @@ class Safran(object): os.makedirs(self.result_full_path, exist_ok=True) df.to_csv(op.join(self.result_full_path, 'merged_array_{}_altitude.csv'.format(self.safran_altitude))) - """ Visualization methods """ - - def visualize(self, ax=None, massif_name_to_fill_kwargs=None, show=True): - if ax is None: - ax = plt.gca() - df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv')) - coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X], - row_massif[AbstractCoordinates.COORDINATE_Y]) - for _, row_massif in df_massif.iterrows()] - - for coordinate_id in set([tuple[0] for tuple in coord_tuples]): - l = [coords for idx, *coords in coord_tuples if idx == coordinate_id] - l = list(zip(*l)) - ax.plot(*l, color='black') - massif_name = self.coordinate_id_to_massif_name[coordinate_id] - fill_kwargs = massif_name_to_fill_kwargs[massif_name] if massif_name_to_fill_kwargs is not None else {} - ax.fill(*l, **fill_kwargs) - ax.scatter(self.massifs_coordinates.x_coordinates, self.massifs_coordinates.y_coordinates) - ax.axis('off') - - if show: - plt.show() - - def visualize_margin_fits_with_cmap(self, threshold=None, show=True, axes=None): - if threshold is None: - params_names = GevParams.SUMMARY_NAMES - df = self.df_gev_mle_each_massif - # todo: understand how Maurienne could be negative - # print(df.head()) - else: - params_names = GpdParams.SUMMARY_NAMES - df = self.df_gpd_mle_each_massif(threshold) - - if axes is None: - fig, axes = plt.subplots(1, len(params_names)) - fig.subplots_adjust(hspace=1.0, wspace=1.0) - - for i, gev_param_name in enumerate(params_names): - ax = axes[i] - massif_name_to_value = df.loc[gev_param_name, :].to_dict() - # Compute the middle point of the values for the color map - values = list(massif_name_to_value.values()) - vmin, vmax = min(values), max(values) - try: - midpoint = 1 - vmax / (vmax + abs(vmin)) - except ZeroDivisionError: - pass - # Load the shifted cmap to center on a middle point - - cmap = [plt.cm.coolwarm, plt.cm.bwr, plt.cm.seismic][1] - if gev_param_name == ExtremeParams.SHAPE: - shifted_cmap = shiftedColorMap(cmap, midpoint=midpoint, name='shifted') - norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax) - else: - shifted_cmap = shiftedColorMap(cmap, midpoint=0.0, name='shifted') - norm = mpl.colors.Normalize(vmin=vmin-1, vmax=vmax) - - m = cm.ScalarMappable(norm=norm, cmap=shifted_cmap) - - massif_name_to_fill_kwargs = {massif_name: {'color': m.to_rgba(value)} for massif_name, value in - massif_name_to_value.items()} - - self.visualize(ax=ax, massif_name_to_fill_kwargs=massif_name_to_fill_kwargs, show=False) - - # Add colorbar - # plt.axis('off') - divider = make_axes_locatable(ax) - cax = divider.append_axes('right', size='5%', pad=0.05) - cb = cbar.ColorbarBase(cax, cmap=shifted_cmap, norm=norm) - cb.set_label(gev_param_name) - - if show: - plt.show() - - def visualize_cmap(self, massif_name_to_value): - orig_cmap = plt.cm.coolwarm - # shifted_cmap = shiftedColorMap(orig_cmap, midpoint=0.75, name='shifted') - - massif_name_to_fill_kwargs = {massif_name: {'color': orig_cmap(value)} for massif_name, value in massif_name_to_value.items()} - - self.visualize(massif_name_to_fill_kwargs=massif_name_to_fill_kwargs) - - """ Statistics methods """ - - @property - def df_gev_mle_each_massif(self): - # Fit a margin_fits on each massif - massif_to_gev_mle = {massif_name: GevMleFit(self.df_annual_maxima[massif_name]).gev_params.summary_serie - for massif_name in self.safran_massif_names} - return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names) - - def df_gpd_mle_each_massif(self, threshold): - # Fit a margin fit on each massif - massif_to_gev_mle = {massif_name: GpdMleFit(self.df_all_snowfall_concatenated[massif_name], threshold=threshold).gpd_params.summary_serie - for massif_name in self.safran_massif_names} - return pd.DataFrame(massif_to_gev_mle, columns=self.safran_massif_names) - """ Data """ @property @@ -145,6 +41,10 @@ class Safran(object): def df_annual_maxima(self): return pd.DataFrame(self.year_to_annual_maxima, index=self.safran_massif_names).T + @property + def observations_annual_maxima(self): + return AnnualMaxima(df_maxima_gev=self.df_annual_maxima.T) + """ Load some attributes only once """ @cached_property @@ -166,7 +66,7 @@ class Safran(object): return year_to_snowfall @property - def safran_massif_names(self): + def safran_massif_names(self) -> List[str]: # Load the names of the massif as defined by SAFRAN return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values()) @@ -196,12 +96,38 @@ class Safran(object): df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv')) # Assert that the massif names are the same between SAFRAN and the coordinate file assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM'])) + df_centroid.set_index('NOM', inplace=True) + df_centroid = df_centroid.loc[self.safran_massif_names] return df_centroid @property - def coordinate_id_to_massif_name(self): + def coordinate_id_to_massif_name(self) -> dict: df_centroid = self.load_df_centroid() - return dict(zip(df_centroid['id'], df_centroid['NOM'])) + return dict(zip(df_centroid['id'], df_centroid.index)) + + + """ Visualization methods """ + + def visualize(self, ax=None, massif_name_to_fill_kwargs=None, show=True): + if ax is None: + ax = plt.gca() + df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv')) + coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X], + row_massif[AbstractCoordinates.COORDINATE_Y]) + for _, row_massif in df_massif.iterrows()] + + for coordinate_id in set([tuple[0] for tuple in coord_tuples]): + l = [coords for idx, *coords in coord_tuples if idx == coordinate_id] + l = list(zip(*l)) + ax.plot(*l, color='black') + massif_name = self.coordinate_id_to_massif_name[coordinate_id] + fill_kwargs = massif_name_to_fill_kwargs[massif_name] if massif_name_to_fill_kwargs is not None else {} + ax.fill(*l, **fill_kwargs) + ax.scatter(self.massifs_coordinates.x_coordinates, self.massifs_coordinates.y_coordinates) + ax.axis('off') + + if show: + plt.show() """ Some properties """ diff --git a/safran_study/safran_extended.py b/experiment/safran_study/safran_extended.py similarity index 96% rename from safran_study/safran_extended.py rename to experiment/safran_study/safran_extended.py index e3e762089d94102902681bc0fe190c459f9fca89..775a47d9d909a5c12f20f2fc8f01886e4f5ef505 100644 --- a/safran_study/safran_extended.py +++ b/experiment/safran_study/safran_extended.py @@ -2,7 +2,7 @@ from collections import OrderedDict import pandas as pd -from safran_study.safran import Safran +from experiment.safran_study.safran import Safran from utils import cached_property diff --git a/experiment/safran_study/safran_visualizer.py b/experiment/safran_study/safran_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..447160a70cec27b2f9088a49275a4ac023816d55 --- /dev/null +++ b/experiment/safran_study/safran_visualizer.py @@ -0,0 +1,129 @@ +import matplotlib as mpl +import matplotlib.colorbar as cbar +import matplotlib.pyplot as plt +import pandas as pd +from matplotlib import cm +from mpl_toolkits.axes_grid1 import make_axes_locatable + +from extreme_estimator.estimator.full_estimator.abstract_full_estimator import \ + FullEstimatorInASingleStepWithSmoothMargin +from extreme_estimator.estimator.margin_estimator.abstract_margin_estimator import SmoothMarginEstimator +from extreme_estimator.estimator.max_stable_estimator.abstract_max_stable_estimator import MaxStableEstimator +from extreme_estimator.extreme_models.margin_model.smooth_margin_model import LinearAllParametersAllDimsMarginModel +from extreme_estimator.extreme_models.max_stable_model.max_stable_models import ExtremalT, Smith +from extreme_estimator.margin_fits.extreme_params import ExtremeParams +from extreme_estimator.margin_fits.gev.gev_params import GevParams +from extreme_estimator.margin_fits.gev.gevmle_fit import GevMleFit +from extreme_estimator.margin_fits.gpd.gpd_params import GpdParams +from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit +from experiment.safran_study.safran import Safran +from experiment.safran_study.shifted_color_map import shiftedColorMap +from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset + + +class SafranVisualizer(object): + + def __init__(self, safran: Safran, show=True): + self.safran = safran + self.show = show + + @property + def observations(self): + return self.safran.observations_annual_maxima + + @property + def coordinates(self): + return self.safran.massifs_coordinates + + @property + def dataset(self): + return AbstractDataset(self.observations, self.coordinates) + + def visualize_smooth_margin_fit(self): + margin_model = LinearAllParametersAllDimsMarginModel(coordinates=self.coordinates) + estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model) + estimator.fit() + estimator.margin_function_fitted.visualize(show=self.show) + + def visualize_full_fit(self): + max_stable_model = Smith() + margin_model = LinearAllParametersAllDimsMarginModel(coordinates=self.coordinates) + estimator = FullEstimatorInASingleStepWithSmoothMargin(self.dataset, margin_model, max_stable_model) + estimator.fit() + estimator.margin_function_fitted.visualize(show=self.show) + + def visualize_independent_margin_fits(self, threshold=None, axes=None): + if threshold is None: + params_names = GevParams.SUMMARY_NAMES + df = self.df_gev_mle_each_massif + # todo: understand how Maurienne could be negative + # print(df.head()) + else: + params_names = GpdParams.SUMMARY_NAMES + df = self.df_gpd_mle_each_massif(threshold) + + if axes is None: + fig, axes = plt.subplots(1, len(params_names)) + fig.subplots_adjust(hspace=1.0, wspace=1.0) + + for i, gev_param_name in enumerate(params_names): + ax = axes[i] + massif_name_to_value = df.loc[gev_param_name, :].to_dict() + # Compute the middle point of the values for the color map + values = list(massif_name_to_value.values()) + vmin, vmax = min(values), max(values) + try: + midpoint = 1 - vmax / (vmax + abs(vmin)) + except ZeroDivisionError: + pass + # Load the shifted cmap to center on a middle point + + cmap = [plt.cm.coolwarm, plt.cm.bwr, plt.cm.seismic][1] + if gev_param_name == ExtremeParams.SHAPE: + shifted_cmap = shiftedColorMap(cmap, midpoint=midpoint, name='shifted') + norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax) + else: + shifted_cmap = shiftedColorMap(cmap, midpoint=0.0, name='shifted') + norm = mpl.colors.Normalize(vmin=vmin - 1, vmax=vmax) + + m = cm.ScalarMappable(norm=norm, cmap=shifted_cmap) + + massif_name_to_fill_kwargs = {massif_name: {'color': m.to_rgba(value)} for massif_name, value in + massif_name_to_value.items()} + + self.safran.visualize(ax=ax, massif_name_to_fill_kwargs=massif_name_to_fill_kwargs, show=False) + + # Add colorbar + # plt.axis('off') + divider = make_axes_locatable(ax) + cax = divider.append_axes('right', size='5%', pad=0.05) + cb = cbar.ColorbarBase(cax, cmap=shifted_cmap, norm=norm) + cb.set_label(gev_param_name) + + if self.show: + plt.show() + + def visualize_cmap(self, massif_name_to_value): + orig_cmap = plt.cm.coolwarm + # shifted_cmap = shiftedColorMap(orig_cmap, midpoint=0.75, name='shifted') + + massif_name_to_fill_kwargs = {massif_name: {'color': orig_cmap(value)} for massif_name, value in + massif_name_to_value.items()} + + self.safran.visualize(massif_name_to_fill_kwargs=massif_name_to_fill_kwargs) + + """ Statistics methods """ + + @property + def df_gev_mle_each_massif(self): + # Fit a margin_fits on each massif + massif_to_gev_mle = {massif_name: GevMleFit(self.safran.df_annual_maxima[massif_name]).gev_params.summary_serie + for massif_name in self.safran.safran_massif_names} + return pd.DataFrame(massif_to_gev_mle, columns=self.safran.safran_massif_names) + + def df_gpd_mle_each_massif(self, threshold): + # Fit a margin fit on each massif + massif_to_gev_mle = {massif_name: GpdMleFit(self.safran.df_all_snowfall_concatenated[massif_name], + threshold=threshold).gpd_params.summary_serie + for massif_name in self.safran.safran_massif_names} + return pd.DataFrame(massif_to_gev_mle, columns=self.safran.safran_massif_names) diff --git a/safran_study/shifted_color_map.py b/experiment/safran_study/shifted_color_map.py similarity index 100% rename from safran_study/shifted_color_map.py rename to experiment/safran_study/shifted_color_map.py diff --git a/safran_study/snowfall_annual_maxima.py b/experiment/safran_study/snowfall_annual_maxima.py similarity index 100% rename from safran_study/snowfall_annual_maxima.py rename to experiment/safran_study/snowfall_annual_maxima.py diff --git a/safran_study/visualize_safran.py b/safran_study/visualize_safran.py deleted file mode 100644 index 2b921725b20d25de8c7ed88e3214d7d4c364a592..0000000000000000000000000000000000000000 --- a/safran_study/visualize_safran.py +++ /dev/null @@ -1,30 +0,0 @@ -from safran_study.safran import Safran -from itertools import product - - -def load_all_safran(only_first_one=False): - all_safran = [] - for safran_alti, nb_day in product([1800, 2400], [1, 3, 7]): - print('alti: {}, nb_day: {}'.format(safran_alti, nb_day)) - all_safran.append(Safran(safran_alti, nb_day)) - if only_first_one: - break - return all_safran - - -def fit_mle_gev_independent(threshold=None): - # Dump the result in a csv - dfs = [] - for safran in load_all_safran(only_first_one=True): - safran.visualize_margin_fits_with_cmap(threshold=threshold) - # path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv' - # df.to_csv(path.format(VERSION)) - - -def fit_max_stab(): - pass - - -if __name__ == '__main__': - threshold = [None, 20, 40, 60][1] - fit_mle_gev_independent(threshold=threshold) diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py index dcf528d327771a3c6a716b319263b6e67151b86b..1ea1ba2c68cebad80c5d29aa6ff5749601483ef7 100644 --- a/spatio_temporal_dataset/dataset/abstract_dataset.py +++ b/spatio_temporal_dataset/dataset/abstract_dataset.py @@ -16,7 +16,7 @@ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_tempor class AbstractDataset(object): def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates): - assert pd.Index.equals(observations.index, coordinates.index) + assert pd.Index.equals(observations.index, coordinates.index), '\n{}\n{}'.format(observations.index, coordinates.index) self.observations = observations # type: AbstractSpatioTemporalObservations self.coordinates = coordinates # type: AbstractCoordinates self.subset_id_to_column_idxs = None # type: Dict[int, List[int]] diff --git a/test/test_experiment/test_safran/test_safran_visualizer.py b/test/test_experiment/test_safran/test_safran_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..d63d2e97b6164a67b86ebaec193d0f9b01470e49 --- /dev/null +++ b/test/test_experiment/test_safran/test_safran_visualizer.py @@ -0,0 +1,29 @@ +# import unittest +# +# from experiment.safran_study.safran import Safran +# from experiment.safran_study.safran_visualizer import SafranVisualizer +# +# +# class TestSafranVisualizer(unittest.TestCase): +# DISPLAY = False +# +# def setUp(self): +# super().setUp() +# self.safran = Safran(1800, 1) +# self.safran_visualizer = SafranVisualizer(self.safran, show=self.DISPLAY) +# +# def tearDown(self) -> None: +# self.assertTrue(True) +# +# def test_safran_smooth_margin_estimator(self): +# self.safran_visualizer.visualize_smooth_margin_fit() +# +# def test_safran_independent_margin_fits(self): +# self.safran_visualizer.visualize_independent_margin_fits() +# +# def test_safran_full_estimator(self): +# self.safran_visualizer.visualize_full_fit() +# +# +# if __name__ == '__main__': +# unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 146b77134793834cd5ea64a872c7ddf45d3f49fb..3f2c854e0038123855c00a02b2ac3d2251a63d59 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -7,7 +7,7 @@ from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model AbstractMaxStableModelWithCovarianceFunction, CovarianceFunction from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith, BrownResnick, Schlather, \ Geometric, ExtremalT, ISchlather -from safran_study.safran import Safran +from experiment.safran_study.safran import Safran from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \ AlpsStation3DCoordinatesWithAnisotropy from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \