[SAFRAN] fix permutation issue of massif_id mapping to the good polynom.

b461528f · Le Roux Erwan · a40956e0 · b461528f · b461528f · b461528f
Commit b461528f authored 6 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 169 additions and 10 deletions
+169 -10
--- a/safran_study/safran.py
+++ b/safran_study/safran.py
@@ -2,12 +2,16 @@ import os
 import os.path as op
 from collections import OrderedDict

+import matplotlib
 import matplotlib.pyplot as plt
 import pandas as pd
+from mpl_toolkits.axes_grid1 import AxesGrid
 from netCDF4 import Dataset

 from extreme_estimator.gev.gevmle_fit import GevMleFit
+from extreme_estimator.gev_params import GevParams
 from safran_study.massif import safran_massif_names_from_datasets
+from safran_study.shifted_color_map import shiftedColorMap
 from safran_study.snowfall_annual_maxima import SafranSnowfall
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
 from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
@@ -29,17 +33,71 @@ class Safran(object):

    """ Visualization methods """

-    def visualize(self):
+    def visualize(self, ax=None, massif_name_to_fill_kwargs=None, show=True):
+        if ax is None:
+            ax = plt.gca()
        df_massif = pd.read_csv(op.join(self.map_full_path, 'massifsalpes.csv'))
        coord_tuples = [(row_massif['idx'], row_massif[AbstractCoordinates.COORDINATE_X],
                         row_massif[AbstractCoordinates.COORDINATE_Y])
                        for _, row_massif in df_massif.iterrows()]
-        for massif_idx in set([tuple[0] for tuple in coord_tuples]):
-            l = [coords for idx, *coords in coord_tuples if idx == massif_idx]
+
+        for coordinate_id in set([tuple[0] for tuple in coord_tuples]):
+            l = [coords for idx, *coords in coord_tuples if idx == coordinate_id]
            l = list(zip(*l))
-            plt.plot(*l, color='black')
-            plt.fill(*l)
-        self.massifs_coordinates.visualization_2D()
+            ax.plot(*l, color='black')
+            massif_name = self.coordinate_id_to_massif_name[coordinate_id]
+            fill_kwargs = massif_name_to_fill_kwargs[massif_name] if massif_name_to_fill_kwargs is not None else {}
+            ax.fill(*l, **fill_kwargs)
+        cax = ax.scatter(self.massifs_coordinates.x_coordinates, self.massifs_coordinates.y_coordinates)
+
+        if show:
+            plt.show()
+        return cax
+
+    def visualize_gev_fit_with_cmap(self, show=True, axes=None):
+        if axes is None:
+            fig, axes = plt.subplots(1, len(GevParams.GEV_PARAM_NAMES))
+            fig.subplots_adjust(hspace=1.0, wspace=1.0)
+
+            # fig = plt.figure(figsize=(6, 6))
+            # axes = AxesGrid(fig, 111, nrows_ncols=(1, 3), axes_pad=0.5,
+            #                 label_mode="1", share_all=True,
+            #                 cbar_location="right", cbar_mode="each",
+            #                 cbar_size="7%", cbar_pad="2%")
+
+        for i, gev_param_name in enumerate(GevParams.GEV_PARAM_NAMES[-1:]):
+            massif_name_to_value = self.df_gev_mle_each_massif.loc[gev_param_name, :].to_dict()
+            # Compute the middle point of the values for the color map
+            values = list(massif_name_to_value.values())
+            vmin, vmax = min(values), max(values)
+            midpoint = 1 - vmax / (vmax + abs(vmin))
+            scaling_factor = max(vmax, -vmin)
+            # print(gev_param_name, midpoint, vmin, vmax, scaling_factor)
+            # Load the shifted cmap to center on a middle point
+            cmap = [plt.cm.coolwarm, plt.cm.bwr, plt.cm.seismic][0]
+            shifted_cmap = shiftedColorMap(plt.cm.coolwarm, midpoint=0.0, name='shifted')
+            massif_name_to_fill_kwargs = {massif_name: {'color': shifted_cmap(value / scaling_factor)} for massif_name, value in
+                                          massif_name_to_value.items()}
+            ax = axes[i]
+            cax = self.visualize(ax=ax, massif_name_to_fill_kwargs=massif_name_to_fill_kwargs, show=False)
+
+            # cbar = fig.colorbar(cax, ticks=[-1, 0, 1], orientation='horizontal')
+            # cbar.ax.set_xticklabels(['Low', 'Medium', 'High'])  # horizontal colorbar
+            # cbar = fig.colorbar(cax, ticks=[-1, 0, 1])
+            # cbar.ax.set_yticklabels(['< -1', '0', '> 1'])  # vertically oriented colorbar
+            title_str = gev_param_name
+            ax.set_title(title_str)
+
+        if show:
+            plt.show()
+
+    def visualize_cmap(self, massif_name_to_value):
+        orig_cmap = plt.cm.coolwarm
+        # shifted_cmap = shiftedColorMap(orig_cmap, midpoint=0.75, name='shifted')
+
+        massif_name_to_fill_kwargs = {massif_name: {'color': orig_cmap(value)} for massif_name, value in massif_name_to_value.items()}
+
+        self.visualize(massif_name_to_fill_kwargs=massif_name_to_fill_kwargs)

    """ Statistics methods """

@@ -73,6 +131,10 @@ class Safran(object):
        # Load the names of the massif as defined by SAFRAN
        return safran_massif_names_from_datasets(self.year_to_dataset_ordered_dict.values())

+    @property
+    def safran_massif_id_to_massif_name(self):
+        return dict(enumerate(self.safran_massif_names))
+
    @cached_property
    def year_to_dataset_ordered_dict(self) -> OrderedDict:
        # Map each year to the correspond netCDF4 Dataset
@@ -88,13 +150,20 @@ class Safran(object):
        df_centroid = self.load_df_centroid()
        for coord_column in [AbstractCoordinates.COORDINATE_X, AbstractCoordinates.COORDINATE_Y]:
            df_centroid.loc[:, coord_column] = df_centroid[coord_column].str.replace(',', '.').astype(float)
-        # Assert that the massif names are the same between SAFRAN and the coordinate file
-        assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
        # Build coordinate object from df_centroid
        return AbstractSpatialCoordinates.from_df(df_centroid)

-    def load_df_centroid(self):
-        return pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
+    def load_df_centroid(self) -> pd.DataFrame:
+        df_centroid = pd.read_csv(op.join(self.map_full_path, 'coordonnees_massifs_alpes.csv'))
+        # Assert that the massif names are the same between SAFRAN and the coordinate file
+        assert not set(self.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))
+        return df_centroid
+
+    @property
+    def coordinate_id_to_massif_name(self):
+        df_centroid = self.load_df_centroid()
+        print(df_centroid.columns)
+        return dict(zip(df_centroid['id'], df_centroid['NOM']))

    """ Some properties """


--- a/safran_study/shifted_color_map.py
+++ b/safran_study/shifted_color_map.py
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import AxesGrid
+
+# from: https://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib/20528097
+def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
+    '''
+    Function to offset the "center" of a colormap. Useful for
+    data with a negative min and positive max and you want the
+    middle of the colormap's dynamic range to be at zero.
+
+    Input
+    -----
+      cmap : The matplotlib colormap to be altered
+      start : Offset from lowest point in the colormap's range.
+          Defaults to 0.0 (no lower offset). Should be between
+          0.0 and `midpoint`.
+      midpoint : The new center of the colormap. Defaults to
+          0.5 (no shift). Should be between 0.0 and 1.0. In
+          general, this should be  1 - vmax / (vmax + abs(vmin))
+          For example if your data range from -15.0 to +5.0 and
+          you want the center of the colormap at 0.0, `midpoint`
+          should be set to  1 - 5/(5 + 15)) or 0.75
+      stop : Offset from highest point in the colormap's range.
+          Defaults to 1.0 (no upper offset). Should be between
+          `midpoint` and 1.0.
+    '''
+    cdict = {
+        'red': [],
+        'green': [],
+        'blue': [],
+        'alpha': []
+    }
+
+    # regular index to compute the colors
+    reg_index = np.linspace(start, stop, 257)
+
+    # shifted index to match the data
+    shift_index = np.hstack([
+        np.linspace(0.0, midpoint, 128, endpoint=False),
+        np.linspace(midpoint, 1.0, 129, endpoint=True)
+    ])
+
+    for ri, si in zip(reg_index, shift_index):
+        r, g, b, a = cmap(ri)
+
+        cdict['red'].append((si, r, r))
+        cdict['green'].append((si, g, g))
+        cdict['blue'].append((si, b, b))
+        cdict['alpha'].append((si, a, a))
+
+    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
+    plt.register_cmap(cmap=newcmap)
+
+    return newcmap
--- a/safran_study/visualize_safran.py
+++ b/safran_study/visualize_safran.py
+import pandas as pd
+
+from extreme_estimator.gev_params import GevParams
+from safran_study.safran import Safran
+from safran_study.safran_extended import ExtendedSafran
+from utils import VERSION
+from itertools import product
+
+
+def load_all_safran(only_first_one=False):
+    all_safran = []
+    for safran_alti, nb_day in product([1800, 2400], [1, 3, 7]):
+        print('alti: {}, nb_day: {}'.format(safran_alti, nb_day))
+        all_safran.append(Safran(safran_alti, nb_day))
+        if only_first_one:
+            break
+    return all_safran
+
+
+def fit_mle_gev_independent():
+    # Dump the result in a csv
+    dfs = []
+    for safran in load_all_safran(only_first_one=True):
+        safran.visualize_gev_fit_with_cmap()
+        # path = r'/home/erwan/Documents/projects/spatiotemporalextremes/local/spatio_temporal_datasets/results/fit_mle_massif/fit_mle_gev_{}.csv'
+        # df.to_csv(path.format(VERSION))
+
+
+def fit_max_stab():
+    pass
+
+
+if __name__ == '__main__':
+    fit_mle_gev_independent()