[SCM] add full analysis for any study & write the results to file. add test.

4fa7fcf6 · Le Roux Erwan · 7833bcb4 · 4fa7fcf6 · 4fa7fcf6 · 4fa7fcf6
Commit 4fa7fcf6 authored 6 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 121 additions and 75 deletions
+121 -75
--- a/experiment/meteo_france_SCM_study/abstract_study.py
+++ b/experiment/meteo_france_SCM_study/abstract_study.py
@@ -49,7 +49,7 @@ class AbstractStudy(object):
    def year_to_dataset_ordered_dict(self) -> OrderedDict:
        # Map each year to the correspond netCDF4 Dataset
        year_to_dataset = OrderedDict()
-        nc_files = [(int(f.split('_')[1][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')]
+        nc_files = [(int(f.split('_')[-2][:4]), f) for f in os.listdir(self.safran_full_path) if f.endswith('.nc')]
        for year, nc_file in sorted(nc_files, key=lambda t: t[0]):
            year_to_dataset[year] = Dataset(op.join(self.safran_full_path, nc_file))
        return year_to_dataset
@@ -94,7 +94,7 @@ class AbstractStudy(object):
    @property
    def original_safran_massif_names(self):
        # Load the names of the massif as defined by SAFRAN
-        return safran_massif_names_from_datasets(list(self.year_to_dataset_ordered_dict.values()))
+        return safran_massif_names_from_datasets(list(self.year_to_dataset_ordered_dict.values()), self.altitude)

    @property
    def original_safran_massif_id_to_massif_name(self) -> Dict[int, str]:

--- a/experiment/meteo_france_SCM_study/main_visualize.py
+++ b/experiment/meteo_france_SCM_study/main_visualize.py
@@ -5,37 +5,63 @@ from experiment.meteo_france_SCM_study.safran.safran import Safran, ExtendedSafr
 from itertools import product

 from experiment.meteo_france_SCM_study.safran.safran_visualizer import StudyVisualizer
+from collections import OrderedDict

+SCM_STUDIES = [Safran, CrocusSwe, CrocusDepth]
+SCM_EXTENDED_STUDIES = [ExtendedSafran, ExtendedCrocusSwe, ExtendedCrocusDepth]
+SCM_STUDY_TO_EXTENDED_STUDY = OrderedDict(zip(SCM_STUDIES, SCM_EXTENDED_STUDIES))

-def load_all_studies(study_class, only_first_one=False):
+
+def study_iterator(study_class, only_first_one=False, both_altitude=False, verbose=True):
    all_studies = []
-    is_safran_study = study_class == Safran
+    is_safran_study = study_class in [Safran, ExtendedSafran]
    nb_days = [3, 1] if is_safran_study else [1]
-    for alti, nb_day in product(AbstractStudy.ALTITUDES, nb_days):
-        print('alti: {}, nb_day: {}'.format(alti, nb_day))
-        study = Safran(alti, nb_day) if is_safran_study else study_class(alti)
-        all_studies.append(study)
+    if verbose:
+        print('Loading studies....')
+    for nb_day in nb_days:
+        for alti in AbstractStudy.ALTITUDES[::1]:
+            if verbose:
+                print('alti: {}, nb_day: {}'.format(alti, nb_day))
+            study = study_class(alti, nb_day) if is_safran_study else study_class(alti)
+            yield study
+            if only_first_one and not both_altitude:
+                break
        if only_first_one:
            break
+
    return all_studies


 def extended_visualization():
-    for study_class in [ExtendedSafran, ExtendedCrocusSwe, ExtendedCrocusDepth][:1]:
-        for study in load_all_studies(study_class, only_first_one=True):
+    for study_class in SCM_EXTENDED_STUDIES[1:2]:
+        for study in study_iterator(study_class, only_first_one=True):
            study_visualizer = StudyVisualizer(study)
            study_visualizer.visualize_all_kde_graphs()


 def normal_visualization():
-    for study_class in [Safran, CrocusSwe, CrocusDepth][:1]:
-        for study in load_all_studies(study_class, only_first_one=True):
+    for study_class in SCM_STUDIES[:1]:
+        for study in study_iterator(study_class, only_first_one=True):
            study_visualizer = StudyVisualizer(study)
            # study_visualizer.visualize_independent_margin_fits(threshold=[None, 20, 40, 60][0])
-            # study_visualizer.visualize_smooth_margin_fit()
-            study_visualizer.visualize_full_fit()
+            study_visualizer.visualize_linear_margin_fit()
+
+
+def complete_analysis(only_first_one=False):
+    """An overview of everything that is possible with study OR extended study"""
+    for study_class, extended_study_class in list(SCM_STUDY_TO_EXTENDED_STUDY.items())[:]:
+        # First explore everything you can do with the extended study class
+        print('Extended study')
+        for extended_study in study_iterator(extended_study_class, only_first_one=only_first_one):
+            study_visualizer = StudyVisualizer(extended_study, save_to_file=True)
+            study_visualizer.visualize_all_kde_graphs()
+        print('Study normal')
+        for study in study_iterator(study_class, only_first_one=only_first_one):
+            study_visualizer = StudyVisualizer(study, save_to_file=True)
+            study_visualizer.visualize_linear_margin_fit()


 if __name__ == '__main__':
-    normal_visualization()
-    # extended_visualization()
\ No newline at end of file
+    # normal_visualization()
+    extended_visualization()
+    # complete_analysis()
--- a/experiment/meteo_france_SCM_study/massif.py
+++ b/experiment/meteo_france_SCM_study/massif.py
 from utils import first

-MASSIF_NAMES = ['Pelvoux', 'Queyras', 'Mont-Blanc', 'Aravis', 'Haute-Tarentaise', 'Vercors', 'Alpes-Azur', 'Oisans',
-                'Mercantour', 'Chartreuse', 'Haute-Maurienne', 'Belledonne', 'Thabor', 'Parpaillon', 'Bauges',
-                'Chablais', 'Ubaye', 'Grandes-Rousses', 'Devoluy', 'Champsaur', 'Vanoise', 'Beaufortain', 'Maurienne']
+MASSIF_NAMES_1800 = ['Pelvoux', 'Queyras', 'Mont-Blanc', 'Aravis', 'Haute-Tarentaise', 'Vercors', 'Alpes-Azur',
+                     'Oisans',
+                     'Mercantour', 'Chartreuse', 'Haute-Maurienne', 'Belledonne', 'Thabor', 'Parpaillon', 'Bauges',
+                     'Chablais', 'Ubaye', 'Grandes-Rousses', 'Devoluy', 'Champsaur', 'Vanoise', 'Beaufortain',
+                     'Maurienne']
+# Some massif like Chartreuse do not have massif whose altitude is higher or equal to 2400
+MASSIF_NAMES_2400 = ['Pelvoux', 'Queyras', 'Mont-Blanc', 'Aravis', 'Haute-Tarentaise', 'Vercors', 'Alpes-Azur', 'Oisans'
+    , 'Mercantour', 'Haute-Maurienne', 'Belledonne', 'Thabor', 'Parpaillon', 'Chablais', 'Ubaye', 'Grandes-Rousses',
+                     'Devoluy', 'Champsaur', 'Vanoise', 'Beaufortain', 'Maurienne']


 class Massif(object):
@@ -19,12 +25,13 @@ class Massif(object):
        return cls(name.strip(), int(id), float(lat), float(lon))


-def safran_massif_names_from_datasets(datasets):
+def safran_massif_names_from_datasets(datasets, altitude):
    # Massifs names are extracted from SAFRAN dataset
+    reference_massif_list = MASSIF_NAMES_1800 if altitude == 1800 else MASSIF_NAMES_2400
    if hasattr(datasets[0], 'massifsList'):
        # Assert the all the datasets have the same indexing for the massif
        assert len(set([dataset.massifsList for dataset in datasets])) == 1
        # List of the name of the massif used by all the SAFRAN datasets
        safran_names = [Massif.from_str(massif_str).name for massif_str in first(datasets).massifsList.split('/')]
-        assert MASSIF_NAMES == safran_names
-    return MASSIF_NAMES
+        assert reference_massif_list == safran_names, '{} \n{}'.format(reference_massif_list, safran_names)
+    return reference_massif_list
--- a/experiment/meteo_france_SCM_study/safran/safran_visualizer.py
+++ b/experiment/meteo_france_SCM_study/safran/safran_visualizer.py
 import math
+import os
+import os.path as op
 import numpy as np

 import matplotlib.pyplot as plt
@@ -17,15 +19,17 @@ from extreme_estimator.margin_fits.gpd.gpdmle_fit import GpdMleFit
 from extreme_estimator.margin_fits.plot.create_shifted_cmap import get_color_rbga_shifted
 from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
 from test.test_utils import load_test_max_stable_models
-from utils import get_display_name_from_object_type
+from utils import get_display_name_from_object_type, VERSION, VERSION_TIME


 class StudyVisualizer(object):

-    def __init__(self, study: AbstractStudy, show=True):
+    def __init__(self, study: AbstractStudy, show=True, save_to_file=False):
+        self.save_to_file = save_to_file
        self.study = study
-        self.show = show
+        self.show = False if self.save_to_file else show
        self.window_size_for_smoothing = 21
+        self.figsize=(16.0, 10.0)

    @property
    def observations(self):
@@ -39,21 +43,18 @@ class StudyVisualizer(object):
    def dataset(self):
        return AbstractDataset(self.observations, self.coordinates)

-    def visualize_all_kde_graphs(self, show=True):
+    def visualize_all_kde_graphs(self):
        massif_names = self.study.safran_massif_names
        nb_columns = 5
        nb_rows = math.ceil(len(massif_names) / nb_columns)
-        fig, axes = plt.subplots(nb_rows, nb_columns)
+        fig, axes = plt.subplots(nb_rows, nb_columns, figsize=self.figsize)
        fig.subplots_adjust(hspace=1.0, wspace=1.0)
        for i, massif_name in enumerate(massif_names):
            row_id, column_id = i // nb_columns, i % nb_columns
            ax = axes[row_id, column_id]
            self.visualize_kde_graph(ax, i, massif_name)
-        title = self.study.title
-        title += '  (mean computed with a sliding window of size {})'.format(self.window_size_for_smoothing)
-        fig.suptitle(title)
-        if show:
-            plt.show()
+        plot_name = ' mean with sliding window of size {}'.format(self.window_size_for_smoothing)
+        self.show_or_save_to_file(plot_name)

    def visualize_kde_graph(self, ax, i, massif_name):
        self.maxima_plot(ax, i)
@@ -91,34 +92,43 @@ class StudyVisualizer(object):
        assert len(x) == len(y)
        return x, y

-    def fit_and_visualize_estimator(self, estimator, axes=None, show=True, title=None):
+    def visualize_linear_margin_fit(self):
+        plot_name = 'Full Likelihood with Linear marginals and max stable dependency structure'
+        max_stable_models = load_test_max_stable_models(only_one_covariance_function=True)[:1]
+        fig, axes = plt.subplots(len(max_stable_models) + 1, len(GevParams.SUMMARY_NAMES), figsize=self.figsize)
+        fig.subplots_adjust(hspace=1.0, wspace=1.0)
+        margin_class = LinearAllParametersAllDimsMarginModel
+        # Plot the smooth margin only
+        margin_model = margin_class(coordinates=self.coordinates)
+        estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model)
+        self.fit_and_visualize_estimator(estimator, axes[0], title='without max stable')
+        # Plot the smooth margin fitted with a max stable
+        for i, max_stable_model in enumerate(max_stable_models, 1):
+            margin_model = margin_class(coordinates=self.coordinates)
+            estimator = FullEstimatorInASingleStepWithSmoothMargin(self.dataset, margin_model, max_stable_model)
+            title = get_display_name_from_object_type(type(max_stable_model))
+            self.fit_and_visualize_estimator(estimator, axes[i], title=title)
+        self.show_or_save_to_file(plot_name)
+
+    def fit_and_visualize_estimator(self, estimator, axes=None, title=None):
        estimator.fit()
        axes = estimator.margin_function_fitted.visualize_function(show=False, axes=axes, title=title)
        for ax in axes:
            self.study.visualize(ax, fill=False, show=False)
-        if show:
-            plt.suptitle(self.study.title)
-            plt.show()

-    def visualize_smooth_margin_fit(self):
-        margin_model = LinearAllParametersAllDimsMarginModel(coordinates=self.coordinates)
-        estimator = SmoothMarginEstimator(dataset=self.dataset, margin_model=margin_model)
-        self.fit_and_visualize_estimator(estimator)
-
-    def visualize_full_fit(self):
-        max_stable_models = load_test_max_stable_models()
-        fig, axes = plt.subplots(len(max_stable_models), len(GevParams.SUMMARY_NAMES))
-        fig.subplots_adjust(hspace=1.0, wspace=1.0)
-        for i, max_stable_model in enumerate(max_stable_models):
-            margin_model = LinearAllParametersAllDimsMarginModel(coordinates=self.coordinates)
-            estimator = FullEstimatorInASingleStepWithSmoothMargin(self.dataset, margin_model, max_stable_model)
-            title = get_display_name_from_object_type(type(max_stable_model))
-            print(title)
-            self.fit_and_visualize_estimator(estimator, axes[i], show=False, title=title)
+    def show_or_save_to_file(self, plot_name):
        title = self.study.title
-        title += '\nMethod: Full Likelihood with Linear marginals and max stable dependency structure'
+        title += '\n' + plot_name
        plt.suptitle(title)
-        plt.show()
+        if self.show:
+            plt.show()
+        if self.save_to_file:
+            filename = "{}/{}/{}".format(VERSION_TIME, '_'.join(self.study.title.split()), '_'.join(plot_name.split()))
+            filepath = op.join(self.study.result_full_path, filename + '.png')
+            dir = op.dirname(filepath)
+            if not op.exists(dir):
+                os.makedirs(dir, exist_ok=True)
+            plt.savefig(filepath)

    def visualize_independent_margin_fits(self, threshold=None, axes=None):
        if threshold is None:
@@ -162,8 +172,8 @@ class StudyVisualizer(object):
    def df_gev_mle_each_massif(self):
        # Fit a margin_fits on each massif
        massif_to_gev_mle = {
-        massif_name: GevMleFit(self.study.observations_annual_maxima.loc[massif_name]).gev_params.summary_serie
-        for massif_name in self.study.safran_massif_names}
+            massif_name: GevMleFit(self.study.observations_annual_maxima.loc[massif_name]).gev_params.summary_serie
+            for massif_name in self.study.safran_massif_names}
        return pd.DataFrame(massif_to_gev_mle, columns=self.study.safran_massif_names)

    def df_gpd_mle_each_massif(self, threshold):

--- a/test/test_meteo_france_SCM_study/__init__.py
+++ b/test/test_meteo_france_SCM_study/__init__.py
--- a/test/test_meteo_france_SCM_study/test_SCM_study.py
+++ b/test/test_meteo_france_SCM_study/test_SCM_study.py
-import os
 import os.path as op
-from collections import OrderedDict
-from typing import List, Dict
+import unittest

-import matplotlib.pyplot as plt
 import pandas as pd
-from netCDF4 import Dataset
-
-from experiment.meteo_france_SCM_study.abstract_variable import AbstractVariable
-from experiment.meteo_france_SCM_study.massif import safran_massif_names_from_datasets
-from experiment.meteo_france_SCM_study.safran.safran import Safran
-from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
-from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
-    AbstractSpatialCoordinates
-from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import AnnualMaxima
-from utils import get_full_path, cached_property
-import unittest

-from test.test_utils import load_safran_objects
+from experiment.meteo_france_SCM_study.crocus.crocus import ExtendedCrocusSwe
+from experiment.meteo_france_SCM_study.main_visualize import study_iterator
+from experiment.meteo_france_SCM_study.safran.safran import Safran, ExtendedSafran
+from experiment.meteo_france_SCM_study.safran.safran_visualizer import StudyVisualizer


 class TestSCMStudy(unittest.TestCase):
@@ -31,6 +20,13 @@ class TestSCMStudy(unittest.TestCase):
        # Assert that the massif names are the same between SAFRAN and the coordinate file
        assert not set(self.study.safran_massif_names).symmetric_difference(set(df_centroid['NOM']))

+    def test_extended_run(self):
+        for study_class in [ExtendedSafran, ExtendedCrocusSwe]:
+            for study in study_iterator(study_class, only_first_one=True, both_altitude=True, verbose=False):
+                study_visualizer = StudyVisualizer(study, show=False, save_to_file=False)
+                study_visualizer.visualize_all_kde_graphs()
+        self.assertTrue(True)
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -46,13 +46,17 @@ def load_smooth_margin_models(coordinates):
    return [margin_class(coordinates=coordinates) for margin_class in TEST_MARGIN_TYPES]


-def load_test_max_stable_models():
+def load_test_max_stable_models(only_one_covariance_function=False):
+    default_covariance_function = CovarianceFunction.cauchy
    # Load all max stable model
    max_stable_models = []
    for max_stable_class in TEST_MAX_STABLE_MODEL:
        if issubclass(max_stable_class, AbstractMaxStableModelWithCovarianceFunction):
-            max_stable_models.extend([max_stable_class(covariance_function=covariance_function)
-                                      for covariance_function in CovarianceFunction])
+            if only_one_covariance_function:
+                max_stable_models.append(max_stable_class(covariance_function=default_covariance_function))
+            else:
+                max_stable_models.extend([max_stable_class(covariance_function=covariance_function)
+                                          for covariance_function in CovarianceFunction])
        else:
            max_stable_models.append(max_stable_class())
    return max_stable_models

--- a/utils.py
+++ b/utils.py
@@ -2,7 +2,9 @@ import datetime
 import os.path as op

 VERSION = datetime.datetime.now()
-
+VERSION_TIME = str(VERSION).split('.')[0]
+for c in [' ', ':', '-']:
+    VERSION_TIME = VERSION_TIME.replace(c, '_')

 def get_root_path() -> str:
    return op.dirname(op.abspath(__file__))
@@ -62,3 +64,4 @@ if __name__ == '__main__':
    e = Example()
    print(e.big_attribute)
    print(e.big_attribute)
+    print(VERSION_TIME)