From 613524c6a262869d21971eb6b4ca38376c71181b Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Sun, 19 Apr 2020 20:05:44 +0200 Subject: [PATCH] [contrasting] add numpy random state to pd.Series for sample. improve test_two_fold_fit.py --- extreme_fit/model/utils.py | 1 + .../altitudes_fit/two_fold_detail_fit.py | 16 ++++---- .../coordinates/abstract_coordinates.py | 3 ++ spatio_temporal_dataset/slicer/split.py | 4 +- .../test_contrasting/test_two_fold_fit.py | 41 +++++++++++-------- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/extreme_fit/model/utils.py b/extreme_fit/model/utils.py index 03e653be..c04eb802 100644 --- a/extreme_fit/model/utils.py +++ b/extreme_fit/model/utils.py @@ -45,6 +45,7 @@ warnings.filters = default_filters def set_seed_for_test(seed=42): set_seed_r(seed=seed) + np.random.seed(seed=seed) random.seed(seed) diff --git a/projects/contrasting_trends_in_snow_loads/altitudes_fit/two_fold_detail_fit.py b/projects/contrasting_trends_in_snow_loads/altitudes_fit/two_fold_detail_fit.py index d58f1d47..57332d46 100644 --- a/projects/contrasting_trends_in_snow_loads/altitudes_fit/two_fold_detail_fit.py +++ b/projects/contrasting_trends_in_snow_loads/altitudes_fit/two_fold_detail_fit.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Tuple import numpy as np @@ -59,19 +59,19 @@ class TwoFoldSampleFit(object): key = lambda model_class: self.model_class_to_model_fit[model_class].score(score) return sorted(self.model_classes, key=key) + def scores(self, score): + return [self.model_class_to_model_fit[model_class].score(score) for model_class in self.model_classes] + class TwoFoldModelFit(object): def __init__(self, model_class, two_fold_datasets, fit_method): self.model_class = model_class self.fit_method = fit_method - self.estimator_fold_1, self.estimator_fold_2 = [ - fitted_linear_margin_estimator_short(model_class=self.model_class, dataset=dataset, - fit_method=self.fit_method) for dataset in two_fold_datasets] - - @property - def estimators(self) -> List[LinearMarginEstimator]: - return [self.estimator_fold_1, self.estimator_fold_2] + self.estimators = [fitted_linear_margin_estimator_short(model_class=self.model_class, dataset=dataset, + fit_method=self.fit_method) for dataset in two_fold_datasets] # type: List[LinearMarginEstimator] + self.estimator_fold_1 = self.estimators[0] + self.estimator_fold_2 = self.estimators[1] def score(self, score): if score == Score.NLLH_TEST: diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 9cb1f567..10b5e563 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -158,6 +158,9 @@ class AbstractCoordinates(object): def coordinates_values(self, split: Split = Split.all, transformed=True) -> np.ndarray: return self.df_coordinates(split, transformed=transformed).values + def coordinates_index(self, split: Split = Split.all) -> pd.Index: + return self.df_coordinates(split).index + @property def ind_train_spatial(self) -> pd.Series: return ind_train_from_s_split(s_split=self.s_split_spatial) diff --git a/spatio_temporal_dataset/slicer/split.py b/spatio_temporal_dataset/slicer/split.py index ac18f76a..a5168fad 100644 --- a/spatio_temporal_dataset/slicer/split.py +++ b/spatio_temporal_dataset/slicer/split.py @@ -1,4 +1,5 @@ from enum import Enum +import numpy as np from typing import Union import pandas as pd @@ -46,7 +47,8 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio): s = pd.Series(TEST_SPLIT_STR, index=index) nb_points_train = int(length * train_split_ratio) assert 0 < nb_points_train < length - train_ind = pd.Series.sample(s, n=nb_points_train).index + random_state = np.random.mtrand._rand + train_ind = pd.Series.sample(s, n=nb_points_train, random_state=random_state).index assert 0 < len(train_ind) < length, "number of training points:{} length:{}".format(len(train_ind), length) s.loc[train_ind] = TRAIN_SPLIT_STR return s diff --git a/test/test_projects/test_contrasting/test_two_fold_fit.py b/test/test_projects/test_contrasting/test_two_fold_fit.py index 4f0311dd..7e07a95e 100644 --- a/test/test_projects/test_contrasting/test_two_fold_fit.py +++ b/test/test_projects/test_contrasting/test_two_fold_fit.py @@ -10,7 +10,9 @@ from extreme_fit.model.margin_model.utils import MarginFitMethod from extreme_fit.model.utils import set_seed_for_test from projects.contrasting_trends_in_snow_loads.altitudes_fit.altitudes_studies import AltitudesStudies from projects.contrasting_trends_in_snow_loads.altitudes_fit.two_fold_datasets_generator import TwoFoldDatasetsGenerator +from projects.contrasting_trends_in_snow_loads.altitudes_fit.two_fold_detail_fit import TwoFoldModelFit from projects.contrasting_trends_in_snow_loads.altitudes_fit.two_fold_fit import TwoFoldFit +from projects.contrasting_trends_in_snow_loads.altitudes_fit.utils import Score from spatio_temporal_dataset.slicer.split import Split @@ -19,26 +21,33 @@ class TestTwoFoldFit(unittest.TestCase): def setUp(self) -> None: super().setUp() set_seed_for_test() - altitudes = [900, 1200] - study_class = SafranSnowfall1Day - studies = AltitudesStudies(study_class, altitudes, year_min=1959, year_max=1989) - self.two_fold_datasets_generator = TwoFoldDatasetsGenerator(studies, nb_samples=2, massif_names=['Vercors']) - self.model_family_name_to_model_class = {'Stationary': [ConstantMarginModel], - 'Linear': [ConstantMarginModel, LinearLocationAllDimsMarginModel]} - - def load_two_fold_fit(self, fit_method): + + def load_two_fold_fit(self, fit_method, year_max): + self.altitudes = [900, 1200] + self.study_class = SafranSnowfall1Day + studies = AltitudesStudies(self.study_class, self.altitudes, year_max=year_max) + self.two_fold_datasets_generator = TwoFoldDatasetsGenerator(studies, nb_samples=1, massif_names=['Vercors']) + self.model_family_name_to_model_class = {'Stationary': [ConstantMarginModel]} return TwoFoldFit(two_fold_datasets_generator=self.two_fold_datasets_generator, model_family_name_to_model_classes=self.model_family_name_to_model_class, fit_method=fit_method) - # def test_best_fit_spatial_extreme(self): - # two_fold_fit = self.load_two_fold_fit(fit_method=MarginFitMethod.spatial_extremes_mle) - # try: - # best_model_class = two_fold_fit.massif_name_to_best_model()['Vercors'] - # except AssertionError as e: - # self.assertTrue(False, msg=e.__str__()) - # best_model_class = None - # self.assertEqual(best_model_class, LinearLocationAllDimsMarginModel) + def test_determinism_dataset_generation(self): + two_fold_fit = self.load_two_fold_fit(fit_method=MarginFitMethod.spatial_extremes_mle, year_max=1963) + massif_fit = two_fold_fit.massif_name_to_massif_fit['Vercors'] + model_fit = massif_fit.sample_id_to_sample_fit[0].model_class_to_model_fit[ + ConstantMarginModel] # type: TwoFoldModelFit + dataset_fold1 = model_fit.estimator_fold_1.dataset + index_train = list(dataset_fold1.coordinates.coordinates_index(split=Split.train_temporal)) + self.assertEqual([2, 3, 8, 9], index_train) + self.assertEqual(110.52073192596436, np.sum(dataset_fold1.maxima_gev(split=Split.train_temporal))) + + def test_determinism_fit_spatial_extreme(self): + two_fold_fit = self.load_two_fold_fit(fit_method=MarginFitMethod.spatial_extremes_mle, year_max=2019) + massif_fit = two_fold_fit.massif_name_to_massif_fit['Vercors'] + model_fit = massif_fit.sample_id_to_sample_fit[0].model_class_to_model_fit[ + ConstantMarginModel] # type: TwoFoldModelFit + self.assertEqual(461.6710428902022, model_fit.score(score=Score.NLLH_TEST)) if __name__ == '__main__': -- GitLab