From 70baaeed7428a8199f8b440e21fc4526d6e34182 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Fri, 10 May 2019 10:10:15 +0200 Subject: [PATCH] [COORDINATE][SPATIO TEMPORAL INDEX] add rule for spatio temporal index suffix. update test. --- .../main_study_visualizer.py | 1 + .../coordinates/abstract_coordinates.py | 8 +++++- .../abstract_spatio_temporal_coordinates.py | 8 +++--- spatio_temporal_dataset/coordinates/utils.py | 23 ++++++++++++++++ .../dataset/abstract_dataset.py | 4 ++- .../test_coordinates.py | 11 +++++--- .../test_dataset.py | 27 +++++++++++++++++-- 7 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 spatio_temporal_dataset/coordinates/utils.py diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py index 24c9c8c0..53144ef8 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py @@ -113,6 +113,7 @@ def complete_analysis(only_first_one=False): def trend_analysis(): save_to_file = True only_first_one = False + # [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200, 4500, 4800] to test for others altitudes = [300, 1200, 2100, 3000][:] study_classes = [CrocusSwe, CrocusDepth, SafranSnowfall, SafranRainfall, SafranTemperature] for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes): diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 1a22f396..7b6ae41b 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from mpl_toolkits.mplot3d import Axes3D +from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -176,7 +177,12 @@ class AbstractCoordinates(object): return self.df_coordinates(split).loc[:, self.coordinates_spatial_names].drop_duplicates() def spatial_index(self, split: Split = Split.all) -> pd.Index: - return self.df_spatial_coordinates(split).index + df_spatial = self.df_spatial_coordinates(split) + if self.has_spatio_temporal_coordinates: + # Remove the spatio temporal index suffix + return get_index_without_spatio_temporal_index_suffix(df_spatial) + else: + return df_spatial.index # Temporal attributes diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py index 485368ec..53e4af8a 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py @@ -1,6 +1,7 @@ import pandas as pd from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -19,13 +20,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): @classmethod def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0): df_time_steps = [] - index_type = type(df_spatial.index[0]) for t in range(nb_steps): df_time_step = df_spatial.copy() df_time_step[cls.COORDINATE_T] = start + t - index_suffix = index_type(t * len(df_spatial)) - time_step_index = [i + index_suffix for i in df_spatial.index] - df_time_step.index = time_step_index + df_time_step.index = get_index_with_spatio_temporal_index_suffix(df_spatial, t) df_time_steps.append(df_time_step) df_time_steps = pd.concat(df_time_steps) return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio) + + diff --git a/spatio_temporal_dataset/coordinates/utils.py b/spatio_temporal_dataset/coordinates/utils.py new file mode 100644 index 00000000..5261eafb --- /dev/null +++ b/spatio_temporal_dataset/coordinates/utils.py @@ -0,0 +1,23 @@ +# Suffix to differentiate between spatio temporal index and spatial index +import pandas as pd +import numpy as np + + +def get_index_suffix(df_spatial: pd.DataFrame, t): + index_type = type(df_spatial.index[0]) + assert index_type in [int, float, str, np.int64, np.float64], index_type + return index_type(t * len(df_spatial)) + + +def get_index_with_spatio_temporal_index_suffix(df_spatial: pd.DataFrame, t): + index_suffix = get_index_suffix(df_spatial, t) + return pd.Index([i + index_suffix for i in df_spatial.index]) + + +def get_index_without_spatio_temporal_index_suffix(df_spatial: pd.DataFrame): + index_suffix = get_index_suffix(df_spatial, 0) + if isinstance(index_suffix, str): + return df_spatial.index.str.split(index_suffix).str.join('') + else: + return df_spatial.index - index_suffix + diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py index 1b2996c8..2dbb6476 100644 --- a/spatio_temporal_dataset/dataset/abstract_dataset.py +++ b/spatio_temporal_dataset/dataset/abstract_dataset.py @@ -56,7 +56,9 @@ class AbstractDataset(object): def transform_maxima_for_spatial_extreme_package(self, maxima_function, split) -> np.ndarray: array = maxima_function(split) if self.coordinates.has_spatio_temporal_coordinates: - return array.reshape(self.coordinates.spatio_temporal_shape(split)[::-1]) + inverted_shape = list(self.coordinates.spatio_temporal_shape(split)[::-1]) + inverted_shape[0] *= self.observations.nb_obs + return array.reshape(inverted_shape) else: return np.transpose(array) diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py index a2c0b7b4..5ee85578 100644 --- a/test/test_spatio_temporal_dataset/test_coordinates.py +++ b/test/test_spatio_temporal_dataset/test_coordinates.py @@ -13,6 +13,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo AlpsStation3DCoordinatesWithAnisotropy from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ CircleSpatialCoordinates +from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -65,14 +66,18 @@ class SpatioTemporalCoordinates(unittest.TestCase): # the uniqueness of each spatio temporal index is not garanteed by the current algo # it will work in classical cases, and raise an assert when uniqueness is needed (when using a slicer) index1 = pd.Series(spatial_coordinates.spatial_index()) - # Add the suffix to the index1 - suffix = '0' if isinstance(df_spatial.index[0], str) else 0 - index1 += suffix index2 = pd.Series(coordinates.spatial_index()) ind = index1 != index2 # type: pd.Series self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}". format(index1.loc[ind], index2.loc[ind])) + index1 = get_index_with_spatio_temporal_index_suffix(spatial_coordinates.df_spatial_coordinates(), t=0) + index1 = pd.Series(index1) + index2 = pd.Series(coordinates.df_spatial_coordinates().index) + ind = index1 != index2 # type: pd.Series + self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}". + format(index1.loc[ind], index2.loc[ind])) + def test_ordered_coordinates(self): # Order coordinates, to ensure that the first dimension/the second dimension and so on.. # Always are in the same order to a given type (e.g. spatio_temporal= of coordinates diff --git a/test/test_spatio_temporal_dataset/test_dataset.py b/test/test_spatio_temporal_dataset/test_dataset.py index 3c97f370..9aea31b2 100644 --- a/test/test_spatio_temporal_dataset/test_dataset.py +++ b/test/test_spatio_temporal_dataset/test_dataset.py @@ -53,11 +53,13 @@ class TestSpatioTemporalDataset(unittest.TestCase): coordinates=self.coordinates) def test_spatio_temporal_array(self): + # The test could have been on a given station. But we decided to do it for a given time step. self.load_dataset(nb_obs=1) # Load observation for time 0 - ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates(coordinate_name=AbstractCoordinates.COORDINATE_T, - value=0) + ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates( + coordinate_name=AbstractCoordinates.COORDINATE_T, + value=0) observation_at_time_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_time_0].values.flatten() # Load observation correspond to time 0 @@ -70,7 +72,28 @@ class TestSpatioTemporalDataset(unittest.TestCase): observation_at_time_0_v2)) def test_spatio_temporal_case_to_resolve(self): + # In this case, we must check that the observations are the same self.load_dataset(nb_obs=2) + + # Load observation for time 0 + ind_station_0 = self.dataset.coordinates.ind_of_df_all_coordinates( + coordinate_name=AbstractCoordinates.COORDINATE_X, + value=-1) + observation_at_station_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_station_0].values.flatten() + + # Load observation correspond to time 0 + maxima_gev = self.dataset.maxima_gev_for_spatial_extremes_package() + self.assertEqual(maxima_gev.shape[1], self.nb_points) + maxima_gev = np.transpose(maxima_gev) + self.assertEqual(maxima_gev.shape, (3, 2 * 2)) + observation_at_time_0_v2 = maxima_gev[1, :] + self.assertEqual(len(observation_at_time_0_v2), 4, msg='{}'.format(observation_at_time_0_v2)) + + # The order does not really matter here but we check it anyway + self.assertTrue(np.equal(observation_at_station_0_v1, observation_at_time_0_v2).all(), + msg='v1={} is different from v2={}'.format(observation_at_station_0_v1, + observation_at_time_0_v2)) + print(self.dataset.maxima_gev_for_spatial_extremes_package()) -- GitLab