diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py index 24c9c8c0d27a8f108e892f41f83b4b042c31b6e5..53144ef898dd8ce852fe445351470208383e91e8 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py @@ -113,6 +113,7 @@ def complete_analysis(only_first_one=False): def trend_analysis(): save_to_file = True only_first_one = False + # [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200, 4500, 4800] to test for others altitudes = [300, 1200, 2100, 3000][:] study_classes = [CrocusSwe, CrocusDepth, SafranSnowfall, SafranRainfall, SafranTemperature] for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes): diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 1a22f3961ea2805e47c5d891c9462004ed9cc6a5..7b6ae41b0ebfd60023af37a0349d345286b09adc 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from mpl_toolkits.mplot3d import Axes3D +from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -176,7 +177,12 @@ class AbstractCoordinates(object): return self.df_coordinates(split).loc[:, self.coordinates_spatial_names].drop_duplicates() def spatial_index(self, split: Split = Split.all) -> pd.Index: - return self.df_spatial_coordinates(split).index + df_spatial = self.df_spatial_coordinates(split) + if self.has_spatio_temporal_coordinates: + # Remove the spatio temporal index suffix + return get_index_without_spatio_temporal_index_suffix(df_spatial) + else: + return df_spatial.index # Temporal attributes diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py index 485368ec5b01beea7ae07396bfb139f4ef8d4dd5..53e4af8af4832ce85bf131937c127c4bee618011 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py @@ -1,6 +1,7 @@ import pandas as pd from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -19,13 +20,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): @classmethod def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0): df_time_steps = [] - index_type = type(df_spatial.index[0]) for t in range(nb_steps): df_time_step = df_spatial.copy() df_time_step[cls.COORDINATE_T] = start + t - index_suffix = index_type(t * len(df_spatial)) - time_step_index = [i + index_suffix for i in df_spatial.index] - df_time_step.index = time_step_index + df_time_step.index = get_index_with_spatio_temporal_index_suffix(df_spatial, t) df_time_steps.append(df_time_step) df_time_steps = pd.concat(df_time_steps) return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio) + + diff --git a/spatio_temporal_dataset/coordinates/utils.py b/spatio_temporal_dataset/coordinates/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5261eafbb94618cb52af55b78d808130d6012e53 --- /dev/null +++ b/spatio_temporal_dataset/coordinates/utils.py @@ -0,0 +1,23 @@ +# Suffix to differentiate between spatio temporal index and spatial index +import pandas as pd +import numpy as np + + +def get_index_suffix(df_spatial: pd.DataFrame, t): + index_type = type(df_spatial.index[0]) + assert index_type in [int, float, str, np.int64, np.float64], index_type + return index_type(t * len(df_spatial)) + + +def get_index_with_spatio_temporal_index_suffix(df_spatial: pd.DataFrame, t): + index_suffix = get_index_suffix(df_spatial, t) + return pd.Index([i + index_suffix for i in df_spatial.index]) + + +def get_index_without_spatio_temporal_index_suffix(df_spatial: pd.DataFrame): + index_suffix = get_index_suffix(df_spatial, 0) + if isinstance(index_suffix, str): + return df_spatial.index.str.split(index_suffix).str.join('') + else: + return df_spatial.index - index_suffix + diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py index 1b2996c8df6f9a71413150727c1d84c0e2368b71..2dbb64761a2e303a5c0dc1dcb6792b856399c68c 100644 --- a/spatio_temporal_dataset/dataset/abstract_dataset.py +++ b/spatio_temporal_dataset/dataset/abstract_dataset.py @@ -56,7 +56,9 @@ class AbstractDataset(object): def transform_maxima_for_spatial_extreme_package(self, maxima_function, split) -> np.ndarray: array = maxima_function(split) if self.coordinates.has_spatio_temporal_coordinates: - return array.reshape(self.coordinates.spatio_temporal_shape(split)[::-1]) + inverted_shape = list(self.coordinates.spatio_temporal_shape(split)[::-1]) + inverted_shape[0] *= self.observations.nb_obs + return array.reshape(inverted_shape) else: return np.transpose(array) diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py index a2c0b7b4ce68de6297f1613ecc58ff4dbcbdc105..5ee85578f5aee3412d19e2d28b660772e33abe92 100644 --- a/test/test_spatio_temporal_dataset/test_coordinates.py +++ b/test/test_spatio_temporal_dataset/test_coordinates.py @@ -13,6 +13,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo AlpsStation3DCoordinatesWithAnisotropy from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ CircleSpatialCoordinates +from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -65,14 +66,18 @@ class SpatioTemporalCoordinates(unittest.TestCase): # the uniqueness of each spatio temporal index is not garanteed by the current algo # it will work in classical cases, and raise an assert when uniqueness is needed (when using a slicer) index1 = pd.Series(spatial_coordinates.spatial_index()) - # Add the suffix to the index1 - suffix = '0' if isinstance(df_spatial.index[0], str) else 0 - index1 += suffix index2 = pd.Series(coordinates.spatial_index()) ind = index1 != index2 # type: pd.Series self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}". format(index1.loc[ind], index2.loc[ind])) + index1 = get_index_with_spatio_temporal_index_suffix(spatial_coordinates.df_spatial_coordinates(), t=0) + index1 = pd.Series(index1) + index2 = pd.Series(coordinates.df_spatial_coordinates().index) + ind = index1 != index2 # type: pd.Series + self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}". + format(index1.loc[ind], index2.loc[ind])) + def test_ordered_coordinates(self): # Order coordinates, to ensure that the first dimension/the second dimension and so on.. # Always are in the same order to a given type (e.g. spatio_temporal= of coordinates diff --git a/test/test_spatio_temporal_dataset/test_dataset.py b/test/test_spatio_temporal_dataset/test_dataset.py index 3c97f370ca2df967a953b0dd679f2778d643cd14..9aea31b224b1a4080a8263d525791f804cfcf44e 100644 --- a/test/test_spatio_temporal_dataset/test_dataset.py +++ b/test/test_spatio_temporal_dataset/test_dataset.py @@ -53,11 +53,13 @@ class TestSpatioTemporalDataset(unittest.TestCase): coordinates=self.coordinates) def test_spatio_temporal_array(self): + # The test could have been on a given station. But we decided to do it for a given time step. self.load_dataset(nb_obs=1) # Load observation for time 0 - ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates(coordinate_name=AbstractCoordinates.COORDINATE_T, - value=0) + ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates( + coordinate_name=AbstractCoordinates.COORDINATE_T, + value=0) observation_at_time_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_time_0].values.flatten() # Load observation correspond to time 0 @@ -70,7 +72,28 @@ class TestSpatioTemporalDataset(unittest.TestCase): observation_at_time_0_v2)) def test_spatio_temporal_case_to_resolve(self): + # In this case, we must check that the observations are the same self.load_dataset(nb_obs=2) + + # Load observation for time 0 + ind_station_0 = self.dataset.coordinates.ind_of_df_all_coordinates( + coordinate_name=AbstractCoordinates.COORDINATE_X, + value=-1) + observation_at_station_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_station_0].values.flatten() + + # Load observation correspond to time 0 + maxima_gev = self.dataset.maxima_gev_for_spatial_extremes_package() + self.assertEqual(maxima_gev.shape[1], self.nb_points) + maxima_gev = np.transpose(maxima_gev) + self.assertEqual(maxima_gev.shape, (3, 2 * 2)) + observation_at_time_0_v2 = maxima_gev[1, :] + self.assertEqual(len(observation_at_time_0_v2), 4, msg='{}'.format(observation_at_time_0_v2)) + + # The order does not really matter here but we check it anyway + self.assertTrue(np.equal(observation_at_station_0_v1, observation_at_time_0_v2).all(), + msg='v1={} is different from v2={}'.format(observation_at_station_0_v1, + observation_at_time_0_v2)) + print(self.dataset.maxima_gev_for_spatial_extremes_package())