diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index 0c55f3dd2338d0d5b88cdc07ebc1e86c37234151..7035c2f1217b943ca69f6f417888e6f9b466c1b3 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -27,7 +27,8 @@ class AbstractCoordinates(object): # Coordinates columns COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] - def __init__(self, df_coord: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None): + def __init__(self, df_coord: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None, + s_split_temporal: pd.Series = None): self.df_all_coordinates = df_coord # type: pd.DataFrame self.s_split_spatial = s_split_spatial # type: pd.Series self.s_split_temporal = s_split_temporal # type: pd.Series @@ -37,23 +38,26 @@ class AbstractCoordinates(object): # ClassMethod constructor + @classmethod + def from_df(cls, df: pd.DataFrame): + pass + @classmethod def from_df_and_slicer(cls, df: pd.DataFrame, slicer_class: type, train_split_ratio: float = None): - """ - train_split_ratio is shared between the spatial part of the data, and the temporal part - """ + # train_split_ratio is shared between the spatial part of the data, and the temporal part + # All the index should be unique assert len(set(df.index)) == len(df) # Create a spatial split if slicer_class in [SpatialSlicer, SpatioTemporalSlicer]: - s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, concat=False) + s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, True) else: s_split_spatial = None # Create a temporal split if slicer_class in [TemporalSlicer, SpatioTemporalSlicer]: - s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, concat=True) + s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, False) else: s_split_temporal = None @@ -71,18 +75,6 @@ class AbstractCoordinates(object): df.set_index(index_column_name, inplace=True) return cls.from_df(df) - @classmethod - def from_nb_points(cls, nb_points: int, train_split_ratio: float = None, **kwargs): - # Call the default class method from csv - coordinates = cls.from_csv() # type: AbstractCoordinates - # Check that nb_points asked is not superior to the number of coordinates - nb_coordinates = len(coordinates) - if nb_points > nb_coordinates: - raise Exception('Nb coordinates in csv: {} < Nb points desired: {}'.format(nb_coordinates, nb_points)) - # Sample randomly nb_points coordinates - df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points) - return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio) - @property def index(self): return self.df_all_coordinates.index diff --git a/spatio_temporal_dataset/coordinates/spatial_coordinates/abstract_spatial_coordinates.py b/spatio_temporal_dataset/coordinates/spatial_coordinates/abstract_spatial_coordinates.py index 50a4367546f076707a00ab9766ad592bfe18db20..f78456454fb9e747f4c22a10e6368aef08bfcc4e 100644 --- a/spatio_temporal_dataset/coordinates/spatial_coordinates/abstract_spatial_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatial_coordinates/abstract_spatial_coordinates.py @@ -11,3 +11,15 @@ class AbstractSpatialCoordinates(AbstractCoordinates): assert cls.COORDINATE_X in df.columns assert cls.COORDINATE_T not in df.columns return super().from_df_and_slicer(df, SpatialSlicer, train_split_ratio) + + @classmethod + def from_nb_points(cls, nb_points: int, train_split_ratio: float = None, **kwargs): + # Call the default class method from csv + coordinates = cls.from_csv() # type: AbstractCoordinates + # Check that nb_points asked is not superior to the number of coordinates + nb_coordinates = len(coordinates) + if nb_points > nb_coordinates: + raise Exception('Nb coordinates in csv: {} < Nb points desired: {}'.format(nb_coordinates, nb_points)) + # Sample randomly nb_points coordinates + df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points) + return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio) diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py index 7074c5440b42d2406423ebf72cb3d8d00d3c0dc7..651eb34970b6a3a17f6ef6a3d87e5962e7489a75 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py @@ -10,4 +10,8 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None): assert cls.COORDINATE_T in df.columns assert cls.COORDINATE_X in df.columns + # Assert that the time steps are in the good order with respect to the coordinates + nb_points = len(set(df[cls.COORDINATE_X])) + first_time_step_for_all_points = df.iloc[:nb_points][cls.COORDINATE_T] + assert len(set(first_time_step_for_all_points)) == 1 return super().from_df_and_slicer(df, SpatioTemporalSlicer, train_split_ratio) \ No newline at end of file diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py index f2b4b648ccce456ed9025fac0297959ca15936fe..fd1aa1f374f4b6c1f00fb4cb2e5d75867aa98e11 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py @@ -4,22 +4,24 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ AbstractSpatioTemporalCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ + ConsecutiveTemporalCoordinates from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer class UniformSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates): @classmethod - def from_nb_points(cls, nb_points, train_split_ratio: float = None, nb_time_steps=1, max_radius=1.0): - assert isinstance(nb_time_steps, int) and nb_time_steps >= 1 - df_spatial = UniformSpatialCoordinates.df_spatial(nb_points) + def from_nb_points_and_nb_steps(cls, nb_points, nb_steps, train_split_ratio: float = None): + assert isinstance(nb_steps, int) and nb_steps >= 1 + df_spatial = UniformSpatialCoordinates.df_spatial(nb_points=nb_points) + # df_temporal = ConsecutiveTemporalCoordinates.df_temporal(nb_temporal_steps=nb_temporal_steps) df_time_steps = [] - for t in range(nb_time_steps): + for t in range(nb_steps): df_time_step = df_spatial.copy() df_time_step[cls.COORDINATE_T] = t df_time_steps.append(df_time_step) df_time_steps = pd.concat(df_time_steps, ignore_index=True) - print(df_time_steps) return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio) diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py index 749cfc27692e0fee218d15f3775cead9e44787a1..518459725907c7a457d49bde000f787b8d7e9efd 100644 --- a/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py @@ -4,7 +4,7 @@ from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_ AbstractTemporalCoordinates -class TemporalCoordinates(AbstractTemporalCoordinates): +class ConsecutiveTemporalCoordinates(AbstractTemporalCoordinates): pass @classmethod diff --git a/spatio_temporal_dataset/slicer/split.py b/spatio_temporal_dataset/slicer/split.py index 6e33c57f2e77bc34e4146d9ab783e79de84e27f5..564657e0905957d7a699cb834cad75106859eef2 100644 --- a/spatio_temporal_dataset/slicer/split.py +++ b/spatio_temporal_dataset/slicer/split.py @@ -1,4 +1,5 @@ from enum import Enum +from typing import Union import pandas as pd @@ -44,7 +45,7 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio): return s -def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, concat): +def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, spatial_split) -> Union[None, pd.Series]: df = df.copy() # type: pd.DataFrame # Extract the index if train_split_ratio is None: @@ -55,17 +56,17 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, c raise Exception('A split has already been defined') else: serie = df.drop_duplicates(subset=[column], keep='first')[column] - assert len(df) % len(serie) == 0 multiplication_factor = len(df) // len(serie) small_s_split = small_s_split_from_ratio(serie.index, train_split_ratio) - if concat: + if spatial_split: + # concatenation for spatial_split s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy() else: - # dilatjon - s_split = pd.Series(None, index=df.infer_objects()) + # dilatjon for the temporal split + s_split = pd.Series(None, index=df.index) for i in range(len(s_split)): - s_split.iloc[i] = small_s_split.iloc[i % len(small_s_split)] + s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor] s_split.index = df.index return s_split diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py index a82080ef8f3aefc5867798dcc77c6c5f96db52d5..a95abdad6569564ff309c5b81932793673c84a53 100644 --- a/test/test_spatio_temporal_dataset/test_coordinates.py +++ b/test/test_spatio_temporal_dataset/test_coordinates.py @@ -9,7 +9,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_2D_coo AlpsStation2DCoordinatesBetweenZeroAndOne from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \ AlpsStation3DCoordinatesWithAnisotropy -from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import CircleSpatialCoordinates +from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ + CircleSpatialCoordinates from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer @@ -40,7 +41,7 @@ class TestSpatialCoordinates(unittest.TestCase): class SpatioTemporalCoordinates(unittest.TestCase): nb_points = 4 - nb_times_steps = 2 + nb_steps = 2 def tearDown(self): c = Counter([len(self.coordinates.df_coordinates(split)) for split in SpatioTemporalSlicer.SPLITS]) @@ -48,9 +49,9 @@ class SpatioTemporalCoordinates(unittest.TestCase): self.assertTrue(good_count) def test_temporal_circle(self): - self.coordinates = UniformSpatioTemporalCoordinates.from_nb_points(nb_points=self.nb_points, - nb_time_steps=self.nb_times_steps, - train_split_ratio=0.5) + self.coordinates = UniformSpatioTemporalCoordinates.from_nb_points_and_nb_steps(nb_points=self.nb_points, + nb_steps=self.nb_steps, + train_split_ratio=0.5) # def test_temporal_alps(self): # pass diff --git a/test/test_spatio_temporal_dataset/test_slicer.py b/test/test_spatio_temporal_dataset/test_slicer.py index c41d4f252acb3b56ac79e0d822142228fbb0af56..6551bee311dbaf5280218a6066df5b3bc75642e3 100644 --- a/test/test_spatio_temporal_dataset/test_slicer.py +++ b/test/test_spatio_temporal_dataset/test_slicer.py @@ -17,8 +17,8 @@ class TestSlicerForDataset(unittest.TestCase): super().__init__(methodName) self.dataset = None - nb_spatial_points = 2 - nb_temporal_steps = 2 + nb_points = 2 + nb_steps = 2 nb_obs = 2 @property @@ -48,10 +48,10 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset): @property def complete_shape(self): - return self.nb_spatial_points, self.nb_obs + return self.nb_points, self.nb_obs def load_datasets(self, train_split_ratio): - coordinates_list = load_test_1D_and_2D_spatial_coordinates(nb_points=self.nb_spatial_points, + coordinates_list = load_test_1D_and_2D_spatial_coordinates(nb_points=self.nb_points, train_split_ratio=train_split_ratio) dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, margin_model=ConstantMarginModel( @@ -63,7 +63,7 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset): def test_spatial_slicer_for_spatial_dataset(self): train_split_ratio_to_observation_shape = { None: self.complete_shape, - 0.5: (self.nb_spatial_points // 2, self.nb_obs), + 0.5: (self.nb_points // 2, self.nb_obs), } self.check_shapes(train_split_ratio_to_observation_shape) @@ -72,10 +72,10 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset): @property def complete_shape(self): - return self.nb_temporal_steps, self.nb_obs + return self.nb_steps, self.nb_obs def load_datasets(self, train_split_ratio): - coordinates_list = load_test_temporal_coordinates(nb_temporal_steps=self.nb_temporal_steps, + coordinates_list = load_test_temporal_coordinates(nb_steps=self.nb_steps, train_split_ratio=train_split_ratio) dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, margin_model=ConstantMarginModel( @@ -87,33 +87,34 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset): def test_temporal_slicer_for_temporal_dataset(self): ind_tuple_to_observation_shape = { None: self.complete_shape, - 0.5: (self.nb_temporal_steps // 2, self.nb_obs), + 0.5: (self.nb_steps // 2, self.nb_obs), } self.check_shapes(ind_tuple_to_observation_shape) -# class TestSlicerForSpatioTemporalDataset(TestSlicerForDataset): -# -# def complete_shape(self): -# return self.nb_spatial_points * self.nb_temporal_points, self.nb_obs -# -# def load_datasets(self, train_split_ratio): -# coordinates_list = load_test_spatiotemporal_coordinates(nb_points=self.nb_spatial_points, -# train_split_ratio=train_split_ratio, -# nb_time_steps=self.nb_temporal_points) -# dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, -# margin_model=ConstantMarginModel( -# coordinates=coordinates), -# coordinates=coordinates, max_stable_model=Smith()) -# for coordinates in coordinates_list] -# return dataset_list -# -# def test_spatiotemporal_slicer_for_spatio_temporal_dataset(self): -# ind_tuple_to_observation_shape = { -# None: self.complete_shape, -# 0.5: (1, 1), -# } -# self.check_shapes(ind_tuple_to_observation_shape) +class TestSlicerForSpatioTemporalDataset(TestSlicerForDataset): + + @property + def complete_shape(self): + return self.nb_points * self.nb_steps, self.nb_obs + + def load_datasets(self, train_split_ratio): + coordinates_list = load_test_spatiotemporal_coordinates(nb_points=self.nb_points, + nb_steps=self.nb_steps, + train_split_ratio=train_split_ratio) + dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, + margin_model=ConstantMarginModel( + coordinates=coordinates), + coordinates=coordinates, max_stable_model=Smith()) + for coordinates in coordinates_list] + return dataset_list + + def test_spatiotemporal_slicer_for_spatio_temporal_dataset(self): + ind_tuple_to_observation_shape = { + None: self.complete_shape, + 0.5: (self.nb_steps * self.nb_points // 4, self.nb_obs), + } + self.check_shapes(ind_tuple_to_observation_shape) if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index 00b74491b9390317842379b11710d081f66f661d..b9badf698f90457131e0760cea155b24c303ca39 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -14,7 +14,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_c from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \ UniformSpatioTemporalCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates -from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import TemporalCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import ConsecutiveTemporalCoordinates """ Common objects to load for the test. @@ -25,7 +25,7 @@ In this case, unit test (at least on the constructor) must be ensured in the tes TEST_MAX_STABLE_MODEL = [Smith, BrownResnick, Schlather, Geometric, ExtremalT, ISchlather] TEST_1D_AND_2D_SPATIAL_COORDINATES = [UniformSpatialCoordinates, CircleSpatialCoordinates] TEST_3D_SPATIAL_COORDINATES = [AlpsStation3DCoordinatesWithAnisotropy] -TEST_TEMPORAL_COORDINATES = [TemporalCoordinates] +TEST_TEMPORAL_COORDINATES = [ConsecutiveTemporalCoordinates] TEST_SPATIO_TEMPORAL_COORDINATES = [UniformSpatioTemporalCoordinates] TEST_MARGIN_TYPES = [ConstantMarginModel, LinearAllParametersAllDimsMarginModel][:] TEST_MAX_STABLE_ESTIMATOR = [MaxStableEstimator] @@ -71,12 +71,12 @@ def load_test_3D_spatial_coordinates(nb_points): return load_test_spatial_coordinates(nb_points, TEST_3D_SPATIAL_COORDINATES) -def load_test_temporal_coordinates(nb_temporal_steps, train_split_ratio=None): - return [coordinate_class.from_nb_temporal_steps(nb_temporal_steps, train_split_ratio) for coordinate_class in +def load_test_temporal_coordinates(nb_steps, train_split_ratio=None): + return [coordinate_class.from_nb_temporal_steps(nb_steps, train_split_ratio) for coordinate_class in TEST_TEMPORAL_COORDINATES] -def load_test_spatiotemporal_coordinates(nb_points, train_split_ratio=None, nb_time_steps=None): - return [coordinate_class.from_nb_points(nb_points=nb_points, train_split_ratio=train_split_ratio, - nb_time_steps=nb_time_steps) +def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=None): + return [coordinate_class.from_nb_points_and_nb_steps(nb_points=nb_points, nb_steps=nb_steps, + train_split_ratio=train_split_ratio) for coordinate_class in TEST_SPATIO_TEMPORAL_COORDINATES]