Commit bf835ec0 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SPATIO TEMPORAL COORDINATES] add spatio temporal coordinates. add spatio temporal slicer test

parent 93a51e32
No related merge requests found
Showing with 85 additions and 72 deletions
+85 -72
...@@ -27,7 +27,8 @@ class AbstractCoordinates(object): ...@@ -27,7 +27,8 @@ class AbstractCoordinates(object):
# Coordinates columns # Coordinates columns
COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T]
def __init__(self, df_coord: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None): def __init__(self, df_coord: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None,
s_split_temporal: pd.Series = None):
self.df_all_coordinates = df_coord # type: pd.DataFrame self.df_all_coordinates = df_coord # type: pd.DataFrame
self.s_split_spatial = s_split_spatial # type: pd.Series self.s_split_spatial = s_split_spatial # type: pd.Series
self.s_split_temporal = s_split_temporal # type: pd.Series self.s_split_temporal = s_split_temporal # type: pd.Series
...@@ -37,23 +38,26 @@ class AbstractCoordinates(object): ...@@ -37,23 +38,26 @@ class AbstractCoordinates(object):
# ClassMethod constructor # ClassMethod constructor
@classmethod
def from_df(cls, df: pd.DataFrame):
pass
@classmethod @classmethod
def from_df_and_slicer(cls, df: pd.DataFrame, slicer_class: type, train_split_ratio: float = None): def from_df_and_slicer(cls, df: pd.DataFrame, slicer_class: type, train_split_ratio: float = None):
""" # train_split_ratio is shared between the spatial part of the data, and the temporal part
train_split_ratio is shared between the spatial part of the data, and the temporal part
"""
# All the index should be unique # All the index should be unique
assert len(set(df.index)) == len(df) assert len(set(df.index)) == len(df)
# Create a spatial split # Create a spatial split
if slicer_class in [SpatialSlicer, SpatioTemporalSlicer]: if slicer_class in [SpatialSlicer, SpatioTemporalSlicer]:
s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, concat=False) s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, True)
else: else:
s_split_spatial = None s_split_spatial = None
# Create a temporal split # Create a temporal split
if slicer_class in [TemporalSlicer, SpatioTemporalSlicer]: if slicer_class in [TemporalSlicer, SpatioTemporalSlicer]:
s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, concat=True) s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, False)
else: else:
s_split_temporal = None s_split_temporal = None
...@@ -71,18 +75,6 @@ class AbstractCoordinates(object): ...@@ -71,18 +75,6 @@ class AbstractCoordinates(object):
df.set_index(index_column_name, inplace=True) df.set_index(index_column_name, inplace=True)
return cls.from_df(df) return cls.from_df(df)
@classmethod
def from_nb_points(cls, nb_points: int, train_split_ratio: float = None, **kwargs):
# Call the default class method from csv
coordinates = cls.from_csv() # type: AbstractCoordinates
# Check that nb_points asked is not superior to the number of coordinates
nb_coordinates = len(coordinates)
if nb_points > nb_coordinates:
raise Exception('Nb coordinates in csv: {} < Nb points desired: {}'.format(nb_coordinates, nb_points))
# Sample randomly nb_points coordinates
df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points)
return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio)
@property @property
def index(self): def index(self):
return self.df_all_coordinates.index return self.df_all_coordinates.index
......
...@@ -11,3 +11,15 @@ class AbstractSpatialCoordinates(AbstractCoordinates): ...@@ -11,3 +11,15 @@ class AbstractSpatialCoordinates(AbstractCoordinates):
assert cls.COORDINATE_X in df.columns assert cls.COORDINATE_X in df.columns
assert cls.COORDINATE_T not in df.columns assert cls.COORDINATE_T not in df.columns
return super().from_df_and_slicer(df, SpatialSlicer, train_split_ratio) return super().from_df_and_slicer(df, SpatialSlicer, train_split_ratio)
@classmethod
def from_nb_points(cls, nb_points: int, train_split_ratio: float = None, **kwargs):
# Call the default class method from csv
coordinates = cls.from_csv() # type: AbstractCoordinates
# Check that nb_points asked is not superior to the number of coordinates
nb_coordinates = len(coordinates)
if nb_points > nb_coordinates:
raise Exception('Nb coordinates in csv: {} < Nb points desired: {}'.format(nb_coordinates, nb_points))
# Sample randomly nb_points coordinates
df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points)
return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio)
...@@ -10,4 +10,8 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): ...@@ -10,4 +10,8 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates):
def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None): def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None):
assert cls.COORDINATE_T in df.columns assert cls.COORDINATE_T in df.columns
assert cls.COORDINATE_X in df.columns assert cls.COORDINATE_X in df.columns
# Assert that the time steps are in the good order with respect to the coordinates
nb_points = len(set(df[cls.COORDINATE_X]))
first_time_step_for_all_points = df.iloc[:nb_points][cls.COORDINATE_T]
assert len(set(first_time_step_for_all_points)) == 1
return super().from_df_and_slicer(df, SpatioTemporalSlicer, train_split_ratio) return super().from_df_and_slicer(df, SpatioTemporalSlicer, train_split_ratio)
\ No newline at end of file
...@@ -4,22 +4,24 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo ...@@ -4,22 +4,24 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo
from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates
from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \
AbstractSpatioTemporalCoordinates AbstractSpatioTemporalCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \
ConsecutiveTemporalCoordinates
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
class UniformSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates): class UniformSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates):
@classmethod @classmethod
def from_nb_points(cls, nb_points, train_split_ratio: float = None, nb_time_steps=1, max_radius=1.0): def from_nb_points_and_nb_steps(cls, nb_points, nb_steps, train_split_ratio: float = None):
assert isinstance(nb_time_steps, int) and nb_time_steps >= 1 assert isinstance(nb_steps, int) and nb_steps >= 1
df_spatial = UniformSpatialCoordinates.df_spatial(nb_points) df_spatial = UniformSpatialCoordinates.df_spatial(nb_points=nb_points)
# df_temporal = ConsecutiveTemporalCoordinates.df_temporal(nb_temporal_steps=nb_temporal_steps)
df_time_steps = [] df_time_steps = []
for t in range(nb_time_steps): for t in range(nb_steps):
df_time_step = df_spatial.copy() df_time_step = df_spatial.copy()
df_time_step[cls.COORDINATE_T] = t df_time_step[cls.COORDINATE_T] = t
df_time_steps.append(df_time_step) df_time_steps.append(df_time_step)
df_time_steps = pd.concat(df_time_steps, ignore_index=True) df_time_steps = pd.concat(df_time_steps, ignore_index=True)
print(df_time_steps)
return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio) return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio)
...@@ -4,7 +4,7 @@ from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_ ...@@ -4,7 +4,7 @@ from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_
AbstractTemporalCoordinates AbstractTemporalCoordinates
class TemporalCoordinates(AbstractTemporalCoordinates): class ConsecutiveTemporalCoordinates(AbstractTemporalCoordinates):
pass pass
@classmethod @classmethod
......
from enum import Enum from enum import Enum
from typing import Union
import pandas as pd import pandas as pd
...@@ -44,7 +45,7 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio): ...@@ -44,7 +45,7 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio):
return s return s
def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, concat): def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, spatial_split) -> Union[None, pd.Series]:
df = df.copy() # type: pd.DataFrame df = df.copy() # type: pd.DataFrame
# Extract the index # Extract the index
if train_split_ratio is None: if train_split_ratio is None:
...@@ -55,17 +56,17 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, c ...@@ -55,17 +56,17 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, c
raise Exception('A split has already been defined') raise Exception('A split has already been defined')
else: else:
serie = df.drop_duplicates(subset=[column], keep='first')[column] serie = df.drop_duplicates(subset=[column], keep='first')[column]
assert len(df) % len(serie) == 0 assert len(df) % len(serie) == 0
multiplication_factor = len(df) // len(serie) multiplication_factor = len(df) // len(serie)
small_s_split = small_s_split_from_ratio(serie.index, train_split_ratio) small_s_split = small_s_split_from_ratio(serie.index, train_split_ratio)
if concat: if spatial_split:
# concatenation for spatial_split
s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy() s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy()
else: else:
# dilatjon # dilatjon for the temporal split
s_split = pd.Series(None, index=df.infer_objects()) s_split = pd.Series(None, index=df.index)
for i in range(len(s_split)): for i in range(len(s_split)):
s_split.iloc[i] = small_s_split.iloc[i % len(small_s_split)] s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor]
s_split.index = df.index s_split.index = df.index
return s_split return s_split
...@@ -9,7 +9,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_2D_coo ...@@ -9,7 +9,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_2D_coo
AlpsStation2DCoordinatesBetweenZeroAndOne AlpsStation2DCoordinatesBetweenZeroAndOne
from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \
AlpsStation3DCoordinatesWithAnisotropy AlpsStation3DCoordinatesWithAnisotropy
from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import CircleSpatialCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \
CircleSpatialCoordinates
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
...@@ -40,7 +41,7 @@ class TestSpatialCoordinates(unittest.TestCase): ...@@ -40,7 +41,7 @@ class TestSpatialCoordinates(unittest.TestCase):
class SpatioTemporalCoordinates(unittest.TestCase): class SpatioTemporalCoordinates(unittest.TestCase):
nb_points = 4 nb_points = 4
nb_times_steps = 2 nb_steps = 2
def tearDown(self): def tearDown(self):
c = Counter([len(self.coordinates.df_coordinates(split)) for split in SpatioTemporalSlicer.SPLITS]) c = Counter([len(self.coordinates.df_coordinates(split)) for split in SpatioTemporalSlicer.SPLITS])
...@@ -48,9 +49,9 @@ class SpatioTemporalCoordinates(unittest.TestCase): ...@@ -48,9 +49,9 @@ class SpatioTemporalCoordinates(unittest.TestCase):
self.assertTrue(good_count) self.assertTrue(good_count)
def test_temporal_circle(self): def test_temporal_circle(self):
self.coordinates = UniformSpatioTemporalCoordinates.from_nb_points(nb_points=self.nb_points, self.coordinates = UniformSpatioTemporalCoordinates.from_nb_points_and_nb_steps(nb_points=self.nb_points,
nb_time_steps=self.nb_times_steps, nb_steps=self.nb_steps,
train_split_ratio=0.5) train_split_ratio=0.5)
# def test_temporal_alps(self): # def test_temporal_alps(self):
# pass # pass
......
...@@ -17,8 +17,8 @@ class TestSlicerForDataset(unittest.TestCase): ...@@ -17,8 +17,8 @@ class TestSlicerForDataset(unittest.TestCase):
super().__init__(methodName) super().__init__(methodName)
self.dataset = None self.dataset = None
nb_spatial_points = 2 nb_points = 2
nb_temporal_steps = 2 nb_steps = 2
nb_obs = 2 nb_obs = 2
@property @property
...@@ -48,10 +48,10 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset): ...@@ -48,10 +48,10 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset):
@property @property
def complete_shape(self): def complete_shape(self):
return self.nb_spatial_points, self.nb_obs return self.nb_points, self.nb_obs
def load_datasets(self, train_split_ratio): def load_datasets(self, train_split_ratio):
coordinates_list = load_test_1D_and_2D_spatial_coordinates(nb_points=self.nb_spatial_points, coordinates_list = load_test_1D_and_2D_spatial_coordinates(nb_points=self.nb_points,
train_split_ratio=train_split_ratio) train_split_ratio=train_split_ratio)
dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
margin_model=ConstantMarginModel( margin_model=ConstantMarginModel(
...@@ -63,7 +63,7 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset): ...@@ -63,7 +63,7 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset):
def test_spatial_slicer_for_spatial_dataset(self): def test_spatial_slicer_for_spatial_dataset(self):
train_split_ratio_to_observation_shape = { train_split_ratio_to_observation_shape = {
None: self.complete_shape, None: self.complete_shape,
0.5: (self.nb_spatial_points // 2, self.nb_obs), 0.5: (self.nb_points // 2, self.nb_obs),
} }
self.check_shapes(train_split_ratio_to_observation_shape) self.check_shapes(train_split_ratio_to_observation_shape)
...@@ -72,10 +72,10 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset): ...@@ -72,10 +72,10 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset):
@property @property
def complete_shape(self): def complete_shape(self):
return self.nb_temporal_steps, self.nb_obs return self.nb_steps, self.nb_obs
def load_datasets(self, train_split_ratio): def load_datasets(self, train_split_ratio):
coordinates_list = load_test_temporal_coordinates(nb_temporal_steps=self.nb_temporal_steps, coordinates_list = load_test_temporal_coordinates(nb_steps=self.nb_steps,
train_split_ratio=train_split_ratio) train_split_ratio=train_split_ratio)
dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
margin_model=ConstantMarginModel( margin_model=ConstantMarginModel(
...@@ -87,33 +87,34 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset): ...@@ -87,33 +87,34 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset):
def test_temporal_slicer_for_temporal_dataset(self): def test_temporal_slicer_for_temporal_dataset(self):
ind_tuple_to_observation_shape = { ind_tuple_to_observation_shape = {
None: self.complete_shape, None: self.complete_shape,
0.5: (self.nb_temporal_steps // 2, self.nb_obs), 0.5: (self.nb_steps // 2, self.nb_obs),
} }
self.check_shapes(ind_tuple_to_observation_shape) self.check_shapes(ind_tuple_to_observation_shape)
# class TestSlicerForSpatioTemporalDataset(TestSlicerForDataset): class TestSlicerForSpatioTemporalDataset(TestSlicerForDataset):
#
# def complete_shape(self): @property
# return self.nb_spatial_points * self.nb_temporal_points, self.nb_obs def complete_shape(self):
# return self.nb_points * self.nb_steps, self.nb_obs
# def load_datasets(self, train_split_ratio):
# coordinates_list = load_test_spatiotemporal_coordinates(nb_points=self.nb_spatial_points, def load_datasets(self, train_split_ratio):
# train_split_ratio=train_split_ratio, coordinates_list = load_test_spatiotemporal_coordinates(nb_points=self.nb_points,
# nb_time_steps=self.nb_temporal_points) nb_steps=self.nb_steps,
# dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs, train_split_ratio=train_split_ratio)
# margin_model=ConstantMarginModel( dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
# coordinates=coordinates), margin_model=ConstantMarginModel(
# coordinates=coordinates, max_stable_model=Smith()) coordinates=coordinates),
# for coordinates in coordinates_list] coordinates=coordinates, max_stable_model=Smith())
# return dataset_list for coordinates in coordinates_list]
# return dataset_list
# def test_spatiotemporal_slicer_for_spatio_temporal_dataset(self):
# ind_tuple_to_observation_shape = { def test_spatiotemporal_slicer_for_spatio_temporal_dataset(self):
# None: self.complete_shape, ind_tuple_to_observation_shape = {
# 0.5: (1, 1), None: self.complete_shape,
# } 0.5: (self.nb_steps * self.nb_points // 4, self.nb_obs),
# self.check_shapes(ind_tuple_to_observation_shape) }
self.check_shapes(ind_tuple_to_observation_shape)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -14,7 +14,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_c ...@@ -14,7 +14,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_c
from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \ from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \
UniformSpatioTemporalCoordinates UniformSpatioTemporalCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import TemporalCoordinates from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import ConsecutiveTemporalCoordinates
""" """
Common objects to load for the test. Common objects to load for the test.
...@@ -25,7 +25,7 @@ In this case, unit test (at least on the constructor) must be ensured in the tes ...@@ -25,7 +25,7 @@ In this case, unit test (at least on the constructor) must be ensured in the tes
TEST_MAX_STABLE_MODEL = [Smith, BrownResnick, Schlather, Geometric, ExtremalT, ISchlather] TEST_MAX_STABLE_MODEL = [Smith, BrownResnick, Schlather, Geometric, ExtremalT, ISchlather]
TEST_1D_AND_2D_SPATIAL_COORDINATES = [UniformSpatialCoordinates, CircleSpatialCoordinates] TEST_1D_AND_2D_SPATIAL_COORDINATES = [UniformSpatialCoordinates, CircleSpatialCoordinates]
TEST_3D_SPATIAL_COORDINATES = [AlpsStation3DCoordinatesWithAnisotropy] TEST_3D_SPATIAL_COORDINATES = [AlpsStation3DCoordinatesWithAnisotropy]
TEST_TEMPORAL_COORDINATES = [TemporalCoordinates] TEST_TEMPORAL_COORDINATES = [ConsecutiveTemporalCoordinates]
TEST_SPATIO_TEMPORAL_COORDINATES = [UniformSpatioTemporalCoordinates] TEST_SPATIO_TEMPORAL_COORDINATES = [UniformSpatioTemporalCoordinates]
TEST_MARGIN_TYPES = [ConstantMarginModel, LinearAllParametersAllDimsMarginModel][:] TEST_MARGIN_TYPES = [ConstantMarginModel, LinearAllParametersAllDimsMarginModel][:]
TEST_MAX_STABLE_ESTIMATOR = [MaxStableEstimator] TEST_MAX_STABLE_ESTIMATOR = [MaxStableEstimator]
...@@ -71,12 +71,12 @@ def load_test_3D_spatial_coordinates(nb_points): ...@@ -71,12 +71,12 @@ def load_test_3D_spatial_coordinates(nb_points):
return load_test_spatial_coordinates(nb_points, TEST_3D_SPATIAL_COORDINATES) return load_test_spatial_coordinates(nb_points, TEST_3D_SPATIAL_COORDINATES)
def load_test_temporal_coordinates(nb_temporal_steps, train_split_ratio=None): def load_test_temporal_coordinates(nb_steps, train_split_ratio=None):
return [coordinate_class.from_nb_temporal_steps(nb_temporal_steps, train_split_ratio) for coordinate_class in return [coordinate_class.from_nb_temporal_steps(nb_steps, train_split_ratio) for coordinate_class in
TEST_TEMPORAL_COORDINATES] TEST_TEMPORAL_COORDINATES]
def load_test_spatiotemporal_coordinates(nb_points, train_split_ratio=None, nb_time_steps=None): def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=None):
return [coordinate_class.from_nb_points(nb_points=nb_points, train_split_ratio=train_split_ratio, return [coordinate_class.from_nb_points_and_nb_steps(nb_points=nb_points, nb_steps=nb_steps,
nb_time_steps=nb_time_steps) train_split_ratio=train_split_ratio)
for coordinate_class in TEST_SPATIO_TEMPORAL_COORDINATES] for coordinate_class in TEST_SPATIO_TEMPORAL_COORDINATES]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment