Commit bf835ec0 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SPATIO TEMPORAL COORDINATES] add spatio temporal coordinates. add spatio temporal slicer test

parent 93a51e32
No related merge requests found
Showing with 85 additions and 72 deletions
+85 -72
......@@ -27,7 +27,8 @@ class AbstractCoordinates(object):
# Coordinates columns
COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T]
def __init__(self, df_coord: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None):
def __init__(self, df_coord: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None,
s_split_temporal: pd.Series = None):
self.df_all_coordinates = df_coord # type: pd.DataFrame
self.s_split_spatial = s_split_spatial # type: pd.Series
self.s_split_temporal = s_split_temporal # type: pd.Series
......@@ -37,23 +38,26 @@ class AbstractCoordinates(object):
# ClassMethod constructor
@classmethod
def from_df(cls, df: pd.DataFrame):
pass
@classmethod
def from_df_and_slicer(cls, df: pd.DataFrame, slicer_class: type, train_split_ratio: float = None):
"""
train_split_ratio is shared between the spatial part of the data, and the temporal part
"""
# train_split_ratio is shared between the spatial part of the data, and the temporal part
# All the index should be unique
assert len(set(df.index)) == len(df)
# Create a spatial split
if slicer_class in [SpatialSlicer, SpatioTemporalSlicer]:
s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, concat=False)
s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, True)
else:
s_split_spatial = None
# Create a temporal split
if slicer_class in [TemporalSlicer, SpatioTemporalSlicer]:
s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, concat=True)
s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, False)
else:
s_split_temporal = None
......@@ -71,18 +75,6 @@ class AbstractCoordinates(object):
df.set_index(index_column_name, inplace=True)
return cls.from_df(df)
@classmethod
def from_nb_points(cls, nb_points: int, train_split_ratio: float = None, **kwargs):
# Call the default class method from csv
coordinates = cls.from_csv() # type: AbstractCoordinates
# Check that nb_points asked is not superior to the number of coordinates
nb_coordinates = len(coordinates)
if nb_points > nb_coordinates:
raise Exception('Nb coordinates in csv: {} < Nb points desired: {}'.format(nb_coordinates, nb_points))
# Sample randomly nb_points coordinates
df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points)
return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio)
@property
def index(self):
return self.df_all_coordinates.index
......
......@@ -11,3 +11,15 @@ class AbstractSpatialCoordinates(AbstractCoordinates):
assert cls.COORDINATE_X in df.columns
assert cls.COORDINATE_T not in df.columns
return super().from_df_and_slicer(df, SpatialSlicer, train_split_ratio)
@classmethod
def from_nb_points(cls, nb_points: int, train_split_ratio: float = None, **kwargs):
# Call the default class method from csv
coordinates = cls.from_csv() # type: AbstractCoordinates
# Check that nb_points asked is not superior to the number of coordinates
nb_coordinates = len(coordinates)
if nb_points > nb_coordinates:
raise Exception('Nb coordinates in csv: {} < Nb points desired: {}'.format(nb_coordinates, nb_points))
# Sample randomly nb_points coordinates
df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points)
return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio)
......@@ -10,4 +10,8 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates):
def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None):
assert cls.COORDINATE_T in df.columns
assert cls.COORDINATE_X in df.columns
# Assert that the time steps are in the good order with respect to the coordinates
nb_points = len(set(df[cls.COORDINATE_X]))
first_time_step_for_all_points = df.iloc[:nb_points][cls.COORDINATE_T]
assert len(set(first_time_step_for_all_points)) == 1
return super().from_df_and_slicer(df, SpatioTemporalSlicer, train_split_ratio)
\ No newline at end of file
......@@ -4,22 +4,24 @@ from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoo
from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates
from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \
AbstractSpatioTemporalCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \
ConsecutiveTemporalCoordinates
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
class UniformSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates):
@classmethod
def from_nb_points(cls, nb_points, train_split_ratio: float = None, nb_time_steps=1, max_radius=1.0):
assert isinstance(nb_time_steps, int) and nb_time_steps >= 1
df_spatial = UniformSpatialCoordinates.df_spatial(nb_points)
def from_nb_points_and_nb_steps(cls, nb_points, nb_steps, train_split_ratio: float = None):
assert isinstance(nb_steps, int) and nb_steps >= 1
df_spatial = UniformSpatialCoordinates.df_spatial(nb_points=nb_points)
# df_temporal = ConsecutiveTemporalCoordinates.df_temporal(nb_temporal_steps=nb_temporal_steps)
df_time_steps = []
for t in range(nb_time_steps):
for t in range(nb_steps):
df_time_step = df_spatial.copy()
df_time_step[cls.COORDINATE_T] = t
df_time_steps.append(df_time_step)
df_time_steps = pd.concat(df_time_steps, ignore_index=True)
print(df_time_steps)
return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio)
......@@ -4,7 +4,7 @@ from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_
AbstractTemporalCoordinates
class TemporalCoordinates(AbstractTemporalCoordinates):
class ConsecutiveTemporalCoordinates(AbstractTemporalCoordinates):
pass
@classmethod
......
from enum import Enum
from typing import Union
import pandas as pd
......@@ -44,7 +45,7 @@ def small_s_split_from_ratio(index: pd.Index, train_split_ratio):
return s
def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, concat):
def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, spatial_split) -> Union[None, pd.Series]:
df = df.copy() # type: pd.DataFrame
# Extract the index
if train_split_ratio is None:
......@@ -55,17 +56,17 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, c
raise Exception('A split has already been defined')
else:
serie = df.drop_duplicates(subset=[column], keep='first')[column]
assert len(df) % len(serie) == 0
multiplication_factor = len(df) // len(serie)
small_s_split = small_s_split_from_ratio(serie.index, train_split_ratio)
if concat:
if spatial_split:
# concatenation for spatial_split
s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy()
else:
# dilatjon
s_split = pd.Series(None, index=df.infer_objects())
# dilatjon for the temporal split
s_split = pd.Series(None, index=df.index)
for i in range(len(s_split)):
s_split.iloc[i] = small_s_split.iloc[i % len(small_s_split)]
s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor]
s_split.index = df.index
return s_split
......@@ -9,7 +9,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_2D_coo
AlpsStation2DCoordinatesBetweenZeroAndOne
from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \
AlpsStation3DCoordinatesWithAnisotropy
from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import CircleSpatialCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \
CircleSpatialCoordinates
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
......@@ -40,7 +41,7 @@ class TestSpatialCoordinates(unittest.TestCase):
class SpatioTemporalCoordinates(unittest.TestCase):
nb_points = 4
nb_times_steps = 2
nb_steps = 2
def tearDown(self):
c = Counter([len(self.coordinates.df_coordinates(split)) for split in SpatioTemporalSlicer.SPLITS])
......@@ -48,9 +49,9 @@ class SpatioTemporalCoordinates(unittest.TestCase):
self.assertTrue(good_count)
def test_temporal_circle(self):
self.coordinates = UniformSpatioTemporalCoordinates.from_nb_points(nb_points=self.nb_points,
nb_time_steps=self.nb_times_steps,
train_split_ratio=0.5)
self.coordinates = UniformSpatioTemporalCoordinates.from_nb_points_and_nb_steps(nb_points=self.nb_points,
nb_steps=self.nb_steps,
train_split_ratio=0.5)
# def test_temporal_alps(self):
# pass
......
......@@ -17,8 +17,8 @@ class TestSlicerForDataset(unittest.TestCase):
super().__init__(methodName)
self.dataset = None
nb_spatial_points = 2
nb_temporal_steps = 2
nb_points = 2
nb_steps = 2
nb_obs = 2
@property
......@@ -48,10 +48,10 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset):
@property
def complete_shape(self):
return self.nb_spatial_points, self.nb_obs
return self.nb_points, self.nb_obs
def load_datasets(self, train_split_ratio):
coordinates_list = load_test_1D_and_2D_spatial_coordinates(nb_points=self.nb_spatial_points,
coordinates_list = load_test_1D_and_2D_spatial_coordinates(nb_points=self.nb_points,
train_split_ratio=train_split_ratio)
dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
margin_model=ConstantMarginModel(
......@@ -63,7 +63,7 @@ class TestSlicerForSpatialDataset(TestSlicerForDataset):
def test_spatial_slicer_for_spatial_dataset(self):
train_split_ratio_to_observation_shape = {
None: self.complete_shape,
0.5: (self.nb_spatial_points // 2, self.nb_obs),
0.5: (self.nb_points // 2, self.nb_obs),
}
self.check_shapes(train_split_ratio_to_observation_shape)
......@@ -72,10 +72,10 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset):
@property
def complete_shape(self):
return self.nb_temporal_steps, self.nb_obs
return self.nb_steps, self.nb_obs
def load_datasets(self, train_split_ratio):
coordinates_list = load_test_temporal_coordinates(nb_temporal_steps=self.nb_temporal_steps,
coordinates_list = load_test_temporal_coordinates(nb_steps=self.nb_steps,
train_split_ratio=train_split_ratio)
dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
margin_model=ConstantMarginModel(
......@@ -87,33 +87,34 @@ class TestSlicerForTemporalDataset(TestSlicerForDataset):
def test_temporal_slicer_for_temporal_dataset(self):
ind_tuple_to_observation_shape = {
None: self.complete_shape,
0.5: (self.nb_temporal_steps // 2, self.nb_obs),
0.5: (self.nb_steps // 2, self.nb_obs),
}
self.check_shapes(ind_tuple_to_observation_shape)
# class TestSlicerForSpatioTemporalDataset(TestSlicerForDataset):
#
# def complete_shape(self):
# return self.nb_spatial_points * self.nb_temporal_points, self.nb_obs
#
# def load_datasets(self, train_split_ratio):
# coordinates_list = load_test_spatiotemporal_coordinates(nb_points=self.nb_spatial_points,
# train_split_ratio=train_split_ratio,
# nb_time_steps=self.nb_temporal_points)
# dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
# margin_model=ConstantMarginModel(
# coordinates=coordinates),
# coordinates=coordinates, max_stable_model=Smith())
# for coordinates in coordinates_list]
# return dataset_list
#
# def test_spatiotemporal_slicer_for_spatio_temporal_dataset(self):
# ind_tuple_to_observation_shape = {
# None: self.complete_shape,
# 0.5: (1, 1),
# }
# self.check_shapes(ind_tuple_to_observation_shape)
class TestSlicerForSpatioTemporalDataset(TestSlicerForDataset):
@property
def complete_shape(self):
return self.nb_points * self.nb_steps, self.nb_obs
def load_datasets(self, train_split_ratio):
coordinates_list = load_test_spatiotemporal_coordinates(nb_points=self.nb_points,
nb_steps=self.nb_steps,
train_split_ratio=train_split_ratio)
dataset_list = [FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_obs,
margin_model=ConstantMarginModel(
coordinates=coordinates),
coordinates=coordinates, max_stable_model=Smith())
for coordinates in coordinates_list]
return dataset_list
def test_spatiotemporal_slicer_for_spatio_temporal_dataset(self):
ind_tuple_to_observation_shape = {
None: self.complete_shape,
0.5: (self.nb_steps * self.nb_points // 4, self.nb_obs),
}
self.check_shapes(ind_tuple_to_observation_shape)
if __name__ == '__main__':
......
......@@ -14,7 +14,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_c
from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \
UniformSpatioTemporalCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import TemporalCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import ConsecutiveTemporalCoordinates
"""
Common objects to load for the test.
......@@ -25,7 +25,7 @@ In this case, unit test (at least on the constructor) must be ensured in the tes
TEST_MAX_STABLE_MODEL = [Smith, BrownResnick, Schlather, Geometric, ExtremalT, ISchlather]
TEST_1D_AND_2D_SPATIAL_COORDINATES = [UniformSpatialCoordinates, CircleSpatialCoordinates]
TEST_3D_SPATIAL_COORDINATES = [AlpsStation3DCoordinatesWithAnisotropy]
TEST_TEMPORAL_COORDINATES = [TemporalCoordinates]
TEST_TEMPORAL_COORDINATES = [ConsecutiveTemporalCoordinates]
TEST_SPATIO_TEMPORAL_COORDINATES = [UniformSpatioTemporalCoordinates]
TEST_MARGIN_TYPES = [ConstantMarginModel, LinearAllParametersAllDimsMarginModel][:]
TEST_MAX_STABLE_ESTIMATOR = [MaxStableEstimator]
......@@ -71,12 +71,12 @@ def load_test_3D_spatial_coordinates(nb_points):
return load_test_spatial_coordinates(nb_points, TEST_3D_SPATIAL_COORDINATES)
def load_test_temporal_coordinates(nb_temporal_steps, train_split_ratio=None):
return [coordinate_class.from_nb_temporal_steps(nb_temporal_steps, train_split_ratio) for coordinate_class in
def load_test_temporal_coordinates(nb_steps, train_split_ratio=None):
return [coordinate_class.from_nb_temporal_steps(nb_steps, train_split_ratio) for coordinate_class in
TEST_TEMPORAL_COORDINATES]
def load_test_spatiotemporal_coordinates(nb_points, train_split_ratio=None, nb_time_steps=None):
return [coordinate_class.from_nb_points(nb_points=nb_points, train_split_ratio=train_split_ratio,
nb_time_steps=nb_time_steps)
def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=None):
return [coordinate_class.from_nb_points_and_nb_steps(nb_points=nb_points, nb_steps=nb_steps,
train_split_ratio=train_split_ratio)
for coordinate_class in TEST_SPATIO_TEMPORAL_COORDINATES]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment