diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py index dd12340e387fae033de1b8ab02bfad4ab905c703..5fc08a3aa4039b47c507da4921dfe2637ae3c438 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/abstract_margin_function.py @@ -57,9 +57,9 @@ class AbstractMarginFunction(object): plt.show() def visualize_single_param(self, gev_param_name=GevParams.GEV_LOC, ax=None, show=True): - if self.coordinates.nb_columns == 1: + if self.coordinates.nb_coordinates_spatial == 1: self.visualize_1D(gev_param_name, ax, show) - elif self.coordinates.nb_columns == 2: + elif self.coordinates.nb_coordinates_spatial == 2: self.visualize_2D(gev_param_name, ax, show) else: raise NotImplementedError('3D Margin visualization not yet implemented') diff --git a/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py b/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py index c3b5b96b3f12a9a964c22651fd856fdd8636d945..42f875241db7174ce90641482b9f37d12a2581a3 100644 --- a/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py +++ b/extreme_estimator/extreme_models/margin_model/margin_function/linear_margin_function.py @@ -37,7 +37,7 @@ class LinearMarginFunction(IndependentMarginFunction): # Check the linear_dim are well-defined with respect to the coordinates for linear_dims in self.gev_param_name_to_linear_dims.values(): for dim in linear_dims: - assert 0 < dim <= coordinates.nb_columns, "dim={}, nb_columns={}".format(dim, coordinates.nb_columns) + assert 0 < dim <= coordinates.nb_coordinates, "dim={}, nb_columns={}".format(dim, coordinates.nb_coordinates) # Map each gev_param_name to its corresponding param_function for gev_param_name in GevParams.GEV_PARAM_NAMES: diff --git a/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py b/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py index e6c825705d45ef9880420b659a6b398934dea6c4..d025a3d1f875e28b972e13ae4711b84cbb2ebd4e 100644 --- a/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py +++ b/extreme_estimator/extreme_models/margin_model/param_function/linear_coef.py @@ -56,6 +56,6 @@ class LinearCoef(object): shape.form = shape ~ coord_x+coord_y :return: """ - dim_to_name = {i: name for i, name in enumerate(AbstractCoordinates.COORDINATE_NAMES, 1)} + dim_to_name = {i: name for i, name in enumerate(AbstractCoordinates.COORDINATES_NAMES, 1)} formula_str = '1' if not linear_dims else '+'.join([dim_to_name[dim] for dim in linear_dims]) return {self.gev_param_name + '.form': self.gev_param_name + ' ~ ' + formula_str} \ No newline at end of file diff --git a/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py b/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py index 4fb13aa55a2ee6f354463ac8735119499fce692f..ee076c367ca72db60f88bf275fa29a18db9fbedd 100644 --- a/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py +++ b/extreme_estimator/extreme_models/margin_model/smooth_margin_model.py @@ -103,7 +103,7 @@ class LinearMarginModelExample(LinearMarginModel): class LinearAllParametersAllDimsMarginModel(LinearMarginModel): def load_margin_functions(self, margin_function_class: type = None, gev_param_name_to_linear_dims=None): - all_dims = list(range(1, self.coordinates.nb_columns + 1)) + all_dims = list(range(1, self.coordinates.nb_coordinates + 1)) super().load_margin_functions({GevParams.GEV_SHAPE: all_dims.copy(), GevParams.GEV_LOC: all_dims.copy(), GevParams.GEV_SCALE: all_dims.copy()}) diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index ff5e939dc8ed0113a6971cdca6540e72054709d6..326ac94a127c6c03782919c40f028c497139759f 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -6,10 +6,11 @@ import numpy as np import pandas as pd from mpl_toolkits.mplot3d import Axes3D +from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer -from spatio_temporal_dataset.slicer.split import s_split_from_ratio, TEST_SPLIT_STR, \ - TRAIN_SPLIT_STR, train_ind_from_s_split, Split +from spatio_temporal_dataset.slicer.split import s_split_from_df, TEST_SPLIT_STR, \ + TRAIN_SPLIT_STR, ind_train_from_s_split, Split from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer @@ -18,36 +19,43 @@ class AbstractCoordinates(object): COORDINATE_X = 'coord_x' COORDINATE_Y = 'coord_y' COORDINATE_Z = 'coord_z' - COORDINATE_NAMES = [COORDINATE_X, COORDINATE_Y, COORDINATE_Z] - COORDINATE_SPATIAL_SPLIT = 'coord_spatial_split' + COORDINATE_SPATIAL_NAMES = [COORDINATE_X, COORDINATE_Y, COORDINATE_Z] + SPATIAL_SPLIT = 'spatial_split' # Temporal columns COORDINATE_T = 'coord_t' - COORDINATE_TEMPORAL_SPLIT = 'coord_temporal_split' + TEMPORAL_SPLIT = 'coord_temporal_split' + COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] - def __init__(self, df_coord: pd.DataFrame, s_spatial_split: pd.Series = None): + def __init__(self, df_coord: pd.DataFrame, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None, + slicer_class: type = SpatialSlicer): self.df_all_coordinates = df_coord # type: pd.DataFrame - self.s_spatial_split = s_spatial_split # type: pd.Series + self.s_split_spatial = s_split_spatial # type: pd.Series + self.s_split_temporal = s_split_temporal # type: pd.Series + self.slicer = slicer_class(ind_train_spatial=self.ind_train_spatial, + ind_train_temporal=self.ind_train_temporal) # type: AbstractSlicer + assert isinstance(self.slicer, AbstractSlicer) # ClassMethod constructor @classmethod - def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None): - # X and coordinates must be defined - assert cls.COORDINATE_X in df.columns - # Create a split based on the train_split_ratio - if train_split_ratio is not None: - assert cls.COORDINATE_SPATIAL_SPLIT not in df.columns, "A split has already been defined" - s_split = s_split_from_ratio(index=df.index, train_split_ratio=train_split_ratio) - df[cls.COORDINATE_SPATIAL_SPLIT] = s_split - # Potentially, a split column can be specified directly in df - if cls.COORDINATE_SPATIAL_SPLIT not in df.columns: - df_coord = df - s_split = None + def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None, slicer_class: type = SpatialSlicer): + """ + train_split_ratio is shared between the spatial part of the data, and the temporal part + """ + # All the index should be unique + assert len(set(df.index)) == len(df) + + # Create a spatial split + s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, concat=False) + + # Create a temporal split + if slicer_class is SpatioTemporalSlicer: + s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, concat=True) else: - df_coord = df.loc[:, cls.coordinates_spatial_columns(df)] - s_split = df[cls.COORDINATE_SPATIAL_SPLIT] - assert s_split.isin([TRAIN_SPLIT_STR, TEST_SPLIT_STR]).all() - return cls(df_coord=df_coord, s_spatial_split=s_split) + s_split_temporal = None + + return cls(df_coord=df, s_split_spatial=s_split_spatial, s_split_temporal=s_split_temporal, + slicer_class=slicer_class) @classmethod def from_csv(cls, csv_path: str = None): @@ -56,7 +64,7 @@ class AbstractCoordinates(object): df = pd.read_csv(csv_path) # Index correspond to the first column index_column_name = df.columns[0] - assert index_column_name not in cls.coordinates_spatial_columns(df) + assert index_column_name not in cls.COORDINATE_SPATIAL_NAMES df.set_index(index_column_name, inplace=True) return cls.from_df(df) @@ -72,58 +80,62 @@ class AbstractCoordinates(object): df_sample = pd.DataFrame.sample(coordinates.df_merged, n=nb_points) return cls.from_df(df=df_sample, train_split_ratio=train_split_ratio) - @classmethod - def coordinates_spatial_columns(cls, df_coord: pd.DataFrame) -> List[str]: - coord_columns = [cls.COORDINATE_X] - for additional_coord in [cls.COORDINATE_Y, cls.COORDINATE_Z]: - if additional_coord in df_coord.columns: - coord_columns.append(additional_coord) - return coord_columns - - @property - def columns(self): - return self.coordinates_spatial_columns(df_coord=self.df_all_coordinates) - - @property - def nb_columns(self): - return len(self.columns) - @property def index(self): - # todo: this should be replace whenever possible by coordinates_index return self.df_all_coordinates.index - - @property def df_merged(self) -> pd.DataFrame: # Merged DataFrame of df_coord and s_split - return self.df_all_coordinates if self.s_spatial_split is None else self.df_all_coordinates.join(self.s_spatial_split) + return self.df_all_coordinates if self.s_split_spatial is None else self.df_all_coordinates.join( + self.s_split_spatial) + + # Split def df_coordinates(self, split: Split = Split.all) -> pd.DataFrame: - if self.ind_train_spatial is None: - return self.df_all_coordinates + return df_sliced(df=self.df_all_coordinates, split=split, slicer=self.slicer) - if split is Split.all: - return self.df_all_coordinates + def coordinates_values(self, split: Split = Split.all) -> np.ndarray: + return self.df_coordinates(split).values - if split in [Split.train_temporal, Split.test_temporal]: - return self.df_all_coordinates + def coordinates_index(self, split: Split = Split.all) -> pd.Index: + return self.df_coordinates(split).index - elif split in [Split.train_spatial, Split.train_spatiotemporal, Split.test_spatiotemporal_temporal]: - return self.df_all_coordinates.loc[self.ind_train_spatial] + @property + def ind_train_spatial(self) -> pd.Series: + return ind_train_from_s_split(s_split=self.s_split_spatial) - elif split in [Split.test_spatial, Split.test_spatiotemporal, Split.test_spatiotemporal_spatial]: - return self.df_all_coordinates.loc[~self.ind_train_spatial] + @property + def ind_train_temporal(self) -> pd.Series: + return ind_train_from_s_split(s_split=self.s_split_temporal) - else: - raise NotImplementedError('Unknown split: {}'.format(split)) + # Columns - def coordinates_values(self, split: Split = Split.all) -> np.ndarray: - return self.df_coordinates(split).values + @property + def coordinates_names(self) -> List[str]: + return self.coordinates_spatial_names + self.coordinates_temporal_names - def coordinate_index(self, split: Split = Split.all) -> pd.Index: - return self.df_coordinates(split).index + @property + def nb_coordinates(self) -> int: + return len(self.coordinates_names) + + @property + def coordinates_spatial_names(self) -> List[str]: + return [name for name in self.COORDINATE_SPATIAL_NAMES if name in self.df_all_coordinates.columns] + + @property + def nb_coordinates_spatial(self) -> int: + return len(self.coordinates_spatial_names) + + @property + def coordinates_temporal_names(self) -> List[str]: + return [self.COORDINATE_T] if self.COORDINATE_T in self.df_all_coordinates else [] + + @property + def nb_coordinates_temporal(self) -> int: + return len(self.coordinates_temporal_names) + + # Visualization @property def x_coordinates(self) -> np.ndarray: @@ -134,41 +146,38 @@ class AbstractCoordinates(object): return self.df_all_coordinates[self.COORDINATE_Y].values.copy() @property - def ind_train_spatial(self) -> pd.Series: - return train_ind_from_s_split(s_split=self.s_spatial_split) + def z_coordinates(self) -> np.ndarray: + return self.df_all_coordinates[self.COORDINATE_Z].values.copy() - # Visualization + @property + def t_coordinates(self): + return self.df_all_coordinates[self.COORDINATE_T].values.copy() def visualize(self): - nb_coordinates_columns = len(self.coordinates_spatial_columns(self.df_all_coordinates)) - if nb_coordinates_columns == 1: + if self.nb_coordinates_spatial == 1: self.visualization_1D() - elif nb_coordinates_columns == 2: + elif self.nb_coordinates_spatial == 2: self.visualization_2D() else: self.visualization_3D() def visualization_1D(self): - assert len(self.coordinates_spatial_columns(self.df_all_coordinates)) >= 1 - x = self.coordinates_values()[:] + assert self.nb_coordinates_spatial >= 1 + x = self.x_coordinates y = np.zeros(len(x)) plt.scatter(x, y) plt.show() def visualization_2D(self): - assert len(self.coordinates_spatial_columns(self.df_all_coordinates)) >= 2 - coordinates_values = self.coordinates_values() - x, y = coordinates_values[:, 0], coordinates_values[:, 1] - plt.scatter(x, y) + assert self.nb_coordinates_spatial >= 2 + plt.scatter(self.x_coordinates, self.y_coordinates) plt.show() def visualization_3D(self): - assert len(self.coordinates_spatial_columns(self.df_all_coordinates)) == 3 - coordinates_values = self.coordinates_values() - x, y, z = coordinates_values[:, 0], coordinates_values[:, 1], coordinates_values[:, 2] + assert self.nb_coordinates_spatial == 3 fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # type: Axes3D - ax.scatter(x, y, z, marker='^') + ax.scatter(self.x_coordinates, self.y_coordinates, self.z_coordinates, marker='^') plt.show() # Magic Methods diff --git a/spatio_temporal_dataset/coordinates/spatial_coordinates/generated_spatial_coordinates.py b/spatio_temporal_dataset/coordinates/spatial_coordinates/generated_spatial_coordinates.py index fc600e268fff3bd5a20f6dd4f004b5bfcf3a84f9..be2be12012e05f6d24593356dac336f0ad463715 100644 --- a/spatio_temporal_dataset/coordinates/spatial_coordinates/generated_spatial_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatial_coordinates/generated_spatial_coordinates.py @@ -10,13 +10,17 @@ import matplotlib.pyplot as plt class CircleCoordinates(AbstractCoordinates): @classmethod - def from_nb_points(cls, nb_points, train_split_ratio: float = None, max_radius=1.0): + def df_spatial(cls, nb_points, max_radius=1.0): # Sample uniformly inside the circle angles = np.array(r.runif(nb_points, max=2 * math.pi)) radius = np.sqrt(np.array(r.runif(nb_points, max=max_radius))) df = pd.DataFrame.from_dict({cls.COORDINATE_X: radius * np.cos(angles), cls.COORDINATE_Y: radius * np.sin(angles)}) - return cls.from_df(df, train_split_ratio) + return df + + @classmethod + def from_nb_points(cls, nb_points, train_split_ratio: float = None, max_radius=1.0): + return cls.from_df(cls.df_spatial(nb_points, max_radius), train_split_ratio) def visualization_2D(self): r = 1.0 @@ -32,4 +36,3 @@ class CircleCoordinatesRadius2(CircleCoordinates): @classmethod def from_nb_points(cls, nb_points, train_split_ratio: float = None, max_radius=1.0): return 2 * super().from_nb_points(nb_points, train_split_ratio, max_radius) - diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/alps_station_2D_1D_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/alps_station_2D_1D_coordinates.py new file mode 100644 index 0000000000000000000000000000000000000000..be156a602cca021e1c1d7cc56afbb042893492e4 --- /dev/null +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/alps_station_2D_1D_coordinates.py @@ -0,0 +1,3 @@ + + +# todo: important the index should be different, it shuold be the name of the satation + the year for isntance \ No newline at end of file diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py new file mode 100644 index 0000000000000000000000000000000000000000..7211d1c21b7937308661bfa8e9a11b532ce4e8f1 --- /dev/null +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py @@ -0,0 +1,22 @@ +import pandas as pd + +from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import CircleCoordinates +from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer +from spatio_temporal_dataset.slicer.split import s_split_from_df + + +class CircleTemporalCoordinates(CircleCoordinates): + + @classmethod + def from_nb_points(cls, nb_points, train_split_ratio: float = None, nb_time_steps=1, max_radius=1.0): + assert isinstance(nb_time_steps, int) and nb_time_steps >= 1 + df_spatial = CircleCoordinates.df_spatial(nb_points, max_radius) + df_time_steps = [] + for t in range(nb_time_steps): + df_time_step = df_spatial.copy() + df_time_step[cls.COORDINATE_T] = t + df_time_steps.append(df_time_step) + df_time_steps = pd.concat(df_time_steps, ignore_index=True) + return AbstractCoordinates.from_df(df=df_time_steps, train_split_ratio=train_split_ratio, + slicer_class=SpatioTemporalSlicer) diff --git a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformed_coordinates.py b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformed_coordinates.py index fe34e194331ccc2baee08834509bd2d08a8dd8c1..fb99da7973c1da1b8e1825efd0f8ae6128eac9b4 100644 --- a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformed_coordinates.py +++ b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformed_coordinates.py @@ -9,6 +9,6 @@ class TransformedCoordinates(AbstractCoordinates): transformation_function: AbstractTransformation): df_coordinates_transformed = coordinates.df_all_coordinates.copy() df_coordinates_transformed = transformation_function.transform(df_coord=df_coordinates_transformed) - return cls(df_coord=df_coordinates_transformed, s_spatial_split=coordinates.s_spatial_split) + return cls(df_coord=df_coordinates_transformed, s_split_spatial=coordinates.s_split_spatial) diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py index ed9bc236aca23d4fa91cc88a443da59582f60646..4564bfa21f0620eb0e1eb8e4416fe07d2be20ae7 100644 --- a/spatio_temporal_dataset/dataset/abstract_dataset.py +++ b/spatio_temporal_dataset/dataset/abstract_dataset.py @@ -6,8 +6,6 @@ import numpy as np import pandas as pd from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates -from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer -from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer from spatio_temporal_dataset.slicer.split import Split from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ AbstractSpatioTemporalObservations @@ -15,15 +13,14 @@ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_tempor class AbstractDataset(object): - def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates, - slicer_class: type = SpatialSlicer): + def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates): assert pd.Index.equals(observations.index, coordinates.index) - assert isinstance(slicer_class, type) self.observations = observations self.coordinates = coordinates - self.slicer = slicer_class(coordinates_train_ind=self.coordinates.ind_train_spatial, - observations_train_ind=self.observations.train_ind) # type: AbstractSlicer - assert isinstance(self.slicer, AbstractSlicer) + + @property + def slicer(self): + return self.coordinates.slicer @classmethod def from_csv(cls, csv_path: str): @@ -65,7 +62,7 @@ class AbstractDataset(object): return self.coordinates.coordinates_values(split=split) def coordinates_index(self, split: Split= Split.all) -> pd.Index: - return self.coordinates.coordinate_index(split=split) + return self.coordinates.coordinates_index(split=split) # Slicer wrapper diff --git a/spatio_temporal_dataset/dataset/simulation_dataset.py b/spatio_temporal_dataset/dataset/simulation_dataset.py index 11e8ed5657997858015ebe78b2875ccdf2390fcb..e888cf1c82243d521fde59f875c2c202e78f8dd9 100644 --- a/spatio_temporal_dataset/dataset/simulation_dataset.py +++ b/spatio_temporal_dataset/dataset/simulation_dataset.py @@ -17,10 +17,9 @@ class SimulatedDataset(AbstractDataset): def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates, - slicer_class: type = SpatialSlicer, max_stable_model: AbstractMaxStableModel = None, margin_model: AbstractMarginModel = None): - super().__init__(observations, coordinates, slicer_class) + super().__init__(observations, coordinates) assert margin_model is not None or max_stable_model is not None self.margin_model = margin_model # type: AbstractMarginModel self.max_stable_model = max_stable_model # type: AbstractMaxStableModel @@ -29,21 +28,17 @@ class SimulatedDataset(AbstractDataset): class MaxStableDataset(SimulatedDataset): @classmethod - def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel, coordinates: AbstractCoordinates, - train_split_ratio: float = None, slicer_class: type = SpatialSlicer): - observations = MaxStableAnnualMaxima.from_sampling(nb_obs, max_stable_model, coordinates, train_split_ratio) - return cls(observations=observations, coordinates=coordinates, slicer_class=slicer_class, - max_stable_model=max_stable_model) + def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel, coordinates: AbstractCoordinates): + observations = MaxStableAnnualMaxima.from_sampling(nb_obs, max_stable_model, coordinates) + return cls(observations=observations, coordinates=coordinates, max_stable_model=max_stable_model) class MarginDataset(SimulatedDataset): @classmethod - def from_sampling(cls, nb_obs: int, margin_model: AbstractMarginModel, coordinates: AbstractCoordinates, - train_split_ratio: float = None, slicer_class: type = SpatialSlicer): - observations = MarginAnnualMaxima.from_sampling(nb_obs, coordinates, margin_model, train_split_ratio) - return cls(observations=observations, coordinates=coordinates, slicer_class=slicer_class, - margin_model=margin_model) + def from_sampling(cls, nb_obs: int, margin_model: AbstractMarginModel, coordinates: AbstractCoordinates): + observations = MarginAnnualMaxima.from_sampling(nb_obs, coordinates, margin_model) + return cls(observations=observations, coordinates=coordinates, margin_model=margin_model) class FullSimulatedDataset(SimulatedDataset): @@ -51,10 +46,8 @@ class FullSimulatedDataset(SimulatedDataset): @classmethod def from_double_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel, coordinates: AbstractCoordinates, - margin_model: AbstractMarginModel, - train_split_ratio: float = None, - slicer_class: type = SpatialSlicer): + margin_model: AbstractMarginModel): observations = FullAnnualMaxima.from_double_sampling(nb_obs, max_stable_model, - coordinates, margin_model, train_split_ratio) - return cls(observations=observations, coordinates=coordinates, slicer_class=slicer_class, + coordinates, margin_model) + return cls(observations=observations, coordinates=coordinates, max_stable_model=max_stable_model, margin_model=margin_model) diff --git a/spatio_temporal_dataset/slicer/abstract_slicer.py b/spatio_temporal_dataset/slicer/abstract_slicer.py index ab2673cdfc35e39e3b5e5891d6d574be0d029352..f2877b921849786a82887608bd2c2eaf924e5807 100644 --- a/spatio_temporal_dataset/slicer/abstract_slicer.py +++ b/spatio_temporal_dataset/slicer/abstract_slicer.py @@ -7,39 +7,38 @@ from spatio_temporal_dataset.slicer.split import Split class AbstractSlicer(object): - def __init__(self, coordinates_train_ind: Union[None, pd.Series], observations_train_ind: Union[None, pd.Series]): - self.index_train_ind = coordinates_train_ind # type: Union[None, pd.Series] - self.column_train_ind = observations_train_ind # type: Union[None, pd.Series] + def __init__(self, ind_train_spatial: Union[None, pd.Series], ind_train_temporal: Union[None, pd.Series]): + self.ind_train_spatial = ind_train_spatial # type: Union[None, pd.Series] + self.ind_train_temporal = ind_train_temporal # type: Union[None, pd.Series] @property - def train_split(self) -> Split: - pass + def ind_test_spatial(self) -> pd.Series: + return ~self.ind_train_spatial @property - def test_split(self) -> Split: - pass - - @property - def splits(self) -> List[Split]: - pass + def ind_test_temporal(self) -> pd.Series: + return ~self.ind_train_temporal + def loc_split(self, df: pd.DataFrame, split: Split): + # split should belong to the list of split accepted by the slicer + assert isinstance(split, Split) - @property - def index_test_ind(self) -> pd.Series: - return ~self.index_train_ind + if split is Split.all: + return df - # todo: test should be the same as train when we don't care about that in the split - @property - def column_test_ind(self) -> pd.Series: - return ~self.column_train_ind + assert split in self.splits, "split:{}, slicer_type:{}".format(split, type(self)) - @property - def some_required_ind_are_not_defined(self): - pass + # By default, some required splits are not defined + # instead of crashing, we return all the data for all the split + # This is the default behavior, when the required splits has been defined + if self.some_required_ind_are_not_defined: + return df + else: + return self.specialized_loc_split(df=df, split=split) def summary(self, show=True): msg = '' - for s, global_name in [(self.index_train_ind, "Spatial"), (self.column_train_ind, "Temporal")]: + for s, global_name in [(self.ind_train_spatial, "Spatial"), (self.ind_train_temporal, "Temporal")]: msg += global_name + ': ' if s is None: msg += 'Not handled by this slicer' @@ -51,29 +50,29 @@ class AbstractSlicer(object): print(msg) return msg - def loc_split(self, df: pd.DataFrame, split: Split): - # split should belong to the list of split accepted by the slicer - assert isinstance(split, Split) + # Methods that need to be defined in the child class - if split is Split.all: - return df + def specialized_loc_split(self, df: pd.DataFrame, split: Split): + return None - assert split in self.splits, "split:{}, slicer_type:{}".format(split, type(self)) + @property + def some_required_ind_are_not_defined(self): + pass - # By default, some required splits are not defined - # instead of crashing, we return all the data for all the split - # This is the default behavior, when the required splits has been defined - if self.some_required_ind_are_not_defined: - return df - else: - return self.specialized_loc_split(df=df, split=split) + @property + def train_split(self) -> Split: + pass - def specialized_loc_split(self, df: pd.DataFrame, split: Split): - # This method should be defined in the child class - return None + @property + def test_split(self) -> Split: + pass + + @property + def splits(self) -> List[Split]: + pass -def slice(df: pd.DataFrame, split: Split = Split.all, slicer: AbstractSlicer = None) -> pd.DataFrame: +def df_sliced(df: pd.DataFrame, split: Split = Split.all, slicer: AbstractSlicer = None) -> pd.DataFrame: if slicer is None: assert split is Split.all return df diff --git a/spatio_temporal_dataset/slicer/spatial_slicer.py b/spatio_temporal_dataset/slicer/spatial_slicer.py index c6c22a9a674d5f02137150fd010fe588dc7a31d4..06b77cdca881c2f98eb33338e744ec23d80cfaad 100644 --- a/spatio_temporal_dataset/slicer/spatial_slicer.py +++ b/spatio_temporal_dataset/slicer/spatial_slicer.py @@ -9,8 +9,8 @@ from spatio_temporal_dataset.slicer.split import Split class SpatialSlicer(AbstractSlicer): SPLITS = [Split.train_spatial, Split.test_spatial] - def __init__(self, coordinates_train_ind: Union[None, pd.Series], observations_train_ind: Union[None, pd.Series]): - super().__init__(coordinates_train_ind, None) + def __init__(self, ind_train_spatial: Union[None, pd.Series], ind_train_temporal: Union[None, pd.Series]): + super().__init__(ind_train_spatial, None) @property def splits(self) -> List[Split]: @@ -26,11 +26,11 @@ class SpatialSlicer(AbstractSlicer): @property def some_required_ind_are_not_defined(self): - return self.index_train_ind is None + return self.ind_train_spatial is None def specialized_loc_split(self, df: pd.DataFrame, split: Split): - assert pd.Index.equals(df.index, self.index_train_ind.index) + assert pd.Index.equals(df.index, self.ind_train_spatial.index) if split is Split.train_spatial: - return df.loc[self.index_train_ind, :] + return df.loc[self.ind_train_spatial] elif split is Split.test_spatial: - return df.loc[self.index_test_ind, :] + return df.loc[self.ind_test_spatial] diff --git a/spatio_temporal_dataset/slicer/spatio_temporal_slicer.py b/spatio_temporal_dataset/slicer/spatio_temporal_slicer.py index 47a71610020d763d057b01a4780b4fa541eb2cfb..58e8ed5d42cbffb6042906638ba2603763d08638 100644 --- a/spatio_temporal_dataset/slicer/spatio_temporal_slicer.py +++ b/spatio_temporal_dataset/slicer/spatio_temporal_slicer.py @@ -8,9 +8,9 @@ from spatio_temporal_dataset.slicer.split import Split class SpatioTemporalSlicer(AbstractSlicer): SPLITS = [Split.train_spatiotemporal, - Split.test_spatiotemporal, - Split.test_spatiotemporal_spatial, - Split.test_spatiotemporal_temporal] + Split.test_spatiotemporal, + Split.test_spatiotemporal_spatial, + Split.test_spatiotemporal_temporal] @property def splits(self) -> List[Split]: @@ -26,16 +26,16 @@ class SpatioTemporalSlicer(AbstractSlicer): @property def some_required_ind_are_not_defined(self): - return self.index_train_ind is None or self.column_train_ind is None + return self.ind_train_spatial is None or self.ind_train_temporal is None def specialized_loc_split(self, df: pd.DataFrame, split: Split): - assert pd.Index.equals(df.columns, self.column_train_ind.index) - assert pd.Index.equals(df.index, self.index_train_ind.index) + assert pd.Index.equals(df.index, self.ind_train_temporal.index) + assert pd.Index.equals(df.index, self.ind_train_spatial.index) if split is Split.train_spatiotemporal: - return df.loc[self.index_train_ind, self.column_train_ind] + return df.loc[self.ind_train_spatial & self.ind_train_temporal] elif split is Split.test_spatiotemporal: - return df.loc[self.index_test_ind, self.column_test_ind] + return df.loc[self.ind_test_spatial & self.ind_test_temporal] elif split is Split.test_spatiotemporal_spatial: - return df.loc[self.index_test_ind, self.column_train_ind] + return df.loc[self.ind_test_spatial & self.ind_train_temporal] elif split is Split.test_spatiotemporal_temporal: - return df.loc[self.index_train_ind, self.column_test_ind] + return df.loc[self.ind_train_spatial & self.ind_test_temporal] diff --git a/spatio_temporal_dataset/slicer/split.py b/spatio_temporal_dataset/slicer/split.py index 9771bbcaf3b0254e4e54092dd8aee6ea08a1d826..6e33c57f2e77bc34e4146d9ab783e79de84e27f5 100644 --- a/spatio_temporal_dataset/slicer/split.py +++ b/spatio_temporal_dataset/slicer/split.py @@ -25,14 +25,14 @@ TRAIN_SPLIT_STR = 'train_split' TEST_SPLIT_STR = 'test_split' -def train_ind_from_s_split(s_split): +def ind_train_from_s_split(s_split): if s_split is None: return None else: return s_split.isin([TRAIN_SPLIT_STR]) -def s_split_from_ratio(index, train_split_ratio): +def small_s_split_from_ratio(index: pd.Index, train_split_ratio): length = len(index) assert 0 < train_split_ratio < 1 s = pd.Series(TEST_SPLIT_STR, index=index) @@ -42,3 +42,30 @@ def s_split_from_ratio(index, train_split_ratio): assert 0 < len(train_ind) < length, "number of training points:{} length:{}".format(len(train_ind), length) s.loc[train_ind] = TRAIN_SPLIT_STR return s + + +def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, concat): + df = df.copy() # type: pd.DataFrame + # Extract the index + if train_split_ratio is None: + return None + if column not in df: + return None + elif split_column in df: + raise Exception('A split has already been defined') + else: + serie = df.drop_duplicates(subset=[column], keep='first')[column] + + assert len(df) % len(serie) == 0 + multiplication_factor = len(df) // len(serie) + small_s_split = small_s_split_from_ratio(serie.index, train_split_ratio) + if concat: + s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy() + else: + # dilatjon + s_split = pd.Series(None, index=df.infer_objects()) + for i in range(len(s_split)): + s_split.iloc[i] = small_s_split.iloc[i % len(small_s_split)] + s_split.index = df.index + return s_split + diff --git a/spatio_temporal_dataset/slicer/temporal_slicer.py b/spatio_temporal_dataset/slicer/temporal_slicer.py index 4bfd6d01f8e9e897863e1f038a25d412fdd2fb85..acb70bb6de3efb91a001e3c470ef8a592ed689b5 100644 --- a/spatio_temporal_dataset/slicer/temporal_slicer.py +++ b/spatio_temporal_dataset/slicer/temporal_slicer.py @@ -9,8 +9,8 @@ from spatio_temporal_dataset.slicer.split import Split class TemporalSlicer(AbstractSlicer): SPLITS = [Split.train_temporal, Split.test_temporal] - def __init__(self, coordinates_train_ind: Union[None, pd.Series], observations_train_ind: Union[None, pd.Series]): - super().__init__(None, observations_train_ind) + def __init__(self, ind_train_spatial: Union[None, pd.Series], ind_train_temporal: Union[None, pd.Series]): + super().__init__(None, ind_train_temporal) @property def splits(self) -> List[Split]: @@ -26,11 +26,11 @@ class TemporalSlicer(AbstractSlicer): @property def some_required_ind_are_not_defined(self): - return self.column_train_ind is None + return self.ind_train_temporal is None def specialized_loc_split(self, df: pd.DataFrame, split: Split): - assert pd.Index.equals(df.columns, self.column_train_ind.index) + assert pd.Index.equals(df.index, self.ind_train_temporal.index) if split is Split.train_temporal: - return df.loc[:, self.column_train_ind] + return df.loc[self.ind_train_temporal] elif split is Split.test_temporal: - return df.loc[:, self.column_test_ind] + return df.loc[self.ind_test_temporal] diff --git a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py index e23525b74e52de331314e9475276bb1f52a01159..f1c24cc6f91a569ed5ac7f6d88fa4cbabdc354d7 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/abstract_spatio_temporal_observations.py @@ -2,15 +2,12 @@ import os.path as op import pandas as pd import numpy as np -from spatio_temporal_dataset.slicer.abstract_slicer import slice, AbstractSlicer -from spatio_temporal_dataset.slicer.split import Split, \ - train_ind_from_s_split, TEST_SPLIT_STR, TRAIN_SPLIT_STR, s_split_from_ratio - +from spatio_temporal_dataset.slicer.abstract_slicer import df_sliced, AbstractSlicer +from spatio_temporal_dataset.slicer.split import Split class AbstractSpatioTemporalObservations(object): - def __init__(self, df_maxima_frech: pd.DataFrame = None, df_maxima_gev: pd.DataFrame = None, - s_split: pd.Series = None, train_split_ratio: float = None): + def __init__(self, df_maxima_frech: pd.DataFrame = None, df_maxima_gev: pd.DataFrame = None): """ Main attribute of the class is the DataFrame df_maxima Index are stations index @@ -20,15 +17,6 @@ class AbstractSpatioTemporalObservations(object): self.df_maxima_frech = df_maxima_frech self.df_maxima_gev = df_maxima_gev - if s_split is not None and train_split_ratio is not None: - raise AttributeError('A split is already defined, there is no need to specify a ratio') - elif s_split is not None or train_split_ratio is not None: - if train_split_ratio: - s_split = s_split_from_ratio(index=self._df_maxima.columns, train_split_ratio=train_split_ratio) - assert len(s_split) == len(self._df_maxima.columns) - assert s_split.isin([TRAIN_SPLIT_STR, TEST_SPLIT_STR]).all() - self.s_split = s_split - @classmethod def from_csv(cls, csv_path: str = None): assert csv_path is not None @@ -60,16 +48,12 @@ class AbstractSpatioTemporalObservations(object): pass def maxima_gev(self, split: Split = Split.all, slicer: AbstractSlicer = None) -> np.ndarray: - return slice(self.df_maxima_gev, split, slicer).values + return df_sliced(self.df_maxima_gev, split, slicer).values def maxima_frech(self, split: Split = Split.all, slicer: AbstractSlicer = None) -> np.ndarray: - return slice(self.df_maxima_frech, split, slicer).values + return df_sliced(self.df_maxima_frech, split, slicer).values def set_maxima_frech(self, maxima_frech_values: np.ndarray, split: Split = Split.all, slicer: AbstractSlicer = None): - df = slice(self.df_maxima_frech, split, slicer) + df = df_sliced(self.df_maxima_frech, split, slicer) df.loc[:] = maxima_frech_values - - @property - def train_ind(self) -> pd.Series: - return train_ind_from_s_split(s_split=self.s_split) diff --git a/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py b/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py index bd751dbad5320f3e0f2ebc29dc5987f54a0f50b6..a89dd23f991fba5b1c8388db0782364268565309 100644 --- a/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py +++ b/spatio_temporal_dataset/spatio_temporal_observations/annual_maxima_observations.py @@ -18,29 +18,27 @@ class MarginAnnualMaxima(AnnualMaxima): @classmethod def from_sampling(cls, nb_obs: int, coordinates: AbstractCoordinates, - margin_model: AbstractMarginModel, train_split_ratio: float = None): + margin_model: AbstractMarginModel): maxima_gev = margin_model.rmargin_from_nb_obs(nb_obs=nb_obs, coordinates_values=coordinates.coordinates_values()) df_maxima_gev = pd.DataFrame(data=maxima_gev, index=coordinates.index) - return cls(df_maxima_gev=df_maxima_gev, train_split_ratio=train_split_ratio) + return cls(df_maxima_gev=df_maxima_gev) class MaxStableAnnualMaxima(AnnualMaxima): @classmethod - def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel, coordinates: AbstractCoordinates, - train_split_ratio: float = None): + def from_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel, coordinates: AbstractCoordinates): maxima_frech = max_stable_model.rmaxstab(nb_obs=nb_obs, coordinates_values=coordinates.coordinates_values()) df_maxima_frech = pd.DataFrame(data=maxima_frech, index=coordinates.index) - return cls(df_maxima_frech=df_maxima_frech, train_split_ratio=train_split_ratio) + return cls(df_maxima_frech=df_maxima_frech) class FullAnnualMaxima(MaxStableAnnualMaxima): @classmethod def from_double_sampling(cls, nb_obs: int, max_stable_model: AbstractMaxStableModel, - coordinates: AbstractCoordinates, margin_model: AbstractMarginModel, - train_split_ratio: float = None): - max_stable_annual_maxima = super().from_sampling(nb_obs, max_stable_model, coordinates, train_split_ratio) + coordinates: AbstractCoordinates, margin_model: AbstractMarginModel): + max_stable_annual_maxima = super().from_sampling(nb_obs, max_stable_model, coordinates) # Compute df_maxima_gev from df_maxima_frech maxima_gev = margin_model.rmargin_from_maxima_frech(maxima_frech=max_stable_annual_maxima.maxima_frech(), coordinates_values=coordinates.coordinates_values()) diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py index 5c9f23ee947eb83aa33208cc0f740ca4a667eb0a..13ccbace8d66f40f0728ef740c423af7ba2a4994 100644 --- a/test/test_spatio_temporal_dataset/test_coordinates.py +++ b/test/test_spatio_temporal_dataset/test_coordinates.py @@ -1,15 +1,20 @@ import unittest +from collections import Counter from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \ + CircleTemporalCoordinates from spatio_temporal_dataset.coordinates.unidimensional_coordinates.coordinates_1D import UniformCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_2D_coordinates import \ AlpsStation2DCoordinatesBetweenZeroAndOne from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coordinates import \ AlpsStation3DCoordinatesWithAnisotropy from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import CircleCoordinates +from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer +from spatio_temporal_dataset.slicer.split import Split -class TestCoordinates(unittest.TestCase): +class TestSpatialCoordinates(unittest.TestCase): DISPLAY = False def __init__(self, methodName='runTest'): @@ -34,5 +39,22 @@ class TestCoordinates(unittest.TestCase): self.coord = AlpsStation3DCoordinatesWithAnisotropy.from_csv() +class SpatioTemporalCoordinates(unittest.TestCase): + nb_points = 4 + nb_times_steps = 2 + + def tearDown(self): + c = Counter([len(self.coordinates.df_coordinates(split)) for split in SpatioTemporalSlicer.SPLITS]) + good_count = c == Counter([2, 2, 2, 2]) or c == Counter([0, 0, 4, 4]) + self.assertTrue(good_count) + + def test_temporal_circle(self): + self.coordinates = CircleTemporalCoordinates.from_nb_points(nb_points=self.nb_points, + nb_time_steps=self.nb_times_steps, + train_split_ratio=0.5) + # def test_temporal_alps(self): + # pass + + if __name__ == '__main__': unittest.main() diff --git a/test/test_spatio_temporal_dataset/test_slicer.py b/test/test_spatio_temporal_dataset/test_slicer.py index 2f9be82d34643acf0b36e6155748c2fe1b9f8b7a..28532c318ca7775ac4a1104df13fcd6921cef021 100644 --- a/test/test_spatio_temporal_dataset/test_slicer.py +++ b/test/test_spatio_temporal_dataset/test_slicer.py @@ -1,147 +1,147 @@ -import pandas as pd -import numpy as np -from rpy2.rinterface import RRuntimeError -import unittest -from itertools import product - -from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel -from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith -from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates -from spatio_temporal_dataset.coordinates.unidimensional_coordinates.coordinates_1D import LinSpaceCoordinates -from spatio_temporal_dataset.dataset.simulation_dataset import MaxStableDataset, FullSimulatedDataset -from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer -from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer -from spatio_temporal_dataset.slicer.split import ALL_SPLITS_EXCEPT_ALL, Split -from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer -from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ - AbstractSpatioTemporalObservations - - -class TestSlicerForDataset(unittest.TestCase): - - def __init__(self, methodName: str = ...) -> None: - super().__init__(methodName) - self.dataset = None - - nb_spatial_points = 2 - nb_temporal_obs = 2 - complete_shape = (nb_spatial_points, nb_temporal_obs) - - def load_dataset(self, slicer_class, split_ratio_spatial, split_ratio_temporal): - coordinates = LinSpaceCoordinates.from_nb_points(nb_points=self.nb_spatial_points, - train_split_ratio=split_ratio_spatial) - return FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_temporal_obs, - train_split_ratio=split_ratio_temporal, - margin_model=ConstantMarginModel(coordinates=coordinates), - coordinates=coordinates, max_stable_model=Smith(), - slicer_class=slicer_class) - - def get_shape(self, dataset, split): - return dataset.maxima_frech(split).shape - - def test_spatiotemporal_slicer_for_dataset(self): - ind_tuple_to_observation_shape = { - (None, None): self.complete_shape, - (None, 0.5): self.complete_shape, - (0.5, None): self.complete_shape, - (0.5, 0.5): (1, 1), - } - self.check_shapes(ind_tuple_to_observation_shape, SpatioTemporalSlicer) - - def test_spatial_slicer_for_dataset(self): - ind_tuple_to_observation_shape = { - (None, None): self.complete_shape, - (None, 0.5): self.complete_shape, - (0.5, None): (1, 2), - (0.5, 0.5): (1, 2), - } - self.check_shapes(ind_tuple_to_observation_shape, SpatialSlicer) - - def test_temporal_slicer_for_dataset(self): - ind_tuple_to_observation_shape = { - (None, None): self.complete_shape, - (None, 0.5): (2, 1), - (0.5, None): self.complete_shape, - (0.5, 0.5): (2, 1), - } - self.check_shapes(ind_tuple_to_observation_shape, TemporalSlicer) - - def check_shapes(self, ind_tuple_to_observation_shape, slicer_type): - for split_ratio, data_shape in ind_tuple_to_observation_shape.items(): - dataset = self.load_dataset(slicer_type, *split_ratio) - self.assertEqual(self.complete_shape, self.get_shape(dataset, Split.all)) - for split in ALL_SPLITS_EXCEPT_ALL: - if split in dataset.slicer.splits: - self.assertEqual(data_shape, self.get_shape(dataset, split)) - else: - with self.assertRaises(AssertionError): - self.get_shape(dataset, split) - - -class TestSlicerForCoordinates(unittest.TestCase): - - def nb_coordinates(self, coordinates: AbstractCoordinates, split): - return len(coordinates.coordinates_values(split)) - - def test_slicer_for_coordinates(self): - for split in Split: - coordinates1 = LinSpaceCoordinates.from_nb_points(nb_points=2, train_split_ratio=0.5) - if split in SpatialSlicer.SPLITS: - self.assertEqual(self.nb_coordinates(coordinates1, split), 1) - elif split in SpatioTemporalSlicer.SPLITS: - self.assertEqual(self.nb_coordinates(coordinates1, split), 1) - elif split in TemporalSlicer.SPLITS: - self.assertEqual(self.nb_coordinates(coordinates1, split), 2) - else: - self.assertEqual(self.nb_coordinates(coordinates1, split), 2) - coordinates2 = LinSpaceCoordinates.from_nb_points(nb_points=2) - self.assertEqual(self.nb_coordinates(coordinates2, split), 2) - - -class TestSlicerForObservations(unittest.TestCase): - - def load_observations(self, split_ratio_temporal): - df = pd.DataFrame.from_dict( - { - 'year1': [1 for _ in range(4)], - 'year2': [2 for _ in range(4)], - - }) - return AbstractSpatioTemporalObservations(df_maxima_frech=df, train_split_ratio=split_ratio_temporal) - - def nb_obs(self, observations, split, slicer): - return len(np.transpose(observations.maxima_frech(split, slicer))) - - def test_slicer_for_observations(self): - observations = self.load_observations(0.5) - # For the None Slicer, a slice should be returned only for split=SpatialTemporalSplit.all - # self.assertEqual(len(observations.maxima_frech(SpatialTemporalSplit.all, None)), 2) - self.assertEqual(2, self.nb_obs(observations, Split.all, None)) - for split in ALL_SPLITS_EXCEPT_ALL: - with self.assertRaises(AssertionError): - observations.maxima_frech(split, None) - # For other slicers we try out all the possible combinations - slicer_type_to_size = { - SpatialSlicer: 2, - TemporalSlicer: 1, - SpatioTemporalSlicer: 1, - } - for slicer_type, size in slicer_type_to_size.items(): - for coordinates_train_ind in [None, pd.Series([True, True, True, False])][::-1]: - slicer = slicer_type(coordinates_train_ind=coordinates_train_ind, - observations_train_ind=observations.train_ind) - self.assertEqual(2, self.nb_obs(observations, Split.all, slicer)) - for split in ALL_SPLITS_EXCEPT_ALL: - if split in slicer.splits: - # By default for SpatioTemporalSlicer should slice if both train_ind are available - # Otherwise if coordinates_train_ind is None, then it should return all the data - if slicer_type is SpatioTemporalSlicer and coordinates_train_ind is None: - size = 2 - self.assertEqual(size, self.nb_obs(observations, split, slicer)) - else: - with self.assertRaises(AssertionError): - observations.maxima_frech(split, slicer) - - -if __name__ == '__main__': - unittest.main() +# import pandas as pd +# import numpy as np +# from rpy2.rinterface import RRuntimeError +# import unittest +# from itertools import product +# +# from extreme_estimator.extreme_models.margin_model.smooth_margin_model import ConstantMarginModel +# from extreme_estimator.extreme_models.max_stable_model.max_stable_models import Smith +# from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates +# from spatio_temporal_dataset.coordinates.unidimensional_coordinates.coordinates_1D import LinSpaceCoordinates +# from spatio_temporal_dataset.dataset.simulation_dataset import MaxStableDataset, FullSimulatedDataset +# from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer +# from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer +# from spatio_temporal_dataset.slicer.split import ALL_SPLITS_EXCEPT_ALL, Split +# from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer +# from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ +# AbstractSpatioTemporalObservations +# +# +# class TestSlicerForDataset(unittest.TestCase): +# +# def __init__(self, methodName: str = ...) -> None: +# super().__init__(methodName) +# self.dataset = None +# +# nb_spatial_points = 2 +# nb_temporal_obs = 2 +# complete_shape = (nb_spatial_points, nb_temporal_obs) +# +# def load_dataset(self, slicer_class, split_ratio_spatial, split_ratio_temporal): +# coordinates = LinSpaceCoordinates.from_nb_points(nb_points=self.nb_spatial_points, +# train_split_ratio=split_ratio_spatial) +# return FullSimulatedDataset.from_double_sampling(nb_obs=self.nb_temporal_obs, +# train_split_ratio=split_ratio_temporal, +# margin_model=ConstantMarginModel(coordinates=coordinates), +# coordinates=coordinates, max_stable_model=Smith(), +# slicer_class=slicer_class) +# +# def get_shape(self, dataset, split): +# return dataset.maxima_frech(split).shape +# +# def test_spatiotemporal_slicer_for_dataset(self): +# ind_tuple_to_observation_shape = { +# (None, None): self.complete_shape, +# (None, 0.5): self.complete_shape, +# (0.5, None): self.complete_shape, +# (0.5, 0.5): (1, 1), +# } +# self.check_shapes(ind_tuple_to_observation_shape, SpatioTemporalSlicer) +# +# def test_spatial_slicer_for_dataset(self): +# ind_tuple_to_observation_shape = { +# (None, None): self.complete_shape, +# (None, 0.5): self.complete_shape, +# (0.5, None): (1, 2), +# (0.5, 0.5): (1, 2), +# } +# self.check_shapes(ind_tuple_to_observation_shape, SpatialSlicer) +# +# def test_temporal_slicer_for_dataset(self): +# ind_tuple_to_observation_shape = { +# (None, None): self.complete_shape, +# (None, 0.5): (2, 1), +# (0.5, None): self.complete_shape, +# (0.5, 0.5): (2, 1), +# } +# self.check_shapes(ind_tuple_to_observation_shape, TemporalSlicer) +# +# def check_shapes(self, ind_tuple_to_observation_shape, slicer_type): +# for split_ratio, data_shape in ind_tuple_to_observation_shape.items(): +# dataset = self.load_dataset(slicer_type, *split_ratio) +# self.assertEqual(self.complete_shape, self.get_shape(dataset, Split.all)) +# for split in ALL_SPLITS_EXCEPT_ALL: +# if split in dataset.slicer.splits: +# self.assertEqual(data_shape, self.get_shape(dataset, split)) +# else: +# with self.assertRaises(AssertionError): +# self.get_shape(dataset, split) +# +# +# class TestSlicerForCoordinates(unittest.TestCase): +# +# def nb_coordinates(self, coordinates: AbstractCoordinates, split): +# return len(coordinates.coordinates_values(split)) +# +# def test_slicer_for_coordinates(self): +# for split in Split: +# coordinates1 = LinSpaceCoordinates.from_nb_points(nb_points=2, train_split_ratio=0.5) +# if split in SpatialSlicer.SPLITS: +# self.assertEqual(self.nb_coordinates(coordinates1, split), 1) +# elif split in SpatioTemporalSlicer.SPLITS: +# self.assertEqual(self.nb_coordinates(coordinates1, split), 1) +# elif split in TemporalSlicer.SPLITS: +# self.assertEqual(self.nb_coordinates(coordinates1, split), 2) +# else: +# self.assertEqual(self.nb_coordinates(coordinates1, split), 2) +# coordinates2 = LinSpaceCoordinates.from_nb_points(nb_points=2) +# self.assertEqual(self.nb_coordinates(coordinates2, split), 2) +# +# +# class TestSlicerForObservations(unittest.TestCase): +# +# def load_observations(self, split_ratio_temporal): +# df = pd.DataFrame.from_dict( +# { +# 'year1': [1 for _ in range(4)], +# 'year2': [2 for _ in range(4)], +# +# }) +# return AbstractSpatioTemporalObservations(df_maxima_frech=df) +# +# def nb_obs(self, observations, split, slicer): +# return len(np.transpose(observations.maxima_frech(split, slicer))) +# +# def test_slicer_for_observations(self): +# observations = self.load_observations(0.5) +# # For the None Slicer, a slice should be returned only for split=SpatialTemporalSplit.all +# # self.assertEqual(len(observations.maxima_frech(SpatialTemporalSplit.all, None)), 2) +# self.assertEqual(2, self.nb_obs(observations, Split.all, None)) +# for split in ALL_SPLITS_EXCEPT_ALL: +# with self.assertRaises(AssertionError): +# observations.maxima_frech(split, None) +# # For other slicers we try out all the possible combinations +# slicer_type_to_size = { +# SpatialSlicer: 2, +# TemporalSlicer: 1, +# SpatioTemporalSlicer: 1, +# } +# for slicer_type, size in slicer_type_to_size.items(): +# for coordinates_train_ind in [None, pd.Series([True, True, True, False])][::-1]: +# slicer = slicer_type(coordinates_train_ind=coordinates_train_ind, +# observations_train_ind=observations.train_ind) +# self.assertEqual(2, self.nb_obs(observations, Split.all, slicer)) +# for split in ALL_SPLITS_EXCEPT_ALL: +# if split in slicer.splits: +# # By default for SpatioTemporalSlicer should slice if both train_ind are available +# # Otherwise if coordinates_train_ind is None, then it should return all the data +# if slicer_type is SpatioTemporalSlicer and coordinates_train_ind is None: +# size = 2 +# self.assertEqual(size, self.nb_obs(observations, split, slicer)) +# else: +# with self.assertRaises(AssertionError): +# observations.maxima_frech(split, slicer) +# +# +# if __name__ == '__main__': +# unittest.main() diff --git a/test/test_unitary/test_rmaxstab/test_rmaxstab_without_margin.py b/test/test_unitary/test_rmaxstab/test_rmaxstab_without_margin.py index dd803e54edcb20b05929371cfd9544226d3b17fa..a4259d99da45decc3a59e8eb7124e679d3755e2d 100644 --- a/test/test_unitary/test_rmaxstab/test_rmaxstab_without_margin.py +++ b/test/test_unitary/test_rmaxstab/test_rmaxstab_without_margin.py @@ -20,7 +20,7 @@ class TestRMaxStab(TestUnitaryAbstract): @classmethod def python_code(cls): # Load coordinate object - df = pd.DataFrame(data=r.locations, columns=AbstractCoordinates.COORDINATE_NAMES[:2]) + df = pd.DataFrame(data=r.locations, columns=AbstractCoordinates.COORDINATE_SPATIAL_NAMES[:2]) coordinates = AbstractCoordinates.from_df(df) # Load max stable model params_sample = {'range': 3, 'smooth': 0.5, 'nugget': 0}