diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py index 822268e9f8145ae01a006595e7fcdd29c33d5408..33b0031b6f2f86f5237d00c8d8c5d35a083b4063 100644 --- a/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py +++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/non_stationary_trends.py @@ -33,6 +33,9 @@ class AbstractNonStationaryTrendTest(object): if (margin_model_class, starting_point) not in self._margin_model_class_and_starting_point_to_estimator: margin_model = margin_model_class(coordinates=self.dataset.coordinates, starting_point=starting_point) estimator = self._load_estimator(margin_model) + estimator_name = get_display_name_from_object_type(estimator) + margin_model_name = get_display_name_from_object_type(margin_model) + print('Fitting {} with margin: {} for starting_point={}'.format(estimator_name, margin_model_name, starting_point)) estimator.fit() self._margin_model_class_and_starting_point_to_estimator[(margin_model_class, starting_point)] = estimator return self._margin_model_class_and_starting_point_to_estimator[(margin_model_class, starting_point)] @@ -76,11 +79,12 @@ class AbstractNonStationaryTrendTest(object): mu1_trends = [self.get_mu1(starting_point=year) for year in years] ax2 = ax.twinx() color_mu1 = 'c' + print(mu1_trends) ax2.plot(years, mu1_trends, color_mu1 + 'o-') ax2.set_ylabel('mu1 parameter', color=color_mu1) ax.set_xlabel('starting year for the linear trend of mu1') - align_yaxis_on_zero(ax, ax2) + # align_yaxis_on_zero(ax, ax2) title = self.display_name ax.set_title(title) ax.legend() diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index a51fe21862f8a473ee6a0bd5c21c5ce66ea51057..523d51310dcf1d28569eeca21c363e7ffa3550ad 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -47,10 +47,8 @@ class AbstractCoordinates(object): # Sort coordinates according to a specified order sorted_coordinates_columns = [c for c in self.COORDINATES_NAMES if c in coordinate_columns] self.df_all_coordinates = df.loc[:, sorted_coordinates_columns].copy() # type: pd.DataFrame - # Check the data type of the coordinate columns - accepted_dtypes = ['int'] - assert len(self.df_all_coordinates.select_dtypes(include=self.ALL_COORDINATES_ACCEPTED_TYPES).columns) \ - == len(coordinate_columns), 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes) + # Cast coordinates + self.df_all_coordinates = self.df_all_coordinates.astype(self.COORDINATE_TYPE) # type: pd.DataFrame # Slicing attributes self.s_split_spatial = s_split_spatial # type: pd.Series @@ -60,6 +58,10 @@ class AbstractCoordinates(object): # Transformation attribute if transformation_class is None: transformation_class = IdentityTransformation + # Transformation only works for float coordinates + accepted_dtypes = [self.COORDINATE_TYPE] + assert len(self.df_all_coordinates.select_dtypes(include=accepted_dtypes).columns) \ + == len(coordinate_columns), 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes) # Transformation class is instantiated with all coordinates self.transformation = transformation_class(self.df_all_coordinates) assert isinstance(self.transformation, AbstractTransformation) @@ -137,8 +139,7 @@ class AbstractCoordinates(object): # Split def df_coordinates(self, split: Split = Split.all) -> pd.DataFrame: - df_all_coordinate_as_float = self.df_all_coordinates.astype(self.COORDINATE_TYPE) # type: pd.DataFrame - df_transformed_coordinates = self.transformation.transform_df(df_all_coordinate_as_float) + df_transformed_coordinates = self.transformation.transform_df(self.df_all_coordinates) return df_sliced(df=df_transformed_coordinates, split=split, slicer=self.slicer) def coordinates_values(self, split: Split = Split.all) -> np.ndarray: diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py index dc86c8c9f760af0757fc22f339558920db20ead5..26bb7e0b1421a995539045eb129981ee8e4adfdb 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py @@ -17,15 +17,17 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): transformation_class: type = None, spatial_coordinates: AbstractSpatialCoordinates = None, temporal_coordinates: AbstractTemporalCoordinates = None): - super().__init__(df, slicer_class, s_split_spatial, s_split_temporal, transformation_class) + super().__init__(df, slicer_class, s_split_spatial, s_split_temporal, None) # Spatial coordinates' if spatial_coordinates is None: - self.spatial_coordinates = AbstractSpatialCoordinates.from_df(df=self.df_spatial_coordinates()) + self.spatial_coordinates = AbstractSpatialCoordinates.from_df(df=self.df_spatial_coordinates(), + transformation_class=transformation_class) else: self.spatial_coordinates = spatial_coordinates # Temporal coordinates if temporal_coordinates is None: - self.temporal_coordinates = AbstractTemporalCoordinates.from_df(df=self.df_temporal_coordinates()) + self.temporal_coordinates = AbstractTemporalCoordinates.from_df(df=self.df_temporal_coordinates(), + transformation_class=transformation_class) else: self.temporal_coordinates = temporal_coordinates @@ -58,28 +60,33 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): spatial_coordinates=spatial_coordinates, temporal_coordinates=temporal_coordinates) @classmethod - def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None): + def from_df(cls, df: pd.DataFrame, train_split_ratio: float = None, transformation_class: type = None): assert cls.COORDINATE_T in df.columns assert cls.COORDINATE_X in df.columns # Assert that the time steps are in the good order with respect to the coordinates nb_points = len(set(df[cls.COORDINATE_X])) first_time_step_for_all_points = df.iloc[:nb_points][cls.COORDINATE_T] assert len(set(first_time_step_for_all_points)) == 1 - return super().from_df_and_slicer(df, SpatioTemporalSlicer, train_split_ratio) + return super().from_df_and_slicer(df, SpatioTemporalSlicer, train_split_ratio, transformation_class) @classmethod - def from_df_spatial_and_coordinate_t_values(cls, df_spatial, coordinate_t_values, train_split_ratio: float = None): + def from_df_spatial_and_coordinate_t_values(cls, df_spatial, coordinate_t_values, train_split_ratio: float = None, + transformation_class: type = None): df_time_steps = cls.get_df_from_df_spatial_and_coordinate_t_values(coordinate_t_values, df_spatial) - return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio) - + return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio, + transformation_class=transformation_class) @classmethod - def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0): + def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0, + transformation_class: type = None): coordinate_t_values = [start + t for t in range(nb_steps)] - return cls.from_df_spatial_and_coordinate_t_values(df_spatial, coordinate_t_values, train_split_ratio) + return cls.from_df_spatial_and_coordinate_t_values(df_spatial, coordinate_t_values, train_split_ratio, + transformation_class) @classmethod - def from_df_spatial_and_df_temporal(cls, df_spatial, df_temporal, train_split_ratio: float = None): + def from_df_spatial_and_df_temporal(cls, df_spatial, df_temporal, train_split_ratio: float = None, + transformation_class: type = None): nb_steps = len(df_temporal) coordinate_t_values = [df_temporal.iloc[t].values[0] for t in range(nb_steps)] - return cls.from_df_spatial_and_coordinate_t_values(df_spatial, coordinate_t_values, train_split_ratio) + return cls.from_df_spatial_and_coordinate_t_values(df_spatial, coordinate_t_values, train_split_ratio, + transformation_class) diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py index b29738fc6377a453a9f381c50811511822530a54..3c461629cb694c6b247652a022ac98ccb1117120 100644 --- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/generated_spatio_temporal_coordinates.py @@ -10,17 +10,28 @@ class GeneratedSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates): SPATIAL_COORDINATES_CLASS = None @classmethod - def from_nb_points_and_nb_steps(cls, nb_points, nb_steps, train_split_ratio: float = None): + def from_nb_points_and_nb_steps(cls, nb_points, nb_steps, train_split_ratio: float = None, + transformation_class: type = None): assert isinstance(nb_steps, int) and nb_steps >= 1 - assert cls.SPATIAL_COORDINATES_CLASS is not None - assert hasattr(cls.SPATIAL_COORDINATES_CLASS, 'df_spatial') - df_spatial = cls.SPATIAL_COORDINATES_CLASS.df_spatial(nb_points=nb_points) - return cls.from_df_spatial_and_nb_steps(df_spatial, nb_steps, train_split_ratio) + assert hasattr(cls.spatial_coordinate_class(), 'df_spatial') + df_spatial = cls.spatial_coordinate_class().df_spatial(nb_points=nb_points) + return cls.from_df_spatial_and_nb_steps(df_spatial, nb_steps, train_split_ratio, + transformation_class=transformation_class) + + @classmethod + def spatial_coordinate_class(cls): + raise NotImplementedError class UniformSpatioTemporalCoordinates(GeneratedSpatioTemporalCoordinates): - SPATIAL_COORDINATES_CLASS = UniformSpatialCoordinates + + @classmethod + def spatial_coordinate_class(cls): + return UniformSpatialCoordinates class LinSpaceSpatial2DSpatioTemporalCoordinates(GeneratedSpatioTemporalCoordinates): - SPATIAL_COORDINATES_CLASS = LinSpaceSpatial2DCoordinates + + @classmethod + def spatial_coordinate_class(cls): + return LinSpaceSpatial2DCoordinates diff --git a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py index 4f6bdc9ca930006495dc7a44d4ddcddae2573853..e4092e0fc3c17d11cccdd34586bc2dbc4af4958c 100644 --- a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py +++ b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py @@ -5,7 +5,7 @@ import numpy as np class AbstractTransformation(object): def __init__(self, df_coordinates): - self.df_coordinates = df_coordinates + self.df_coordinates = df_coordinates.copy() @property def nb_dimensions(self): @@ -19,6 +19,7 @@ class AbstractTransformation(object): return pd.Series(self.transform_array(s_coord.values), index=s_coord.index) def transform_df(self, df_coord: pd.DataFrame) -> pd.DataFrame: + df_coord = df_coord.copy() data = [self.transform_serie(r) for _, r in df_coord.iterrows()] return pd.DataFrame(data, index=df_coord.index, columns=df_coord.columns) diff --git a/test/test_spatio_temporal_dataset/test_transformation.py b/test/test_spatio_temporal_dataset/test_transformation.py index 5c8cdedaa4023763795eb42af9a8caf3065a8016..c6f2342ce1985786bdda5adaafb6d9033b9f6a88 100644 --- a/test/test_spatio_temporal_dataset/test_transformation.py +++ b/test/test_spatio_temporal_dataset/test_transformation.py @@ -2,6 +2,8 @@ import unittest import numpy as np +from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \ + GeneratedSpatioTemporalCoordinates, UniformSpatioTemporalCoordinates from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ ConsecutiveTemporalCoordinates from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.uniform_normalization import \ @@ -25,6 +27,22 @@ class TestTransformation(unittest.TestCase): self.assertTrue(equals.all(), msg="expected: {}, res:{}".format(expected_coordinates, normalized_coordinates)) + def test_spatio_temporal_normalization(self): + + transformation_class_to_expected = {BetweenZeroAndOneNormalization: [0.0, 1.0], + BetweenMinusOneAndOneNormalization: [-1.0, 1.0]} + + for transformation_class, expected in transformation_class_to_expected.items(): + coordinates = UniformSpatioTemporalCoordinates.from_nb_points_and_nb_steps(nb_points=2, nb_steps=50, + transformation_class=transformation_class) + temporal_coordinates = coordinates.temporal_coordinates + normalized_coordinates = temporal_coordinates.df_coordinates().iloc[:, 0].values + normalized_coordinates = np.array([normalized_coordinates[0], normalized_coordinates[-1]]) + expected_coordinates = np.array(expected) + equals = normalized_coordinates == expected_coordinates + self.assertTrue(equals.all(), + msg="expected: {}, res:{}".format(expected_coordinates, normalized_coordinates)) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 170988f3da51be4b46620aac92c09580a2e89b2c..072bd796382121dc76042e95bdb38bbb19e62a9f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -89,9 +89,10 @@ def load_test_temporal_coordinates(nb_steps, train_split_ratio=None): TEST_TEMPORAL_COORDINATES] -def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=None): +def load_test_spatiotemporal_coordinates(nb_points, nb_steps, train_split_ratio=None, transformation_class: type = None): return [coordinate_class.from_nb_points_and_nb_steps(nb_points=nb_points, nb_steps=nb_steps, - train_split_ratio=train_split_ratio) + train_split_ratio=train_split_ratio, + transformation_class=transformation_class) for coordinate_class in TEST_SPATIO_TEMPORAL_COORDINATES]