From fc9c8bf78d83f9476715d513e45dd6ca66e67bba Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Mon, 23 Mar 2020 17:00:17 +0100 Subject: [PATCH] [contrasting project] fix memory issue that was happening while loading many precipitation studies. issue #300 --- .../coordinates/abstract_coordinates.py | 17 ++++++++++++---- .../abstract_temporal_covariate_for_fit.py | 19 ++++++++++++++++++ .../test_coordinates.py | 20 ++++++++++++++++--- test/test_utils.py | 4 +++- 4 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index af07c111..3d4a3fb0 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -6,6 +6,8 @@ import numpy as np import pandas as pd from mpl_toolkits.mplot3d import Axes3D +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ + AbstractTemporalCovariateForFit from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ AbstractTransformation, IdentityTransformation from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix @@ -240,9 +242,11 @@ class AbstractCoordinates(object): return self.df_coordinates(split, transformed=transformed).loc[:, self.temporal_coordinates_names] \ .drop_duplicates() - def df_temporal_coordinates_for_fit(self, split=Split.all, starting_point=None) -> pd.DataFrame: + def df_temporal_coordinates_for_fit(self, split=Split.all, starting_point=None, + temporal_covariate_for_fit: Union[None, type] = None) -> pd.DataFrame: + # Load time covariate if starting_point is None: - return self.df_temporal_coordinates(split=split, transformed=True) + df = self.df_temporal_coordinates(split=split, transformed=True) else: # Load the un transformed coordinates df_temporal_coordinates = self.df_temporal_coordinates(split=split, transformed=False) @@ -252,7 +256,7 @@ class AbstractCoordinates(object): ind_to_modify = df_temporal_coordinates.iloc[:, 0] <= starting_point # type: pd.Series # Assert that some coordinates are selected but not all msg = '{} First year={} Last_year={}'.format(sum(ind_to_modify), df_temporal_coordinates.iloc[0, 0], - df_temporal_coordinates.iloc[-1, 0]) + df_temporal_coordinates.iloc[-1, 0]) assert 0 < sum(ind_to_modify) < len(ind_to_modify), msg # Modify the temporal coordinates to enforce the stationarity df_temporal_coordinates.loc[ind_to_modify] = starting_point @@ -260,7 +264,12 @@ class AbstractCoordinates(object): temporal_transformation = self.temporal_coordinates.transformation_class( df_temporal_coordinates) # type: AbstractTransformation # Return the result of the temporal transformation - return temporal_transformation.transform_df(df_temporal_coordinates) + df = temporal_transformation.transform_df(df_temporal_coordinates) + # Potentially transform the time covariate into another covariate + if temporal_covariate_for_fit is not None: + assert issubclass(temporal_covariate_for_fit, AbstractTemporalCovariateForFit) + df = df.apply(temporal_covariate_for_fit.get_temporal_covariate) + return df @property def temporal_coordinates(self): diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py new file mode 100644 index 00000000..7ce27a40 --- /dev/null +++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/abstract_temporal_covariate_for_fit.py @@ -0,0 +1,19 @@ +class AbstractTemporalCovariateForFit(object): + + @classmethod + def get_temporal_covariate(cls, t): + raise NotImplementedError + + +class TimeTemporalCovariate(AbstractTemporalCovariateForFit): + + @classmethod + def get_temporal_covariate(cls, t): + return t + + +class MeanGlobalTemperatureCovariate(AbstractTemporalCovariateForFit): + + @classmethod + def get_temporal_covariate(cls, t): + pass diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py index 43b12db5..5319c9d7 100644 --- a/test/test_spatio_temporal_dataset/test_coordinates.py +++ b/test/test_spatio_temporal_dataset/test_coordinates.py @@ -17,6 +17,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo AlpsStation3DCoordinatesWithAnisotropy from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ CircleSpatialCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_covariate_for_fit import \ + AbstractTemporalCovariateForFit, TimeTemporalCovariate from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import \ CenteredScaledNormalization from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.uniform_normalization import \ @@ -137,18 +139,30 @@ class TestCoordinatesWithTransformedStartingPoint(unittest.TestCase): transformation_class=BetweenZeroAndOneNormalization)[ 0] temporal_coordinates = \ - load_test_temporal_coordinates(nb_steps=self.nb_steps, transformation_class=CenteredScaledNormalization)[0] + load_test_temporal_coordinates(nb_steps=self.nb_steps, transformation_class=CenteredScaledNormalization)[0] coordinates = AbstractSpatioTemporalCoordinates.from_spatial_coordinates_and_temporal_coordinates( spatial_coordinates=spatial_coordinate, temporal_coordinates=temporal_coordinates) # Check that df_all_coordinates have not yet been normalized self.assertEqual(coordinates.df_temporal_coordinates(transformed=False).iloc[-1, 0], 49.0) # Check that the normalization is working - self.assertAlmostEqual(coordinates.df_temporal_coordinates_for_fit(starting_point=None).iloc[0, 0], -1.697749375254331) - self.assertAlmostEqual(coordinates.df_temporal_coordinates_for_fit(starting_point=2).iloc[2, 0], -1.5739459974625107) + self.assertAlmostEqual(coordinates.df_temporal_coordinates_for_fit(starting_point=None).iloc[0, 0], + -1.697749375254331) + self.assertAlmostEqual(coordinates.df_temporal_coordinates_for_fit(starting_point=2).iloc[2, 0], + -1.5739459974625107) self.assertNotEqual(coordinates.df_temporal_coordinates_for_fit(starting_point=2).iloc[2, 0], coordinates.df_temporal_coordinates_for_fit(starting_point=2).iloc[3, 0]) +class TestCoordinatesWithModifiedCovariate(unittest.TestCase): + + def test_time_covariate(self): + coordinates = load_test_temporal_coordinates(nb_steps=10)[0] + old_df = coordinates.df_temporal_coordinates_for_fit().copy() + new_df = coordinates.df_temporal_coordinates_for_fit(temporal_covariate_for_fit=TimeTemporalCovariate) + pd.testing.assert_frame_equal(old_df, new_df) + # pd.as.assertEqual(old_df, new_df) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 169983c9..49e95c9b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -28,6 +28,8 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_c from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.generated_spatio_temporal_coordinates import \ UniformSpatioTemporalCoordinates, LinSpaceSpatial2DSpatioTemporalCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates +from spatio_temporal_dataset.coordinates.temporal_coordinates.abstract_temporal_coordinates import \ + AbstractTemporalCoordinates from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ ConsecutiveTemporalCoordinates @@ -105,7 +107,7 @@ def load_test_3D_spatial_coordinates(nb_points, transformation_class=None) -> Li transformation_class=transformation_class) -def load_test_temporal_coordinates(nb_steps, train_split_ratio=None, transformation_class=None): +def load_test_temporal_coordinates(nb_steps, train_split_ratio=None, transformation_class=None) -> List[AbstractTemporalCoordinates]: return [coordinate_class.from_nb_temporal_steps(nb_temporal_steps=nb_steps, train_split_ratio=train_split_ratio, transformation_class=transformation_class) -- GitLab