diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py index b269d60232718d3da23dcb1e22087a293b052326..84afada107c7fa706208564fbc39cf4e873d8e69 100644 --- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py @@ -35,6 +35,8 @@ class AbstractCoordinates(object): TEMPORAL_SPLIT = 'temporal_split' # Coordinates columns COORDINATES_NAMES = COORDINATE_SPATIAL_NAMES + [COORDINATE_T] + # Coordinate type + COORDINATE_TYPE = 'float64' def __init__(self, df: pd.DataFrame, slicer_class: type, s_split_spatial: pd.Series = None, s_split_temporal: pd.Series = None, transformation_class: type = None): @@ -44,10 +46,8 @@ class AbstractCoordinates(object): # Sort coordinates according to a specified order sorted_coordinates_columns = [c for c in self.COORDINATES_NAMES if c in coordinate_columns] self.df_all_coordinates = df.loc[:, sorted_coordinates_columns].copy() # type: pd.DataFrame - # Check the data type of the coordinate columns - accepted_dtypes = ['float64', 'int64'] - assert len(self.df_all_coordinates.select_dtypes(include=accepted_dtypes).columns) == len(coordinate_columns), \ - 'coordinates columns dtypes should belong to {}'.format(accepted_dtypes) + # Cast df_all_coordinates to the desired type + self.df_all_coordinates = self.df_all_coordinates.astype(self.COORDINATE_TYPE) # Slicing attributes self.s_split_spatial = s_split_spatial # type: pd.Series @@ -133,7 +133,6 @@ class AbstractCoordinates(object): # Split def df_coordinates(self, split: Split = Split.all) -> pd.DataFrame: - print(type(self.transformation)) df_transformed_coordinates = self.transformation.transform_df(df_coord=self.df_all_coordinates) return df_sliced(df=df_transformed_coordinates, split=split, slicer=self.slicer) diff --git a/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py index 766cd5e409c69c94b0533a87fa42f3c71601976e..dda74952f49e6ced2f8dfd13ec8082a5b8d7cae8 100644 --- a/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py +++ b/spatio_temporal_dataset/coordinates/temporal_coordinates/generated_temporal_coordinates.py @@ -11,11 +11,9 @@ class ConsecutiveTemporalCoordinates(AbstractTemporalCoordinates): def from_nb_temporal_steps(cls, nb_temporal_steps, train_split_ratio: float = None, start=0, transformation_class: type = None): df = cls.df_temporal(nb_temporal_steps, start) - print(df.dtypes) return cls.from_df(df, train_split_ratio, transformation_class=transformation_class) @classmethod def df_temporal(cls, nb_temporal_steps, start=0): df = pd.DataFrame.from_dict({cls.COORDINATE_T: list(range(start, start + nb_temporal_steps))}) - # df = df.astype return df diff --git a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py index 77bfcebda3dc9564d37cea72a04da08c92c0f6b1..4f6bdc9ca930006495dc7a44d4ddcddae2573853 100644 --- a/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py +++ b/spatio_temporal_dataset/coordinates/transformed_coordinates/transformation/abstract_transformation.py @@ -19,7 +19,8 @@ class AbstractTransformation(object): return pd.Series(self.transform_array(s_coord.values), index=s_coord.index) def transform_df(self, df_coord: pd.DataFrame) -> pd.DataFrame: - return df_coord.apply(self.transform_serie, axis=1) + data = [self.transform_serie(r) for _, r in df_coord.iterrows()] + return pd.DataFrame(data, index=df_coord.index, columns=df_coord.columns) class IdentityTransformation(AbstractTransformation): diff --git a/test/test_experiment/test_weird_bug.py b/test/test_experiment/test_weird_bug.py new file mode 100644 index 0000000000000000000000000000000000000000..77ad18699b2ad6489b21c1ec2b7f7f2e580aa71d --- /dev/null +++ b/test/test_experiment/test_weird_bug.py @@ -0,0 +1,40 @@ +import unittest + +from experiment.meteo_france_SCM_study.crocus.crocus import CrocusSwe +from experiment.meteo_france_SCM_study.visualization.study_visualization.main_study_visualizer import \ + study_iterator_global +from experiment.meteo_france_SCM_study.visualization.study_visualization.non_stationary_trends import \ + ConditionalIndedendenceLocationTrendTest +from experiment.meteo_france_SCM_study.visualization.study_visualization.study_visualizer import StudyVisualizer +from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.uniform_normalization import \ + BetweenZeroAndOneNormalization, BetweenMinusOneAndOneNormalization +from utils import get_display_name_from_object_type + + +class TestCoordinateSensitivity(unittest.TestCase): + + def test_weird(self): + # todo: maybe the code does not like negative coordinates + # todo: maybe not that the sign of the x coordinate are all negative and the other are all positive, it is easier to find the perfect spatial structure + altitudes = [3000] + transformation_classes = [BetweenZeroAndOneNormalization, BetweenMinusOneAndOneNormalization][:] + for transformation_class in transformation_classes: + study_classes = [CrocusSwe] + for study in study_iterator_global(study_classes, altitudes=altitudes, verbose=False): + print('\n\n') + study_visualizer = StudyVisualizer(study, transformation_class=transformation_class) + study_visualizer.temporal_non_stationarity = True + print(study_visualizer.coordinates) + # trend_test = ConditionalIndedendenceLocationTrendTest(study_visualizer.dataset) + # # years = [1960, 1990] + # # mu1s = [trend_test.get_mu1(year) for year in years] + # # print('Stationary') + # # print(trend_test.get_estimator(trend_test.stationary_margin_model_class, starting_point=None).margin_function_fitted.coef_dict) + # print('Non Stationary') + # print(trend_test.get_estimator(trend_test.non_stationary_margin_model_class, starting_point=1960).margin_function_fitted.coef_dict) + # # print(get_display_name_from_object_type(type(transformation_2D)), 'mu1s: ', mu1s) + # # self.assertTrue(0.0 not in mu1s) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_spatio_temporal_dataset/test_transformation.py b/test/test_spatio_temporal_dataset/test_transformation.py new file mode 100644 index 0000000000000000000000000000000000000000..5c8cdedaa4023763795eb42af9a8caf3065a8016 --- /dev/null +++ b/test/test_spatio_temporal_dataset/test_transformation.py @@ -0,0 +1,30 @@ +import unittest + +import numpy as np + +from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \ + ConsecutiveTemporalCoordinates +from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.uniform_normalization import \ + BetweenZeroAndOneNormalization, BetweenMinusOneAndOneNormalization + + +class TestTransformation(unittest.TestCase): + + def test_temporal_normalization(self): + nb_steps = 3 + start = 1950 + transformation_class_to_expected = {BetweenZeroAndOneNormalization: [0.0, 0.5, 1.0], + BetweenMinusOneAndOneNormalization: [-1.0, 0.0, 1.0]} + for transformation_class, expected in transformation_class_to_expected.items(): + temporal_coordinates = ConsecutiveTemporalCoordinates.from_nb_temporal_steps(nb_temporal_steps=nb_steps, + start=start, + transformation_class=transformation_class) + normalized_coordinates = temporal_coordinates.df_coordinates().iloc[:, 0].values + expected_coordinates = np.array(expected) + equals = normalized_coordinates == expected_coordinates + self.assertTrue(equals.all(), + msg="expected: {}, res:{}".format(expected_coordinates, normalized_coordinates)) + + +if __name__ == '__main__': + unittest.main()