diff --git a/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py b/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py index 31bf1787b8361bb49542a6be14d016b99c780f14..ed6bca63c2df83ec20002f8e0da95e24692841fe 100644 --- a/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py +++ b/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py @@ -1,4 +1,5 @@ import os.path as op +import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -31,31 +32,34 @@ class AbstractSpatialCoordinates(object): df = pd.read_csv(csv_path) return cls.from_df(df) - def df_coord_split(self, split_str): + def coord_x_y_values(self, df_coord: pd.DataFrame) -> np.ndarray: + return df_coord.loc[:, [self.COORD_X, self.COORD_Y]].values + + def df_coord_split(self, split_str: str) -> pd.DataFrame: assert self.s_split is not None ind = self.s_split == split_str return self.df_coord.loc[ind] @property - def df_coord_train(self): - return self.df_coord_split(self.TRAIN_SPLIT_STR) - - @property - def df_coord_test(self): - return self.df_coord_split(self.TEST_SPLIT_STR) + def coord(self) -> np.ndarray: + return self.coord_x_y_values(df_coord=self.df_coord) @property - def nb_points(self): - return len(self.df_coord) + def coord_train(self) -> np.ndarray: + return self.coord_x_y_values(df_coord=self.df_coord_split(self.TRAIN_SPLIT_STR)) @property - def coord(self): - return self.df_coord.values + def coord_test(self) -> np.ndarray: + return self.coord_x_y_values(df_coord=self.df_coord_split(self.TEST_SPLIT_STR)) @property def index(self): return self.df_coord.index + @property + def nb_points(self): + return len(self.df_coord) + def visualization(self): x, y = self.coord[:, 0], self.coord[:, 1] plt.scatter(x, y) diff --git a/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py b/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1ef1de44e78d5fd68924654d412e9d164a9b4ad8 100644 --- a/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py +++ b/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py @@ -0,0 +1,66 @@ +import pandas as pd + +from spatio_temporal_dataset.spatial_coordinates.abstract_coordinates import AbstractSpatialCoordinates +from spatio_temporal_dataset.spatial_coordinates.alps_station_coordinates import AlpsStationCoordinate + + +class AbstractNormalizingFunction(object): + + def normalize(self, df_coord: pd.DataFrame) -> pd.DataFrame: + assert len(df_coord.columns) == 2 + return df_coord + + +class NormalizedCoordinates(AbstractSpatialCoordinates): + + @classmethod + def from_coordinates(cls, spatial_coordinates: AbstractSpatialCoordinates, + normalizing_function: AbstractNormalizingFunction): + df_coord_normalized = spatial_coordinates.df_coord.copy() + coord_XY = [spatial_coordinates.COORD_X, spatial_coordinates.COORD_Y] + df_coord_normalized.loc[:, coord_XY] = normalizing_function.normalize(df_coord_normalized.loc[:, coord_XY]) + return cls(df_coord=df_coord_normalized, s_split=spatial_coordinates.s_split) + + +""" +Define various types of normalizing functions +""" + + +class UniformNormalization(AbstractNormalizingFunction): + """Normalize similarly the X and Y axis with a single function so as to conserve proportional distances""" + + def normalize(self, df_coord: pd.DataFrame) -> pd.DataFrame: + df_coord = super().normalize(df_coord) + for i in range(2): + df_coord.iloc[:, i] = self.uniform_normalization(df_coord.iloc[:, i]) + return df_coord + + def uniform_normalization(self, s_coord: pd.Series) -> pd.Series: + return s_coord + + +class BetweenZeroAndOneNormalization(UniformNormalization): + """Normalize such that min(coord) >= (0,0) and max(coord) <= (1,1)""" + + def __init__(self) -> None: + self.min_coord = None + self.max_coord = None + + def normalize(self, df_coord: pd.DataFrame) -> pd.DataFrame: + # Compute the min and max globally + self.min_coord, self.max_coord = df_coord.min().min(), df_coord.max().max() + # Then, call the super method that will call the uniform_normalization method + return super().normalize(df_coord) + + def uniform_normalization(self, s_coord: pd.Series) -> pd.Series: + s_coord_shifted = s_coord - self.min_coord + s_coord_scaled = s_coord_shifted / (self.max_coord - self.min_coord) + return s_coord_scaled + + +if __name__ == '__main__': + coord = AlpsStationCoordinate.from_csv() + normalized_coord = NormalizedCoordinates.from_coordinates(spatial_coordinates=coord, + normalizing_function=BetweenZeroAndOneNormalization()) + normalized_coord.visualization()