From e148d01c8fe21a3d6eec01cfffb602b664d997e0 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Mon, 5 Nov 2018 15:03:55 +0100
Subject: [PATCH] add normalized coordinates

---
 .../abstract_coordinates.py                   | 26 ++++----
 .../normalized_coordinates.py                 | 66 +++++++++++++++++++
 2 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py b/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py
index 31bf1787..ed6bca63 100644
--- a/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py
+++ b/spatio_temporal_dataset/spatial_coordinates/abstract_coordinates.py
@@ -1,4 +1,5 @@
 import os.path as op
+import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 
@@ -31,31 +32,34 @@ class AbstractSpatialCoordinates(object):
         df = pd.read_csv(csv_path)
         return cls.from_df(df)
 
-    def df_coord_split(self, split_str):
+    def coord_x_y_values(self, df_coord: pd.DataFrame) -> np.ndarray:
+        return df_coord.loc[:, [self.COORD_X, self.COORD_Y]].values
+
+    def df_coord_split(self, split_str: str) -> pd.DataFrame:
         assert self.s_split is not None
         ind = self.s_split == split_str
         return self.df_coord.loc[ind]
 
     @property
-    def df_coord_train(self):
-        return self.df_coord_split(self.TRAIN_SPLIT_STR)
-
-    @property
-    def df_coord_test(self):
-        return self.df_coord_split(self.TEST_SPLIT_STR)
+    def coord(self) -> np.ndarray:
+        return self.coord_x_y_values(df_coord=self.df_coord)
 
     @property
-    def nb_points(self):
-        return len(self.df_coord)
+    def coord_train(self) -> np.ndarray:
+        return self.coord_x_y_values(df_coord=self.df_coord_split(self.TRAIN_SPLIT_STR))
 
     @property
-    def coord(self):
-        return self.df_coord.values
+    def coord_test(self) -> np.ndarray:
+        return self.coord_x_y_values(df_coord=self.df_coord_split(self.TEST_SPLIT_STR))
 
     @property
     def index(self):
         return self.df_coord.index
 
+    @property
+    def nb_points(self):
+        return len(self.df_coord)
+
     def visualization(self):
         x, y = self.coord[:, 0], self.coord[:, 1]
         plt.scatter(x, y)
diff --git a/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py b/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py
index e69de29b..1ef1de44 100644
--- a/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py
+++ b/spatio_temporal_dataset/spatial_coordinates/normalized_coordinates.py
@@ -0,0 +1,66 @@
+import pandas as pd
+
+from spatio_temporal_dataset.spatial_coordinates.abstract_coordinates import AbstractSpatialCoordinates
+from spatio_temporal_dataset.spatial_coordinates.alps_station_coordinates import AlpsStationCoordinate
+
+
+class AbstractNormalizingFunction(object):
+
+    def normalize(self, df_coord: pd.DataFrame) -> pd.DataFrame:
+        assert len(df_coord.columns) == 2
+        return df_coord
+
+
+class NormalizedCoordinates(AbstractSpatialCoordinates):
+
+    @classmethod
+    def from_coordinates(cls, spatial_coordinates: AbstractSpatialCoordinates,
+                         normalizing_function: AbstractNormalizingFunction):
+        df_coord_normalized = spatial_coordinates.df_coord.copy()
+        coord_XY = [spatial_coordinates.COORD_X, spatial_coordinates.COORD_Y]
+        df_coord_normalized.loc[:, coord_XY] = normalizing_function.normalize(df_coord_normalized.loc[:, coord_XY])
+        return cls(df_coord=df_coord_normalized, s_split=spatial_coordinates.s_split)
+
+
+"""
+Define various types of normalizing functions
+"""
+
+
+class UniformNormalization(AbstractNormalizingFunction):
+    """Normalize similarly the X and Y axis with a single function so as to conserve proportional distances"""
+
+    def normalize(self, df_coord: pd.DataFrame) -> pd.DataFrame:
+        df_coord = super().normalize(df_coord)
+        for i in range(2):
+            df_coord.iloc[:, i] = self.uniform_normalization(df_coord.iloc[:, i])
+        return df_coord
+
+    def uniform_normalization(self, s_coord: pd.Series) -> pd.Series:
+        return s_coord
+
+
+class BetweenZeroAndOneNormalization(UniformNormalization):
+    """Normalize such that min(coord) >= (0,0) and max(coord) <= (1,1)"""
+
+    def __init__(self) -> None:
+        self.min_coord = None
+        self.max_coord = None
+
+    def normalize(self, df_coord: pd.DataFrame) -> pd.DataFrame:
+        # Compute the min and max globally
+        self.min_coord, self.max_coord = df_coord.min().min(), df_coord.max().max()
+        #  Then, call the super method that will call the uniform_normalization method
+        return super().normalize(df_coord)
+
+    def uniform_normalization(self, s_coord: pd.Series) -> pd.Series:
+        s_coord_shifted = s_coord - self.min_coord
+        s_coord_scaled = s_coord_shifted / (self.max_coord - self.min_coord)
+        return s_coord_scaled
+
+
+if __name__ == '__main__':
+    coord = AlpsStationCoordinate.from_csv()
+    normalized_coord = NormalizedCoordinates.from_coordinates(spatial_coordinates=coord,
+                                                              normalizing_function=BetweenZeroAndOneNormalization())
+    normalized_coord.visualization()
-- 
GitLab