From 70baaeed7428a8199f8b440e21fc4526d6e34182 Mon Sep 17 00:00:00 2001
From: Le Roux Erwan <erwan.le-roux@irstea.fr>
Date: Fri, 10 May 2019 10:10:15 +0200
Subject: [PATCH] [COORDINATE][SPATIO TEMPORAL INDEX] add rule for spatio
 temporal index suffix. update test.

---
 .../main_study_visualizer.py                  |  1 +
 .../coordinates/abstract_coordinates.py       |  8 +++++-
 .../abstract_spatio_temporal_coordinates.py   |  8 +++---
 spatio_temporal_dataset/coordinates/utils.py  | 23 ++++++++++++++++
 .../dataset/abstract_dataset.py               |  4 ++-
 .../test_coordinates.py                       | 11 +++++---
 .../test_dataset.py                           | 27 +++++++++++++++++--
 7 files changed, 71 insertions(+), 11 deletions(-)
 create mode 100644 spatio_temporal_dataset/coordinates/utils.py

diff --git a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
index 24c9c8c0..53144ef8 100644
--- a/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
+++ b/experiment/meteo_france_SCM_study/visualization/study_visualization/main_study_visualizer.py
@@ -113,6 +113,7 @@ def complete_analysis(only_first_one=False):
 def trend_analysis():
     save_to_file = True
     only_first_one = False
+    # [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200, 4500, 4800] to test for others
     altitudes = [300, 1200, 2100, 3000][:]
     study_classes = [CrocusSwe, CrocusDepth, SafranSnowfall, SafranRainfall, SafranTemperature]
     for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes):
diff --git a/spatio_temporal_dataset/coordinates/abstract_coordinates.py b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
index 1a22f396..7b6ae41b 100644
--- a/spatio_temporal_dataset/coordinates/abstract_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/abstract_coordinates.py
@@ -6,6 +6,7 @@ import numpy as np
 import pandas as pd
 from mpl_toolkits.mplot3d import Axes3D
 
+from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix
 from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced
 from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer
 from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
@@ -176,7 +177,12 @@ class AbstractCoordinates(object):
             return self.df_coordinates(split).loc[:, self.coordinates_spatial_names].drop_duplicates()
 
     def spatial_index(self, split: Split = Split.all) -> pd.Index:
-        return self.df_spatial_coordinates(split).index
+        df_spatial = self.df_spatial_coordinates(split)
+        if self.has_spatio_temporal_coordinates:
+            # Remove the spatio temporal index suffix
+            return get_index_without_spatio_temporal_index_suffix(df_spatial)
+        else:
+            return df_spatial.index
 
     # Temporal attributes
 
diff --git a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py
index 485368ec..53e4af8a 100644
--- a/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py
+++ b/spatio_temporal_dataset/coordinates/spatio_temporal_coordinates/abstract_spatio_temporal_coordinates.py
@@ -1,6 +1,7 @@
 import pandas as pd
 
 from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
+from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix
 from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
 
 
@@ -19,13 +20,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates):
     @classmethod
     def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0):
         df_time_steps = []
-        index_type = type(df_spatial.index[0])
         for t in range(nb_steps):
             df_time_step = df_spatial.copy()
             df_time_step[cls.COORDINATE_T] = start + t
-            index_suffix = index_type(t * len(df_spatial))
-            time_step_index = [i + index_suffix for i in df_spatial.index]
-            df_time_step.index = time_step_index
+            df_time_step.index = get_index_with_spatio_temporal_index_suffix(df_spatial, t)
             df_time_steps.append(df_time_step)
         df_time_steps = pd.concat(df_time_steps)
         return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio)
+
+
diff --git a/spatio_temporal_dataset/coordinates/utils.py b/spatio_temporal_dataset/coordinates/utils.py
new file mode 100644
index 00000000..5261eafb
--- /dev/null
+++ b/spatio_temporal_dataset/coordinates/utils.py
@@ -0,0 +1,23 @@
+# Suffix to differentiate between spatio temporal index and spatial index
+import pandas as pd
+import numpy as np
+
+
+def get_index_suffix(df_spatial: pd.DataFrame, t):
+    index_type = type(df_spatial.index[0])
+    assert index_type in [int, float, str, np.int64, np.float64], index_type
+    return index_type(t * len(df_spatial))
+
+
+def get_index_with_spatio_temporal_index_suffix(df_spatial: pd.DataFrame, t):
+    index_suffix = get_index_suffix(df_spatial, t)
+    return pd.Index([i + index_suffix for i in df_spatial.index])
+
+
+def get_index_without_spatio_temporal_index_suffix(df_spatial: pd.DataFrame):
+    index_suffix = get_index_suffix(df_spatial, 0)
+    if isinstance(index_suffix, str):
+        return df_spatial.index.str.split(index_suffix).str.join('')
+    else:
+        return df_spatial.index - index_suffix
+
diff --git a/spatio_temporal_dataset/dataset/abstract_dataset.py b/spatio_temporal_dataset/dataset/abstract_dataset.py
index 1b2996c8..2dbb6476 100644
--- a/spatio_temporal_dataset/dataset/abstract_dataset.py
+++ b/spatio_temporal_dataset/dataset/abstract_dataset.py
@@ -56,7 +56,9 @@ class AbstractDataset(object):
     def transform_maxima_for_spatial_extreme_package(self, maxima_function, split) -> np.ndarray:
         array = maxima_function(split)
         if self.coordinates.has_spatio_temporal_coordinates:
-            return array.reshape(self.coordinates.spatio_temporal_shape(split)[::-1])
+            inverted_shape = list(self.coordinates.spatio_temporal_shape(split)[::-1])
+            inverted_shape[0] *= self.observations.nb_obs
+            return array.reshape(inverted_shape)
         else:
             return np.transpose(array)
 
diff --git a/test/test_spatio_temporal_dataset/test_coordinates.py b/test/test_spatio_temporal_dataset/test_coordinates.py
index a2c0b7b4..5ee85578 100644
--- a/test/test_spatio_temporal_dataset/test_coordinates.py
+++ b/test/test_spatio_temporal_dataset/test_coordinates.py
@@ -13,6 +13,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo
     AlpsStation3DCoordinatesWithAnisotropy
 from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \
     CircleSpatialCoordinates
+from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix
 from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
 
 
@@ -65,14 +66,18 @@ class SpatioTemporalCoordinates(unittest.TestCase):
             # the uniqueness of each spatio temporal index is not garanteed by the current algo
             # it will work in classical cases, and raise an assert when uniqueness is needed (when using a slicer)
             index1 = pd.Series(spatial_coordinates.spatial_index())
-            # Add the suffix to the index1
-            suffix = '0' if isinstance(df_spatial.index[0], str) else 0
-            index1 += suffix
             index2 = pd.Series(coordinates.spatial_index())
             ind = index1 != index2  # type: pd.Series
             self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}".
                              format(index1.loc[ind], index2.loc[ind]))
 
+            index1 = get_index_with_spatio_temporal_index_suffix(spatial_coordinates.df_spatial_coordinates(), t=0)
+            index1 = pd.Series(index1)
+            index2 = pd.Series(coordinates.df_spatial_coordinates().index)
+            ind = index1 != index2  # type: pd.Series
+            self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}".
+                             format(index1.loc[ind], index2.loc[ind]))
+
     def test_ordered_coordinates(self):
         # Order coordinates, to ensure that the first dimension/the second dimension and so on..
         # Always are in the same order to a given type (e.g. spatio_temporal= of coordinates
diff --git a/test/test_spatio_temporal_dataset/test_dataset.py b/test/test_spatio_temporal_dataset/test_dataset.py
index 3c97f370..9aea31b2 100644
--- a/test/test_spatio_temporal_dataset/test_dataset.py
+++ b/test/test_spatio_temporal_dataset/test_dataset.py
@@ -53,11 +53,13 @@ class TestSpatioTemporalDataset(unittest.TestCase):
                                                    coordinates=self.coordinates)
 
     def test_spatio_temporal_array(self):
+        # The test could have been on a given station. But we decided to do it for a given time step.
         self.load_dataset(nb_obs=1)
 
         # Load observation for time 0
-        ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates(coordinate_name=AbstractCoordinates.COORDINATE_T,
-                                                                        value=0)
+        ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates(
+            coordinate_name=AbstractCoordinates.COORDINATE_T,
+            value=0)
         observation_at_time_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_time_0].values.flatten()
 
         # Load observation correspond to time 0
@@ -70,7 +72,28 @@ class TestSpatioTemporalDataset(unittest.TestCase):
                                                                              observation_at_time_0_v2))
 
     def test_spatio_temporal_case_to_resolve(self):
+        # In this case, we must check that the observations are the same
         self.load_dataset(nb_obs=2)
+
+        # Load observation for time 0
+        ind_station_0 = self.dataset.coordinates.ind_of_df_all_coordinates(
+            coordinate_name=AbstractCoordinates.COORDINATE_X,
+            value=-1)
+        observation_at_station_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_station_0].values.flatten()
+
+        # Load observation correspond to time 0
+        maxima_gev = self.dataset.maxima_gev_for_spatial_extremes_package()
+        self.assertEqual(maxima_gev.shape[1], self.nb_points)
+        maxima_gev = np.transpose(maxima_gev)
+        self.assertEqual(maxima_gev.shape, (3, 2 * 2))
+        observation_at_time_0_v2 = maxima_gev[1, :]
+        self.assertEqual(len(observation_at_time_0_v2), 4, msg='{}'.format(observation_at_time_0_v2))
+
+        # The order does not really matter here but we check it anyway
+        self.assertTrue(np.equal(observation_at_station_0_v1, observation_at_time_0_v2).all(),
+                        msg='v1={} is different from v2={}'.format(observation_at_station_0_v1,
+                                                                   observation_at_time_0_v2))
+
         print(self.dataset.maxima_gev_for_spatial_extremes_package())
 
 
-- 
GitLab