Commit 70baaeed authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[COORDINATE][SPATIO TEMPORAL INDEX] add rule for spatio temporal index suffix. update test.

parent b9fb5cbc
No related merge requests found
Showing with 71 additions and 11 deletions
+71 -11
...@@ -113,6 +113,7 @@ def complete_analysis(only_first_one=False): ...@@ -113,6 +113,7 @@ def complete_analysis(only_first_one=False):
def trend_analysis(): def trend_analysis():
save_to_file = True save_to_file = True
only_first_one = False only_first_one = False
# [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200, 4500, 4800] to test for others
altitudes = [300, 1200, 2100, 3000][:] altitudes = [300, 1200, 2100, 3000][:]
study_classes = [CrocusSwe, CrocusDepth, SafranSnowfall, SafranRainfall, SafranTemperature] study_classes = [CrocusSwe, CrocusDepth, SafranSnowfall, SafranRainfall, SafranTemperature]
for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes): for study in study_iterator_global(study_classes, only_first_one=only_first_one, altitudes=altitudes):
......
...@@ -6,6 +6,7 @@ import numpy as np ...@@ -6,6 +6,7 @@ import numpy as np
import pandas as pd import pandas as pd
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
from spatio_temporal_dataset.coordinates.utils import get_index_without_spatio_temporal_index_suffix
from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced
from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
...@@ -176,7 +177,12 @@ class AbstractCoordinates(object): ...@@ -176,7 +177,12 @@ class AbstractCoordinates(object):
return self.df_coordinates(split).loc[:, self.coordinates_spatial_names].drop_duplicates() return self.df_coordinates(split).loc[:, self.coordinates_spatial_names].drop_duplicates()
def spatial_index(self, split: Split = Split.all) -> pd.Index: def spatial_index(self, split: Split = Split.all) -> pd.Index:
return self.df_spatial_coordinates(split).index df_spatial = self.df_spatial_coordinates(split)
if self.has_spatio_temporal_coordinates:
# Remove the spatio temporal index suffix
return get_index_without_spatio_temporal_index_suffix(df_spatial)
else:
return df_spatial.index
# Temporal attributes # Temporal attributes
......
import pandas as pd import pandas as pd
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
...@@ -19,13 +20,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates): ...@@ -19,13 +20,12 @@ class AbstractSpatioTemporalCoordinates(AbstractCoordinates):
@classmethod @classmethod
def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0): def from_df_spatial_and_nb_steps(cls, df_spatial, nb_steps, train_split_ratio: float = None, start=0):
df_time_steps = [] df_time_steps = []
index_type = type(df_spatial.index[0])
for t in range(nb_steps): for t in range(nb_steps):
df_time_step = df_spatial.copy() df_time_step = df_spatial.copy()
df_time_step[cls.COORDINATE_T] = start + t df_time_step[cls.COORDINATE_T] = start + t
index_suffix = index_type(t * len(df_spatial)) df_time_step.index = get_index_with_spatio_temporal_index_suffix(df_spatial, t)
time_step_index = [i + index_suffix for i in df_spatial.index]
df_time_step.index = time_step_index
df_time_steps.append(df_time_step) df_time_steps.append(df_time_step)
df_time_steps = pd.concat(df_time_steps) df_time_steps = pd.concat(df_time_steps)
return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio) return cls.from_df(df=df_time_steps, train_split_ratio=train_split_ratio)
# Suffix to differentiate between spatio temporal index and spatial index
import pandas as pd
import numpy as np
def get_index_suffix(df_spatial: pd.DataFrame, t):
index_type = type(df_spatial.index[0])
assert index_type in [int, float, str, np.int64, np.float64], index_type
return index_type(t * len(df_spatial))
def get_index_with_spatio_temporal_index_suffix(df_spatial: pd.DataFrame, t):
index_suffix = get_index_suffix(df_spatial, t)
return pd.Index([i + index_suffix for i in df_spatial.index])
def get_index_without_spatio_temporal_index_suffix(df_spatial: pd.DataFrame):
index_suffix = get_index_suffix(df_spatial, 0)
if isinstance(index_suffix, str):
return df_spatial.index.str.split(index_suffix).str.join('')
else:
return df_spatial.index - index_suffix
...@@ -56,7 +56,9 @@ class AbstractDataset(object): ...@@ -56,7 +56,9 @@ class AbstractDataset(object):
def transform_maxima_for_spatial_extreme_package(self, maxima_function, split) -> np.ndarray: def transform_maxima_for_spatial_extreme_package(self, maxima_function, split) -> np.ndarray:
array = maxima_function(split) array = maxima_function(split)
if self.coordinates.has_spatio_temporal_coordinates: if self.coordinates.has_spatio_temporal_coordinates:
return array.reshape(self.coordinates.spatio_temporal_shape(split)[::-1]) inverted_shape = list(self.coordinates.spatio_temporal_shape(split)[::-1])
inverted_shape[0] *= self.observations.nb_obs
return array.reshape(inverted_shape)
else: else:
return np.transpose(array) return np.transpose(array)
......
...@@ -13,6 +13,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo ...@@ -13,6 +13,7 @@ from spatio_temporal_dataset.coordinates.spatial_coordinates.alps_station_3D_coo
AlpsStation3DCoordinatesWithAnisotropy AlpsStation3DCoordinatesWithAnisotropy
from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.generated_spatial_coordinates import \
CircleSpatialCoordinates CircleSpatialCoordinates
from spatio_temporal_dataset.coordinates.utils import get_index_with_spatio_temporal_index_suffix
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
...@@ -65,14 +66,18 @@ class SpatioTemporalCoordinates(unittest.TestCase): ...@@ -65,14 +66,18 @@ class SpatioTemporalCoordinates(unittest.TestCase):
# the uniqueness of each spatio temporal index is not garanteed by the current algo # the uniqueness of each spatio temporal index is not garanteed by the current algo
# it will work in classical cases, and raise an assert when uniqueness is needed (when using a slicer) # it will work in classical cases, and raise an assert when uniqueness is needed (when using a slicer)
index1 = pd.Series(spatial_coordinates.spatial_index()) index1 = pd.Series(spatial_coordinates.spatial_index())
# Add the suffix to the index1
suffix = '0' if isinstance(df_spatial.index[0], str) else 0
index1 += suffix
index2 = pd.Series(coordinates.spatial_index()) index2 = pd.Series(coordinates.spatial_index())
ind = index1 != index2 # type: pd.Series ind = index1 != index2 # type: pd.Series
self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}". self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}".
format(index1.loc[ind], index2.loc[ind])) format(index1.loc[ind], index2.loc[ind]))
index1 = get_index_with_spatio_temporal_index_suffix(spatial_coordinates.df_spatial_coordinates(), t=0)
index1 = pd.Series(index1)
index2 = pd.Series(coordinates.df_spatial_coordinates().index)
ind = index1 != index2 # type: pd.Series
self.assertEqual(sum(ind), 0, msg="spatial_coordinates:\n{} \n!= spatio_temporal_coordinates \n{}".
format(index1.loc[ind], index2.loc[ind]))
def test_ordered_coordinates(self): def test_ordered_coordinates(self):
# Order coordinates, to ensure that the first dimension/the second dimension and so on.. # Order coordinates, to ensure that the first dimension/the second dimension and so on..
# Always are in the same order to a given type (e.g. spatio_temporal= of coordinates # Always are in the same order to a given type (e.g. spatio_temporal= of coordinates
......
...@@ -53,11 +53,13 @@ class TestSpatioTemporalDataset(unittest.TestCase): ...@@ -53,11 +53,13 @@ class TestSpatioTemporalDataset(unittest.TestCase):
coordinates=self.coordinates) coordinates=self.coordinates)
def test_spatio_temporal_array(self): def test_spatio_temporal_array(self):
# The test could have been on a given station. But we decided to do it for a given time step.
self.load_dataset(nb_obs=1) self.load_dataset(nb_obs=1)
# Load observation for time 0 # Load observation for time 0
ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates(coordinate_name=AbstractCoordinates.COORDINATE_T, ind_time_0 = self.dataset.coordinates.ind_of_df_all_coordinates(
value=0) coordinate_name=AbstractCoordinates.COORDINATE_T,
value=0)
observation_at_time_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_time_0].values.flatten() observation_at_time_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_time_0].values.flatten()
# Load observation correspond to time 0 # Load observation correspond to time 0
...@@ -70,7 +72,28 @@ class TestSpatioTemporalDataset(unittest.TestCase): ...@@ -70,7 +72,28 @@ class TestSpatioTemporalDataset(unittest.TestCase):
observation_at_time_0_v2)) observation_at_time_0_v2))
def test_spatio_temporal_case_to_resolve(self): def test_spatio_temporal_case_to_resolve(self):
# In this case, we must check that the observations are the same
self.load_dataset(nb_obs=2) self.load_dataset(nb_obs=2)
# Load observation for time 0
ind_station_0 = self.dataset.coordinates.ind_of_df_all_coordinates(
coordinate_name=AbstractCoordinates.COORDINATE_X,
value=-1)
observation_at_station_0_v1 = self.dataset.observations.df_maxima_gev.loc[ind_station_0].values.flatten()
# Load observation correspond to time 0
maxima_gev = self.dataset.maxima_gev_for_spatial_extremes_package()
self.assertEqual(maxima_gev.shape[1], self.nb_points)
maxima_gev = np.transpose(maxima_gev)
self.assertEqual(maxima_gev.shape, (3, 2 * 2))
observation_at_time_0_v2 = maxima_gev[1, :]
self.assertEqual(len(observation_at_time_0_v2), 4, msg='{}'.format(observation_at_time_0_v2))
# The order does not really matter here but we check it anyway
self.assertTrue(np.equal(observation_at_station_0_v1, observation_at_time_0_v2).all(),
msg='v1={} is different from v2={}'.format(observation_at_station_0_v1,
observation_at_time_0_v2))
print(self.dataset.maxima_gev_for_spatial_extremes_package()) print(self.dataset.maxima_gev_for_spatial_extremes_package())
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment