Commit da63d6c9 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[SLICER] refactor slicer

parent 3d41f0ec
No related merge requests found
Showing with 75 additions and 77 deletions
+75 -77
...@@ -69,7 +69,8 @@ class AbstractEstimator(object): ...@@ -69,7 +69,8 @@ class AbstractEstimator(object):
# Methods to override in the child class # Methods to override in the child class
def _fit(self): def _fit(self):
pass raise NotImplementedError
def _error(self, true_max_stable_params: dict): def _error(self, true_max_stable_params: dict):
pass raise NotImplementedError
...@@ -15,8 +15,7 @@ class FullEstimatorInASingleStepWithSmoothMargin_LinearAllParametersAllDims_Smit ...@@ -15,8 +15,7 @@ class FullEstimatorInASingleStepWithSmoothMargin_LinearAllParametersAllDims_Smit
max_stable_model=Smith()) max_stable_model=Smith())
class FullEstimatorInASingleStepWithSmoothMargin_Constant_Smith( class FullEstimatorInASingleStepWithSmoothMargin_Constant_Smith(FullEstimatorInASingleStepWithSmoothMargin):
FullEstimatorInASingleStepWithSmoothMargin):
@classmethod @classmethod
def from_dataset(cls, dataset: AbstractDataset): def from_dataset(cls, dataset: AbstractDataset):
......
...@@ -71,6 +71,6 @@ class AbstractMarginModel(AbstractModel): ...@@ -71,6 +71,6 @@ class AbstractMarginModel(AbstractModel):
def fitmargin_from_maxima_gev(self, maxima_gev: np.ndarray, df_coordinates: pd.DataFrame) \ def fitmargin_from_maxima_gev(self, maxima_gev: np.ndarray, df_coordinates: pd.DataFrame) \
-> AbstractMarginFunction: -> AbstractMarginFunction:
pass raise NotImplementedError
...@@ -9,12 +9,15 @@ from mpl_toolkits.mplot3d import Axes3D ...@@ -9,12 +9,15 @@ from mpl_toolkits.mplot3d import Axes3D
from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer, df_sliced
from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
from spatio_temporal_dataset.slicer.split import s_split_from_df, TEST_SPLIT_STR, \ from spatio_temporal_dataset.slicer.split import s_split_from_df, ind_train_from_s_split, Split
TRAIN_SPLIT_STR, ind_train_from_s_split, Split
from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer from spatio_temporal_dataset.slicer.temporal_slicer import TemporalSlicer
class AbstractCoordinates(object): class AbstractCoordinates(object):
"""
So far, the train_split_ratio is the same between the spatial part of the data, and the temporal part
"""
# Spatial columns # Spatial columns
COORDINATE_X = 'coord_x' COORDINATE_X = 'coord_x'
COORDINATE_Y = 'coord_y' COORDINATE_Y = 'coord_y'
...@@ -32,9 +35,17 @@ class AbstractCoordinates(object): ...@@ -32,9 +35,17 @@ class AbstractCoordinates(object):
self.df_all_coordinates = df_coord # type: pd.DataFrame self.df_all_coordinates = df_coord # type: pd.DataFrame
self.s_split_spatial = s_split_spatial # type: pd.Series self.s_split_spatial = s_split_spatial # type: pd.Series
self.s_split_temporal = s_split_temporal # type: pd.Series self.s_split_temporal = s_split_temporal # type: pd.Series
self.slicer = slicer_class(ind_train_spatial=self.ind_train_spatial, self.slicer = None # type: AbstractSlicer
ind_train_temporal=self.ind_train_temporal) # type: AbstractSlicer
assert isinstance(self.slicer, AbstractSlicer) # Load the slicer
if slicer_class is TemporalSlicer:
self.slicer = TemporalSlicer(self.ind_train_temporal)
elif slicer_class is SpatialSlicer:
self.slicer = SpatialSlicer(self.ind_train_spatial)
elif slicer_class is SpatioTemporalSlicer:
self.slicer = SpatioTemporalSlicer(self.ind_train_spatial, self.ind_train_temporal)
else:
raise ValueError("Unknown slicer_class: {}".format(slicer_class))
# ClassMethod constructor # ClassMethod constructor
...@@ -43,10 +54,12 @@ class AbstractCoordinates(object): ...@@ -43,10 +54,12 @@ class AbstractCoordinates(object):
# Extract df_coordinate # Extract df_coordinate
coordinate_columns = [c for c in df.columns if c in cls.COORDINATES_NAMES] coordinate_columns = [c for c in df.columns if c in cls.COORDINATES_NAMES]
df_coord = df.loc[:, coordinate_columns].copy() df_coord = df.loc[:, coordinate_columns].copy()
# Extract the split # Extract the split
split_columns = [c for c in df.columns if c in [cls.SPATIAL_SPLIT, cls.TEMPORAL_SPLIT]] split_columns = [c for c in df.columns if c in [cls.SPATIAL_SPLIT, cls.TEMPORAL_SPLIT]]
s_split_spatial = df[cls.SPATIAL_SPLIT].copy() if cls.SPATIAL_SPLIT in df.columns else None s_split_spatial = df[cls.SPATIAL_SPLIT].copy() if cls.SPATIAL_SPLIT in df.columns else None
s_split_temporal = df[cls.TEMPORAL_SPLIT].copy() if cls.TEMPORAL_SPLIT in df.columns else None s_split_temporal = df[cls.TEMPORAL_SPLIT].copy() if cls.TEMPORAL_SPLIT in df.columns else None
# Infer the slicer class # Infer the slicer class
if s_split_temporal is None and s_split_spatial is None: if s_split_temporal is None and s_split_spatial is None:
raise ValueError('Both split are unspecified') raise ValueError('Both split are unspecified')
...@@ -56,6 +69,7 @@ class AbstractCoordinates(object): ...@@ -56,6 +69,7 @@ class AbstractCoordinates(object):
slicer_class = SpatialSlicer slicer_class = SpatialSlicer
else: else:
slicer_class = SpatioTemporalSlicer slicer_class = SpatioTemporalSlicer
# Remove all the columns used from df # Remove all the columns used from df
columns_used = coordinate_columns + split_columns columns_used = coordinate_columns + split_columns
df.drop(columns_used, axis=1, inplace=True) df.drop(columns_used, axis=1, inplace=True)
...@@ -64,22 +78,13 @@ class AbstractCoordinates(object): ...@@ -64,22 +78,13 @@ class AbstractCoordinates(object):
@classmethod @classmethod
def from_df_and_slicer(cls, df: pd.DataFrame, slicer_class: type, train_split_ratio: float = None): def from_df_and_slicer(cls, df: pd.DataFrame, slicer_class: type, train_split_ratio: float = None):
# So far, the train_split_ratio is the same between the spatial part of the data, and the temporal part
# All the index should be unique # All the index should be unique
assert len(set(df.index)) == len(df) assert len(set(df.index)) == len(df)
# Create a spatial split # Create a spatial split
if slicer_class in [SpatialSlicer, SpatioTemporalSlicer]: s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, True)
s_split_spatial = s_split_from_df(df, cls.COORDINATE_X, cls.SPATIAL_SPLIT, train_split_ratio, True)
else:
s_split_spatial = None
# Create a temporal split # Create a temporal split
if slicer_class in [TemporalSlicer, SpatioTemporalSlicer]: s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, False)
s_split_temporal = s_split_from_df(df, cls.COORDINATE_T, cls.TEMPORAL_SPLIT, train_split_ratio, False)
else:
s_split_temporal = None
return cls(df_coord=df, slicer_class=slicer_class, return cls(df_coord=df, slicer_class=slicer_class,
s_split_spatial=s_split_spatial, s_split_temporal=s_split_temporal) s_split_spatial=s_split_spatial, s_split_temporal=s_split_temporal)
...@@ -217,4 +222,3 @@ class AbstractCoordinates(object): ...@@ -217,4 +222,3 @@ class AbstractCoordinates(object):
def __eq__(self, other): def __eq__(self, other):
return self.df_merged.equals(other.df_merged) return self.df_merged.equals(other.df_merged)
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
from rpy2.robjects import r from rpy2.robjects import r
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates AbstractSpatialCoordinates
......
import math import math
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from rpy2.robjects import r from rpy2.robjects import r
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
import matplotlib.pyplot as plt
from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \ from spatio_temporal_dataset.coordinates.spatial_coordinates.abstract_spatial_coordinates import \
AbstractSpatialCoordinates AbstractSpatialCoordinates
...@@ -26,10 +25,10 @@ class CircleSpatialCoordinates(AbstractSpatialCoordinates): ...@@ -26,10 +25,10 @@ class CircleSpatialCoordinates(AbstractSpatialCoordinates):
return cls.from_df(cls.df_spatial(nb_points, max_radius), train_split_ratio) return cls.from_df(cls.df_spatial(nb_points, max_radius), train_split_ratio)
def visualization_2D(self): def visualization_2D(self):
r = 1.0 radius = 1.0
circle1 = plt.Circle((0, 0), r, color='r', fill=False) circle1 = plt.Circle((0, 0), radius, color='r', fill=False)
plt.gcf().gca().set_xlim((-r, r)) plt.gcf().gca().set_xlim((-radius, radius))
plt.gcf().gca().set_ylim((-r, r)) plt.gcf().gca().set_ylim((-radius, radius))
plt.gcf().gca().add_artist(circle1) plt.gcf().gca().add_artist(circle1)
super().visualization_2D() super().visualization_2D()
......
import pandas as pd import pandas as pd
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates from spatio_temporal_dataset.coordinates.spatial_coordinates.coordinates_1D import UniformSpatialCoordinates
from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \ from spatio_temporal_dataset.coordinates.spatio_temporal_coordinates.abstract_spatio_temporal_coordinates import \
AbstractSpatioTemporalCoordinates AbstractSpatioTemporalCoordinates
from spatio_temporal_dataset.coordinates.temporal_coordinates.generated_temporal_coordinates import \
ConsecutiveTemporalCoordinates
from spatio_temporal_dataset.slicer.spatio_temporal_slicer import SpatioTemporalSlicer
class UniformSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates): class UniformSpatioTemporalCoordinates(AbstractSpatioTemporalCoordinates):
......
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import AbstractTransformation from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation \
import AbstractTransformation
import math import math
......
from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import AbstractTransformation from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation \
import AbstractTransformation
import pandas as pd import pandas as pd
......
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation import AbstractTransformation from spatio_temporal_dataset.coordinates.transformed_coordinates.transformation.abstract_transformation \
import AbstractTransformation
class TransformedCoordinates(AbstractCoordinates): class TransformedCoordinates(AbstractCoordinates):
......
...@@ -7,6 +7,7 @@ import numpy as np ...@@ -7,6 +7,7 @@ import numpy as np
import pandas as pd import pandas as pd
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.slicer.abstract_slicer import AbstractSlicer
from spatio_temporal_dataset.slicer.split import Split from spatio_temporal_dataset.slicer.split import Split
from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \
AbstractSpatioTemporalObservations AbstractSpatioTemporalObservations
...@@ -16,14 +17,10 @@ class AbstractDataset(object): ...@@ -16,14 +17,10 @@ class AbstractDataset(object):
def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates): def __init__(self, observations: AbstractSpatioTemporalObservations, coordinates: AbstractCoordinates):
assert pd.Index.equals(observations.index, coordinates.index) assert pd.Index.equals(observations.index, coordinates.index)
self.observations = observations self.observations = observations # type: AbstractSpatioTemporalObservations
self.coordinates = coordinates self.coordinates = coordinates # type: AbstractCoordinates
self.subset_id_to_column_idxs = None # type: Dict[int, List[int]] self.subset_id_to_column_idxs = None # type: Dict[int, List[int]]
@property
def slicer(self):
return self.coordinates.slicer
@classmethod @classmethod
def from_csv(cls, csv_path: str): def from_csv(cls, csv_path: str):
assert op.exists(csv_path) assert op.exists(csv_path)
...@@ -67,6 +64,10 @@ class AbstractDataset(object): ...@@ -67,6 +64,10 @@ class AbstractDataset(object):
# Slicer wrapper # Slicer wrapper
@property
def slicer(self) -> AbstractSlicer:
return self.coordinates.slicer
@property @property
def train_split(self) -> Split: def train_split(self) -> Split:
return self.slicer.train_split return self.slicer.train_split
...@@ -99,4 +100,3 @@ def get_subset_dataset(dataset: AbstractDataset, subset_id) -> AbstractDataset: ...@@ -99,4 +100,3 @@ def get_subset_dataset(dataset: AbstractDataset, subset_id) -> AbstractDataset:
if observations.df_maxima_frech is not None: if observations.df_maxima_frech is not None:
observations.df_maxima_frech = observations.df_maxima_frech.iloc[:, columns_idxs] observations.df_maxima_frech = observations.df_maxima_frech.iloc[:, columns_idxs]
return subset_dataset return subset_dataset
...@@ -2,7 +2,6 @@ from extreme_estimator.extreme_models.margin_model.abstract_margin_model import ...@@ -2,7 +2,6 @@ from extreme_estimator.extreme_models.margin_model.abstract_margin_model import
from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model import AbstractMaxStableModel from extreme_estimator.extreme_models.max_stable_model.abstract_max_stable_model import AbstractMaxStableModel
from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates from spatio_temporal_dataset.coordinates.abstract_coordinates import AbstractCoordinates
from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset from spatio_temporal_dataset.dataset.abstract_dataset import AbstractDataset
from spatio_temporal_dataset.slicer.spatial_slicer import SpatialSlicer
from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \ from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \
AbstractSpatioTemporalObservations AbstractSpatioTemporalObservations
from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import \ from spatio_temporal_dataset.spatio_temporal_observations.annual_maxima_observations import \
......
...@@ -19,7 +19,7 @@ class AbstractSlicer(object): ...@@ -19,7 +19,7 @@ class AbstractSlicer(object):
def ind_test_temporal(self) -> pd.Series: def ind_test_temporal(self) -> pd.Series:
return ~self.ind_train_temporal return ~self.ind_train_temporal
def loc_split(self, df: pd.DataFrame, split: Split): def loc_split(self, df: pd.DataFrame, split: Split) -> pd.DataFrame:
# split should belong to the list of split accepted by the slicer # split should belong to the list of split accepted by the slicer
assert isinstance(split, Split) assert isinstance(split, Split)
...@@ -52,24 +52,24 @@ class AbstractSlicer(object): ...@@ -52,24 +52,24 @@ class AbstractSlicer(object):
# Methods that need to be defined in the child class # Methods that need to be defined in the child class
def specialized_loc_split(self, df: pd.DataFrame, split: Split): def specialized_loc_split(self, df: pd.DataFrame, split: Split) -> pd.DataFrame:
return raise NotImplementedError
@property @property
def some_required_ind_are_not_defined(self) -> bool: def some_required_ind_are_not_defined(self) -> bool:
return raise NotImplementedError
@property @property
def train_split(self) -> Split: def train_split(self) -> Split:
return raise NotImplementedError
@property @property
def test_split(self) -> Split: def test_split(self) -> Split:
return raise NotImplementedError
@property @property
def splits(self) -> List[Split]: def splits(self) -> List[Split]:
return raise NotImplementedError
def df_sliced(df: pd.DataFrame, split: Split = Split.all, slicer: AbstractSlicer = None) -> pd.DataFrame: def df_sliced(df: pd.DataFrame, split: Split = Split.all, slicer: AbstractSlicer = None) -> pd.DataFrame:
......
...@@ -9,7 +9,7 @@ from spatio_temporal_dataset.slicer.split import Split ...@@ -9,7 +9,7 @@ from spatio_temporal_dataset.slicer.split import Split
class SpatialSlicer(AbstractSlicer): class SpatialSlicer(AbstractSlicer):
SPLITS = [Split.train_spatial, Split.test_spatial] SPLITS = [Split.train_spatial, Split.test_spatial]
def __init__(self, ind_train_spatial: Union[None, pd.Series], ind_train_temporal: Union[None, pd.Series]): def __init__(self, ind_train_spatial: Union[None, pd.Series]):
super().__init__(ind_train_spatial, None) super().__init__(ind_train_spatial, None)
@property @property
...@@ -25,10 +25,10 @@ class SpatialSlicer(AbstractSlicer): ...@@ -25,10 +25,10 @@ class SpatialSlicer(AbstractSlicer):
return Split.test_spatial return Split.test_spatial
@property @property
def some_required_ind_are_not_defined(self): def some_required_ind_are_not_defined(self) -> bool:
return self.ind_train_spatial is None return self.ind_train_spatial is None
def specialized_loc_split(self, df: pd.DataFrame, split: Split): def specialized_loc_split(self, df: pd.DataFrame, split: Split) -> pd.DataFrame:
assert pd.Index.equals(df.index, self.ind_train_spatial.index) assert pd.Index.equals(df.index, self.ind_train_spatial.index)
if split is Split.train_spatial: if split is Split.train_spatial:
return df.loc[self.ind_train_spatial] return df.loc[self.ind_train_spatial]
......
...@@ -25,10 +25,10 @@ class SpatioTemporalSlicer(AbstractSlicer): ...@@ -25,10 +25,10 @@ class SpatioTemporalSlicer(AbstractSlicer):
return Split.test_spatiotemporal return Split.test_spatiotemporal
@property @property
def some_required_ind_are_not_defined(self): def some_required_ind_are_not_defined(self) -> bool:
return self.ind_train_spatial is None or self.ind_train_temporal is None return self.ind_train_spatial is None or self.ind_train_temporal is None
def specialized_loc_split(self, df: pd.DataFrame, split: Split): def specialized_loc_split(self, df: pd.DataFrame, split: Split) -> pd.DataFrame:
assert pd.Index.equals(df.index, self.ind_train_temporal.index) assert pd.Index.equals(df.index, self.ind_train_temporal.index)
assert pd.Index.equals(df.index, self.ind_train_spatial.index) assert pd.Index.equals(df.index, self.ind_train_spatial.index)
if split is Split.train_spatiotemporal: if split is Split.train_spatiotemporal:
......
...@@ -34,7 +34,7 @@ def split_to_display_kwargs(split: Split): ...@@ -34,7 +34,7 @@ def split_to_display_kwargs(split: Split):
marker = '^' marker = '^'
else: else:
marker = '>' marker = '>'
return {'marker': marker, 'linewidth': linewidth, 'gridsize':gridsize} return {'marker': marker, 'linewidth': linewidth, 'gridsize': gridsize}
ALL_SPLITS_EXCEPT_ALL = [split for split in Split if split is not Split.all] ALL_SPLITS_EXCEPT_ALL = [split for split in Split if split is not Split.all]
...@@ -73,15 +73,15 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, s ...@@ -73,15 +73,15 @@ def s_split_from_df(df: pd.DataFrame, column, split_column, train_split_ratio, s
elif split_column in df: elif split_column in df:
raise Exception('A split has already been defined') raise Exception('A split has already been defined')
else: else:
serie = df.drop_duplicates(subset=[column], keep='first')[column] s = df.drop_duplicates(subset=[column], keep='first')[column]
assert len(df) % len(serie) == 0 assert len(df) % len(s) == 0
multiplication_factor = len(df) // len(serie) multiplication_factor = len(df) // len(s)
small_s_split = small_s_split_from_ratio(serie.index, train_split_ratio) small_s_split = small_s_split_from_ratio(s.index, train_split_ratio)
if spatial_split: if spatial_split:
# concatenation for spatial_split # concatenation for spatial_split
s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy() s_split = pd.concat([small_s_split for _ in range(multiplication_factor)], ignore_index=True).copy()
else: else:
# dilatjon for the temporal split # dilatation for the temporal split
s_split = pd.Series(None, index=df.index) s_split = pd.Series(None, index=df.index)
for i in range(len(s_split)): for i in range(len(s_split)):
s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor] s_split.iloc[i] = small_s_split.iloc[i // multiplication_factor]
......
...@@ -9,7 +9,7 @@ from spatio_temporal_dataset.slicer.split import Split ...@@ -9,7 +9,7 @@ from spatio_temporal_dataset.slicer.split import Split
class TemporalSlicer(AbstractSlicer): class TemporalSlicer(AbstractSlicer):
SPLITS = [Split.train_temporal, Split.test_temporal] SPLITS = [Split.train_temporal, Split.test_temporal]
def __init__(self, ind_train_spatial: Union[None, pd.Series], ind_train_temporal: Union[None, pd.Series]): def __init__(self, ind_train_temporal: Union[None, pd.Series]):
super().__init__(None, ind_train_temporal) super().__init__(None, ind_train_temporal)
@property @property
...@@ -25,10 +25,10 @@ class TemporalSlicer(AbstractSlicer): ...@@ -25,10 +25,10 @@ class TemporalSlicer(AbstractSlicer):
return Split.test_temporal return Split.test_temporal
@property @property
def some_required_ind_are_not_defined(self): def some_required_ind_are_not_defined(self) -> bool:
return self.ind_train_temporal is None return self.ind_train_temporal is None
def specialized_loc_split(self, df: pd.DataFrame, split: Split): def specialized_loc_split(self, df: pd.DataFrame, split: Split) -> pd.DataFrame:
assert pd.Index.equals(df.index, self.ind_train_temporal.index) assert pd.Index.equals(df.index, self.ind_train_temporal.index)
if split is Split.train_temporal: if split is Split.train_temporal:
return df.loc[self.ind_train_temporal] return df.loc[self.ind_train_temporal]
......
...@@ -20,8 +20,8 @@ class AbstractSpatioTemporalObservations(object): ...@@ -20,8 +20,8 @@ class AbstractSpatioTemporalObservations(object):
assert df_maxima_gev is not None or df_maxima_frech is not None assert df_maxima_gev is not None or df_maxima_frech is not None
if df_maxima_gev is not None and df_maxima_frech is not None: if df_maxima_gev is not None and df_maxima_frech is not None:
assert pd.Index.equals(df_maxima_gev.index, df_maxima_frech.index) assert pd.Index.equals(df_maxima_gev.index, df_maxima_frech.index)
self.df_maxima_gev = df_maxima_gev self.df_maxima_gev = df_maxima_gev # type: pd.DataFrame
self.df_maxima_frech = df_maxima_frech self.df_maxima_frech = df_maxima_frech # type: pd.DataFrame
@classmethod @classmethod
def from_csv(cls, csv_path: str = None): def from_csv(cls, csv_path: str = None):
...@@ -38,9 +38,10 @@ class AbstractSpatioTemporalObservations(object): ...@@ -38,9 +38,10 @@ class AbstractSpatioTemporalObservations(object):
return self.df_maxima_gev return self.df_maxima_gev
@property @property
def df_maxima_merged(self): def df_maxima_merged(self) -> pd.DataFrame:
df_maxima_list = [] df_maxima_list = []
for df, suffix in [(self.df_maxima_gev, self.OBSERVATIONS_GEV), (self.df_maxima_frech, self.OBSERVATIONS_FRECH)]: for df, suffix in [(self.df_maxima_gev, self.OBSERVATIONS_GEV),
(self.df_maxima_frech, self.OBSERVATIONS_FRECH)]:
if df is not None: if df is not None:
df_maxima = df.copy() df_maxima = df.copy()
df_maxima.columns = [str(c) + ' ' + suffix for c in df_maxima.columns] df_maxima.columns = [str(c) + ' ' + suffix for c in df_maxima.columns]
......
import os.path as op import os.path as op
import pandas as pd from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import \
AbstractSpatioTemporalObservations
from spatio_temporal_dataset.spatio_temporal_observations.abstract_spatio_temporal_observations import AbstractSpatioTemporalObservations
from utils import get_full_path from utils import get_full_path
class AlpsPrecipitationObservations(AbstractSpatioTemporalObservations): class AlpsPrecipitationObservations(AbstractSpatioTemporalObservations):
RELATIVE_PATH = r'local/spatio_temporal_datasets/Gilles - precipitations' RELATIVE_PATH = r'local/spatio_temporal_datasets/Gilles - precipitations'
FULL_PATH = get_full_path(relative_path=RELATIVE_PATH) FULL_PATH = get_full_path(relative_path=RELATIVE_PATH)
...@@ -35,4 +33,4 @@ class AlpsPrecipitationObservations(AbstractSpatioTemporalObservations): ...@@ -35,4 +33,4 @@ class AlpsPrecipitationObservations(AbstractSpatioTemporalObservations):
# print(df.head()) # print(df.head())
# filepath = op.join(cls.FULL_PATH, 'max_precip_3j.csv') # filepath = op.join(cls.FULL_PATH, 'max_precip_3j.csv')
# assert not op.exists(filepath) # assert not op.exists(filepath)
# df.to_csv(filepath) # df.to_csv(filepath)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment