Commit 38b2bd8a authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

Initial main commit

parent ac99ffcc
No related merge requests found
Showing with 301 additions and 0 deletions
+301 -0
import os.path as op
import pandas as pd
import matplotlib.pyplot as plt
class AbstractSpatialCoordinates(object):
# Columns
COORD_X = 'coord_x'
COORD_Y = 'coord_y'
COORD_SPLIT = 'coord_split'
# Constants
TRAIN_SPLIT_STR = 'train_split'
TEST_SPLIT_STR = 'test_split'
def __init__(self, df_coord: pd.DataFrame, s_split: pd.Series = None):
self.s_split = s_split
self.df_coord = df_coord
@classmethod
def from_df(cls, df):
assert cls.COORD_X in df.columns and cls.COORD_Y in df.columns
df_coord = df.loc[:, [cls.COORD_X, cls.COORD_Y]]
s_split = df[cls.COORD_SPLIT] if cls.COORD_SPLIT in df.columns else None
return cls(df_coord=df_coord, s_split=s_split)
@classmethod
def from_csv(cls, csv_path):
assert op.exists(csv_path)
df = pd.read_csv(csv_path)
return cls.from_df(df)
def df_coord_split(self, split_str):
assert self.s_split is not None
ind = self.s_split == split_str
return self.df_coord.loc[ind]
@property
def df_coord_train(self):
return self.df_coord_split(self.TRAIN_SPLIT_STR)
@property
def df_coord_test(self):
return self.df_coord_split(self.TEST_SPLIT_STR)
@property
def nb_points(self):
return len(self.df_coord)
@property
def coord(self):
return self.df_coord.values
@property
def index(self):
return self.df_coord.index
def visualization(self):
x, y = self.coord[:, 0], self.coord[:, 1]
plt.scatter(x, y)
plt.show()
import math
import numpy as np
import pandas as pd
from extreme_estimator.R_fit.utils import get_loaded_r
from spatio_temporal_dataset.spatial_coordinates.abstract_coordinate import AbstractSpatialCoordinates
import matplotlib.pyplot as plt
class SimulatedCoordinates(AbstractSpatialCoordinates):
"""
Common manipulation on generated coordinates
"""
def __init__(self, df_coord, s_split=None):
super().__init__(df_coord, s_split)
@classmethod
def from_nb_points(cls, nb_points, **kwargs):
pass
class CircleCoordinates(SimulatedCoordinates):
@classmethod
def from_nb_points(cls, nb_points, **kwargs):
max_radius = kwargs.get('max_radius', 1.0)
# Sample uniformly inside the circle
r = get_loaded_r()
angles = np.array(r.runif(nb_points, max=2 * math.pi))
radius = np.sqrt(np.array(r.runif(nb_points, max=max_radius)))
df = pd.DataFrame.from_dict({cls.COORD_X: radius * np.cos(angles), cls.COORD_Y: radius * np.sin(angles)})
return cls.from_df(df)
def visualization(self):
r = 1.0
circle1 = plt.Circle((0, 0), r, color='r', fill=False)
plt.gcf().gca().set_xlim((-r, r))
plt.gcf().gca().set_ylim((-r, r))
plt.gcf().gca().add_artist(circle1)
super().visualization()
if __name__ == '__main__':
coord = CircleCoordinates.from_nb_points(nb_points=500, max_radius=1)
coord.visualization()
from typing import List
import pandas as pd
from spatio_temporal_dataset.marginals.abstract_marginals import AbstractMarginals
from spatio_temporal_dataset.marginals.spatial_marginals import SpatialMarginal
from spatio_temporal_dataset.stations.station import Station, load_stations_from_dataframe
from spatio_temporal_dataset.stations.station_distance import EuclideanDistance2D, StationDistance
import pickle
import os.path as op
from itertools import combinations
class SpatioTemporalDataHandler(object):
def __init__(self, marginals_class: type, stations: List[Station], station_distance: StationDistance):
self.stations = stations
# Compute once the distances between stations
for station1, station2 in combinations(self.stations, 2):
distance = station_distance.compute_distance(station1, station2)
station1.distance[station2] = distance
station2.distance[station1] = distance
# Compute the marginals
self.marginals = marginals_class(self.stations) # type: AbstractMarginals
# Define the max stable
# self.max_stable =
print(self.marginals.gev_parameters)
@classmethod
def from_dataframe(cls, df):
return cls.from_spatial_dataframe(df)
@classmethod
def from_spatial_dataframe(cls, df):
stations = load_stations_from_dataframe(df)
marginal_class = SpatialMarginal
station_distance = EuclideanDistance2D()
return cls(marginals_class=marginal_class, stations=stations, station_distance=station_distance)
def get_spatio_temporal_data_handler(pickle_path: str, load_pickle: bool = True, dump_pickle: bool = False, *args) \
-> SpatioTemporalDataHandler:
# Either load or dump pickle of a SpatioTemporalDataHandler object
assert load_pickle or dump_pickle
if load_pickle:
assert op.exists(pickle_path) and not dump_pickle
spatio_temporal_experiment = pickle.load(pickle_path)
else:
assert not op.exists(pickle_path)
spatio_temporal_experiment = SpatioTemporalDataHandler(*args)
pickle.dump(spatio_temporal_experiment, file=pickle_path)
return spatio_temporal_experiment
if __name__ == '__main__':
df = pd.DataFrame(1, index=['station1', 'station2'], columns=['200' + str(i) for i in range(18)])
xp = SpatioTemporalDataHandler.from_dataframe(df)
import pandas as pd
import numpy as np
class Station(object):
def __init__(self, name: str, annual_maxima: pd.Series, longitude=np.nan, latitude=np.nan, altitude=np.nan):
self.annual_maxima = annual_maxima
self.year_of_study = list(annual_maxima.index)
self.name = name
self.altitude = altitude
self.latitude = latitude
self.longitude = longitude
self.distance = {}
def load_stations_from_dataframe(df):
return [Station(name=i, annual_maxima=row) for i, row in df.iterrows()]
def load_station_from_two_dataframe(df, location_df):
pass
if __name__ == '__main__':
df = pd.DataFrame(1, index=['station1', 'station2'], columns=['200' + str(i) for i in range(9)])
stations = load_stations_from_dataframe(df)
station = stations[0]
print(station.name)
print(station.annual_maxima)
print(station.year_of_study)
from spatio_temporal_dataset.stations.station import Station
import numpy as np
class StationDistance(object):
@classmethod
def compute_distance(self, station1: Station, station2: Station) -> float:
return np.nan
def euclidean_distance(arr1: np.ndarray, arr2: np.ndarray) -> float:
return np.linalg.norm(arr1 - arr2)
class EuclideanDistance2D(StationDistance):
@classmethod
def compute_distance(self, station1: Station, station2: Station) -> float:
print(station1.latitude)
stations_coordinates = [np.array([station.latitude, station.longitude]) for station in [station1, station2]]
return euclidean_distance(*stations_coordinates)
import pandas as pd
class TemporalMaxima(object):
def __init__(self, df_maxima: pd.DataFrame):
"""
Main attribute of the class is the DataFrame df_maxima
Index are stations index, Columns are the year of the maxima
"""
self.df_maxima = df_maxima
@classmethod
def from_df(cls, df):
pass
@property
def index(self):
return self.df_maxima.index
@property
def maxima(self):
return self.df_maxima.values
# todo: add the transformed_maxima and the not-trasnformed maxima
import unittest
import pandas as pd
from spatio_temporal_dataset.spatio_temporal_data_handler import SpatioTemporalDataHandler
class TestPipeline(unittest):
def main_pipeline(self):
# Select a type of marginals (either spatial, spatio temporal, temporal)
# this will define the dimension of the climatic space of interest
pass
# Select the max stable
# Define an optimization process
# The algo: In 1 time, in 2 times, ..., or more complex patterns
# This algo have at least main procedures (that might be repeated several times)
# For each procedure, we shall define:
# - The loss
# - The optimization method for each part of the process
def blanchet_smooth_pipeline(self):
pass
# Spatial marginal
# NO MAX STABLE
# Procedure:
# Optimization of a single likelihood process that sums up the likelihood of all the terms.
def padoan_extreme_pipeline(self):
pass
# Spatial marginal
# todo: question, when we are optimizing the full Pairwise loss, are we just optimization the relations ?
# or ideally do we need to add the term of order 1
def gaume(self):
# Combining the 2
pass
def test_pipeline_spatial(self):
pass
# Sample from a
# Fit the spatio temporal experiment margin
# Fit the max stable process
def test_dataframe_fit_unitary(self):
df = pd.DataFrame(1, index=['station1', 'station2'], columns=['200' + str(i) for i in range(18)])
xp = SpatioTemporalDataHandler.from_dataframe(df)
if __name__ == '__main__':
df = pd.DataFrame(1, index=['station1', 'station2'], columns=['200' + str(i) for i in range(18)])
xp = SpatioTemporalDataHandler.from_dataframe(df)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment