Commit b3b42c64 authored by eudesyawog's avatar eudesyawog

adding plots pdf, update existing code and news files

parent 6933432e
import pandas as pd
import numpy as np
import os
def normalize_time_series (radar_file, opt_file, norm="meanstd", radar_bands=["VV","VH"],
opt_bands=["B2","B3","B4","B8","B5","B6","B7","B8A","B11","B12"],
opt_indices = ["NDVI","NDWI","EVI","MSAVI2","GDVI","CIGreen","CIRedEdge"]):
radar_df = pd.read_csv(radar_file)
radar_array = None
for band in radar_bands :
columns = [col for col in radar_df.columns if col.startswith(band) and col.endswith("Mean")]
columns.sort()
ts_values = radar_df[columns].values
if norm == "meanstd":
ts_values = (ts_values - ts_values.mean()) / ts_values.std()
elif norm == "minmax":
ts_values = (ts_values - ts_values.min()) / (ts_values.max() - ts_values.min())
if radar_array is None :
radar_array = ts_values
else :
radar_array = np.hstack((radar_array,ts_values))
n_timestamps = len(columns)
n_bands = len(radar_bands)
radar_seq = None
for i in range(n_timestamps):
lst = []
for j in range(n_bands):
lst.append(radar_array[:,i+j*n_timestamps])
if radar_seq is None :
radar_seq = np.stack(lst, axis=1)
else :
radar_seq = np.hstack((radar_seq,np.stack(lst, axis=1)))
opt_df = pd.read_csv(opt_file)
opt_array = None
for band in opt_bands :
columns = [col for col in opt_df.columns if col.split("_")[0]==band and col.endswith("Mean")]
columns.sort()
ts_values = opt_df[columns].values
if norm == "meanstd":
ts_values = (ts_values - ts_values.mean()) / ts_values.std()
elif norm == "minmax":
ts_values = (ts_values - ts_values.min()) / (ts_values.max() - ts_values.min())
if opt_array is None :
opt_array = ts_values
else :
opt_array = np.hstack((opt_array,ts_values))
n_timestamps = len(columns)
n_bands = len(opt_bands)
opt_seq = None
for i in range(n_timestamps):
lst = []
for j in range(n_bands):
lst.append(opt_array[:,i+j*n_timestamps])
if opt_seq is None :
opt_seq = np.stack(lst, axis=1)
else :
opt_seq = np.hstack((opt_seq,np.stack(lst, axis=1)))
indices_df = pd.read_csv(opt_file)
indices_array = None
for band in opt_indices :
columns = [col for col in indices_df.columns if col.startswith(band) and col.endswith("Mean")]
columns.sort()
ts_values = indices_df[columns].values
if norm == "meanstd":
ts_values = (ts_values - ts_values.mean()) / ts_values.std()
elif norm == "minmax":
ts_values = (ts_values - ts_values.min()) / (ts_values.max() - ts_values.min())
if indices_array is None :
indices_array = ts_values
else :
indices_array = np.hstack((indices_array,ts_values))
n_timestamps = len(columns)
n_bands = len(opt_indices)
indices_seq = None
for i in range(n_timestamps):
lst = []
for j in range(n_bands):
lst.append(indices_array[:,i+j*n_timestamps])
if indices_seq is None :
indices_seq = np.stack(lst, axis=1)
else :
indices_seq = np.hstack((indices_seq,np.stack(lst, axis=1)))
ptrn = os.path.basename(radar_file).split('_')[0]+"_"+os.path.basename(radar_file).split('_')[1]
np.save("./data/{}_rad_seq.npy".format(ptrn),radar_seq)
np.save("./data/{}_opt_seq.npy".format(ptrn),opt_seq)
np.save("./data/{}_indices_seq.npy".format(ptrn),indices_seq)
try:
rdt_df = radar_df.merge(opt_df[["ID"]],on="ID")
rdt = rdt_df[["Rdt_s"]].values
np.save("./data/{}_yields.npy".format(ptrn),rdt)
except Exception as error :
print (error)
if __name__ == '__main__' :
# Niakhar 2017
radar_file = "./data/niakhar_2017_radar_notree.csv"
opt_file = "./data/niakhar_2017_opt_gapf_notree.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
# Niakhar 2018
radar_file = "./data/niakhar_2018_radar_notree.csv"
opt_file = "./data/niakhar_2018_opt_gapf_notree.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
# Nioro 2018
radar_file = "./data/nioro_2018_radar_notree.csv"
opt_file = "./data/nioro_2018_opt_gapf_notree.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
\ No newline at end of file
import random
from datetime import datetime
import numpy as np
import os
def writeSplit (idx, data, outFileName):
fold = data[idx]
np.save(outFileName, fold)
def split_data (radar_seq,opt_seq,indices_seq,yields,n_folds=3,n_random=10) :
'''
'''
ptrn = os.path.basename(radar_seq).split("_")[0]+"_"+os.path.basename(radar_seq).split("_")[1]
if not os.path.exists (os.path.join("splits",ptrn)):
os.makedirs(os.path.join("splits",ptrn))
print (ptrn)
y = np.load(yields)
radar = np.load(radar_seq)
opt = np.load(opt_seq)
indices = np.load(indices_seq)
idx = np.arange(y.shape[0])
n_samples = int(y.shape[0]/n_folds)
dt = datetime.now()
random.seed(dt.microsecond)
for j in range(n_random) :
for i in range(n_folds):
if i==0:
random.shuffle(idx)
random.shuffle(idx)
test_samples = idx[i*n_samples:(i+1)*n_samples]
test_idx = np.where(np.isin(idx,test_samples))
train_idx = np.where(np.isin(idx,test_samples,invert=True))
writeSplit(train_idx, radar, os.path.join("splits",ptrn,"train_radar_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(test_idx, radar, os.path.join("splits",ptrn,"test_radar_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(train_idx, opt, os.path.join("splits",ptrn,"train_opt_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(test_idx, opt, os.path.join("splits",ptrn,"test_opt_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(train_idx, indices, os.path.join("splits",ptrn,"train_indices_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(test_idx, indices, os.path.join("splits",ptrn,"test_indices_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(train_idx, y, os.path.join("splits",ptrn,"train_yields_{}_fold-{}.npy".format(j+1,i+1)))
writeSplit(test_idx, y, os.path.join("splits",ptrn,"test_yields_{}_fold-{}.npy".format(j+1,i+1)))
print ("========= split {} =========".format(j+1))
print ("%d-folds : %d, %d over %d" % (n_folds,train_idx[0].shape[0], test_idx[0].shape[0], len(idx)))
if __name__ == "__main__":
# Niakhar 2017
radar_seq = "./data/niakhar_2017_rad_seq.npy"
opt_seq = "./data/niakhar_2017_opt_seq.npy"
indices_seq = "./data/niakhar_2017_indices_seq.npy"
yields = "./data/niakhar_2017_yields.npy"
split_data (radar_seq,opt_seq,indices_seq,yields)
# Niakhar 2018
radar_seq = "./data/niakhar_2018_rad_seq.npy"
opt_seq = "./data/niakhar_2018_opt_seq.npy"
indices_seq = "./data/niakhar_2018_indices_seq.npy"
yields = "./data/niakhar_2018_yields.npy"
split_data (radar_seq,opt_seq,indices_seq,yields)
# Nioro 2018
radar_seq = "./data/nioro_2018_rad_seq.npy"
opt_seq = "./data/nioro_2018_opt_seq.npy"
indices_seq = "./data/nioro_2018_indices_seq.npy"
yields = "./data/nioro_2018_yields.npy"
split_data (radar_seq,opt_seq,indices_seq,yields)
\ No newline at end of file