include PCA and ndvi max variables

parent d1a5b53a
import pandas as pd
import numpy as np
import os
from sklearn.decomposition import PCA
def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH","VV"],
def pca (X,pvar=95):
model1 = PCA(n_components=min(X.shape))
model1.fit_transform(X)
prop = model1.explained_variance_ratio_.cumsum()
ncomp = np.where(prop>=pvar/100)[0][0]+1
model2 = PCA(n_components=ncomp)
Z = model2.fit_transform(X)
return Z
def normalize_time_series (radar_file, opt_file, norm="minmax", project=None, radar_bands=["VH","VV"],
opt_bands=["B2","B3","B4","B8","B5","B6","B7","B8A","B11","B12"],
opt_indices = ["NDVI","NDWI","EVI","MSAVI2","GDVI","CIGreen","CIRedEdge"]):
radar_df = pd.read_csv(radar_file)
if project is not None:
radar_df = radar_df.loc[radar_df["Projet"]==project]
radar_array = None
pca_radar = None
for band in radar_bands :
columns = [col for col in radar_df.columns if col.startswith(band)]
columns.sort()
......@@ -17,7 +34,12 @@ def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH
ts_values = (ts_values - ts_values.mean()) / ts_values.std()
elif norm == "minmax":
ts_values = (ts_values - ts_values.min()) / (ts_values.max() - ts_values.min())
if pca_radar is None :
pca_radar = pca(ts_values)
else :
pca_radar = np.vstack((pca_radar,pca(ts_values)))
if radar_array is None :
radar_array = ts_values
else :
......@@ -38,7 +60,11 @@ def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH
radar_seq = np.hstack((radar_seq,np.stack(lst, axis=1)))
opt_df = pd.read_csv(opt_file)
if project is not None:
opt_df = opt_df.loc[opt_df["Projet"]==project]
opt_array = None
pca_opt = None
for band in opt_bands :
columns = [col for col in opt_df.columns if col.split("_")[0]==band]
columns.sort()
......@@ -47,6 +73,12 @@ def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH
ts_values = (ts_values - ts_values.mean()) / ts_values.std()
elif norm == "minmax":
ts_values = (ts_values - ts_values.min()) / (ts_values.max() - ts_values.min())
if pca_opt is None :
pca_opt = pca(ts_values)
else :
pca_opt = np.vstack((pca_opt,pca(ts_values)))
if opt_array is None :
opt_array = ts_values
else :
......@@ -67,15 +99,30 @@ def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH
opt_seq = np.hstack((opt_seq,np.stack(lst, axis=1)))
indices_df = pd.read_csv(opt_file)
if project is not None:
indices_df = opt_df.loc[opt_df["Projet"]==project]
indices_array = None
pca_indices = None
for band in opt_indices :
columns = [col for col in indices_df.columns if col.startswith(band)]
columns.sort()
ts_values = indices_df[columns].values
if band == "NDVI":
ndvi_max = np.max(ts_values,axis=1)
ndvi_max = ndvi_max.reshape(ndvi_max.shape[0],1)
if norm == "meanstd":
ts_values = (ts_values - ts_values.mean()) / ts_values.std()
elif norm == "minmax":
ts_values = (ts_values - ts_values.min()) / (ts_values.max() - ts_values.min())
if pca_indices is None :
pca_indices = pca(ts_values)
else :
pca_indices = np.vstack((pca_indices,pca(ts_values)))
if indices_array is None :
indices_array = ts_values
else :
......@@ -97,10 +144,20 @@ def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH
if not os.path.exists("./data"):
os.makedirs("./data")
ptrn = os.path.basename(radar_file).split('_')[0]+"_"+os.path.basename(radar_file).split('_')[1]
np.save("./data/{}_rad_seq.npy".format(ptrn),radar_seq)
np.save("./data/{}_opt_seq.npy".format(ptrn),opt_seq)
np.save("./data/{}_indices_seq.npy".format(ptrn),indices_seq)
if project is None :
ptrn = os.path.basename(radar_file).split('_')[0]+"_"+os.path.basename(radar_file).split('_')[1]
else:
ptrn = os.path.basename(radar_file).split('_')[0]+"-%s_"%project.lower()+os.path.basename(radar_file).split('_')[1]
np.save("./data/{}_rad.npy".format(ptrn),radar_seq)
np.save("./data/{}_opt.npy".format(ptrn),opt_seq)
np.save("./data/{}_indices.npy".format(ptrn),indices_seq)
np.save("./data/{}_ndvimax.npy".format(ptrn),ndvi_max)
np.save("./data/{}_pca-rad.npy".format(ptrn),pca_radar)
np.save("./data/{}_pca-opt.npy".format(ptrn),pca_opt)
np.save("./data/{}_pca-indices.npy".format(ptrn),pca_indices)
try:
rdt_df = radar_df.merge(opt_df[["ID"]],on="ID")
......@@ -112,26 +169,21 @@ def normalize_time_series (radar_file, opt_file, norm="minmax", radar_bands=["VH
if __name__ == '__main__' :
# Niakhar 2017
radar_file = "./interpolate/niakhar_2017_radar_notree_interpolate.csv"
opt_file = "./interpolate/niakhar_2017_opt_gapf_notree_interpolate.csv"
radar_file = "./interpolate/niakhar_2017_radar_interpolate.csv"
opt_file = "./interpolate/niakhar_2017_opt-gapf_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
# Niakhar 2018
radar_file = "./interpolate/niakhar_2018_radar_notree_interpolate.csv"
opt_file = "./interpolate/niakhar_2018_opt_gapf_notree_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
# Niakhar 2018 SIMCO
radar_file = "./interpolate/niakhar_2018_radar_interpolate.csv"
opt_file = "./interpolate/niakhar_2018_opt-gapf_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd",project="SIMCO")
# Niakhar 2018
radar_file = "./interpolate/niakhar-serena_2018_radar_notree_interpolate.csv"
opt_file = "./interpolate/niakhar-serena_2018_opt_gapf_notree_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
# Niakhar 2018
radar_file = "./interpolate/niakhar-simco_2018_radar_notree_interpolate.csv"
opt_file = "./interpolate/niakhar-simco_2018_opt_gapf_notree_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
# Niakhar 2018 SERENA
radar_file = "./interpolate/niakhar_2018_radar_interpolate.csv"
opt_file = "./interpolate/niakhar_2018_opt-gapf_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd",project="SERENA")
# Nioro 2018
radar_file = "./interpolate/nioro_2018_radar_notree_interpolate.csv"
opt_file = "./interpolate/nioro_2018_opt_gapf_notree_interpolate.csv"
normalize_time_series(radar_file,opt_file,norm="meanstd")
\ No newline at end of file
# # Nioro 2018
# radar_file = "./interpolate/nioro_2018_radar_notree_interpolate.csv"
# opt_file = "./interpolate/nioro_2018_opt_gapf_notree_interpolate.csv"
# normalize_time_series(radar_file,opt_file,norm="meanstd")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment