Commit 5627cc24 authored by Ienco Dino's avatar Ienco Dino
Browse files

add varying length time series management

parent 8444a01c
No related merge requests found
Showing with 259 additions and 0 deletions
+259 -0
import numpy as np
import tensorflow as tf
import os
import sys
from sklearn.metrics import f1_score, r2_score
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
import time
from sklearn.manifold import TSNE
import matplotlib.pyplot as pyplot
from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score
from active_semi_clustering.semi_supervised.pairwise_constraints import MPCKMeans, PCKMeans, COPKMeans
from model import RNNAE
def generateConstraints(idxLabelledData, labels):
ml = []
cl = []
for i in range(len(idxLabelledData)):
for j in range(i+1,len(idxLabelledData)):
if labels[i] == labels[j]:
ml.append([i,j])
else:
cl.append([i,j])
return ml, cl
def getBatch(X, i, batch_size):
start_id = i*batch_size
t = (i+1) * batch_size
end_id = min( (i+1) * batch_size, X.shape[0])
batch_x = X[start_id:end_id]
return batch_x
def buildPair(x_train, labels, valid_mask):
f_data = []
f_data_mask = []
s_data = []
s_data_mask = []
y_val = []
n_examples = labels.shape[0]
for i in range(n_examples):
for j in range(i+1, n_examples):
if labels[i] == labels[j]:
y_val.append(0)
else:
y_val.append(1)
f_data.append( x_train[i])
f_data_mask.append( valid_mask[i])
s_data.append( x_train[j])
s_data_mask.append( valid_mask[j])
return np.stack(f_data, axis=0), np.stack(f_data_mask, axis=0), np.stack(s_data, axis=0), np.stack(s_data_mask, axis=0), np.array(y_val)
def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
margin = 1.0
f_data, f_data_mask, s_data, s_data_mask, y_val = shuffle(f_data, f_data_mask, s_data, s_data_mask, y_val)
iterations = f_data.shape[0] / BATCH_SIZE
if f_data.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_f = getBatch(f_data, ibatch, BATCH_SIZE)
batch_f_mask = getBatch(f_data_mask, ibatch, BATCH_SIZE)
batch_s = getBatch(s_data, ibatch, BATCH_SIZE)
batch_s_mask = getBatch(s_data_mask, ibatch, BATCH_SIZE)
batch_y = getBatch(y_val, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
d_w = model.siameseDistance([batch_f, batch_s], training=True)
equal_loss = (.5* (1-batch_y) * d_w)
neg_loss = (.5* batch_y * tf.math.maximum(0 , margin - d_w) )
loss = equal_loss + neg_loss
loss = tf.reduce_mean(loss)
_, reco_f, reco_fR, _ = model(batch_f, training=True)
_, reco_s, reco_sR, _ = model(batch_s, training=True)
loss+= loss_object(batch_f, reco_f*batch_f_mask)
loss+= loss_object(batch_f, reco_fR*batch_f_mask)
loss+= loss_object(batch_s, reco_s*batch_s_mask)
loss+= loss_object(batch_f, reco_sR*batch_s_mask)
grads = tape.gradient(loss, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss
return (tot_loss / iterations)
def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
cosineSim = tf.keras.losses.CosineSimilarity(reduction=tf.keras.losses.Reduction.NONE)
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
centers = centers.astype("float32")
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE)
batch_c = getBatch(centers, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco*batch_mask)
loss_rec+= loss_object(batch_x, recoR*batch_mask)
loss_rec+= tf.reduce_mean(tf.reduce_sum( tf.square(batch_c - emb), axis=1))
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco*batch_mask)
loss_rec += loss_object(batch_x, recoR*batch_mask)
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs):
#th = 40
n_epochs_warmUp = 40
centers = None
print("PRETRAINING STAGE : AE + CONTRASTIVE LOSS")
for e in range(n_epochs_warmUp):
f_data, f_data_mask, s_data, s_data_mask, y_val = shuffle(f_data, f_data_mask, s_data, s_data_mask, y_val)
data, valid_mask = shuffle(data, valid_mask)
trainLoss = trainStep(model, data, valid_mask, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
print("COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT")
emb, _, _, _ = model(data)
km = KMeans(n_clusters=nClasses)
km.fit(emb)
centers = []
for val in km.labels_:
centers.append( km.cluster_centers_[val])
centers = np.array(centers)
print("REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS")
for e in range(n_epochs - n_epochs_warmUp):
#labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data, centers, valid_mask = shuffle(data, centers, valid_mask)
trainLoss = trainStepStrech(model, data, valid_mask, centers, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
return model
def plot2DFeatures(data, labels):
X_embedded = TSNE(n_components=2).fit_transform( data )
nclasses = len(np.unique(labels))
for i in range(nclasses):
idx = np.where(labels == i)
pyplot.scatter(X_embedded[idx[0],0], X_embedded[idx[0],1])
pyplot.draw()
pyplot.pause(10)
pyplot.clf()
def getExtractLabelSet(data, labels, nSamples):
labelledData = []
labelsSmall = []
for val in np.unique(labels):
idx = np.where(labels == val)
idx = shuffle( idx[0] )[0:nSamples]
labelledData.append( data[idx] )
for j in range(nSamples):
labelsSmall.append(val)
labelledData = np.concatenate(labelledData, axis=0)
return labelledData, np.array(labelsSmall)
def createMaskTensor(data, valid_lengths):
mask = np.zeros(data.shape)
nrow, nt, ndim = mask.shape
for i in range(nrow):
for j in range(valid_lengths[i]):
mask[i,j,:] = np.ones(ndim)
return mask
def main(argv):
dataDir = argv[1]
nSamples = argv[2]
runId = argv[3]
data = np.load(dataDir+"/data.npy")
labels = np.load(dataDir+"/class.npy")
valid_lengths = np.load(dataDir+"/seqLength.npy")
valid_mask = createMaskTensor(data, valid_lengths)
idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy")
labelledData = data[idxLabelledData]
labelsSmall = labels[idxLabelledData]
labelledValidMask = valid_mask[idxLabelledData]
f_data, f_data_mask, s_data, s_data_mask, y_val = buildPair(labelledData, labelsSmall, labelledValidMask)
print("labelledData.shape ",labelledData.shape)
print("labelsSmall.shape ",labelsSmall.shape)
origData = np.array(data)
nClasses = len(np.unique(labels))
RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2)
""" defining loss function and the optimizer to use in the training phase """
loss_huber = tf.keras.losses.Huber()
loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()#
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005)
BATCH_SIZE = 32
n_epochs = 100
RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs)
emb, _, _, _ = RNNAE_model(origData)
emb = emb.numpy()
km = KMeans(n_clusters=nClasses)
km.fit(emb)
nmi = normalized_mutual_info_score(labels, km.labels_)
print("nmi %f" % nmi)
if __name__ == "__main__":
main(sys.argv)
#plot2DFeatures(emb, labels)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment