Commit 5627cc24 authored by Ienco Dino's avatar Ienco Dino
Browse files

add varying length time series management

parent 8444a01c
import numpy as np
import tensorflow as tf
import os
import sys
from sklearn.metrics import f1_score, r2_score
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
import time
from sklearn.manifold import TSNE
import matplotlib.pyplot as pyplot
from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score
from active_semi_clustering.semi_supervised.pairwise_constraints import MPCKMeans, PCKMeans, COPKMeans
from model import RNNAE
def generateConstraints(idxLabelledData, labels):
ml = []
cl = []
for i in range(len(idxLabelledData)):
for j in range(i+1,len(idxLabelledData)):
if labels[i] == labels[j]:
ml.append([i,j])
else:
cl.append([i,j])
return ml, cl
def getBatch(X, i, batch_size):
start_id = i*batch_size
t = (i+1) * batch_size
end_id = min( (i+1) * batch_size, X.shape[0])
batch_x = X[start_id:end_id]
return batch_x
def buildPair(x_train, labels, valid_mask):
f_data = []
f_data_mask = []
s_data = []
s_data_mask = []
y_val = []
n_examples = labels.shape[0]
for i in range(n_examples):
for j in range(i+1, n_examples):
if labels[i] == labels[j]:
y_val.append(0)
else:
y_val.append(1)
f_data.append( x_train[i])
f_data_mask.append( valid_mask[i])
s_data.append( x_train[j])
s_data_mask.append( valid_mask[j])
return np.stack(f_data, axis=0), np.stack(f_data_mask, axis=0), np.stack(s_data, axis=0), np.stack(s_data_mask, axis=0), np.array(y_val)
def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
margin = 1.0
f_data, f_data_mask, s_data, s_data_mask, y_val = shuffle(f_data, f_data_mask, s_data, s_data_mask, y_val)
iterations = f_data.shape[0] / BATCH_SIZE
if f_data.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_f = getBatch(f_data, ibatch, BATCH_SIZE)
batch_f_mask = getBatch(f_data_mask, ibatch, BATCH_SIZE)
batch_s = getBatch(s_data, ibatch, BATCH_SIZE)
batch_s_mask = getBatch(s_data_mask, ibatch, BATCH_SIZE)
batch_y = getBatch(y_val, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
d_w = model.siameseDistance([batch_f, batch_s], training=True)
equal_loss = (.5* (1-batch_y) * d_w)
neg_loss = (.5* batch_y * tf.math.maximum(0 , margin - d_w) )
loss = equal_loss + neg_loss
loss = tf.reduce_mean(loss)
_, reco_f, reco_fR, _ = model(batch_f, training=True)
_, reco_s, reco_sR, _ = model(batch_s, training=True)
loss+= loss_object(batch_f, reco_f*batch_f_mask)
loss+= loss_object(batch_f, reco_fR*batch_f_mask)
loss+= loss_object(batch_s, reco_s*batch_s_mask)
loss+= loss_object(batch_f, reco_sR*batch_s_mask)
grads = tape.gradient(loss, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss
return (tot_loss / iterations)
def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
cosineSim = tf.keras.losses.CosineSimilarity(reduction=tf.keras.losses.Reduction.NONE)
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
centers = centers.astype("float32")
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE)
batch_c = getBatch(centers, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco*batch_mask)
loss_rec+= loss_object(batch_x, recoR*batch_mask)
loss_rec+= tf.reduce_mean(tf.reduce_sum( tf.square(batch_c - emb), axis=1))
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco*batch_mask)
loss_rec += loss_object(batch_x, recoR*batch_mask)
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs):
#th = 40
n_epochs_warmUp = 40
centers = None
print("PRETRAINING STAGE : AE + CONTRASTIVE LOSS")
for e in range(n_epochs_warmUp):
f_data, f_data_mask, s_data, s_data_mask, y_val = shuffle(f_data, f_data_mask, s_data, s_data_mask, y_val)
data, valid_mask = shuffle(data, valid_mask)
trainLoss = trainStep(model, data, valid_mask, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
print("COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT")
emb, _, _, _ = model(data)
km = KMeans(n_clusters=nClasses)
km.fit(emb)
centers = []
for val in km.labels_:
centers.append( km.cluster_centers_[val])
centers = np.array(centers)
print("REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS")
for e in range(n_epochs - n_epochs_warmUp):
#labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data, centers, valid_mask = shuffle(data, centers, valid_mask)
trainLoss = trainStepStrech(model, data, valid_mask, centers, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
return model
def plot2DFeatures(data, labels):
X_embedded = TSNE(n_components=2).fit_transform( data )
nclasses = len(np.unique(labels))
for i in range(nclasses):
idx = np.where(labels == i)
pyplot.scatter(X_embedded[idx[0],0], X_embedded[idx[0],1])
pyplot.draw()
pyplot.pause(10)
pyplot.clf()
def getExtractLabelSet(data, labels, nSamples):
labelledData = []
labelsSmall = []
for val in np.unique(labels):
idx = np.where(labels == val)
idx = shuffle( idx[0] )[0:nSamples]
labelledData.append( data[idx] )
for j in range(nSamples):
labelsSmall.append(val)
labelledData = np.concatenate(labelledData, axis=0)
return labelledData, np.array(labelsSmall)
def createMaskTensor(data, valid_lengths):
mask = np.zeros(data.shape)
nrow, nt, ndim = mask.shape
for i in range(nrow):
for j in range(valid_lengths[i]):
mask[i,j,:] = np.ones(ndim)
return mask
def main(argv):
dataDir = argv[1]
nSamples = argv[2]
runId = argv[3]
data = np.load(dataDir+"/data.npy")
labels = np.load(dataDir+"/class.npy")
valid_lengths = np.load(dataDir+"/seqLength.npy")
valid_mask = createMaskTensor(data, valid_lengths)
idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy")
labelledData = data[idxLabelledData]
labelsSmall = labels[idxLabelledData]
labelledValidMask = valid_mask[idxLabelledData]
f_data, f_data_mask, s_data, s_data_mask, y_val = buildPair(labelledData, labelsSmall, labelledValidMask)
print("labelledData.shape ",labelledData.shape)
print("labelsSmall.shape ",labelsSmall.shape)
origData = np.array(data)
nClasses = len(np.unique(labels))
RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2)
""" defining loss function and the optimizer to use in the training phase """
loss_huber = tf.keras.losses.Huber()
loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()#
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005)
BATCH_SIZE = 32
n_epochs = 100
RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs)
emb, _, _, _ = RNNAE_model(origData)
emb = emb.numpy()
km = KMeans(n_clusters=nClasses)
km.fit(emb)
nmi = normalized_mutual_info_score(labels, km.labels_)
print("nmi %f" % nmi)
if __name__ == "__main__":
main(sys.argv)
#plot2DFeatures(emb, labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment