Commit 513a2882 authored by Ienco Dino's avatar Ienco Dino
Browse files

Clean repository

parent 5627cc24
No related merge requests found
Showing with 32 additions and 397 deletions
+32 -397
import tensorflow as tf
class RNNAE(tf.keras.Model):
def __init__(self, filters, outputDim, dropout_rate = 0.0, hidden_activation='relu', output_activation='softmax',
name='convNetwork2',
**kwargs):
# chiamata al costruttore della classe padre, Model
super(RNNAE, self).__init__(name=name, **kwargs)
self.encoderR = tf.keras.layers.LSTM(filters, go_backwards=True)
self.encoder = tf.keras.layers.LSTM(filters)
self.decoder = tf.keras.layers.LSTM(filters, return_sequences=True)
self.decoder2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None))
self.decoderR = tf.keras.layers.LSTM(filters, return_sequences=True)
self.decoder2R = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None))
def call(self, inputs, training=False):
t = inputs.get_shape()
enc = self.encoder(inputs)
emb = enc
seq_emb = tf.keras.layers.RepeatVector(t[1])(emb)
dec = self.decoder(seq_emb)
dec = self.decoder2(dec)
encR = self.encoderR(inputs)
embR = encR
seq_embR = tf.keras.layers.RepeatVector(t[1])(embR)
decR = self.decoderR(seq_embR)
decR = self.decoder2R(decR)
decR = tf.reverse(decR, axis=[1])
return dec, decR, tf.concat((emb,embR),axis=1)
#(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)
import numpy as np
import tensorflow as tf
import os
import sys
from sklearn.metrics import f1_score, r2_score
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
import time
from sklearn.manifold import TSNE
import matplotlib.pyplot as pyplot
from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score
from active_semi_clustering.semi_supervised.pairwise_constraints import MPCKMeans, PCKMeans, COPKMeans
from model import RNNAE
def generateConstraints(idxLabelledData, labels):
ml = []
cl = []
for i in range(len(idxLabelledData)):
for j in range(i+1,len(idxLabelledData)):
if labels[i] == labels[j]:
ml.append([i,j])
else:
cl.append([i,j])
return ml, cl
def getBatch(X, i, batch_size):
start_id = i*batch_size
t = (i+1) * batch_size
end_id = min( (i+1) * batch_size, X.shape[0])
batch_x = X[start_id:end_id]
return batch_x
def buildPair(x_train, labels):
f_data = []
s_data = []
y_val = []
n_examples = labels.shape[0]
for i in range(n_examples):
for j in range(i+1, n_examples):
if labels[i] == labels[j]:
y_val.append(0)
else:
y_val.append(1)
f_data.append( x_train[i])
s_data.append( x_train[j])
return np.stack(f_data, axis=0), np.stack(s_data, axis=0), np.array(y_val)
def trainStepL(model, f_data, s_data, y_val, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
margin = 1.0
f_data, s_data, y_val = shuffle(f_data, s_data, y_val)
iterations = f_data.shape[0] / BATCH_SIZE
if f_data.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_f = getBatch(f_data, ibatch, BATCH_SIZE)
batch_s = getBatch(s_data, ibatch, BATCH_SIZE)
batch_y = getBatch(y_val, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
d_w = model.siameseDistance([batch_f, batch_s], training=True)
equal_loss = (.5* (1-batch_y) * d_w)
neg_loss = (.5* batch_y * tf.math.maximum(0 , margin - d_w) )
loss = equal_loss + neg_loss
loss = tf.reduce_mean(loss)
_, reco_f, reco_fR, _ = model(batch_f, training=True)
_, reco_s, reco_sR, _ = model(batch_s, training=True)
loss+= loss_object(batch_f, reco_f)
loss+= loss_object(batch_f, reco_fR)
loss+= loss_object(batch_s, reco_s)
loss+= loss_object(batch_f, reco_sR)
grads = tape.gradient(loss, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss
return (tot_loss / iterations)
def trainStepStrech(model, x_train, centers, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
cosineSim = tf.keras.losses.CosineSimilarity(reduction=tf.keras.losses.Reduction.NONE)
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
centers = centers.astype("float32")
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_c = getBatch(centers, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco)
loss_rec+= loss_object(batch_x, recoR)
loss_rec+= tf.reduce_mean(tf.reduce_sum( tf.square(batch_c - emb), axis=1))
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainStep(model, x_train, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco)
loss_rec += loss_object(batch_x, recoR)
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainRNNAE(model, nClasses, data, f_data, s_data, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs):
#th = 40
n_epochs_warmUp = 40
centers = None
print("PRETRAINING STAGE : AE + CONTRASTIVE LOSS")
for e in range(n_epochs_warmUp):
f_data, s_data, y_val, = shuffle(f_data, s_data, y_val)
data = shuffle(data)
trainLoss = trainStep(model, data, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, f_data, s_data, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
print("COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT")
emb, _, _, _ = model(data)
km = KMeans(n_clusters=nClasses)
km.fit(emb)
centers = []
for val in km.labels_:
centers.append( km.cluster_centers_[val])
centers = np.array(centers)
print("REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS")
for e in range(n_epochs - n_epochs_warmUp):
#labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data, centers = shuffle(data, centers)
trainLoss = trainStepStrech(model, data, centers, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, f_data, s_data, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
return model
def plot2DFeatures(data, labels):
X_embedded = TSNE(n_components=2).fit_transform( data )
nclasses = len(np.unique(labels))
for i in range(nclasses):
idx = np.where(labels == i)
pyplot.scatter(X_embedded[idx[0],0], X_embedded[idx[0],1])
pyplot.draw()
pyplot.pause(10)
pyplot.clf()
def getExtractLabelSet(data, labels, nSamples):
labelledData = []
labelsSmall = []
for val in np.unique(labels):
idx = np.where(labels == val)
idx = shuffle( idx[0] )[0:nSamples]
labelledData.append( data[idx] )
for j in range(nSamples):
labelsSmall.append(val)
labelledData = np.concatenate(labelledData, axis=0)
return labelledData, np.array(labelsSmall)
def main(argv):
dataDir = argv[1]
nSamples = argv[2]
runId = argv[3]
data = np.load(dataDir+"/data.npy")
labels = np.load(dataDir+"/class.npy")
idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy")
labelledData = data[idxLabelledData]
labelsSmall = labels[idxLabelledData]
f_data, s_data, y_val = buildPair(labelledData, labelsSmall)
print("labelledData.shape ",labelledData.shape)
print("labelsSmall.shape ",labelsSmall.shape)
origData = np.array(data)
nClasses = len(np.unique(labels))
RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2)
""" defining loss function and the optimizer to use in the training phase """
loss_huber = tf.keras.losses.Huber()
loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()#
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005)
BATCH_SIZE = 32
n_epochs = 100
RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, f_data, s_data, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs)
emb, _, _, _ = RNNAE_model(origData)
emb = emb.numpy()
km = KMeans(n_clusters=nClasses)
km.fit(emb)
nmi = normalized_mutual_info_score(labels, km.labels_)
print("nmi %f" % nmi)
if __name__ == "__main__":
main(sys.argv)
#plot2DFeatures(emb, labels)
...@@ -14,9 +14,12 @@ from sklearn.cluster import KMeans ...@@ -14,9 +14,12 @@ from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score from sklearn.metrics import normalized_mutual_info_score
from active_semi_clustering.semi_supervised.pairwise_constraints import MPCKMeans, PCKMeans, COPKMeans from active_semi_clustering.semi_supervised.pairwise_constraints import MPCKMeans, PCKMeans, COPKMeans
from model import RNNAE from model import RNNAE
#gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.45)
#sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
def generateConstraints(idxLabelledData, labels): def generateConstraints(idxLabelledData, labels):
ml = [] ml = []
cl = [] cl = []
...@@ -80,8 +83,8 @@ def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_obje ...@@ -80,8 +83,8 @@ def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_obje
loss = equal_loss + neg_loss loss = equal_loss + neg_loss
loss = tf.reduce_mean(loss) loss = tf.reduce_mean(loss)
_, reco_f, reco_fR, _ = model(batch_f, training=True) _, reco_f, reco_fR = model(batch_f, training=True)
_, reco_s, reco_sR, _ = model(batch_s, training=True) _, reco_s, reco_sR = model(batch_s, training=True)
loss+= loss_object(batch_f, reco_f*batch_f_mask) loss+= loss_object(batch_f, reco_f*batch_f_mask)
loss+= loss_object(batch_f, reco_fR*batch_f_mask) loss+= loss_object(batch_f, reco_fR*batch_f_mask)
...@@ -98,7 +101,6 @@ def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_obje ...@@ -98,7 +101,6 @@ def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_obje
def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer, BATCH_SIZE, e): def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0 loss_iteration = 0
tot_loss = 0.0 tot_loss = 0.0
cosineSim = tf.keras.losses.CosineSimilarity(reduction=tf.keras.losses.Reduction.NONE)
iterations = x_train.shape[0] / BATCH_SIZE iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0: if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1 iterations += 1
...@@ -109,7 +111,7 @@ def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer, ...@@ -109,7 +111,7 @@ def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer,
batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE) batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE)
batch_c = getBatch(centers, ibatch, BATCH_SIZE) batch_c = getBatch(centers, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True) emb, reco, recoR = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco*batch_mask) loss_rec = loss_object(batch_x, reco*batch_mask)
loss_rec+= loss_object(batch_x, recoR*batch_mask) loss_rec+= loss_object(batch_x, recoR*batch_mask)
loss_rec+= tf.reduce_mean(tf.reduce_sum( tf.square(batch_c - emb), axis=1)) loss_rec+= tf.reduce_mean(tf.reduce_sum( tf.square(batch_c - emb), axis=1))
...@@ -131,7 +133,7 @@ def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e) ...@@ -131,7 +133,7 @@ def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e)
batch_x = getBatch(x_train, ibatch, BATCH_SIZE) batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE) batch_mask = getBatch(valid_mask, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True) emb, reco, recoR = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco*batch_mask) loss_rec = loss_object(batch_x, reco*batch_mask)
loss_rec += loss_object(batch_x, recoR*batch_mask) loss_rec += loss_object(batch_x, recoR*batch_mask)
grads = tape.gradient(loss_rec, model.trainable_variables) grads = tape.gradient(loss_rec, model.trainable_variables)
...@@ -142,9 +144,9 @@ def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e) ...@@ -142,9 +144,9 @@ def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e)
def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs): def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs):
#th = 40
n_epochs_warmUp = 40 n_epochs_warmUp = 40
centers = None centers = None
print("PRETRAINING STAGE : AE + CONTRASTIVE LOSS") print("PRETRAINING STAGE : AE + CONTRASTIVE LOSS")
for e in range(n_epochs_warmUp): for e in range(n_epochs_warmUp):
f_data, f_data_mask, s_data, s_data_mask, y_val = shuffle(f_data, f_data_mask, s_data, s_data_mask, y_val) f_data, f_data_mask, s_data, s_data_mask, y_val = shuffle(f_data, f_data_mask, s_data, s_data_mask, y_val)
...@@ -155,7 +157,7 @@ def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s ...@@ -155,7 +157,7 @@ def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s
print("COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT") print("COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT")
emb, _, _, _ = model(data) emb, _, _ = model(data)
km = KMeans(n_clusters=nClasses) km = KMeans(n_clusters=nClasses)
km.fit(emb) km.fit(emb)
centers = [] centers = []
...@@ -166,38 +168,15 @@ def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s ...@@ -166,38 +168,15 @@ def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s
print("REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS") print("REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS")
for e in range(n_epochs - n_epochs_warmUp): for e in range(n_epochs - n_epochs_warmUp):
#labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data, centers, valid_mask = shuffle(data, centers, valid_mask) data, centers, valid_mask = shuffle(data, centers, valid_mask)
#STRECHING THE EMBEDDING TOWARDS CENTROIDS
trainLoss = trainStepStrech(model, data, valid_mask, centers, loss_huber, optimizer, BATCH_SIZE, e) trainLoss = trainStepStrech(model, data, valid_mask, centers, loss_huber, optimizer, BATCH_SIZE, e)
#FORCING EMBEDDING TO MATCH CONSTRAINTS
trainLoss += trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer2, BATCH_SIZE, e) trainLoss += trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss)) print("epoch %d with loss %f" % (e, trainLoss))
return model return model
def plot2DFeatures(data, labels):
X_embedded = TSNE(n_components=2).fit_transform( data )
nclasses = len(np.unique(labels))
for i in range(nclasses):
idx = np.where(labels == i)
pyplot.scatter(X_embedded[idx[0],0], X_embedded[idx[0],1])
pyplot.draw()
pyplot.pause(10)
pyplot.clf()
def getExtractLabelSet(data, labels, nSamples):
labelledData = []
labelsSmall = []
for val in np.unique(labels):
idx = np.where(labels == val)
idx = shuffle( idx[0] )[0:nSamples]
labelledData.append( data[idx] )
for j in range(nSamples):
labelsSmall.append(val)
labelledData = np.concatenate(labelledData, axis=0)
return labelledData, np.array(labelsSmall)
def createMaskTensor(data, valid_lengths): def createMaskTensor(data, valid_lengths):
mask = np.zeros(data.shape) mask = np.zeros(data.shape)
nrow, nt, ndim = mask.shape nrow, nt, ndim = mask.shape
...@@ -207,27 +186,31 @@ def createMaskTensor(data, valid_lengths): ...@@ -207,27 +186,31 @@ def createMaskTensor(data, valid_lengths):
return mask return mask
def main(argv): def main(argv):
#Directory in which data are stored
dataDir = argv[1] dataDir = argv[1]
#number of labelled samples to access data information
nSamples = argv[2] nSamples = argv[2]
#run identifier to add to the output file name
runId = argv[3] runId = argv[3]
newDir = dataDir+"/OUR_VL"
if not os.path.exists(newDir):
os.makedirs(newDir)
data = np.load(dataDir+"/data.npy") data = np.load(dataDir+"/data.npy")
labels = np.load(dataDir+"/class.npy") labels = np.load(dataDir+"/class.npy")
valid_lengths = np.load(dataDir+"/seqLength.npy") valid_lengths = np.load(dataDir+"/seqLength.npy")
valid_mask = createMaskTensor(data, valid_lengths) valid_mask = createMaskTensor(data, valid_lengths)
idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy") idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy")
labelledData = data[idxLabelledData] labelledData = data[idxLabelledData]
labelsSmall = labels[idxLabelledData] labelsSmall = labels[idxLabelledData]
labelledValidMask = valid_mask[idxLabelledData] labelledValidMask = valid_mask[idxLabelledData]
#FROM THE LABELLED EXAMPLES BUILD THE WHOLE SET OF MUST AND CANNOT LINK CONSTRAINTS
f_data, f_data_mask, s_data, s_data_mask, y_val = buildPair(labelledData, labelsSmall, labelledValidMask) f_data, f_data_mask, s_data, s_data_mask, y_val = buildPair(labelledData, labelsSmall, labelledValidMask)
print("labelledData.shape ",labelledData.shape)
print("labelsSmall.shape ",labelsSmall.shape)
origData = np.array(data) origData = np.array(data)
nClasses = len(np.unique(labels)) nClasses = len(np.unique(labels))
...@@ -235,25 +218,25 @@ def main(argv): ...@@ -235,25 +218,25 @@ def main(argv):
RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2) RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2)
""" defining loss function and the optimizer to use in the training phase """ """ defining loss function and the optimizer to use in the training phase """
loss_huber = tf.keras.losses.Huber() loss_huber = tf.keras.losses.Huber()
loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()#
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005) optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005)
BATCH_SIZE = 32 BATCH_SIZE = 32
#Total number of epochs
n_epochs = 100 n_epochs = 100
RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs) RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs)
emb, _, _, _ = RNNAE_model(origData) emb, _, _ = RNNAE_model(origData)
emb = emb.numpy() emb = emb.numpy()
km = KMeans(n_clusters=nClasses) km = KMeans(n_clusters=nClasses)
km.fit(emb) km.fit(emb)
nmi = normalized_mutual_info_score(labels, km.labels_) nmi = normalized_mutual_info_score(labels, km.labels_)
print("nmi %f" % nmi) print("nmi %f" % nmi)
#Save the clustering results obtained via the K-Means algorithm applied on the embedding generated by our approach
np.save(newDir+"/res_"+nSamples+"_"+runId+".npy", km.labels_)
#Save the embdding generated by our framework
np.save(newDir+"/emb_"+nSamples+"_"+runId+".npy", emb)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv) main(sys.argv)
#plot2DFeatures(emb, labels)
import tensorflow as tf import tensorflow as tf
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, ch_output):
super(AttentionLayer, self).__init__()
self.ch_output = ch_output
self.activation = tf.math.tanh #tf.nn.leaky_relu
self.output_activation = tf.keras.activations.softmax
def build(self, input_shape):
'''
print(input_shape)
if len(input_shape) > 1:
input_dim = input_shape[1]
else:
input_dim = input_shape
print(input_dim)
exit()
'''
input_dim = input_shape[-1]
self.A = self.add_weight(name="a_weight_matrix", shape=(self.ch_output, 1))
self.W = self.add_weight(name="W_target_nodes_weights", shape=[self.ch_output, self.ch_output])
self.tgt_node_b = self.add_weight(name='bias_target', shape=(self.ch_output,), initializer='zeros')
self.neigh_b = self.add_weight(name='bias_neigh', shape=(self.ch_output,), initializer='zeros')
def call(self, inputs, **kwargs):
#hi = inputs[0]
# target_nodes shape: batch_size x features_size F
# hj shape: batch_size x max(|N(x)|) x features_size F
#mask = tf.dtypes.cast(kwargs.get('mask'), tf.float32)
# mask shape: batch_size x max(|N(x)|)
#whi = tf.nn.bias_add(tf.tensordot(hi, self.W, axes=1), self.tgt_node_b)
# whi shape: batch_size x features_output F'
#print(inputs.get_shape())
#print(self.W.get_shape())
whj = tf.nn.bias_add(tf.tensordot(inputs, self.W, axes=1), self.neigh_b)
#print("whj ",whj.get_shape())
# whj shape: batch_size x max(|N(x)|) x features_output F'
multiply_dim = len(whj[0])
#whi = tf.tile(tf.expand_dims(whi, 1), multiples=(1, multiply_dim, 1))
# whi shape for concat: batch_size x features_output F'
#concat = whj
#concat = tf.concat([whi, whj], axis=2)
# concat shape: batch_size x max(|N(x)|) x 2F'
scores = self.activation(tf.tensordot(whj, self.A, axes=1))
scores = tf.squeeze(scores, axis=-1)
# scores shape: batch_size x max(|N(x)|)
#masked_scores = scores * mask
alphas = self.output_activation(scores)
hj = inputs * tf.expand_dims(alphas, -1)
# hj shape: batch_size x max(|N(x)|) x features_output F'
output = tf.reduce_sum(hj, axis=1)
# output shape: (batch_size x features_output F')
return output, alphas
class RNNAE(tf.keras.Model): class RNNAE(tf.keras.Model):
def __init__(self, filters, outputDim, n_cluster, dropout_rate = 0.0, hidden_activation='relu', output_activation='softmax', def __init__(self, filters, outputDim, n_cluster, dropout_rate = 0.0, hidden_activation='relu', output_activation='softmax',
name='convNetwork2', name='RNNAE',
**kwargs): **kwargs):
# chiamata al costruttore della classe padre, Model # chiamata al costruttore della classe padre, Model
super(RNNAE, self).__init__(name=name, **kwargs) super(RNNAE, self).__init__(name=name, **kwargs)
self.attention = AttentionLayer(filters)
self.attentionR = AttentionLayer(filters)
self.gate = tf.keras.layers.Dense(filters, activation='sigmoid')
self.gateR = tf.keras.layers.Dense(filters, activation='sigmoid')
self.encoder = tf.keras.layers.GRU(filters, return_sequences=True) self.encoder = tf.keras.layers.GRU(filters, return_sequences=True)
self.encoderR = tf.keras.layers.GRU(filters, go_backwards=True, return_sequences=True) self.encoderR = tf.keras.layers.GRU(filters, go_backwards=True, return_sequences=True)
self.classif = tf.keras.layers.Dense(n_cluster, activation='softmax')
self.decoder = tf.keras.layers.GRU(filters, return_sequences=True) self.decoder = tf.keras.layers.GRU(filters, return_sequences=True)
self.decoder2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None)) self.decoder2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None))
self.decoderR = tf.keras.layers.GRU(filters, return_sequences=True) self.decoderR = tf.keras.layers.GRU(filters, return_sequences=True)
self.decoder2R = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None)) self.decoder2R = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None))
#self.TDclassif = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=n_cluster, activation='softmax'))
def siameseDistance(self, inputs, training=False): def siameseDistance(self, inputs, training=False):
first_elements = inputs[0] first_elements = inputs[0]
second_elements = inputs[1] second_elements = inputs[1]
...@@ -94,10 +29,7 @@ class RNNAE(tf.keras.Model): ...@@ -94,10 +29,7 @@ class RNNAE(tf.keras.Model):
seqEmbR = self.encoderR(inputs) seqEmbR = self.encoderR(inputs)
emb = tf.unstack(seqEmb,axis=1)[-1] emb = tf.unstack(seqEmb,axis=1)[-1]
embR = tf.unstack(seqEmbR,axis=1)[-1] embR = tf.unstack(seqEmbR,axis=1)[-1]
#emb = self.gate(emb) * emb
#embR = self.gate(embR) * embR
return emb+embR return emb+embR
#return tf.concat([emb,embR],axis=1)#emb+embR
def decF(self, seq_emb, emb, training=False): def decF(self, seq_emb, emb, training=False):
dec = self.decoder(seq_emb) dec = self.decoder(seq_emb)
...@@ -105,19 +37,15 @@ class RNNAE(tf.keras.Model): ...@@ -105,19 +37,15 @@ class RNNAE(tf.keras.Model):
dec = self.decoder2(dec) dec = self.decoder2(dec)
decR = self.decoder2R(decR) decR = self.decoder2R(decR)
pred = self.classif(emb)
#print(decR.get_shape())
decR = tf.reverse(decR, axis=[1]) decR = tf.reverse(decR, axis=[1])
#exit()
return dec, decR, pred return dec, decR
def call(self, inputs, training=False): def call(self, inputs, training=False):
t = inputs.get_shape() t = inputs.get_shape()
emb = self.encF(inputs, training) emb = self.encF(inputs, training)
seq_emb = tf.keras.layers.RepeatVector(t[1])(emb) seq_emb = tf.keras.layers.RepeatVector(t[1])(emb)
dec, decR, pred = self.decF(seq_emb, emb, training) dec, decR = self.decF(seq_emb, emb, training)
return emb, dec, decR, pred return emb, dec, decR
#(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1) #(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment