diff --git a/__pycache__/model.cpython-38.pyc b/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e6caa4ddd9ae61d8929a42fc3dc5cfbf1341bae Binary files /dev/null and b/__pycache__/model.cpython-38.pyc differ diff --git a/main.py b/main.py index e0106d3c75fad96273686af4059cbbd4454bec99..ef7676823df473b16c5fbf04096e342e81672349 100644 --- a/main.py +++ b/main.py @@ -52,42 +52,41 @@ def buildPair(x_train, labels): return np.stack(f_data, axis=0), np.stack(s_data, axis=0), np.array(y_val) -def trainStepL(model, x_train, labels, loss_object, optimizer, BATCH_SIZE, e): - loss_iteration = 0 - tot_loss = 0.0 - margin = 1.0 - f_data, s_data, y_val = buildPair(x_train, labels) +def trainStepL(model, f_data, s_data, y_val, loss_object, optimizer, BATCH_SIZE, e): + loss_iteration = 0 + tot_loss = 0.0 + margin = 1.0 - f_data, s_data, y_val = shuffle(f_data, s_data, y_val) - iterations = f_data.shape[0] / BATCH_SIZE - if f_data.shape[0] % BATCH_SIZE != 0: - iterations += 1 + f_data, s_data, y_val = shuffle(f_data, s_data, y_val) + iterations = f_data.shape[0] / BATCH_SIZE + if f_data.shape[0] % BATCH_SIZE != 0: + iterations += 1 - for ibatch in range(int(iterations)): - batch_f = getBatch(f_data, ibatch, BATCH_SIZE) - batch_s = getBatch(s_data, ibatch, BATCH_SIZE) - batch_y = getBatch(y_val, ibatch, BATCH_SIZE) - with tf.GradientTape() as tape: - d_w = model.siameseDistance([batch_f, batch_s], training=True) - equal_loss = (.5* (1-batch_y) * d_w) - neg_loss = (.5* batch_y * tf.math.maximum(0 , margin - d_w) ) + for ibatch in range(int(iterations)): + batch_f = getBatch(f_data, ibatch, BATCH_SIZE) + batch_s = getBatch(s_data, ibatch, BATCH_SIZE) + batch_y = getBatch(y_val, ibatch, BATCH_SIZE) + with tf.GradientTape() as tape: + d_w = model.siameseDistance([batch_f, batch_s], training=True) + equal_loss = (.5* (1-batch_y) * d_w) + neg_loss = (.5* batch_y * tf.math.maximum(0 , margin - d_w) ) - loss = equal_loss + neg_loss - loss = tf.reduce_mean(loss) - _, reco_f, reco_fR, _ = model(batch_f, training=True) - _, reco_s, reco_sR, _ = model(batch_s, training=True) + loss = equal_loss + neg_loss + loss = tf.reduce_mean(loss) + _, reco_f, reco_fR, _ = model(batch_f, training=True) + _, reco_s, reco_sR, _ = model(batch_s, training=True) loss+= loss_object(batch_f, reco_f) - loss+= loss_object(batch_f, reco_fR) + loss+= loss_object(batch_f, reco_fR) - loss+= loss_object(batch_s, reco_s) - loss+= loss_object(batch_f, reco_sR) - grads = tape.gradient(loss, model.trainable_variables) - grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)] - optimizer.apply_gradients(zip(grads, model.trainable_variables)) - tot_loss+=loss + loss+= loss_object(batch_s, reco_s) + loss+= loss_object(batch_f, reco_sR) + grads = tape.gradient(loss, model.trainable_variables) + grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)] + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + tot_loss+=loss - return (tot_loss / iterations) + return (tot_loss / iterations) def trainStepStrech(model, x_train, centers, loss_object, optimizer, BATCH_SIZE, e): loss_iteration = 0 @@ -133,15 +132,15 @@ def trainStep(model, x_train, loss_object, optimizer, BATCH_SIZE, e): return (tot_loss / iterations) -def trainRNNAE(model, nClasses, data, labelledData, labelsSmall, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs): +def trainRNNAE(model, nClasses, data, f_data, s_data, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs): #th = 40 n_epochs_warmUp = 40 centers = None for e in range(n_epochs_warmUp): - labelledData, labelsSmall = shuffle(labelledData, labelsSmall) + f_data, s_data, y_val, = shuffle(f_data, s_data, y_val) data = shuffle(data) trainLoss = trainStep(model, data, loss_huber, optimizer, BATCH_SIZE, e) - trainLoss += trainStepL(model, labelledData, labelsSmall, loss_huber, optimizer2, BATCH_SIZE, e) + trainLoss += trainStepL(model, f_data, s_data, y_val, loss_huber, optimizer2, BATCH_SIZE, e) print("epoch %d with loss %f" % (e, trainLoss)) emb, _, _, _ = model(data) @@ -153,10 +152,10 @@ def trainRNNAE(model, nClasses, data, labelledData, labelsSmall, loss_huber, opt centers = np.array(centers) for e in range(n_epochs - n_epochs_warmUp): - labelledData, labelsSmall = shuffle(labelledData, labelsSmall) + #labelledData, labelsSmall = shuffle(labelledData, labelsSmall) data, centers = shuffle(data, centers) trainLoss = trainStepStrech(model, data, centers, loss_huber, optimizer, BATCH_SIZE, e) - trainLoss += trainStepL(model, labelledData, labelsSmall, loss_huber, optimizer2, BATCH_SIZE, e) + trainLoss += trainStepL(model, f_data, s_data, y_val, loss_huber, optimizer2, BATCH_SIZE, e) print("epoch %d with loss %f" % (e, trainLoss)) return model @@ -185,45 +184,50 @@ def getExtractLabelSet(data, labels, nSamples): labelledData = np.concatenate(labelledData, axis=0) return labelledData, np.array(labelsSmall) -dataDir = sys.argv[1] -nSamples = sys.argv[2] -runId = sys.argv[3] -#Dordogne 23 -data = np.load(dataDir+"/data.npy") -labels = np.load(dataDir+"/class.npy") +def main(argv): + dataDir = argv[1] + nSamples = argv[2] + runId = argv[3] + + data = np.load(dataDir+"/data.npy") + labels = np.load(dataDir+"/class.npy") + + idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy") -idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy") + labelledData = data[idxLabelledData] + labelsSmall = labels[idxLabelledData] -labelledData = data[idxLabelledData] -labelsSmall = labels[idxLabelledData] + f_data, s_data, y_val = buildPair(labelledData, labelsSmall) + print("labelledData.shape ",labelledData.shape) + print("labelsSmall.shape ",labelsSmall.shape) + origData = np.array(data) -print("labelledData.shape ",labelledData.shape) -print("labelsSmall.shape ",labelsSmall.shape) -origData = np.array(data) + nClasses = len(np.unique(labels)) -nClasses = len(np.unique(labels)) + RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2) + """ defining loss function and the optimizer to use in the training phase """ + loss_huber = tf.keras.losses.Huber() + loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()# + optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) + optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005) -RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2) -""" defining loss function and the optimizer to use in the training phase """ -loss_huber = tf.keras.losses.Huber() -loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()# -optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) -optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005) + BATCH_SIZE = 32 + n_epochs = 100 -BATCH_SIZE = 32 -n_epochs = 100 + RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, f_data, s_data, y_val, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs) + emb, _, _, _ = RNNAE_model(origData) -RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, labelledData, labelsSmall, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs) -emb, _, _, _ = RNNAE_model(origData) + emb = emb.numpy() + km = KMeans(n_clusters=nClasses) + km.fit(emb) + nmi = normalized_mutual_info_score(labels, km.labels_) + print("nmi %f" % nmi) -emb = emb.numpy() -km = KMeans(n_clusters=nClasses) -km.fit(emb) -nmi = normalized_mutual_info_score(labels, km.labels_) -print("nmi %f" % nmi) +if __name__ == "__main__": + main(sys.argv) #plot2DFeatures(emb, labels) diff --git a/model.py b/model.py new file mode 100644 index 0000000000000000000000000000000000000000..06e7b84013e2249e7bda8df45696c8120cd5623e --- /dev/null +++ b/model.py @@ -0,0 +1,123 @@ +import tensorflow as tf + +class AttentionLayer(tf.keras.layers.Layer): + def __init__(self, ch_output): + super(AttentionLayer, self).__init__() + self.ch_output = ch_output + self.activation = tf.math.tanh #tf.nn.leaky_relu + self.output_activation = tf.keras.activations.softmax + + def build(self, input_shape): + ''' + print(input_shape) + if len(input_shape) > 1: + input_dim = input_shape[1] + else: + input_dim = input_shape + print(input_dim) + exit() + ''' + input_dim = input_shape[-1] + self.A = self.add_weight(name="a_weight_matrix", shape=(self.ch_output, 1)) + self.W = self.add_weight(name="W_target_nodes_weights", shape=[self.ch_output, self.ch_output]) + self.tgt_node_b = self.add_weight(name='bias_target', shape=(self.ch_output,), initializer='zeros') + self.neigh_b = self.add_weight(name='bias_neigh', shape=(self.ch_output,), initializer='zeros') + + def call(self, inputs, **kwargs): + #hi = inputs[0] + # target_nodes shape: batch_size x features_size F + # hj shape: batch_size x max(|N(x)|) x features_size F + #mask = tf.dtypes.cast(kwargs.get('mask'), tf.float32) + # mask shape: batch_size x max(|N(x)|) + + #whi = tf.nn.bias_add(tf.tensordot(hi, self.W, axes=1), self.tgt_node_b) + # whi shape: batch_size x features_output F' + #print(inputs.get_shape()) + #print(self.W.get_shape()) + whj = tf.nn.bias_add(tf.tensordot(inputs, self.W, axes=1), self.neigh_b) + #print("whj ",whj.get_shape()) + # whj shape: batch_size x max(|N(x)|) x features_output F' + multiply_dim = len(whj[0]) + #whi = tf.tile(tf.expand_dims(whi, 1), multiples=(1, multiply_dim, 1)) + # whi shape for concat: batch_size x features_output F' + #concat = whj + #concat = tf.concat([whi, whj], axis=2) + # concat shape: batch_size x max(|N(x)|) x 2F' + scores = self.activation(tf.tensordot(whj, self.A, axes=1)) + scores = tf.squeeze(scores, axis=-1) + # scores shape: batch_size x max(|N(x)|) + #masked_scores = scores * mask + alphas = self.output_activation(scores) + hj = inputs * tf.expand_dims(alphas, -1) + # hj shape: batch_size x max(|N(x)|) x features_output F' + output = tf.reduce_sum(hj, axis=1) + # output shape: (batch_size x features_output F') + return output, alphas + + + + +class RNNAE(tf.keras.Model): + def __init__(self, filters, outputDim, n_cluster, dropout_rate = 0.0, hidden_activation='relu', output_activation='softmax', + name='convNetwork2', + **kwargs): + # chiamata al costruttore della classe padre, Model + super(RNNAE, self).__init__(name=name, **kwargs) + self.attention = AttentionLayer(filters) + self.attentionR = AttentionLayer(filters) + self.gate = tf.keras.layers.Dense(filters, activation='sigmoid') + self.gateR = tf.keras.layers.Dense(filters, activation='sigmoid') + + self.encoder = tf.keras.layers.GRU(filters, return_sequences=True) + self.encoderR = tf.keras.layers.GRU(filters, go_backwards=True, return_sequences=True) + self.classif = tf.keras.layers.Dense(n_cluster, activation='softmax') + self.decoder = tf.keras.layers.GRU(filters, return_sequences=True) + self.decoder2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None)) + + self.decoderR = tf.keras.layers.GRU(filters, return_sequences=True) + self.decoder2R = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None)) + + #self.TDclassif = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=n_cluster, activation='softmax')) + + + def siameseDistance(self, inputs, training=False): + first_elements = inputs[0] + second_elements = inputs[1] + + femb = self.encF(first_elements, training=training) + semb = self.encF(second_elements, training=training) + d_W = tf.reduce_sum( tf.square(femb - semb), axis=1) + return d_W + + def encF(self, inputs, training=False): + seqEmb = self.encoder(inputs) + seqEmbR = self.encoderR(inputs) + emb = tf.unstack(seqEmb,axis=1)[-1] + embR = tf.unstack(seqEmbR,axis=1)[-1] + #emb = self.gate(emb) * emb + #embR = self.gate(embR) * embR + return emb+embR + #return tf.concat([emb,embR],axis=1)#emb+embR + + def decF(self, seq_emb, emb, training=False): + dec = self.decoder(seq_emb) + decR = self.decoderR(seq_emb) + + dec = self.decoder2(dec) + decR = self.decoder2R(decR) + + pred = self.classif(emb) + #print(decR.get_shape()) + + decR = tf.reverse(decR, axis=[1]) + #exit() + return dec, decR, pred + + def call(self, inputs, training=False): + t = inputs.get_shape() + emb = self.encF(inputs, training) + seq_emb = tf.keras.layers.RepeatVector(t[1])(emb) + dec, decR, pred = self.decF(seq_emb, emb, training) + return emb, dec, decR, pred + + #(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)