Commit 2648ecb9 authored by Ienco Dino's avatar Ienco Dino
Browse files

add initial file

parents
import tensorflow as tf
class RNNAE(tf.keras.Model):
def __init__(self, filters, outputDim, dropout_rate = 0.0, hidden_activation='relu', output_activation='softmax',
name='convNetwork2',
**kwargs):
# chiamata al costruttore della classe padre, Model
super(RNNAE, self).__init__(name=name, **kwargs)
self.encoderR = tf.keras.layers.LSTM(filters, go_backwards=True)
self.encoder = tf.keras.layers.LSTM(filters)
self.decoder = tf.keras.layers.LSTM(filters, return_sequences=True)
self.decoder2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None))
self.decoderR = tf.keras.layers.LSTM(filters, return_sequences=True)
self.decoder2R = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=outputDim, activation=None))
def call(self, inputs, training=False):
t = inputs.get_shape()
enc = self.encoder(inputs)
emb = enc
seq_emb = tf.keras.layers.RepeatVector(t[1])(emb)
dec = self.decoder(seq_emb)
dec = self.decoder2(dec)
encR = self.encoderR(inputs)
embR = encR
seq_embR = tf.keras.layers.RepeatVector(t[1])(embR)
decR = self.decoderR(seq_embR)
decR = self.decoder2R(decR)
decR = tf.reverse(decR, axis=[1])
return dec, decR, tf.concat((emb,embR),axis=1)
#(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)
import numpy as np
import tensorflow as tf
import os
import sys
from sklearn.metrics import f1_score, r2_score
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
import time
from sklearn.manifold import TSNE
import matplotlib.pyplot as pyplot
from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score
from active_semi_clustering.semi_supervised.pairwise_constraints import MPCKMeans, PCKMeans, COPKMeans
from model import RNNAE
def generateConstraints(idxLabelledData, labels):
ml = []
cl = []
for i in range(len(idxLabelledData)):
for j in range(i+1,len(idxLabelledData)):
if labels[i] == labels[j]:
ml.append([i,j])
else:
cl.append([i,j])
return ml, cl
def getBatch(X, i, batch_size):
start_id = i*batch_size
t = (i+1) * batch_size
end_id = min( (i+1) * batch_size, X.shape[0])
batch_x = X[start_id:end_id]
return batch_x
def buildPair(x_train, labels):
f_data = []
s_data = []
y_val = []
n_examples = labels.shape[0]
for i in range(n_examples):
for j in range(i+1, n_examples):
if labels[i] == labels[j]:
y_val.append(0)
else:
y_val.append(1)
f_data.append( x_train[i])
s_data.append( x_train[j])
return np.stack(f_data, axis=0), np.stack(s_data, axis=0), np.array(y_val)
def trainStepL(model, x_train, labels, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
margin = 1.0
f_data, s_data, y_val = buildPair(x_train, labels)
f_data, s_data, y_val = shuffle(f_data, s_data, y_val)
iterations = f_data.shape[0] / BATCH_SIZE
if f_data.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_f = getBatch(f_data, ibatch, BATCH_SIZE)
batch_s = getBatch(s_data, ibatch, BATCH_SIZE)
batch_y = getBatch(y_val, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
d_w = model.siameseDistance([batch_f, batch_s], training=True)
equal_loss = (.5* (1-batch_y) * d_w)
neg_loss = (.5* batch_y * tf.math.maximum(0 , margin - d_w) )
loss = equal_loss + neg_loss
loss = tf.reduce_mean(loss)
_, reco_f, reco_fR, _ = model(batch_f, training=True)
_, reco_s, reco_sR, _ = model(batch_s, training=True)
loss+= loss_object(batch_f, reco_f)
loss+= loss_object(batch_f, reco_fR)
loss+= loss_object(batch_s, reco_s)
loss+= loss_object(batch_f, reco_sR)
grads = tape.gradient(loss, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss
return (tot_loss / iterations)
def trainStepStrech(model, x_train, centers, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
cosineSim = tf.keras.losses.CosineSimilarity(reduction=tf.keras.losses.Reduction.NONE)
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
centers = centers.astype("float32")
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
batch_c = getBatch(centers, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco)
loss_rec+= loss_object(batch_x, recoR)
loss_rec+= tf.reduce_mean(tf.reduce_sum( tf.square(batch_c - emb), axis=1))
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainStep(model, x_train, loss_object, optimizer, BATCH_SIZE, e):
loss_iteration = 0
tot_loss = 0.0
iterations = x_train.shape[0] / BATCH_SIZE
if x_train.shape[0] % BATCH_SIZE != 0:
iterations += 1
for ibatch in range(int(iterations)):
batch_x = getBatch(x_train, ibatch, BATCH_SIZE)
with tf.GradientTape() as tape:
emb, reco, recoR, classif = model(batch_x, training=True)
loss_rec = loss_object(batch_x, reco)
loss_rec += loss_object(batch_x, recoR)
grads = tape.gradient(loss_rec, model.trainable_variables)
grads = [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(model.trainable_variables, grads)]
optimizer.apply_gradients(zip(grads, model.trainable_variables))
tot_loss+=loss_rec
return (tot_loss / iterations)
def trainRNNAE(model, nClasses, data, labelledData, labelsSmall, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs):
#th = 40
n_epochs_warmUp = 40
centers = None
for e in range(n_epochs_warmUp):
labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data = shuffle(data)
trainLoss = trainStep(model, data, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, labelledData, labelsSmall, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
emb, _, _, _ = model(data)
km = KMeans(n_clusters=nClasses)
km.fit(emb)
centers = []
for val in km.labels_:
centers.append( km.cluster_centers_[val])
centers = np.array(centers)
for e in range(n_epochs - n_epochs_warmUp):
labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data, centers = shuffle(data, centers)
trainLoss = trainStepStrech(model, data, centers, loss_huber, optimizer, BATCH_SIZE, e)
trainLoss += trainStepL(model, labelledData, labelsSmall, loss_huber, optimizer2, BATCH_SIZE, e)
print("epoch %d with loss %f" % (e, trainLoss))
return model
def plot2DFeatures(data, labels):
X_embedded = TSNE(n_components=2).fit_transform( data )
nclasses = len(np.unique(labels))
for i in range(nclasses):
idx = np.where(labels == i)
pyplot.scatter(X_embedded[idx[0],0], X_embedded[idx[0],1])
pyplot.draw()
pyplot.pause(10)
pyplot.clf()
def getExtractLabelSet(data, labels, nSamples):
labelledData = []
labelsSmall = []
for val in np.unique(labels):
idx = np.where(labels == val)
idx = shuffle( idx[0] )[0:nSamples]
labelledData.append( data[idx] )
for j in range(nSamples):
labelsSmall.append(val)
labelledData = np.concatenate(labelledData, axis=0)
return labelledData, np.array(labelsSmall)
dataDir = sys.argv[1]
nSamples = sys.argv[2]
runId = sys.argv[3]
#Dordogne 23
data = np.load(dataDir+"/data.npy")
labels = np.load(dataDir+"/class.npy")
idxLabelledData = np.load(dataDir+"/"+nSamples+"_"+runId+".npy")
labelledData = data[idxLabelledData]
labelsSmall = labels[idxLabelledData]
print("labelledData.shape ",labelledData.shape)
print("labelsSmall.shape ",labelsSmall.shape)
origData = np.array(data)
nClasses = len(np.unique(labels))
RNNAE_model = RNNAE(64, data.shape[-1], nClasses, dropout_rate=0.2)
""" defining loss function and the optimizer to use in the training phase """
loss_huber = tf.keras.losses.Huber()
loss_object2 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)#MeanAbsoluteError()#
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.0005)
BATCH_SIZE = 32
n_epochs = 100
RNNAE_model = trainRNNAE(RNNAE_model, nClasses, data, labelledData, labelsSmall, loss_huber, optimizer, optimizer2, BATCH_SIZE, n_epochs)
emb, _, _, _ = RNNAE_model(origData)
emb = emb.numpy()
km = KMeans(n_clusters=nClasses)
km.fit(emb)
nmi = normalized_mutual_info_score(labels, km.labels_)
print("nmi %f" % nmi)
#plot2DFeatures(emb, labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment