From 9a1ff9ce1c36f79f8f7739daff66c97e71862d7b Mon Sep 17 00:00:00 2001 From: Benedetti Paola <paole.benedetti@irstea.fr> Date: Fri, 23 Feb 2018 16:02:08 +0100 Subject: [PATCH] update last --- REUNION/Attention.py | 48 +++++++--- REUNION/Bgru.py | 207 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 242 insertions(+), 13 deletions(-) create mode 100644 REUNION/Bgru.py diff --git a/REUNION/Attention.py b/REUNION/Attention.py index a59b241..083702f 100644 --- a/REUNION/Attention.py +++ b/REUNION/Attention.py @@ -23,24 +23,25 @@ def RnnAttention( x, nunits, nlayer, n_timestamps, choice, dropout ): cells=[] for _ in range(nlayer): cell = rnn.GRUCell(nunits) - if choice == 'output': - cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout) - if choice == 'state': - cell = tf.nn.rnn_cell.DropoutWrapper(cell, state_keep_prob=dropout) + cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout) cells.append(cell) cell = tf.contrib.rnn.MultiRNNCell(cells) #SIGNLE LAYER: single GRUCell, nunits hidden units each - - cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout) + else: + cell = rnn.GRUCell(nunits) + cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout) outputs,_=rnn.static_rnn(cell, x, dtype="float32") outputs = tf.stack(outputs, axis=1) - + print "output", outputs.get_shape() # Trainable parameters attention_size = int(nunits / 2) + print "units",nunits + print "att size", attention_size W_omega = tf.Variable(tf.random_normal([nunits, attention_size], stddev=0.1)) + print "w_omega",W_omega.get_shape() b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) @@ -53,13 +54,16 @@ def RnnAttention( x, nunits, nlayer, n_timestamps, choice, dropout ): # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape output = tf.reduce_sum(outputs * tf.expand_dims(alphas, -1), 1) + print "output", output.get_shape() output = tf.reshape(output, [-1, nunits]) - + print "output", output.get_shape() + exit() return output #def getPrediction(x_rnn, x_rnn_b, x_cnn, nunits, nlayer, n_classes, choice): -def getPrediction(x, nunits, nlayer, n_classes, choice, dropout, is_training, n_timestamps): +def getPrediction(x, nunits, nlayer, n_classes, choice, dropout, is_training, n_timestamps, batchsz): + n_filters = 512 prediction = None output = None @@ -67,11 +71,28 @@ def getPrediction(x, nunits, nlayer, n_classes, choice, dropout, is_training, n_ output = RnnAttention(x, nunits, nlayer, n_timestamps, choice, dropout) print 'output:',output.get_shape() - prediction = tf.layers.dense(output, n_classes, name='prediction') + + prediction = tf.layers.dense(output, n_classes, activation=None, name='prediction') print 'prediction:',prediction.get_shape() return prediction +#apply fully connected layer to the input in oder to extract more features before feeding the network +def featuredx( x, timesteps ): + print "input:",x.get_shape() + + x = tf.unstack(x,timesteps,1) + #array (?,16) + new_x = [] + for i in range(len(x)): + print "xi.get_shape" + print x[i].get_shape() + fc = tf.layers.dense( x[i], 64, activation=tf.nn.relu ) + print "fc.get_shape" + print fc.get_shape() + new_x.append(fc) + #array (?,64) + return new_x def getBatch(X, Y, i, batch_size): @@ -132,6 +153,7 @@ path_in_ts = sys.argv[3] path_in_gt = sys.argv[4] #if wrapper on state or input gate choice = sys.argv[5] +drop_val = sys.argv[6] g_path = './dataset/TS/%dx%d/'%( patch_window, patch_window ) train_x = np.load( path_in_ts+"train_x%d_%d.npy"%( itr, p_split ) ) @@ -141,7 +163,7 @@ n_classes = np.bincount( train_y ).shape[0]-1 train_y = getLabelFormat( train_y ) train_x = getRNNFormat( train_x ) -directory = g_path+'AttentionI_nofc_%s_%d_%d/'%( choice, p_split, batchsz) +directory = g_path+'AttentionI_nofc_do%s_%s_%d_%d/'%( str(drop_val) ,choice, p_split, batchsz) if not os.path.exists(directory): os.makedirs(directory) @@ -161,7 +183,7 @@ dropout = tf.placeholder(tf.float32, shape=(), name="drop_rate") sess = tf.InteractiveSession() -prediction = getPrediction(x, nunits, n_levels_lstm, n_classes, choice, dropout, is_training_ph, n_timestamps) +prediction = getPrediction(x, nunits, n_levels_lstm, n_classes, choice, dropout, is_training_ph, n_timestamps, batchsz) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) ) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) @@ -193,7 +215,7 @@ for e in range(hm_epochs): acc,_,loss = sess.run([accuracy,optimizer,cost],feed_dict={x:batch_x, y:batch_y, is_training_ph:True, - dropout:0.7, + dropout:drop_val, learning_rate:0.0002}) lossi+=loss accS+=acc diff --git a/REUNION/Bgru.py b/REUNION/Bgru.py new file mode 100644 index 0000000..1f8a908 --- /dev/null +++ b/REUNION/Bgru.py @@ -0,0 +1,207 @@ +import sys +import os +import numpy as np +import math +from operator import itemgetter, attrgetter, methodcaller +import tensorflow as tf +from tensorflow.contrib import rnn +import random +from sklearn.metrics import accuracy_score +from sklearn.metrics import precision_recall_fscore_support +from sklearn.metrics import f1_score +from sklearn.ensemble import RandomForestClassifier +from sklearn.utils import shuffle +from sklearn.metrics import confusion_matrix + +def Bgru(x, nunits, nlayer, timesteps, nclasses, dropout): + + #Processing input tensor array of (?,16) + x = tf.unstack(x,timesteps,1) + + b_cell = None + f_cell = None + + b_cell = rnn.GRUCell(nunits) + f_cell = rnn.GRUCell(nunits) + + f_cell = tf.nn.rnn_cell.DropoutWrapper(f_cell, output_keep_prob=dropout) + b_cell = tf.nn.rnn_cell.DropoutWrapper(b_cell, output_keep_prob=dropout) + + outputs,_,_=rnn.static_bidirectional_rnn(f_cell, b_cell, x, dtype=tf.float32) + + outputs = tf.stack(outputs, axis=1) + return outputs + +#def getPrediction(x_rnn, x_rnn_b, x_cnn, nunits, nlayer, n_classes, choice): +def getPrediction(x, nunits, nlayer, n_classes, dropout, is_training): + n_timetamps = 34 + prediction = None + features = None + features = Bgru( x, nunits, nlayer, n_timetamps, n_classes, dropout ) + # Trainable parameters + print "output",features.get_shape() + attention_size = int(nunits) + print "units",nunits + print "att size",attention_size + W_omega = tf.Variable(tf.random_normal([nunits*2, attention_size], stddev=0.1)) + print "womega",W_omega.get_shape() + b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) + u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) + + # Applying fully connected layer with non-linear activation to each of the B*T timestamps; + # the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size + v = tf.tanh(tf.tensordot(features, W_omega, axes=1) + b_omega) + + # For each of the timestamps its vector of size A from `v` is reduced with `u` vector + vu = tf.tensordot(v, u_omega, axes=1) # (B,T) shape + + alphas = tf.nn.softmax(vu) # (B,T) shape also + # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape + features = tf.reduce_sum(features * tf.expand_dims(alphas, -1), 1) + print "output",features.get_shape() + features = tf.reshape(features, [-1, nunits*2]) + print "output",features.get_shape() + prediction = tf.layers.dense( features, n_classes, activation=None, name='prediction') + print "prediction",prediction.get_shape() + return prediction + +def getBatch(X, Y, i, batch_size): + + start_id = i*batch_size + end_id = min( (i+1) * batch_size, X.shape[0]) + batch_x = X[start_id:end_id] + batch_y = Y[start_id:end_id] + + return batch_x, batch_y + +def getLabelFormat(Y): + + vals = np.unique( np.array(Y) ) + sorted(vals) + hash_val = {} + + for el in vals: + hash_val[el] = len(hash_val.keys()) + + new_Y = [] + + for el in Y: + t = np.zeros(len(vals)) + t[hash_val[el]] = 1.0 + new_Y.append(t) + + return np.array(new_Y) + + +def getRNNFormat(X): + + #print X.shape + new_X = [] + for row in X: + new_X.append( np.split(row, 34) ) + + return np.array(new_X) + +#main +#Model parameters + +batchsz = 64 +hm_epochs = 400 +n_levels_lstm = 1 +drop_val = 0.6 +#dropout = 0.2 + +#Data INformation +n_timestamps = 34 +n_dims = 16 +patch_window = 25 +n_channels = 5 + +itr = int( sys.argv[1] ) +p_split = 100*float( sys.argv[2] ) +nunits = int(sys.argv[3]) +path_in_ts = sys.argv[4] +path_in_gt = sys.argv[5] +g_path = './dataset/TS/%dx%d/'%( patch_window, patch_window ) + +train_x = np.load( path_in_ts+"train_x%d_%d.npy"%( itr, p_split ) ) +train_y = np.load( path_in_gt+"train_y%d_%d.npy"%( itr, p_split ) ) + +n_classes = np.bincount( train_y ).shape[0]-1 + +train_y = getLabelFormat( train_y ) +train_x = getRNNFormat( train_x ) +directory = g_path+'Bgru_Attention_output0.6_%d_%d_%d/'%( p_split, batchsz, nunits) +if not os.path.exists(directory): + os.makedirs(directory) + +path_out_model = directory+'modelTT%d/'%itr +if not os.path.exists(path_out_model): + os.makedirs(path_out_model) + +#log File +flog = open( directory+"log.txt","a" ) + + +x = tf.placeholder("float",[None,n_timestamps,n_dims],name="x_rnn") +y = tf.placeholder("float",[None,n_classes],name="y") +learning_rate = tf.placeholder(tf.float32, shape=(), name="learning_rate") +is_training_ph = tf.placeholder(tf.bool, shape=(), name="is_training") +dropout = tf.placeholder(tf.float32, shape=(), name="drop_rate") + +sess = tf.InteractiveSession() + +prediction = getPrediction(x, nunits, n_levels_lstm, n_classes, dropout, is_training_ph) + +cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) ) +optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) + +correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1)) +accuracy = tf.reduce_mean(tf.cast(correct,tf.float64)) + +tf.global_variables_initializer().run() + +# Add ops to save and restore all the variables. +saver = tf.train.Saver() +iterations = train_x.shape[0] / batchsz + +if train_x.shape[0] % batchsz != 0: + iterations+=1 + +best_loss = sys.float_info.max + +for e in range(hm_epochs): + lossi = 0 + accS = 0 + + train_x, train_y = shuffle(train_x, train_y, random_state=0) + + for ibatch in range(iterations): + #for ibatch in range(10): + #BATCH_X BATCH_Y: i-th batches of train_indices_x and train_y + batch_x, batch_y = getBatch(train_x, train_y, ibatch, batchsz) + + acc,_,loss = sess.run([accuracy,optimizer,cost],feed_dict={x:batch_x, + #x_rnn_b:batch_rnn_x_b, + #x_cnn:batch_cnn_x, + y:batch_y, + is_training_ph:True, + dropout:drop_val, + learning_rate:0.0002}) + lossi+=loss + accS+=acc + + del batch_x + del batch_y + + c_loss = float(lossi/iterations) + c_acc = float(accS/iterations) + print "Epoch:",e,"Train loss:",c_loss,"| Accuracy:",c_acc + flog.write("Epoch %d Train loss:%f | Accuracy: %f\n"%( e, c_loss, c_acc )) + + if c_loss < best_loss: + best_loss = c_loss + + save_path = saver.save( sess, path_out_model+'model', global_step = itr ) + +flog.close() -- GitLab