Bgru.py

import sys
import os
import numpy as np
import math
from operator import itemgetter, attrgetter, methodcaller
import tensorflow as tf
from tensorflow.contrib import rnn
import random
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix

def Bgru(x, nunits, nlayer, timesteps, nclasses, dropout):

    #Processing input tensor array of (?,16)
	x = tf.unstack(x,timesteps,1)

	b_cell = None
	f_cell = None

	b_cell = rnn.GRUCell(nunits)
	f_cell = rnn.GRUCell(nunits)

	f_cell = tf.nn.rnn_cell.DropoutWrapper(f_cell, output_keep_prob=dropout)
	b_cell = tf.nn.rnn_cell.DropoutWrapper(b_cell, output_keep_prob=dropout)

	outputs,_,_=rnn.static_bidirectional_rnn(f_cell, b_cell, x, dtype=tf.float32)

	outputs = tf.stack(outputs, axis=1)
	return outputs

#def getPrediction(x_rnn, x_rnn_b, x_cnn, nunits, nlayer, n_classes, choice):
def getPrediction(x, nunits, nlayer, n_classes, dropout, is_training):
	n_timetamps = 34
	features = Bgru( x, nunits, nlayer, n_timetamps, n_classes, dropout )
	# Trainable parameters
	print "output ",features.get_shape()
	attention_size = int(nunits)
	print "units ",nunits
	print "att size",attention_size
	W_omega = tf.Variable(tf.random_normal([nunits*2, attention_size], stddev=0.1))
	print "womega ",W_omega.get_shape()
	b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
	u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
	# Applying fully connected layer with non-linear activation to each of the B*T timestamps;
	#  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
	v = tf.tanh(tf.tensordot(features, W_omega, axes=1) + b_omega)
	# For each of the timestamps its vector of size A from `v` is reduced with `u` vector
	vu = tf.tensordot(v, u_omega, axes=1)   # (B,T) shape
	alphas = tf.nn.softmax(vu)              # (B,T) shape also
	# Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
	features = tf.reduce_sum(features * tf.expand_dims(alphas, -1), 1)
	print "output", features.get_shape()
	features = tf.reshape(features, [-1, nunits*2])
	print "output ",features.get_shape()
	prediction = tf.layers.dense( features, n_classes, activation=None, name='prediction')
	print "prediction ",prediction.get_shape()

	return prediction

def getBatch(X, Y, i, batch_size):

    start_id = i*batch_size
    end_id = min( (i+1) * batch_size, X.shape[0])
    batch_x = X[start_id:end_id]
    batch_y = Y[start_id:end_id]

    return batch_x, batch_y

def getLabelFormat(Y):

    vals = np.unique( np.array(Y) )
    sorted(vals)
    hash_val = {}

    for el in vals:
        hash_val[el] = len(hash_val.keys())

    new_Y = []

    for el in Y:
        t = np.zeros(len(vals))
        t[hash_val[el]] = 1.0
        new_Y.append(t)

    return np.array(new_Y)


def getRNNFormat(X):

    #print X.shape
    new_X = []
    for row in X:
        new_X.append( np.split(row, 34) )

    return np.array(new_X)

#main
#Model parameters

batchsz = 64
hm_epochs = 400
n_levels_lstm = 1
drop_val = 0.6
#dropout = 0.2

#Data INformation
n_timestamps = 34
n_dims = 16
patch_window = 25
n_channels = 5

itr = int( sys.argv[1] )
p_split = 100*float( sys.argv[2] )
nunits = int(sys.argv[3])
path_in_ts = sys.argv[4]
path_in_gt = sys.argv[5]
g_path = './dataset/TS/%dx%d/'%( patch_window, patch_window )

train_x = np.load( path_in_ts+"train_x%d_%d.npy"%( itr, p_split ) )
train_y = np.load( path_in_gt+"train_y%d_%d.npy"%( itr, p_split ) )

n_classes = np.bincount( train_y ).shape[0]-1

train_y = getLabelFormat( train_y )
train_x = getRNNFormat( train_x )
directory = g_path+'Bgru_Attention_output0.6_%d_%d_%d/'%( p_split, batchsz, nunits)
if not os.path.exists(directory):
    os.makedirs(directory)

path_out_model = directory+'modelTT%d/'%itr
if not os.path.exists(path_out_model):
    os.makedirs(path_out_model)

#log File
flog = open( directory+"log.txt","a" )


x = tf.placeholder("float",[None,n_timestamps,n_dims],name="x_rnn")
y = tf.placeholder("float",[None,n_classes],name="y")
learning_rate = tf.placeholder(tf.float32, shape=(), name="learning_rate")
is_training_ph = tf.placeholder(tf.bool, shape=(), name="is_training")
dropout = tf.placeholder(tf.float32, shape=(), name="drop_rate")

sess = tf.InteractiveSession()

prediction = getPrediction(x, nunits, n_levels_lstm, n_classes, dropout, is_training_ph)

cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction)  )
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,tf.float64))

tf.global_variables_initializer().run()

# Add ops to save and restore all the variables.
saver = tf.train.Saver()
iterations = train_x.shape[0] / batchsz

if train_x.shape[0] % batchsz != 0:
    iterations+=1

best_loss = sys.float_info.max

for e in range(hm_epochs):
    lossi = 0
    accS = 0

    train_x, train_y = shuffle(train_x, train_y, random_state=0)

    for ibatch in range(iterations):
        #for ibatch in range(10):
        #BATCH_X BATCH_Y: i-th batches of train_indices_x and train_y
        batch_x, batch_y = getBatch(train_x, train_y, ibatch, batchsz)

        acc,_,loss = sess.run([accuracy,optimizer,cost],feed_dict={x:batch_x,
                                                                    #x_rnn_b:batch_rnn_x_b,
                                                                    #x_cnn:batch_cnn_x,
                                                                    y:batch_y,
                                                                    is_training_ph:True,
                                                                    dropout:drop_val,
                                                                    learning_rate:0.0002})
        lossi+=loss
        accS+=acc

        del batch_x
        del batch_y

    c_loss = float(lossi/iterations)
    c_acc = float(accS/iterations)
    print "Epoch:",e,"Train loss:",c_loss,"| Accuracy:",c_acc
    flog.write("Epoch %d Train loss:%f | Accuracy: %f\n"%( e, c_loss, c_acc ))

    if c_loss < best_loss:
        best_loss = c_loss

        save_path = saver.save( sess, path_out_model+'model', global_step = itr )

flog.close()