diff --git a/REUNION/preProcData.py b/REUNION/preProcData.py index e139774b2a7f33fbd4501cb3c0c25b5e349edd43..34742914741bb33712fbb6b686b4cfde4fec8952 100644 --- a/REUNION/preProcData.py +++ b/REUNION/preProcData.py @@ -8,7 +8,7 @@ def obtainVHSRData(vhsr_fileName, valid_pixels): # I know that the minimum value of the BAND is 1 min_val = 1 scaling_factor = 5 - buffer_window = 6 + buffer_window = 2 #6 if 65x65 data_vhsr = [] for k in range(len(valid_pixels)): data_vhsr.append([]) diff --git a/REUNION/simpleCNN.py b/REUNION/simpleCNN.py index e6cc8a6d8d99068bfde751c29445205d51f946fd..b2c75cde75b0a40fa09173b3ba3cf422d8ac7768 100644 --- a/REUNION/simpleCNN.py +++ b/REUNION/simpleCNN.py @@ -26,105 +26,95 @@ def getLabelFormat(Y): return np.array(new_Y) def CNN( x,height,width,n_filter,n_classes): - conv1 = tf.layers.conv2d( inputs=x, - filters=32, - kernel_size=[5, 5], - padding="same", - activation=tf.nn.relu) - - pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) - - conv2 = tf.layers.conv2d( inputs=pool1, - filters=64, - kernel_size=[5, 5], - padding="same", - activation=tf.nn.relu) - - pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) + pred = None + + conv1 = tf.layers.conv2d( inputs = x, filters = n_filter, kernel_size = [ 7, 7 ], strides = (2, 2), padding = "same", activation = tf.nn.relu ) + bn1 = tf.layers.batch_normalization(conv1) + n_filter += 48 + conv2 = tf.layers.conv2d( inputs = bn1, + filters = n_filter, + kernel_size = [ 4, 4 ], + padding = "same", + activation = tf.nn.relu ) + bn2 = tf.layers.batch_normalization(conv2) + n_filter -= 32 + conv3 = tf.layers.conv2d( inputs = bn2, + filters = n_filter, + kernel_size = [ 3, 3 ], + padding = "same", + activation = tf.nn.relu ) + bn3 = tf.layers.batch_normalization(conv3) #vs max pool - #pool2_flat = tf.reduce_max(conv2, [1,2]) - features = tf.reduce_max(pool2, [1,2]) - - #features = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu) - output = tf.layers.dense(inputs=features, units=n_classes) - return output, features - -def checkTest( test_x, test_y, batch_size, sess ): - iterations = test_x.shape[0]/batch_size - - if test_x.shape[0]%batch_size!=0: - iterations+=1 - - testacc = 0 - tot_pred = [] - for batch_id in range(iterations): - - batch_x, batch_y = getBatch(test_x, test_y, batch_size, batch_id ) - pred_temp = sess.run( prediction, feed_dict={ x:batch_x } ) - - #TOT_PRED: accumulate max argument of each test prediciton evaluated by batch - for el in pred_temp: - tot_pred.append( np.argmax(el) ) - - #GT: contains max argument of each testsplitRandDino.py ground truth - gt = [] - for el in test_y: - gt.append( np.argmax(el)) - - tot_pred = tot_pred[0:len(gt)] - - test_KAPPA = cohen_kappa_score( gt, tot_pred ) - test_fs = f1_score( gt, tot_pred ) - test_acc = accuracy_score(gt, tot_pred, average='micro' ) - print "Test Accuracy ", test_acc - return test_acc,test_fs,test_KAPPA - - - + if tag_pool == "mean": + features = tf.reduce_mean(bn3, [1,2], name="features") + if tag_pool == "max": + features = tf.reduce_max(bn3, [1,2], name="features") + + pred = tf.layers.dense(inputs=features, units=n_classes, name="prediction") + print "prediction",pred.get_shape() + #exit() + return pred, features + +# MAIN +# Parameters itr = int( sys.argv[1] ) p_split = 100*float( sys.argv[2] ) batch_size = int( sys.argv[3] ) hm_epochs= int( sys.argv[4] ) - -n_filter = 32 +path_in_val = sys.argv[5] +path_in_gt = sys.argv[6] +tag_pool = sys.argv[7] g_path = 'dataset/' -#g_path = '../../../reunion_script/splits/' +n_filter = 64 -f = open("logCNN%dbatch.txt"%batch_size,"a") -var_train_x = g_path + 'VHSR/train_x%d_%d.npy'%(itr,p_split) -var_train_y = g_path+'ground_truth/train_y%d_%d.npy'%(itr,p_split) - -var_test_x = g_path+'VHSR/test_x%d_%d.npy'%(itr,p_split) -var_test_y = g_path+'ground_truth/test_y%d_%d.npy'%(itr,p_split) +#paths +var_train_x = path_in_val + 'train_x%d_%d.npy'%(itr,p_split) +var_train_y = path_in_gt+'train_y%d_%d.npy'%(itr,p_split) +#load dataset #(90857,4,65,65) train_x = np.load(var_train_x) train_y = np.load(var_train_y) -test_x = np.load(var_test_x) -test_y = np.load(var_test_y) +if path_in_val == './dataset/VHSR/': -height = train_x.shape[2] -width = height -band = train_x.shape[1] + tag_band = '_' + # Network Parameters + height = train_x.shape[2] + width = height + band = train_x.shape[1] + n_classes = np.bincount( train_y ).shape[0]-1 + #(90857,65,65,4) + train_x = np.swapaxes(train_x, 1, 3) +else: + tag_band = 'NDVI_' + height = train_x.shape[1] + width = height + band = train_x.shape[3] + n_classes = np.bincount( train_y ).shape[0]-1 -print "train shape",train_x.shape -print "width:%d"%width -print "height%d"%height -print "bands:%d"%band +print "classes %d"%n_classes -n_classes = np.bincount( test_y ).shape[0]-1 -print "n_classes:%d"%n_classes +#format data +train_y = getLabelFormat(train_y) -#Graph input -#drop_rate = tf.placeholder( tf.float32, [], name = "drop_rate" ) -#learning_rate = tf.placeholder(tf.float32, shape=[], name="learning_rate") +directory = g_path+'SimpCnn%s_%s%d_%d/'%(tag_band, tag_pool, p_split, batch_size ) +if not os.path.exists(directory): + os.makedirs(directory) -x = tf.placeholder("float",[None,height,width,band],name="x_cnn") -y = tf.placeholder( tf.float32, [ None, n_classes ], name = "y" ) +path_out_model = directory+'modelTT%d/'%itr +if not os.path.exists(path_out_model): + os.makedirs(path_out_model) + +#log file +flog = open(directory+"log.txt","a") +learning_rate = 0.001 +#tf_is_traing_pl = tf.placeholder(tf.bool, shape=(), name="is_training") +x = tf.placeholder("float",[None,height,width,band],name="x") +y = tf.placeholder( tf.float32, [ None, n_classes ], name = "y" ) sess = tf.InteractiveSession() @@ -133,23 +123,20 @@ prediction, feat = CNN( x, height, width, n_filter, n_classes ) tensor1d = tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) cost = tf.reduce_mean(tensor1d) -optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) +optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct,tf.float64)) -tf.global_variables_initializer().run() +#to have tensorboard +tf.summary.scalar("cost function", cost) +writer = tf.summary.FileWriter(path_out_model+"histogram_example") +summaries = tf.summary.merge_all() -train_y = getLabelFormat(train_y) -test_y = getLabelFormat(test_y) -print "train_y shape",train_y.shape -print "test_y shape",test_y.shape +tf.global_variables_initializer().run() -#(90857,65,65,4) -train_x = np.swapaxes(train_x, 1, 3) -test_x = np.swapaxes(test_x, 1, 3) -print "train_x shape",train_x.shape -print "test_x shape",test_x.shape +#initialise to save model +saver = tf.train.Saver() iterations = train_x.shape[0] / batch_size @@ -157,6 +144,8 @@ if train_x.shape[0] % batch_size != 0: iterations+=1 +best_loss = 50 +j = 0 for e in range(hm_epochs): #shuffle input train_x, train_y = shuffle(train_x, train_y) @@ -165,20 +154,23 @@ for e in range(hm_epochs): accuracy_ = 0 for batch_id in range( iterations ): - #if batch_id%100==0: - # print "iter %d su %d"%(batch_id, iterations) + batch_x, batch_y = getBatch(train_x, train_y, batch_size, batch_id ) - #print "batch_x = ",batch_x.shape - #print "batch_y = ",batch_y.shape - acc,_,c = sess.run([accuracy,optimizer,cost], feed_dict={x: batch_x, y:batch_y}) + summ, acc, _, c = sess.run([summaries, accuracy, optimizer, cost], feed_dict={x: batch_x, y:batch_y}) accuracy_ += acc lossfunc += c + writer.add_summary( summ, j ) + j += 1 + + loss_epoch = float(lossfunc/iterations) + acc_epoch = float(accuracy_/iterations) + print "epoch %d Train loss:%f| Accuracy: %f"%( e, loss_epoch, acc_epoch ) + flog.write( "epoch %d Train loss: %f| Accuracy: %f\n"%( e, loss_epoch, acc_epoch ) ) + + if loss_epoch < best_loss: + best_loss = loss_epoch - print "epoch %d Train loss:%f| Accuracy: %f"%(e,lossfunc/iterations,accuracy_/iterations) - #f.write("Train loss:%f| %f\n"%(lossfunc/iterations,accuracy_/iterations)) + saver.save( sess, path_out_model+'model', global_step = itr ) -test_acc,test_fs,test_KAPPA = checkTest(test_x, test_y, batch_size, sess) -print "%d\t%f\t%f\t%f\n"%( itr, test_acc, test_fs, test_KAPPA ) -f.write("%d\t%f\t%f\t%f\n"%( itr, test_acc, test_fs, test_KAPPA )) -f.close() +flog.close()