Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import sys
import glob
from scipy import sparse
from gbssl import LGC,HMN,PARW,MAD,OMNIProp,CAMLP
from sklearn.neighbors import kneighbors_graph
from scipy.sparse import coo_matrix
from sklearn.preprocessing import normalize
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score
import os.path
def extractKNNGraph(knn, k):
nrow, _ = knn.shape
G = sparse.lil_matrix((nrow,nrow))
for i in range(nrow):
for j in knn[i,1:k+1]:
G[i,j]=1
G_trans = G.transpose()
#MUTUAL KNN GRAPH
mKNN = np.minimum(G.todense(),G_trans.todense())
return sparse.lil_matrix(mKNN)
def getKNNEucl(X):
dist = pdist(X,'euclidean')
dist = squareform(dist)
knn = np.argsort(dist,axis=1)
return knn
def classify(directoy, embFileName, labelFileName, numberOfNearestNeighbors):
#Y = np.load(directory+"/class.npy")
X = np.load(embFileName)
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
knn = getKNNEucl(X)
G = extractKNNGraph(knn, numberOfNearestNeighbors)
nrow, _ = G.shape
#print directory+"/labels/"+str(runId)+"_"+str(nsamples)+".npy"
labeled = np.load( labelFileName )
id_labeled = labeled[:,0].astype("int")
cl_labeled = labeled[:,1].astype("int")
camlp = CAMLP(graph=G)
camlp.fit(np.array(id_labeled),np.array(cl_labeled))
prob_cl = camlp.predict_proba(np.arange(nrow))
predict = np.argmax(prob_cl,axis=1)
return predict
#Directory Name on which data are stored
directory = sys.argv[1]
#File that contains the new representation learned with the SESAM approach or any other data representation
embFileName = sys.argv[2]
#File in the directory/labels folder with label information
#The file has as many row as the number of labeled example
#Each row has two information: the position of the labeled example w.r.t. the data file data.npy, the associated label
labelFileName = sys.argv[3]
numberOfNearestNeighbors = 20
prediction = classify(directory, embFileName, labelFileName, numberOfNearestNeighbors)
print(prediction)