Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Ienco Dino
cMTS-Clustering
Commits
513a2882
Commit
513a2882
authored
May 14, 2021
by
Ienco Dino
Browse files
Clean repository
parent
5627cc24
Changes
4
Hide whitespace changes
Inline
Side-by-side
BIRNNAE.py
deleted
100755 → 0
View file @
5627cc24
import
tensorflow
as
tf
class
RNNAE
(
tf
.
keras
.
Model
):
def
__init__
(
self
,
filters
,
outputDim
,
dropout_rate
=
0.0
,
hidden_activation
=
'relu'
,
output_activation
=
'softmax'
,
name
=
'convNetwork2'
,
**
kwargs
):
# chiamata al costruttore della classe padre, Model
super
(
RNNAE
,
self
).
__init__
(
name
=
name
,
**
kwargs
)
self
.
encoderR
=
tf
.
keras
.
layers
.
LSTM
(
filters
,
go_backwards
=
True
)
self
.
encoder
=
tf
.
keras
.
layers
.
LSTM
(
filters
)
self
.
decoder
=
tf
.
keras
.
layers
.
LSTM
(
filters
,
return_sequences
=
True
)
self
.
decoder2
=
tf
.
keras
.
layers
.
TimeDistributed
(
tf
.
keras
.
layers
.
Dense
(
units
=
outputDim
,
activation
=
None
))
self
.
decoderR
=
tf
.
keras
.
layers
.
LSTM
(
filters
,
return_sequences
=
True
)
self
.
decoder2R
=
tf
.
keras
.
layers
.
TimeDistributed
(
tf
.
keras
.
layers
.
Dense
(
units
=
outputDim
,
activation
=
None
))
def
call
(
self
,
inputs
,
training
=
False
):
t
=
inputs
.
get_shape
()
enc
=
self
.
encoder
(
inputs
)
emb
=
enc
seq_emb
=
tf
.
keras
.
layers
.
RepeatVector
(
t
[
1
])(
emb
)
dec
=
self
.
decoder
(
seq_emb
)
dec
=
self
.
decoder2
(
dec
)
encR
=
self
.
encoderR
(
inputs
)
embR
=
encR
seq_embR
=
tf
.
keras
.
layers
.
RepeatVector
(
t
[
1
])(
embR
)
decR
=
self
.
decoderR
(
seq_embR
)
decR
=
self
.
decoder2R
(
decR
)
decR
=
tf
.
reverse
(
decR
,
axis
=
[
1
])
return
dec
,
decR
,
tf
.
concat
((
emb
,
embR
),
axis
=
1
)
#(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)
main.py
deleted
100644 → 0
View file @
5627cc24
import
numpy
as
np
import
tensorflow
as
tf
import
os
import
sys
from
sklearn.metrics
import
f1_score
,
r2_score
from
sklearn.utils
import
shuffle
from
sklearn.ensemble
import
RandomForestRegressor
from
sklearn.linear_model
import
LinearRegression
from
sklearn.model_selection
import
KFold
import
time
from
sklearn.manifold
import
TSNE
import
matplotlib.pyplot
as
pyplot
from
sklearn.cluster
import
KMeans
from
sklearn.metrics
import
normalized_mutual_info_score
from
active_semi_clustering.semi_supervised.pairwise_constraints
import
MPCKMeans
,
PCKMeans
,
COPKMeans
from
model
import
RNNAE
def
generateConstraints
(
idxLabelledData
,
labels
):
ml
=
[]
cl
=
[]
for
i
in
range
(
len
(
idxLabelledData
)):
for
j
in
range
(
i
+
1
,
len
(
idxLabelledData
)):
if
labels
[
i
]
==
labels
[
j
]:
ml
.
append
([
i
,
j
])
else
:
cl
.
append
([
i
,
j
])
return
ml
,
cl
def
getBatch
(
X
,
i
,
batch_size
):
start_id
=
i
*
batch_size
t
=
(
i
+
1
)
*
batch_size
end_id
=
min
(
(
i
+
1
)
*
batch_size
,
X
.
shape
[
0
])
batch_x
=
X
[
start_id
:
end_id
]
return
batch_x
def
buildPair
(
x_train
,
labels
):
f_data
=
[]
s_data
=
[]
y_val
=
[]
n_examples
=
labels
.
shape
[
0
]
for
i
in
range
(
n_examples
):
for
j
in
range
(
i
+
1
,
n_examples
):
if
labels
[
i
]
==
labels
[
j
]:
y_val
.
append
(
0
)
else
:
y_val
.
append
(
1
)
f_data
.
append
(
x_train
[
i
])
s_data
.
append
(
x_train
[
j
])
return
np
.
stack
(
f_data
,
axis
=
0
),
np
.
stack
(
s_data
,
axis
=
0
),
np
.
array
(
y_val
)
def
trainStepL
(
model
,
f_data
,
s_data
,
y_val
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
margin
=
1.0
f_data
,
s_data
,
y_val
=
shuffle
(
f_data
,
s_data
,
y_val
)
iterations
=
f_data
.
shape
[
0
]
/
BATCH_SIZE
if
f_data
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
for
ibatch
in
range
(
int
(
iterations
)):
batch_f
=
getBatch
(
f_data
,
ibatch
,
BATCH_SIZE
)
batch_s
=
getBatch
(
s_data
,
ibatch
,
BATCH_SIZE
)
batch_y
=
getBatch
(
y_val
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
d_w
=
model
.
siameseDistance
([
batch_f
,
batch_s
],
training
=
True
)
equal_loss
=
(.
5
*
(
1
-
batch_y
)
*
d_w
)
neg_loss
=
(.
5
*
batch_y
*
tf
.
math
.
maximum
(
0
,
margin
-
d_w
)
)
loss
=
equal_loss
+
neg_loss
loss
=
tf
.
reduce_mean
(
loss
)
_
,
reco_f
,
reco_fR
,
_
=
model
(
batch_f
,
training
=
True
)
_
,
reco_s
,
reco_sR
,
_
=
model
(
batch_s
,
training
=
True
)
loss
+=
loss_object
(
batch_f
,
reco_f
)
loss
+=
loss_object
(
batch_f
,
reco_fR
)
loss
+=
loss_object
(
batch_s
,
reco_s
)
loss
+=
loss_object
(
batch_f
,
reco_sR
)
grads
=
tape
.
gradient
(
loss
,
model
.
trainable_variables
)
grads
=
[
grad
if
grad
is
not
None
else
tf
.
zeros_like
(
var
)
for
var
,
grad
in
zip
(
model
.
trainable_variables
,
grads
)]
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
tot_loss
+=
loss
return
(
tot_loss
/
iterations
)
def
trainStepStrech
(
model
,
x_train
,
centers
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
cosineSim
=
tf
.
keras
.
losses
.
CosineSimilarity
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
iterations
=
x_train
.
shape
[
0
]
/
BATCH_SIZE
if
x_train
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
centers
=
centers
.
astype
(
"float32"
)
for
ibatch
in
range
(
int
(
iterations
)):
batch_x
=
getBatch
(
x_train
,
ibatch
,
BATCH_SIZE
)
batch_c
=
getBatch
(
centers
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
emb
,
reco
,
recoR
,
classif
=
model
(
batch_x
,
training
=
True
)
loss_rec
=
loss_object
(
batch_x
,
reco
)
loss_rec
+=
loss_object
(
batch_x
,
recoR
)
loss_rec
+=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
tf
.
square
(
batch_c
-
emb
),
axis
=
1
))
grads
=
tape
.
gradient
(
loss_rec
,
model
.
trainable_variables
)
grads
=
[
grad
if
grad
is
not
None
else
tf
.
zeros_like
(
var
)
for
var
,
grad
in
zip
(
model
.
trainable_variables
,
grads
)]
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
tot_loss
+=
loss_rec
return
(
tot_loss
/
iterations
)
def
trainStep
(
model
,
x_train
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
iterations
=
x_train
.
shape
[
0
]
/
BATCH_SIZE
if
x_train
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
for
ibatch
in
range
(
int
(
iterations
)):
batch_x
=
getBatch
(
x_train
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
emb
,
reco
,
recoR
,
classif
=
model
(
batch_x
,
training
=
True
)
loss_rec
=
loss_object
(
batch_x
,
reco
)
loss_rec
+=
loss_object
(
batch_x
,
recoR
)
grads
=
tape
.
gradient
(
loss_rec
,
model
.
trainable_variables
)
grads
=
[
grad
if
grad
is
not
None
else
tf
.
zeros_like
(
var
)
for
var
,
grad
in
zip
(
model
.
trainable_variables
,
grads
)]
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
tot_loss
+=
loss_rec
return
(
tot_loss
/
iterations
)
def
trainRNNAE
(
model
,
nClasses
,
data
,
f_data
,
s_data
,
y_val
,
loss_huber
,
optimizer
,
optimizer2
,
BATCH_SIZE
,
n_epochs
):
#th = 40
n_epochs_warmUp
=
40
centers
=
None
print
(
"PRETRAINING STAGE : AE + CONTRASTIVE LOSS"
)
for
e
in
range
(
n_epochs_warmUp
):
f_data
,
s_data
,
y_val
,
=
shuffle
(
f_data
,
s_data
,
y_val
)
data
=
shuffle
(
data
)
trainLoss
=
trainStep
(
model
,
data
,
loss_huber
,
optimizer
,
BATCH_SIZE
,
e
)
trainLoss
+=
trainStepL
(
model
,
f_data
,
s_data
,
y_val
,
loss_huber
,
optimizer2
,
BATCH_SIZE
,
e
)
print
(
"epoch %d with loss %f"
%
(
e
,
trainLoss
))
print
(
"COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT"
)
emb
,
_
,
_
,
_
=
model
(
data
)
km
=
KMeans
(
n_clusters
=
nClasses
)
km
.
fit
(
emb
)
centers
=
[]
for
val
in
km
.
labels_
:
centers
.
append
(
km
.
cluster_centers_
[
val
])
centers
=
np
.
array
(
centers
)
print
(
"REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS"
)
for
e
in
range
(
n_epochs
-
n_epochs_warmUp
):
#labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data
,
centers
=
shuffle
(
data
,
centers
)
trainLoss
=
trainStepStrech
(
model
,
data
,
centers
,
loss_huber
,
optimizer
,
BATCH_SIZE
,
e
)
trainLoss
+=
trainStepL
(
model
,
f_data
,
s_data
,
y_val
,
loss_huber
,
optimizer2
,
BATCH_SIZE
,
e
)
print
(
"epoch %d with loss %f"
%
(
e
,
trainLoss
))
return
model
def
plot2DFeatures
(
data
,
labels
):
X_embedded
=
TSNE
(
n_components
=
2
).
fit_transform
(
data
)
nclasses
=
len
(
np
.
unique
(
labels
))
for
i
in
range
(
nclasses
):
idx
=
np
.
where
(
labels
==
i
)
pyplot
.
scatter
(
X_embedded
[
idx
[
0
],
0
],
X_embedded
[
idx
[
0
],
1
])
pyplot
.
draw
()
pyplot
.
pause
(
10
)
pyplot
.
clf
()
def
getExtractLabelSet
(
data
,
labels
,
nSamples
):
labelledData
=
[]
labelsSmall
=
[]
for
val
in
np
.
unique
(
labels
):
idx
=
np
.
where
(
labels
==
val
)
idx
=
shuffle
(
idx
[
0
]
)[
0
:
nSamples
]
labelledData
.
append
(
data
[
idx
]
)
for
j
in
range
(
nSamples
):
labelsSmall
.
append
(
val
)
labelledData
=
np
.
concatenate
(
labelledData
,
axis
=
0
)
return
labelledData
,
np
.
array
(
labelsSmall
)
def
main
(
argv
):
dataDir
=
argv
[
1
]
nSamples
=
argv
[
2
]
runId
=
argv
[
3
]
data
=
np
.
load
(
dataDir
+
"/data.npy"
)
labels
=
np
.
load
(
dataDir
+
"/class.npy"
)
idxLabelledData
=
np
.
load
(
dataDir
+
"/"
+
nSamples
+
"_"
+
runId
+
".npy"
)
labelledData
=
data
[
idxLabelledData
]
labelsSmall
=
labels
[
idxLabelledData
]
f_data
,
s_data
,
y_val
=
buildPair
(
labelledData
,
labelsSmall
)
print
(
"labelledData.shape "
,
labelledData
.
shape
)
print
(
"labelsSmall.shape "
,
labelsSmall
.
shape
)
origData
=
np
.
array
(
data
)
nClasses
=
len
(
np
.
unique
(
labels
))
RNNAE_model
=
RNNAE
(
64
,
data
.
shape
[
-
1
],
nClasses
,
dropout_rate
=
0.2
)
""" defining loss function and the optimizer to use in the training phase """
loss_huber
=
tf
.
keras
.
losses
.
Huber
()
loss_object2
=
tf
.
keras
.
losses
.
Huber
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
#MeanAbsoluteError()#
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.0005
)
optimizer2
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.0005
)
BATCH_SIZE
=
32
n_epochs
=
100
RNNAE_model
=
trainRNNAE
(
RNNAE_model
,
nClasses
,
data
,
f_data
,
s_data
,
y_val
,
loss_huber
,
optimizer
,
optimizer2
,
BATCH_SIZE
,
n_epochs
)
emb
,
_
,
_
,
_
=
RNNAE_model
(
origData
)
emb
=
emb
.
numpy
()
km
=
KMeans
(
n_clusters
=
nClasses
)
km
.
fit
(
emb
)
nmi
=
normalized_mutual_info_score
(
labels
,
km
.
labels_
)
print
(
"nmi %f"
%
nmi
)
if
__name__
==
"__main__"
:
main
(
sys
.
argv
)
#plot2DFeatures(emb, labels)
main_varyingLength.py
View file @
513a2882
...
...
@@ -14,9 +14,12 @@ from sklearn.cluster import KMeans
from
sklearn.metrics
import
normalized_mutual_info_score
from
active_semi_clustering.semi_supervised.pairwise_constraints
import
MPCKMeans
,
PCKMeans
,
COPKMeans
from
model
import
RNNAE
#gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.45)
#sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
def
generateConstraints
(
idxLabelledData
,
labels
):
ml
=
[]
cl
=
[]
...
...
@@ -80,8 +83,8 @@ def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_obje
loss
=
equal_loss
+
neg_loss
loss
=
tf
.
reduce_mean
(
loss
)
_
,
reco_f
,
reco_fR
,
_
=
model
(
batch_f
,
training
=
True
)
_
,
reco_s
,
reco_sR
,
_
=
model
(
batch_s
,
training
=
True
)
_
,
reco_f
,
reco_fR
=
model
(
batch_f
,
training
=
True
)
_
,
reco_s
,
reco_sR
=
model
(
batch_s
,
training
=
True
)
loss
+=
loss_object
(
batch_f
,
reco_f
*
batch_f_mask
)
loss
+=
loss_object
(
batch_f
,
reco_fR
*
batch_f_mask
)
...
...
@@ -98,7 +101,6 @@ def trainStepL(model, f_data, f_data_mask, s_data, s_data_mask, y_val, loss_obje
def
trainStepStrech
(
model
,
x_train
,
valid_mask
,
centers
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
cosineSim
=
tf
.
keras
.
losses
.
CosineSimilarity
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
iterations
=
x_train
.
shape
[
0
]
/
BATCH_SIZE
if
x_train
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
...
...
@@ -109,7 +111,7 @@ def trainStepStrech(model, x_train, valid_mask, centers, loss_object, optimizer,
batch_mask
=
getBatch
(
valid_mask
,
ibatch
,
BATCH_SIZE
)
batch_c
=
getBatch
(
centers
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
emb
,
reco
,
recoR
,
classif
=
model
(
batch_x
,
training
=
True
)
emb
,
reco
,
recoR
=
model
(
batch_x
,
training
=
True
)
loss_rec
=
loss_object
(
batch_x
,
reco
*
batch_mask
)
loss_rec
+=
loss_object
(
batch_x
,
recoR
*
batch_mask
)
loss_rec
+=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
tf
.
square
(
batch_c
-
emb
),
axis
=
1
))
...
...
@@ -131,7 +133,7 @@ def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e)
batch_x
=
getBatch
(
x_train
,
ibatch
,
BATCH_SIZE
)
batch_mask
=
getBatch
(
valid_mask
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
emb
,
reco
,
recoR
,
classif
=
model
(
batch_x
,
training
=
True
)
emb
,
reco
,
recoR
=
model
(
batch_x
,
training
=
True
)
loss_rec
=
loss_object
(
batch_x
,
reco
*
batch_mask
)
loss_rec
+=
loss_object
(
batch_x
,
recoR
*
batch_mask
)
grads
=
tape
.
gradient
(
loss_rec
,
model
.
trainable_variables
)
...
...
@@ -142,9 +144,9 @@ def trainStep(model, x_train, valid_mask, loss_object, optimizer, BATCH_SIZE, e)
def
trainRNNAE
(
model
,
nClasses
,
data
,
valid_mask
,
f_data
,
f_data_mask
,
s_data
,
s_data_mask
,
y_val
,
loss_huber
,
optimizer
,
optimizer2
,
BATCH_SIZE
,
n_epochs
):
#th = 40
n_epochs_warmUp
=
40
centers
=
None
print
(
"PRETRAINING STAGE : AE + CONTRASTIVE LOSS"
)
for
e
in
range
(
n_epochs_warmUp
):
f_data
,
f_data_mask
,
s_data
,
s_data_mask
,
y_val
=
shuffle
(
f_data
,
f_data_mask
,
s_data
,
s_data_mask
,
y_val
)
...
...
@@ -155,7 +157,7 @@ def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s
print
(
"COMPUTE INTERMEDIATE CLUSTERING ASSIGNMENT"
)
emb
,
_
,
_
,
_
=
model
(
data
)
emb
,
_
,
_
=
model
(
data
)
km
=
KMeans
(
n_clusters
=
nClasses
)
km
.
fit
(
emb
)
centers
=
[]
...
...
@@ -166,38 +168,15 @@ def trainRNNAE(model, nClasses, data, valid_mask, f_data, f_data_mask, s_data, s
print
(
"REFINEMENT STEP alternating AE + MANIFOLD STRETCH TOWARDS CENTROIDS and AE + CONTRASTIVE LOSS"
)
for
e
in
range
(
n_epochs
-
n_epochs_warmUp
):
#labelledData, labelsSmall = shuffle(labelledData, labelsSmall)
data
,
centers
,
valid_mask
=
shuffle
(
data
,
centers
,
valid_mask
)
#STRECHING THE EMBEDDING TOWARDS CENTROIDS
trainLoss
=
trainStepStrech
(
model
,
data
,
valid_mask
,
centers
,
loss_huber
,
optimizer
,
BATCH_SIZE
,
e
)
#FORCING EMBEDDING TO MATCH CONSTRAINTS
trainLoss
+=
trainStepL
(
model
,
f_data
,
f_data_mask
,
s_data
,
s_data_mask
,
y_val
,
loss_huber
,
optimizer2
,
BATCH_SIZE
,
e
)
print
(
"epoch %d with loss %f"
%
(
e
,
trainLoss
))
return
model
def
plot2DFeatures
(
data
,
labels
):
X_embedded
=
TSNE
(
n_components
=
2
).
fit_transform
(
data
)
nclasses
=
len
(
np
.
unique
(
labels
))
for
i
in
range
(
nclasses
):
idx
=
np
.
where
(
labels
==
i
)
pyplot
.
scatter
(
X_embedded
[
idx
[
0
],
0
],
X_embedded
[
idx
[
0
],
1
])
pyplot
.
draw
()
pyplot
.
pause
(
10
)
pyplot
.
clf
()
def
getExtractLabelSet
(
data
,
labels
,
nSamples
):
labelledData
=
[]
labelsSmall
=
[]
for
val
in
np
.
unique
(
labels
):
idx
=
np
.
where
(
labels
==
val
)
idx
=
shuffle
(
idx
[
0
]
)[
0
:
nSamples
]
labelledData
.
append
(
data
[
idx
]
)
for
j
in
range
(
nSamples
):
labelsSmall
.
append
(
val
)
labelledData
=
np
.
concatenate
(
labelledData
,
axis
=
0
)
return
labelledData
,
np
.
array
(
labelsSmall
)
def
createMaskTensor
(
data
,
valid_lengths
):
mask
=
np
.
zeros
(
data
.
shape
)
nrow
,
nt
,
ndim
=
mask
.
shape
...
...
@@ -207,27 +186,31 @@ def createMaskTensor(data, valid_lengths):
return
mask
def
main
(
argv
):
#Directory in which data are stored
dataDir
=
argv
[
1
]
#number of labelled samples to access data information
nSamples
=
argv
[
2
]
#run identifier to add to the output file name
runId
=
argv
[
3
]
newDir
=
dataDir
+
"/OUR_VL"
if
not
os
.
path
.
exists
(
newDir
):
os
.
makedirs
(
newDir
)
data
=
np
.
load
(
dataDir
+
"/data.npy"
)
labels
=
np
.
load
(
dataDir
+
"/class.npy"
)
valid_lengths
=
np
.
load
(
dataDir
+
"/seqLength.npy"
)
valid_mask
=
createMaskTensor
(
data
,
valid_lengths
)
idxLabelledData
=
np
.
load
(
dataDir
+
"/"
+
nSamples
+
"_"
+
runId
+
".npy"
)
labelledData
=
data
[
idxLabelledData
]
labelsSmall
=
labels
[
idxLabelledData
]
labelledValidMask
=
valid_mask
[
idxLabelledData
]
#FROM THE LABELLED EXAMPLES BUILD THE WHOLE SET OF MUST AND CANNOT LINK CONSTRAINTS
f_data
,
f_data_mask
,
s_data
,
s_data_mask
,
y_val
=
buildPair
(
labelledData
,
labelsSmall
,
labelledValidMask
)
print
(
"labelledData.shape "
,
labelledData
.
shape
)
print
(
"labelsSmall.shape "
,
labelsSmall
.
shape
)
origData
=
np
.
array
(
data
)
nClasses
=
len
(
np
.
unique
(
labels
))
...
...
@@ -235,25 +218,25 @@ def main(argv):
RNNAE_model
=
RNNAE
(
64
,
data
.
shape
[
-
1
],
nClasses
,
dropout_rate
=
0.2
)
""" defining loss function and the optimizer to use in the training phase """
loss_huber
=
tf
.
keras
.
losses
.
Huber
()
loss_object2
=
tf
.
keras
.
losses
.
Huber
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
#MeanAbsoluteError()#
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.0005
)
optimizer2
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.0005
)
BATCH_SIZE
=
32
#Total number of epochs
n_epochs
=
100
RNNAE_model
=
trainRNNAE
(
RNNAE_model
,
nClasses
,
data
,
valid_mask
,
f_data
,
f_data_mask
,
s_data
,
s_data_mask
,
y_val
,
loss_huber
,
optimizer
,
optimizer2
,
BATCH_SIZE
,
n_epochs
)
emb
,
_
,
_
,
_
=
RNNAE_model
(
origData
)
emb
,
_
,
_
=
RNNAE_model
(
origData
)
emb
=
emb
.
numpy
()
km
=
KMeans
(
n_clusters
=
nClasses
)
km
.
fit
(
emb
)
nmi
=
normalized_mutual_info_score
(
labels
,
km
.
labels_
)
print
(
"nmi %f"
%
nmi
)
#Save the clustering results obtained via the K-Means algorithm applied on the embedding generated by our approach
np
.
save
(
newDir
+
"/res_"
+
nSamples
+
"_"
+
runId
+
".npy"
,
km
.
labels_
)
#Save the embdding generated by our framework
np
.
save
(
newDir
+
"/emb_"
+
nSamples
+
"_"
+
runId
+
".npy"
,
emb
)
if
__name__
==
"__main__"
:
main
(
sys
.
argv
)
#plot2DFeatures(emb, labels)
model.py
View file @
513a2882
import
tensorflow
as
tf
class
AttentionLayer
(
tf
.
keras
.
layers
.
Layer
):
def
__init__
(
self
,
ch_output
):
super
(
AttentionLayer
,
self
).
__init__
()
self
.
ch_output
=
ch_output
self
.
activation
=
tf
.
math
.
tanh
#tf.nn.leaky_relu
self
.
output_activation
=
tf
.
keras
.
activations
.
softmax
def
build
(
self
,
input_shape
):
'''
print(input_shape)
if len(input_shape) > 1:
input_dim = input_shape[1]
else:
input_dim = input_shape
print(input_dim)
exit()
'''
input_dim
=
input_shape
[
-
1
]
self
.
A
=
self
.
add_weight
(
name
=
"a_weight_matrix"
,
shape
=
(
self
.
ch_output
,
1
))
self
.
W
=
self
.
add_weight
(
name
=
"W_target_nodes_weights"
,
shape
=
[
self
.
ch_output
,
self
.
ch_output
])
self
.
tgt_node_b
=
self
.
add_weight
(
name
=
'bias_target'
,
shape
=
(
self
.
ch_output
,),
initializer
=
'zeros'
)
self
.
neigh_b
=
self
.
add_weight
(
name
=
'bias_neigh'
,
shape
=
(
self
.
ch_output
,),
initializer
=
'zeros'
)
def
call
(
self
,
inputs
,
**
kwargs
):
#hi = inputs[0]
# target_nodes shape: batch_size x features_size F
# hj shape: batch_size x max(|N(x)|) x features_size F
#mask = tf.dtypes.cast(kwargs.get('mask'), tf.float32)
# mask shape: batch_size x max(|N(x)|)
#whi = tf.nn.bias_add(tf.tensordot(hi, self.W, axes=1), self.tgt_node_b)
# whi shape: batch_size x features_output F'
#print(inputs.get_shape())
#print(self.W.get_shape())
whj
=
tf
.
nn
.
bias_add
(
tf
.
tensordot
(
inputs
,
self
.
W
,
axes
=
1
),
self
.
neigh_b
)
#print("whj ",whj.get_shape())
# whj shape: batch_size x max(|N(x)|) x features_output F'
multiply_dim
=
len
(
whj
[
0
])
#whi = tf.tile(tf.expand_dims(whi, 1), multiples=(1, multiply_dim, 1))
# whi shape for concat: batch_size x features_output F'
#concat = whj
#concat = tf.concat([whi, whj], axis=2)
# concat shape: batch_size x max(|N(x)|) x 2F'
scores
=
self
.
activation
(
tf
.
tensordot
(
whj
,
self
.
A
,
axes
=
1
))
scores
=
tf
.
squeeze
(
scores
,
axis
=-
1
)
# scores shape: batch_size x max(|N(x)|)
#masked_scores = scores * mask
alphas
=
self
.
output_activation
(
scores
)
hj
=
inputs
*
tf
.
expand_dims
(
alphas
,
-
1
)
# hj shape: batch_size x max(|N(x)|) x features_output F'
output
=
tf
.
reduce_sum
(
hj
,
axis
=
1
)
# output shape: (batch_size x features_output F')
return
output
,
alphas
class
RNNAE
(
tf
.
keras
.
Model
):
def
__init__
(
self
,
filters
,
outputDim
,
n_cluster
,
dropout_rate
=
0.0
,
hidden_activation
=
'relu'
,
output_activation
=
'softmax'
,
name
=
'
convNetwork2
'
,
name
=
'
RNNAE
'
,
**
kwargs
):
# chiamata al costruttore della classe padre, Model
super
(
RNNAE
,
self
).
__init__
(
name
=
name
,
**
kwargs
)
self
.
attention
=
AttentionLayer
(
filters
)
self
.
attentionR
=
AttentionLayer
(
filters
)
self
.
gate
=
tf
.
keras
.
layers
.
Dense
(
filters
,
activation
=
'sigmoid'
)
self
.
gateR
=
tf
.
keras
.
layers
.
Dense
(
filters
,
activation
=
'sigmoid'
)
self
.
encoder
=
tf
.
keras
.
layers
.
GRU
(
filters
,
return_sequences
=
True
)
self
.
encoderR
=
tf
.
keras
.
layers
.
GRU
(
filters
,
go_backwards
=
True
,
return_sequences
=
True
)
self
.
classif
=
tf
.
keras
.
layers
.
Dense
(
n_cluster
,
activation
=
'softmax'
)
self
.
decoder
=
tf
.
keras
.
layers
.
GRU
(
filters
,
return_sequences
=
True
)
self
.
decoder2
=
tf
.
keras
.
layers
.
TimeDistributed
(
tf
.
keras
.
layers
.
Dense
(
units
=
outputDim
,
activation
=
None
))
self
.
decoderR
=
tf
.
keras
.
layers
.
GRU
(
filters
,
return_sequences
=
True
)
self
.
decoder2R
=
tf
.
keras
.
layers
.
TimeDistributed
(
tf
.
keras
.
layers
.
Dense
(
units
=
outputDim
,
activation
=
None
))
#self.TDclassif = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=n_cluster, activation='softmax'))
def
siameseDistance
(
self
,
inputs
,
training
=
False
):
first_elements
=
inputs
[
0
]
second_elements
=
inputs
[
1
]
...
...
@@ -94,10 +29,7 @@ class RNNAE(tf.keras.Model):
seqEmbR
=
self
.
encoderR
(
inputs
)
emb
=
tf
.
unstack
(
seqEmb
,
axis
=
1
)[
-
1
]
embR
=
tf
.
unstack
(
seqEmbR
,
axis
=
1
)[
-
1
]
#emb = self.gate(emb) * emb
#embR = self.gate(embR) * embR
return
emb
+
embR
#return tf.concat([emb,embR],axis=1)#emb+embR
def
decF
(
self
,
seq_emb
,
emb
,
training
=
False
):
dec
=
self
.
decoder
(
seq_emb
)
...
...
@@ -105,19 +37,15 @@ class RNNAE(tf.keras.Model):
dec
=
self
.
decoder2
(
dec
)
decR
=
self
.
decoder2R
(
decR
)
pred
=
self
.
classif
(
emb
)
#print(decR.get_shape())
decR
=
tf
.
reverse
(
decR
,
axis
=
[
1
])
#exit()
return
dec
,
decR
,
pred
return
dec
,
decR
def
call
(
self
,
inputs
,
training
=
False
):
t
=
inputs
.
get_shape
()
emb
=
self
.
encF
(
inputs
,
training
)
seq_emb
=
tf
.
keras
.
layers
.
RepeatVector
(
t
[
1
])(
emb
)
dec
,
decR
,
pred
=
self
.
decF
(
seq_emb
,
emb
,
training
)
return
emb
,
dec
,
decR
,
pred
dec
,
decR
=
self
.
decF
(
seq_emb
,
emb
,
training
)
return
emb
,
dec
,
decR
#(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment