Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Ienco Dino
cMTS-Clustering
Commits
2648ecb9
Commit
2648ecb9
authored
Mar 10, 2021
by
Ienco Dino
Browse files
add initial file
parents
Changes
3
Show whitespace changes
Inline
Side-by-side
BIRNNAE.py
0 → 100755
View file @
2648ecb9
import
tensorflow
as
tf
class
RNNAE
(
tf
.
keras
.
Model
):
def
__init__
(
self
,
filters
,
outputDim
,
dropout_rate
=
0.0
,
hidden_activation
=
'relu'
,
output_activation
=
'softmax'
,
name
=
'convNetwork2'
,
**
kwargs
):
# chiamata al costruttore della classe padre, Model
super
(
RNNAE
,
self
).
__init__
(
name
=
name
,
**
kwargs
)
self
.
encoderR
=
tf
.
keras
.
layers
.
LSTM
(
filters
,
go_backwards
=
True
)
self
.
encoder
=
tf
.
keras
.
layers
.
LSTM
(
filters
)
self
.
decoder
=
tf
.
keras
.
layers
.
LSTM
(
filters
,
return_sequences
=
True
)
self
.
decoder2
=
tf
.
keras
.
layers
.
TimeDistributed
(
tf
.
keras
.
layers
.
Dense
(
units
=
outputDim
,
activation
=
None
))
self
.
decoderR
=
tf
.
keras
.
layers
.
LSTM
(
filters
,
return_sequences
=
True
)
self
.
decoder2R
=
tf
.
keras
.
layers
.
TimeDistributed
(
tf
.
keras
.
layers
.
Dense
(
units
=
outputDim
,
activation
=
None
))
def
call
(
self
,
inputs
,
training
=
False
):
t
=
inputs
.
get_shape
()
enc
=
self
.
encoder
(
inputs
)
emb
=
enc
seq_emb
=
tf
.
keras
.
layers
.
RepeatVector
(
t
[
1
])(
emb
)
dec
=
self
.
decoder
(
seq_emb
)
dec
=
self
.
decoder2
(
dec
)
encR
=
self
.
encoderR
(
inputs
)
embR
=
encR
seq_embR
=
tf
.
keras
.
layers
.
RepeatVector
(
t
[
1
])(
embR
)
decR
=
self
.
decoderR
(
seq_embR
)
decR
=
self
.
decoder2R
(
decR
)
decR
=
tf
.
reverse
(
decR
,
axis
=
[
1
])
return
dec
,
decR
,
tf
.
concat
((
emb
,
embR
),
axis
=
1
)
#(dec+decR)/2, tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1), tf.concat((emb,embR),axis=1)
README.md
0 → 100644
View file @
2648ecb9
main.py
0 → 100644
View file @
2648ecb9
import
numpy
as
np
import
tensorflow
as
tf
import
os
import
sys
from
sklearn.metrics
import
f1_score
,
r2_score
from
sklearn.utils
import
shuffle
from
sklearn.ensemble
import
RandomForestRegressor
from
sklearn.linear_model
import
LinearRegression
from
sklearn.model_selection
import
KFold
import
time
from
sklearn.manifold
import
TSNE
import
matplotlib.pyplot
as
pyplot
from
sklearn.cluster
import
KMeans
from
sklearn.metrics
import
normalized_mutual_info_score
from
active_semi_clustering.semi_supervised.pairwise_constraints
import
MPCKMeans
,
PCKMeans
,
COPKMeans
from
model
import
RNNAE
def
generateConstraints
(
idxLabelledData
,
labels
):
ml
=
[]
cl
=
[]
for
i
in
range
(
len
(
idxLabelledData
)):
for
j
in
range
(
i
+
1
,
len
(
idxLabelledData
)):
if
labels
[
i
]
==
labels
[
j
]:
ml
.
append
([
i
,
j
])
else
:
cl
.
append
([
i
,
j
])
return
ml
,
cl
def
getBatch
(
X
,
i
,
batch_size
):
start_id
=
i
*
batch_size
t
=
(
i
+
1
)
*
batch_size
end_id
=
min
(
(
i
+
1
)
*
batch_size
,
X
.
shape
[
0
])
batch_x
=
X
[
start_id
:
end_id
]
return
batch_x
def
buildPair
(
x_train
,
labels
):
f_data
=
[]
s_data
=
[]
y_val
=
[]
n_examples
=
labels
.
shape
[
0
]
for
i
in
range
(
n_examples
):
for
j
in
range
(
i
+
1
,
n_examples
):
if
labels
[
i
]
==
labels
[
j
]:
y_val
.
append
(
0
)
else
:
y_val
.
append
(
1
)
f_data
.
append
(
x_train
[
i
])
s_data
.
append
(
x_train
[
j
])
return
np
.
stack
(
f_data
,
axis
=
0
),
np
.
stack
(
s_data
,
axis
=
0
),
np
.
array
(
y_val
)
def
trainStepL
(
model
,
x_train
,
labels
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
margin
=
1.0
f_data
,
s_data
,
y_val
=
buildPair
(
x_train
,
labels
)
f_data
,
s_data
,
y_val
=
shuffle
(
f_data
,
s_data
,
y_val
)
iterations
=
f_data
.
shape
[
0
]
/
BATCH_SIZE
if
f_data
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
for
ibatch
in
range
(
int
(
iterations
)):
batch_f
=
getBatch
(
f_data
,
ibatch
,
BATCH_SIZE
)
batch_s
=
getBatch
(
s_data
,
ibatch
,
BATCH_SIZE
)
batch_y
=
getBatch
(
y_val
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
d_w
=
model
.
siameseDistance
([
batch_f
,
batch_s
],
training
=
True
)
equal_loss
=
(.
5
*
(
1
-
batch_y
)
*
d_w
)
neg_loss
=
(.
5
*
batch_y
*
tf
.
math
.
maximum
(
0
,
margin
-
d_w
)
)
loss
=
equal_loss
+
neg_loss
loss
=
tf
.
reduce_mean
(
loss
)
_
,
reco_f
,
reco_fR
,
_
=
model
(
batch_f
,
training
=
True
)
_
,
reco_s
,
reco_sR
,
_
=
model
(
batch_s
,
training
=
True
)
loss
+=
loss_object
(
batch_f
,
reco_f
)
loss
+=
loss_object
(
batch_f
,
reco_fR
)
loss
+=
loss_object
(
batch_s
,
reco_s
)
loss
+=
loss_object
(
batch_f
,
reco_sR
)
grads
=
tape
.
gradient
(
loss
,
model
.
trainable_variables
)
grads
=
[
grad
if
grad
is
not
None
else
tf
.
zeros_like
(
var
)
for
var
,
grad
in
zip
(
model
.
trainable_variables
,
grads
)]
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
tot_loss
+=
loss
return
(
tot_loss
/
iterations
)
def
trainStepStrech
(
model
,
x_train
,
centers
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
cosineSim
=
tf
.
keras
.
losses
.
CosineSimilarity
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
iterations
=
x_train
.
shape
[
0
]
/
BATCH_SIZE
if
x_train
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
centers
=
centers
.
astype
(
"float32"
)
for
ibatch
in
range
(
int
(
iterations
)):
batch_x
=
getBatch
(
x_train
,
ibatch
,
BATCH_SIZE
)
batch_c
=
getBatch
(
centers
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
emb
,
reco
,
recoR
,
classif
=
model
(
batch_x
,
training
=
True
)
loss_rec
=
loss_object
(
batch_x
,
reco
)
loss_rec
+=
loss_object
(
batch_x
,
recoR
)
loss_rec
+=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
tf
.
square
(
batch_c
-
emb
),
axis
=
1
))
grads
=
tape
.
gradient
(
loss_rec
,
model
.
trainable_variables
)
grads
=
[
grad
if
grad
is
not
None
else
tf
.
zeros_like
(
var
)
for
var
,
grad
in
zip
(
model
.
trainable_variables
,
grads
)]
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
tot_loss
+=
loss_rec
return
(
tot_loss
/
iterations
)
def
trainStep
(
model
,
x_train
,
loss_object
,
optimizer
,
BATCH_SIZE
,
e
):
loss_iteration
=
0
tot_loss
=
0.0
iterations
=
x_train
.
shape
[
0
]
/
BATCH_SIZE
if
x_train
.
shape
[
0
]
%
BATCH_SIZE
!=
0
:
iterations
+=
1
for
ibatch
in
range
(
int
(
iterations
)):
batch_x
=
getBatch
(
x_train
,
ibatch
,
BATCH_SIZE
)
with
tf
.
GradientTape
()
as
tape
:
emb
,
reco
,
recoR
,
classif
=
model
(
batch_x
,
training
=
True
)
loss_rec
=
loss_object
(
batch_x
,
reco
)
loss_rec
+=
loss_object
(
batch_x
,
recoR
)
grads
=
tape
.
gradient
(
loss_rec
,
model
.
trainable_variables
)
grads
=
[
grad
if
grad
is
not
None
else
tf
.
zeros_like
(
var
)
for
var
,
grad
in
zip
(
model
.
trainable_variables
,
grads
)]
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
tot_loss
+=
loss_rec
return
(
tot_loss
/
iterations
)
def
trainRNNAE
(
model
,
nClasses
,
data
,
labelledData
,
labelsSmall
,
loss_huber
,
optimizer
,
optimizer2
,
BATCH_SIZE
,
n_epochs
):
#th = 40
n_epochs_warmUp
=
40
centers
=
None
for
e
in
range
(
n_epochs_warmUp
):
labelledData
,
labelsSmall
=
shuffle
(
labelledData
,
labelsSmall
)
data
=
shuffle
(
data
)
trainLoss
=
trainStep
(
model
,
data
,
loss_huber
,
optimizer
,
BATCH_SIZE
,
e
)
trainLoss
+=
trainStepL
(
model
,
labelledData
,
labelsSmall
,
loss_huber
,
optimizer2
,
BATCH_SIZE
,
e
)
print
(
"epoch %d with loss %f"
%
(
e
,
trainLoss
))
emb
,
_
,
_
,
_
=
model
(
data
)
km
=
KMeans
(
n_clusters
=
nClasses
)
km
.
fit
(
emb
)
centers
=
[]
for
val
in
km
.
labels_
:
centers
.
append
(
km
.
cluster_centers_
[
val
])
centers
=
np
.
array
(
centers
)
for
e
in
range
(
n_epochs
-
n_epochs_warmUp
):
labelledData
,
labelsSmall
=
shuffle
(
labelledData
,
labelsSmall
)
data
,
centers
=
shuffle
(
data
,
centers
)
trainLoss
=
trainStepStrech
(
model
,
data
,
centers
,
loss_huber
,
optimizer
,
BATCH_SIZE
,
e
)
trainLoss
+=
trainStepL
(
model
,
labelledData
,
labelsSmall
,
loss_huber
,
optimizer2
,
BATCH_SIZE
,
e
)
print
(
"epoch %d with loss %f"
%
(
e
,
trainLoss
))
return
model
def
plot2DFeatures
(
data
,
labels
):
X_embedded
=
TSNE
(
n_components
=
2
).
fit_transform
(
data
)
nclasses
=
len
(
np
.
unique
(
labels
))
for
i
in
range
(
nclasses
):
idx
=
np
.
where
(
labels
==
i
)
pyplot
.
scatter
(
X_embedded
[
idx
[
0
],
0
],
X_embedded
[
idx
[
0
],
1
])
pyplot
.
draw
()
pyplot
.
pause
(
10
)
pyplot
.
clf
()
def
getExtractLabelSet
(
data
,
labels
,
nSamples
):
labelledData
=
[]
labelsSmall
=
[]
for
val
in
np
.
unique
(
labels
):
idx
=
np
.
where
(
labels
==
val
)
idx
=
shuffle
(
idx
[
0
]
)[
0
:
nSamples
]
labelledData
.
append
(
data
[
idx
]
)
for
j
in
range
(
nSamples
):
labelsSmall
.
append
(
val
)
labelledData
=
np
.
concatenate
(
labelledData
,
axis
=
0
)
return
labelledData
,
np
.
array
(
labelsSmall
)
dataDir
=
sys
.
argv
[
1
]
nSamples
=
sys
.
argv
[
2
]
runId
=
sys
.
argv
[
3
]
#Dordogne 23
data
=
np
.
load
(
dataDir
+
"/data.npy"
)
labels
=
np
.
load
(
dataDir
+
"/class.npy"
)
idxLabelledData
=
np
.
load
(
dataDir
+
"/"
+
nSamples
+
"_"
+
runId
+
".npy"
)
labelledData
=
data
[
idxLabelledData
]
labelsSmall
=
labels
[
idxLabelledData
]
print
(
"labelledData.shape "
,
labelledData
.
shape
)
print
(
"labelsSmall.shape "
,
labelsSmall
.
shape
)
origData
=
np
.
array
(
data
)
nClasses
=
len
(
np
.
unique
(
labels
))
RNNAE_model
=
RNNAE
(
64
,
data
.
shape
[
-
1
],
nClasses
,
dropout_rate
=
0.2
)
""" defining loss function and the optimizer to use in the training phase """
loss_huber
=
tf
.
keras
.
losses
.
Huber
()
loss_object2
=
tf
.
keras
.
losses
.
Huber
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
#MeanAbsoluteError()#
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.0005
)
optimizer2
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.0005
)
BATCH_SIZE
=
32
n_epochs
=
100
RNNAE_model
=
trainRNNAE
(
RNNAE_model
,
nClasses
,
data
,
labelledData
,
labelsSmall
,
loss_huber
,
optimizer
,
optimizer2
,
BATCH_SIZE
,
n_epochs
)
emb
,
_
,
_
,
_
=
RNNAE_model
(
origData
)
emb
=
emb
.
numpy
()
km
=
KMeans
(
n_clusters
=
nClasses
)
km
.
fit
(
emb
)
nmi
=
normalized_mutual_info_score
(
labels
,
km
.
labels_
)
print
(
"nmi %f"
%
nmi
)
#plot2DFeatures(emb, labels)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment