Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Fize Jacques
GMatch4py
Commits
c486d941
Commit
c486d941
authored
Feb 22, 2019
by
Fize Jacques
Browse files
Shortest path kernel work!
parent
02c1bea1
Changes
4
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
c486d941
...
...
@@ -84,7 +84,6 @@ ged.distance(result)
*
Shortest Path Kernel [3]
*
Weisfeiler-Lehman Kernel [4]
*
Subtree Kernel
*
Edge Kernel
*
Graph Edit Distance [5]
*
Approximated Graph Edit Distance
*
Hausdorff Graph Edit Distance
...
...
gmatch4py/kernels/adjacency.pyx
0 → 100644
View file @
c486d941
import
networkx
as
nx
import
numpy
as
np
def
get_adjacency
(
G1
,
G2
):
"""
Return adjacency matrices of two graph based on nodes present in both of them.
Parameters
----------
G1 : nx.Graph
first graph
G2 : nx.Graph
second graph
Returns
-------
tuple of np.array
adjacency matrices of G1 and G2
"""
# Extract nodes
nodes_G1
=
list
(
G1
.
nodes
())
nodes_G2
=
list
(
G2
.
nodes
())
# Get Adjacency Matrix for each graph
adj_original_G1
=
nx
.
convert_matrix
.
to_numpy_matrix
(
G1
,
nodes_G1
)
adj_original_G2
=
nx
.
convert_matrix
.
to_numpy_matrix
(
G2
,
nodes_G2
)
# Get old index
index_node_G1
=
{
node
:
ix
for
ix
,
node
in
enumerate
(
nodes_G1
)}
index_node_G2
=
{
node
:
ix
for
ix
,
node
in
enumerate
(
nodes_G2
)}
# Building new indices
nodes_unique
=
list
(
set
(
nodes_G1
).
union
(
nodes_G2
))
new_node_index
=
{
node
:
i
for
i
,
node
in
enumerate
(
nodes_unique
)}
n
=
len
(
nodes_unique
)
#Generate new adjacent matrices
new_adj_G1
=
np
.
zeros
((
n
,
n
))
new_adj_G2
=
np
.
zeros
((
n
,
n
))
# Filling old values
for
n1
in
nodes_unique
:
for
n2
in
nodes_unique
:
if
n1
in
G1
.
nodes
()
and
n2
in
G1
.
nodes
():
new_adj_G1
[
new_node_index
[
n1
],
new_node_index
[
n2
]]
=
adj_original_G1
[
index_node_G1
[
n1
],
index_node_G1
[
n2
]]
if
n1
in
G2
.
nodes
()
and
n2
in
G2
.
nodes
():
new_adj_G2
[
new_node_index
[
n1
],
new_node_index
[
n2
]]
=
adj_original_G2
[
index_node_G2
[
n1
],
index_node_G2
[
n2
]]
return
new_adj_G1
,
new_adj_G2
gmatch4py/kernels/shortest_path_kernel.pyx
View file @
c486d941
...
...
@@ -12,15 +12,21 @@ Modified by : Jacques Fize
import
networkx
as
nx
import
numpy
as
np
cimport
numpy
as
np
from
scipy.sparse.csgraph
import
floyd_warshall
from
.adjacency
import
get_adjacency
from
cython.parallel
cimport
prange
,
parallel
from
..helpers.general
import
parsenx2graph
from
..base
cimport
Base
class
ShortestPathGraphKernel
:
cdef
class
ShortestPathGraphKernel
(
Base
):
"""
Shorthest path graph kernel.
"""
__type__
=
"sim"
@
staticmethod
def
compare
(
g_1
,
g_2
,
verbose
=
False
):
def
__init__
(
self
):
Base
.
__init__
(
self
,
0
,
True
)
def
compare_two
(
self
,
g_1
,
g_2
):
"""Compute the kernel value (similarity) between two graphs.
Parameters
----------
...
...
@@ -34,15 +40,18 @@ class ShortestPathGraphKernel:
"""
# Diagonal superior matrix of the floyd warshall shortest
# paths:
fwm1
=
np
.
array
(
nx
.
floyd_warshall_numpy
(
g_1
))
fwm1
=
np
.
where
(
fwm1
==
np
.
inf
,
0
,
fwm1
)
fwm1
=
np
.
where
(
fwm1
==
np
.
nan
,
0
,
fwm1
)
if
isinstance
(
g_1
,
nx
.
Graph
)
and
isinstance
(
g_2
,
nx
.
Graph
):
g_1
,
g_2
=
get_adjacency
(
g_1
,
g_2
)
fwm1
=
np
.
array
(
floyd_warshall
(
g_1
))
fwm1
[
np
.
isinf
(
fwm1
)]
=
0
fwm1
[
np
.
isnan
(
fwm1
)]
=
0
fwm1
=
np
.
triu
(
fwm1
,
k
=
1
)
bc1
=
np
.
bincount
(
fwm1
.
reshape
(
-
1
).
astype
(
int
))
fwm2
=
np
.
array
(
nx
.
floyd_warshall
_numpy
(
g_2
))
fwm2
=
np
.
where
(
fwm2
==
np
.
inf
,
0
,
fwm2
)
fwm2
=
np
.
where
(
fwm2
==
np
.
nan
,
0
,
fwm2
)
fwm2
=
np
.
array
(
floyd_warshall
(
g_2
))
fwm2
[
np
.
isinf
(
fwm2
)]
=
0
fwm2
[
np
.
isnan
(
fwm2
)]
=
0
fwm2
=
np
.
triu
(
fwm2
,
k
=
1
)
bc2
=
np
.
bincount
(
fwm2
.
reshape
(
-
1
).
astype
(
int
))
...
...
@@ -57,8 +66,7 @@ class ShortestPathGraphKernel:
return
np
.
sum
(
v1
*
v2
)
@
staticmethod
def
compare_list
(
graph_list
,
verbose
=
False
):
cpdef
np
.
ndarray
compare
(
self
,
list
graph_list
,
list
selected
):
"""Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the
...
...
@@ -73,16 +81,69 @@ class ShortestPathGraphKernel:
K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list.
"""
n
=
len
(
graph_list
)
k
=
np
.
zeros
((
n
,
n
))
cdef
int
n
=
len
(
graph_list
)
cdef
double
[:,:]
k
=
np
.
zeros
((
n
,
n
))
cdef
int
cpu_count
=
self
.
cpu_count
cdef
list
adjacency_matrices
=
[[
None
for
i
in
range
(
n
)]
for
j
in
range
(
n
)]
cdef
int
i
,
j
for
i
in
range
(
n
):
for
j
in
range
(
i
,
n
):
k
[
i
,
j
]
=
ShortestPathGraphKernel
.
compare
(
graph_list
[
i
],
graph_list
[
j
])
k
[
j
,
i
]
=
k
[
i
,
j
]
adjacency_matrices
[
i
][
j
]
=
get_adjacency
(
graph_list
[
i
],
graph_list
[
j
])
adjacency_matrices
[
j
][
i
]
=
adjacency_matrices
[
i
][
j
]
with
nogil
,
parallel
(
num_threads
=
cpu_count
):
for
i
in
prange
(
n
,
schedule
=
'static'
):
for
j
in
range
(
i
,
n
):
with
gil
:
if
len
(
graph_list
[
i
])
>
0
and
len
(
graph_list
[
j
])
>
0
:
a
,
b
=
adjacency_matrices
[
i
][
j
]
k
[
i
][
j
]
=
self
.
compare_two
(
a
,
b
)
k
[
j
][
i
]
=
k
[
i
][
j
]
k_norm
=
np
.
zeros
((
n
,
n
))
for
i
in
range
(
n
):
for
j
in
range
(
i
,
n
):
k_norm
[
i
,
j
]
=
k
[
i
][
j
]
/
np
.
sqrt
(
k
[
i
][
i
]
*
k
[
j
][
j
])
k_norm
[
j
,
i
]
=
k_norm
[
i
,
j
]
k_norm
=
np
.
zeros
(
k
.
shape
)
for
i
in
range
(
k
.
shape
[
0
]):
for
j
in
range
(
k
.
shape
[
1
]):
k_norm
[
i
,
j
]
=
k
[
i
,
j
]
/
np
.
sqrt
(
k
[
i
,
i
]
*
k
[
j
,
j
])
return
np
.
nan_to_num
(
k_norm
)
return
k_norm
\ No newline at end of file
cpdef
np
.
ndarray
compare_single_core
(
self
,
list
graph_list
,
list
selected
):
"""Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the
kernel matrix is faster than the computation of all individual
pairwise kernel values.
Parameters
----------
graph_list: list
A list of graphs (list of networkx graphs)
Return
------
K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list.
"""
cdef
int
n
=
len
(
graph_list
)
cdef
double
[:,:]
k
=
np
.
zeros
((
n
,
n
))
cdef
list
adjacency_matrices
=
[[
None
for
i
in
range
(
n
)]
for
j
in
range
(
n
)]
cdef
int
i
,
j
for
i
in
range
(
n
):
for
j
in
range
(
i
,
n
):
adjacency_matrices
[
i
][
j
]
=
get_adjacency
(
graph_list
[
i
],
graph_list
[
j
])
adjacency_matrices
[
j
][
i
]
=
adjacency_matrices
[
i
][
j
]
for
i
in
range
(
n
):
for
j
in
range
(
i
,
n
):
if
len
(
graph_list
[
i
])
>
0
and
len
(
graph_list
[
j
])
>
0
:
a
,
b
=
adjacency_matrices
[
i
][
j
]
k
[
i
][
j
]
=
self
.
compare_two
(
a
,
b
)
k
[
j
][
i
]
=
k
[
i
][
j
]
k_norm
=
np
.
zeros
((
n
,
n
))
for
i
in
range
(
n
):
for
j
in
range
(
i
,
n
):
k_norm
[
i
,
j
]
=
k
[
i
][
j
]
/
np
.
sqrt
(
k
[
i
][
i
]
*
k
[
j
][
j
])
k_norm
[
j
,
i
]
=
k_norm
[
i
,
j
]
return
np
.
nan_to_num
(
k_norm
)
\ No newline at end of file
setup.py
View file @
c486d941
...
...
@@ -70,7 +70,7 @@ setup(
cmdclass
=
{
'build_ext'
:
build_ext
},
setup_requires
=
[
"numpy"
,
"networkx"
,
"scipy"
,
'scikit-learn'
],
install_requires
=
[
"numpy"
,
"networkx"
,
"scipy"
,
'scikit-learn'
],
version
=
"0.2.4
alph
a"
,
version
=
"0.2.4
.2bet
a"
,
classifiers
=
[
"Programming Language :: Python :: 3"
,
"License :: OSI Approved :: MIT License"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment