Commit c486d941 authored by Fize Jacques's avatar Fize Jacques

Shortest path kernel work!

parent 02c1bea1
...@@ -84,7 +84,6 @@ ged.distance(result) ...@@ -84,7 +84,6 @@ ged.distance(result)
* Shortest Path Kernel [3] * Shortest Path Kernel [3]
* Weisfeiler-Lehman Kernel [4] * Weisfeiler-Lehman Kernel [4]
* Subtree Kernel * Subtree Kernel
* Edge Kernel
* Graph Edit Distance [5] * Graph Edit Distance [5]
* Approximated Graph Edit Distance * Approximated Graph Edit Distance
* Hausdorff Graph Edit Distance * Hausdorff Graph Edit Distance
......
import networkx as nx
import numpy as np
def get_adjacency(G1,G2):
"""
Return adjacency matrices of two graph based on nodes present in both of them.
Parameters
----------
G1 : nx.Graph
first graph
G2 : nx.Graph
second graph
Returns
-------
tuple of np.array
adjacency matrices of G1 and G2
"""
# Extract nodes
nodes_G1=list(G1.nodes())
nodes_G2=list(G2.nodes())
# Get Adjacency Matrix for each graph
adj_original_G1 = nx.convert_matrix.to_numpy_matrix(G1,nodes_G1)
adj_original_G2 = nx.convert_matrix.to_numpy_matrix(G2,nodes_G2)
# Get old index
index_node_G1={node: ix for ix,node in enumerate(nodes_G1)}
index_node_G2={node: ix for ix,node in enumerate(nodes_G2)}
# Building new indices
nodes_unique = list(set(nodes_G1).union(nodes_G2))
new_node_index = {node:i for i,node in enumerate(nodes_unique)}
n=len(nodes_unique)
#Generate new adjacent matrices
new_adj_G1= np.zeros((n,n))
new_adj_G2= np.zeros((n,n))
# Filling old values
for n1 in nodes_unique:
for n2 in nodes_unique:
if n1 in G1.nodes() and n2 in G1.nodes():
new_adj_G1[new_node_index[n1],new_node_index[n2]]=adj_original_G1[index_node_G1[n1],index_node_G1[n2]]
if n1 in G2.nodes() and n2 in G2.nodes():
new_adj_G2[new_node_index[n1],new_node_index[n2]]=adj_original_G2[index_node_G2[n1],index_node_G2[n2]]
return new_adj_G1,new_adj_G2
...@@ -12,15 +12,21 @@ Modified by : Jacques Fize ...@@ -12,15 +12,21 @@ Modified by : Jacques Fize
import networkx as nx import networkx as nx
import numpy as np import numpy as np
cimport numpy as np
from scipy.sparse.csgraph import floyd_warshall
from .adjacency import get_adjacency
from cython.parallel cimport prange,parallel
from ..helpers.general import parsenx2graph
from ..base cimport Base
cdef class ShortestPathGraphKernel(Base):
class ShortestPathGraphKernel:
""" """
Shorthest path graph kernel. Shorthest path graph kernel.
""" """
__type__ = "sim" def __init__(self):
@staticmethod Base.__init__(self,0,True)
def compare( g_1, g_2, verbose=False):
def compare_two(self,g_1, g_2):
"""Compute the kernel value (similarity) between two graphs. """Compute the kernel value (similarity) between two graphs.
Parameters Parameters
---------- ----------
...@@ -34,15 +40,18 @@ class ShortestPathGraphKernel: ...@@ -34,15 +40,18 @@ class ShortestPathGraphKernel:
""" """
# Diagonal superior matrix of the floyd warshall shortest # Diagonal superior matrix of the floyd warshall shortest
# paths: # paths:
fwm1 = np.array(nx.floyd_warshall_numpy(g_1)) if isinstance(g_1,nx.Graph) and isinstance(g_2,nx.Graph):
fwm1 = np.where(fwm1 == np.inf, 0, fwm1) g_1,g_2= get_adjacency(g_1,g_2)
fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
fwm1 = np.array(floyd_warshall(g_1))
fwm1[np.isinf(fwm1)] = 0
fwm1[np.isnan(fwm1)] = 0
fwm1 = np.triu(fwm1, k=1) fwm1 = np.triu(fwm1, k=1)
bc1 = np.bincount(fwm1.reshape(-1).astype(int)) bc1 = np.bincount(fwm1.reshape(-1).astype(int))
fwm2 = np.array(nx.floyd_warshall_numpy(g_2)) fwm2 = np.array(floyd_warshall(g_2))
fwm2 = np.where(fwm2 == np.inf, 0, fwm2) fwm2[np.isinf(fwm2)] = 0
fwm2 = np.where(fwm2 == np.nan, 0, fwm2) fwm2[np.isnan(fwm2)] = 0
fwm2 = np.triu(fwm2, k=1) fwm2 = np.triu(fwm2, k=1)
bc2 = np.bincount(fwm2.reshape(-1).astype(int)) bc2 = np.bincount(fwm2.reshape(-1).astype(int))
...@@ -57,8 +66,7 @@ class ShortestPathGraphKernel: ...@@ -57,8 +66,7 @@ class ShortestPathGraphKernel:
return np.sum(v1 * v2) return np.sum(v1 * v2)
@staticmethod cpdef np.ndarray compare(self,list graph_list, list selected):
def compare_list(graph_list, verbose=False):
"""Compute the all-pairs kernel values for a list of graphs. """Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the matrix for a list of graphs. The direct computation of the
...@@ -73,16 +81,69 @@ class ShortestPathGraphKernel: ...@@ -73,16 +81,69 @@ class ShortestPathGraphKernel:
K: numpy.array, shape = (len(graph_list), len(graph_list)) K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list. The similarity matrix of all graphs in graph_list.
""" """
n = len(graph_list) cdef int n = len(graph_list)
k = np.zeros((n, n)) cdef double[:,:] k = np.zeros((n, n))
cdef int cpu_count = self.cpu_count
cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
cdef int i,j
for i in range(n): for i in range(n):
for j in range(i, n): for j in range(i, n):
k[i, j] = ShortestPathGraphKernel.compare(graph_list[i], graph_list[j]) adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
k[j, i] = k[i, j] adjacency_matrices[j][i] = adjacency_matrices[i][j]
with nogil, parallel(num_threads=cpu_count):
for i in prange(n,schedule='static'):
for j in range(i, n):
with gil:
if len(graph_list[i]) > 0 and len(graph_list[j]) >0:
a,b=adjacency_matrices[i][j]
k[i][j] = self.compare_two(a,b)
k[j][i] = k[i][j]
k_norm = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
k_norm[j, i] = k_norm[i, j]
k_norm = np.zeros(k.shape) return np.nan_to_num(k_norm)
for i in range(k.shape[0]):
for j in range(k.shape[1]):
k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
return k_norm cpdef np.ndarray compare_single_core(self,list graph_list, list selected):
\ No newline at end of file """Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the
kernel matrix is faster than the computation of all individual
pairwise kernel values.
Parameters
----------
graph_list: list
A list of graphs (list of networkx graphs)
Return
------
K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list.
"""
cdef int n = len(graph_list)
cdef double[:,:] k = np.zeros((n, n))
cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
cdef int i,j
for i in range(n):
for j in range(i, n):
adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
adjacency_matrices[j][i] = adjacency_matrices[i][j]
for i in range(n):
for j in range(i, n):
if len(graph_list[i]) > 0 and len(graph_list[j]) >0:
a,b=adjacency_matrices[i][j]
k[i][j] = self.compare_two(a,b)
k[j][i] = k[i][j]
k_norm = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
k_norm[j, i] = k_norm[i, j]
return np.nan_to_num(k_norm)
\ No newline at end of file
...@@ -70,7 +70,7 @@ setup( ...@@ -70,7 +70,7 @@ setup(
cmdclass={'build_ext': build_ext}, cmdclass={'build_ext': build_ext},
setup_requires=["numpy","networkx","scipy",'scikit-learn'], setup_requires=["numpy","networkx","scipy",'scikit-learn'],
install_requires=["numpy","networkx","scipy",'scikit-learn'], install_requires=["numpy","networkx","scipy",'scikit-learn'],
version="0.2.4alpha", version="0.2.4.2beta",
classifiers=[ classifiers=[
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License", "License :: OSI Approved :: MIT License",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment