From c486d94183e72c148a55f771bc98a62435b07d97 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Fri, 22 Feb 2019 17:01:56 +0100 Subject: [PATCH] Shortest path kernel work! --- README.md | 1 - gmatch4py/kernels/adjacency.pyx | 52 ++++++++++ gmatch4py/kernels/shortest_path_kernel.pyx | 105 ++++++++++++++++----- setup.py | 2 +- 4 files changed, 136 insertions(+), 24 deletions(-) create mode 100644 gmatch4py/kernels/adjacency.pyx diff --git a/README.md b/README.md index 31f1278..332cc17 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,6 @@ ged.distance(result) * Shortest Path Kernel [3] * Weisfeiler-Lehman Kernel [4] * Subtree Kernel - * Edge Kernel * Graph Edit Distance [5] * Approximated Graph Edit Distance * Hausdorff Graph Edit Distance diff --git a/gmatch4py/kernels/adjacency.pyx b/gmatch4py/kernels/adjacency.pyx new file mode 100644 index 0000000..67b3cb5 --- /dev/null +++ b/gmatch4py/kernels/adjacency.pyx @@ -0,0 +1,52 @@ +import networkx as nx +import numpy as np + +def get_adjacency(G1,G2): + """ + Return adjacency matrices of two graph based on nodes present in both of them. + + Parameters + ---------- + G1 : nx.Graph + first graph + G2 : nx.Graph + second graph + + Returns + ------- + tuple of np.array + adjacency matrices of G1 and G2 + """ + + # Extract nodes + nodes_G1=list(G1.nodes()) + nodes_G2=list(G2.nodes()) + + # Get Adjacency Matrix for each graph + adj_original_G1 = nx.convert_matrix.to_numpy_matrix(G1,nodes_G1) + adj_original_G2 = nx.convert_matrix.to_numpy_matrix(G2,nodes_G2) + + # Get old index + index_node_G1={node: ix for ix,node in enumerate(nodes_G1)} + index_node_G2={node: ix for ix,node in enumerate(nodes_G2)} + + # Building new indices + nodes_unique = list(set(nodes_G1).union(nodes_G2)) + new_node_index = {node:i for i,node in enumerate(nodes_unique)} + + n=len(nodes_unique) + + #Generate new adjacent matrices + new_adj_G1= np.zeros((n,n)) + new_adj_G2= np.zeros((n,n)) + + # Filling old values + for n1 in nodes_unique: + for n2 in nodes_unique: + if n1 in G1.nodes() and n2 in G1.nodes(): + new_adj_G1[new_node_index[n1],new_node_index[n2]]=adj_original_G1[index_node_G1[n1],index_node_G1[n2]] + if n1 in G2.nodes() and n2 in G2.nodes(): + new_adj_G2[new_node_index[n1],new_node_index[n2]]=adj_original_G2[index_node_G2[n1],index_node_G2[n2]] + + return new_adj_G1,new_adj_G2 + diff --git a/gmatch4py/kernels/shortest_path_kernel.pyx b/gmatch4py/kernels/shortest_path_kernel.pyx index e7e7444..8c5c93d 100644 --- a/gmatch4py/kernels/shortest_path_kernel.pyx +++ b/gmatch4py/kernels/shortest_path_kernel.pyx @@ -12,15 +12,21 @@ Modified by : Jacques Fize import networkx as nx import numpy as np +cimport numpy as np +from scipy.sparse.csgraph import floyd_warshall +from .adjacency import get_adjacency +from cython.parallel cimport prange,parallel +from ..helpers.general import parsenx2graph +from ..base cimport Base - -class ShortestPathGraphKernel: +cdef class ShortestPathGraphKernel(Base): """ Shorthest path graph kernel. """ - __type__ = "sim" - @staticmethod - def compare( g_1, g_2, verbose=False): + def __init__(self): + Base.__init__(self,0,True) + + def compare_two(self,g_1, g_2): """Compute the kernel value (similarity) between two graphs. Parameters ---------- @@ -34,15 +40,18 @@ class ShortestPathGraphKernel: """ # Diagonal superior matrix of the floyd warshall shortest # paths: - fwm1 = np.array(nx.floyd_warshall_numpy(g_1)) - fwm1 = np.where(fwm1 == np.inf, 0, fwm1) - fwm1 = np.where(fwm1 == np.nan, 0, fwm1) + if isinstance(g_1,nx.Graph) and isinstance(g_2,nx.Graph): + g_1,g_2= get_adjacency(g_1,g_2) + + fwm1 = np.array(floyd_warshall(g_1)) + fwm1[np.isinf(fwm1)] = 0 + fwm1[np.isnan(fwm1)] = 0 fwm1 = np.triu(fwm1, k=1) bc1 = np.bincount(fwm1.reshape(-1).astype(int)) - fwm2 = np.array(nx.floyd_warshall_numpy(g_2)) - fwm2 = np.where(fwm2 == np.inf, 0, fwm2) - fwm2 = np.where(fwm2 == np.nan, 0, fwm2) + fwm2 = np.array(floyd_warshall(g_2)) + fwm2[np.isinf(fwm2)] = 0 + fwm2[np.isnan(fwm2)] = 0 fwm2 = np.triu(fwm2, k=1) bc2 = np.bincount(fwm2.reshape(-1).astype(int)) @@ -57,8 +66,7 @@ class ShortestPathGraphKernel: return np.sum(v1 * v2) - @staticmethod - def compare_list(graph_list, verbose=False): + cpdef np.ndarray compare(self,list graph_list, list selected): """Compute the all-pairs kernel values for a list of graphs. This function can be used to directly compute the kernel matrix for a list of graphs. The direct computation of the @@ -73,16 +81,69 @@ class ShortestPathGraphKernel: K: numpy.array, shape = (len(graph_list), len(graph_list)) The similarity matrix of all graphs in graph_list. """ - n = len(graph_list) - k = np.zeros((n, n)) + cdef int n = len(graph_list) + cdef double[:,:] k = np.zeros((n, n)) + cdef int cpu_count = self.cpu_count + cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)] + cdef int i,j for i in range(n): for j in range(i, n): - k[i, j] = ShortestPathGraphKernel.compare(graph_list[i], graph_list[j]) - k[j, i] = k[i, j] + adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j]) + adjacency_matrices[j][i] = adjacency_matrices[i][j] + + with nogil, parallel(num_threads=cpu_count): + for i in prange(n,schedule='static'): + for j in range(i, n): + with gil: + if len(graph_list[i]) > 0 and len(graph_list[j]) >0: + a,b=adjacency_matrices[i][j] + k[i][j] = self.compare_two(a,b) + k[j][i] = k[i][j] + + k_norm = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j]) + k_norm[j, i] = k_norm[i, j] - k_norm = np.zeros(k.shape) - for i in range(k.shape[0]): - for j in range(k.shape[1]): - k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) + return np.nan_to_num(k_norm) - return k_norm \ No newline at end of file + cpdef np.ndarray compare_single_core(self,list graph_list, list selected): + """Compute the all-pairs kernel values for a list of graphs. + This function can be used to directly compute the kernel + matrix for a list of graphs. The direct computation of the + kernel matrix is faster than the computation of all individual + pairwise kernel values. + Parameters + ---------- + graph_list: list + A list of graphs (list of networkx graphs) + Return + ------ + K: numpy.array, shape = (len(graph_list), len(graph_list)) + The similarity matrix of all graphs in graph_list. + """ + cdef int n = len(graph_list) + cdef double[:,:] k = np.zeros((n, n)) + + cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)] + cdef int i,j + for i in range(n): + for j in range(i, n): + adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j]) + adjacency_matrices[j][i] = adjacency_matrices[i][j] + + for i in range(n): + for j in range(i, n): + if len(graph_list[i]) > 0 and len(graph_list[j]) >0: + a,b=adjacency_matrices[i][j] + k[i][j] = self.compare_two(a,b) + k[j][i] = k[i][j] + + k_norm = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j]) + k_norm[j, i] = k_norm[i, j] + + return np.nan_to_num(k_norm) \ No newline at end of file diff --git a/setup.py b/setup.py index 71fcff8..c380167 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( cmdclass={'build_ext': build_ext}, setup_requires=["numpy","networkx","scipy",'scikit-learn'], install_requires=["numpy","networkx","scipy",'scikit-learn'], - version="0.2.4alpha", + version="0.2.4.2beta", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", -- GitLab