Shortest path kernel work!

c486d941 · Fize Jacques · 02c1bea1 · c486d941 · c486d941 · c486d941
Commit c486d941 authored 6 years ago by Fize Jacques
Hide whitespace changes
Inline Side-by-side

Showing

with 136 additions and 24 deletions
+136 -24
--- a/README.md
+++ b/README.md
@@ -84,7 +84,6 @@ ged.distance(result)
    * Shortest Path Kernel [3]
    * Weisfeiler-Lehman Kernel [4]
        * Subtree Kernel 
-        * Edge Kernel
 * Graph Edit Distance [5]
    * Approximated Graph Edit Distance 
    * Hausdorff Graph Edit Distance 

--- a/gmatch4py/kernels/adjacency.pyx
+++ b/gmatch4py/kernels/adjacency.pyx
+import networkx as nx
+import numpy as np
+def get_adjacency(G1,G2):
+    """
+    Return adjacency matrices of two graph based on nodes present in both of them.
+    Parameters
+    ----------
+    G1 : nx.Graph
+        first graph
+    G2 : nx.Graph
+        second graph
+    Returns
+    -------
+    tuple of np.array
+        adjacency matrices of G1 and G2
+    """
+    # Extract nodes
+    nodes_G1=list(G1.nodes())
+    nodes_G2=list(G2.nodes())
+    # Get Adjacency Matrix for each graph
+    adj_original_G1 = nx.convert_matrix.to_numpy_matrix(G1,nodes_G1)
+    adj_original_G2 = nx.convert_matrix.to_numpy_matrix(G2,nodes_G2)
+    # Get old index
+    index_node_G1={node: ix for ix,node in enumerate(nodes_G1)}
+    index_node_G2={node: ix for ix,node in enumerate(nodes_G2)}
+    # Building new indices
+    nodes_unique = list(set(nodes_G1).union(nodes_G2))
+    new_node_index = {node:i for i,node in enumerate(nodes_unique)}
+    n=len(nodes_unique)
+    #Generate new adjacent matrices
+    new_adj_G1= np.zeros((n,n))
+    new_adj_G2= np.zeros((n,n))
+    # Filling old values
+    for n1 in nodes_unique:
+        for n2 in nodes_unique:
+            if n1 in G1.nodes() and n2 in G1.nodes():
+                new_adj_G1[new_node_index[n1],new_node_index[n2]]=adj_original_G1[index_node_G1[n1],index_node_G1[n2]]
+            if n1 in G2.nodes() and n2 in G2.nodes():
+                new_adj_G2[new_node_index[n1],new_node_index[n2]]=adj_original_G2[index_node_G2[n1],index_node_G2[n2]]
+    return new_adj_G1,new_adj_G2
--- a/gmatch4py/kernels/shortest_path_kernel.pyx
+++ b/gmatch4py/kernels/shortest_path_kernel.pyx
@@ -12,15 +12,21 @@ Modified by : Jacques Fize
 import networkx as nx
 import numpy as np
+cimport numpy as np
+from scipy.sparse.csgraph import floyd_warshall
+from .adjacency import get_adjacency
+from cython.parallel cimport prange,parallel
+from ..helpers.general import parsenx2graph
+from ..base cimport Base
+cdef class ShortestPathGraphKernel(Base):
-class ShortestPathGraphKernel:
    """
    Shorthest path graph kernel.
    """
-    __type__ = "sim"
+    def __init__(self):
-    @staticmethod
+        Base.__init__(self,0,True)
-    def compare( g_1, g_2, verbose=False):
+    def compare_two(self,g_1, g_2):
        """Compute the kernel value (similarity) between two graphs.
        Parameters
        ----------
@@ -34,15 +40,18 @@ class ShortestPathGraphKernel:
        """
        # Diagonal superior matrix of the floyd warshall shortest
        # paths:
-        fwm1 = np.array(nx.floyd_warshall_numpy(g_1))
+        if isinstance(g_1,nx.Graph) and isinstance(g_2,nx.Graph):
-        fwm1 = np.where(fwm1 == np.inf, 0, fwm1)
+            g_1,g_2= get_adjacency(g_1,g_2)
-        fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
+        fwm1 = np.array(floyd_warshall(g_1))
+        fwm1[np.isinf(fwm1)] = 0
+        fwm1[np.isnan(fwm1)] = 0 
        fwm1 = np.triu(fwm1, k=1)
        bc1 = np.bincount(fwm1.reshape(-1).astype(int))
-        fwm2 = np.array(nx.floyd_warshall_numpy(g_2))
+        fwm2 = np.array(floyd_warshall(g_2))
-        fwm2 = np.where(fwm2 == np.inf, 0, fwm2)
+        fwm2[np.isinf(fwm2)] = 0
-        fwm2 = np.where(fwm2 == np.nan, 0, fwm2)
+        fwm2[np.isnan(fwm2)] = 0 
        fwm2 = np.triu(fwm2, k=1)
        bc2 = np.bincount(fwm2.reshape(-1).astype(int))
@@ -57,8 +66,7 @@ class ShortestPathGraphKernel:
        return np.sum(v1 * v2)
-    @staticmethod
+    cpdef np.ndarray compare(self,list graph_list, list selected):
-    def compare_list(graph_list, verbose=False):
        """Compute the all-pairs kernel values for a list of graphs.
        This function can be used to directly compute the kernel
        matrix for a list of graphs. The direct computation of the
@@ -73,16 +81,69 @@ class ShortestPathGraphKernel:
        K: numpy.array, shape = (len(graph_list), len(graph_list))
        The similarity matrix of all graphs in graph_list.
        """
-        n = len(graph_list)
+        cdef int n = len(graph_list)
-        k = np.zeros((n, n))
+        cdef double[:,:] k = np.zeros((n, n))
+        cdef int cpu_count = self.cpu_count
+        cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
+        cdef int i,j
        for i in range(n):
            for j in range(i, n):
-                k[i, j] = ShortestPathGraphKernel.compare(graph_list[i], graph_list[j])
+                adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
-                k[j, i] = k[i, j]
+                adjacency_matrices[j][i] = adjacency_matrices[i][j]
+        with nogil, parallel(num_threads=cpu_count):
+            for i in prange(n,schedule='static'):
+                for j in range(i, n):
+                    with gil:
+                        if len(graph_list[i]) > 0 and len(graph_list[j]) >0: 
+                            a,b=adjacency_matrices[i][j]
+                            k[i][j] = self.compare_two(a,b)
+                    k[j][i] = k[i][j]
+        k_norm = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
+                k_norm[j, i] = k_norm[i, j]
-        k_norm = np.zeros(k.shape)
+        return np.nan_to_num(k_norm)
-        for i in range(k.shape[0]):
-            for j in range(k.shape[1]):
-                k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
-        return k_norm
+    cpdef np.ndarray compare_single_core(self,list graph_list, list selected):
\ No newline at end of file
+        """Compute the all-pairs kernel values for a list of graphs.
+        This function can be used to directly compute the kernel
+        matrix for a list of graphs. The direct computation of the
+        kernel matrix is faster than the computation of all individual
+        pairwise kernel values.
+        Parameters
+        ----------
+        graph_list: list
+            A list of graphs (list of networkx graphs)
+        Return
+        ------
+        K: numpy.array, shape = (len(graph_list), len(graph_list))
+        The similarity matrix of all graphs in graph_list.
+        """
+        cdef int n = len(graph_list)
+        cdef double[:,:] k = np.zeros((n, n))
+        cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
+        cdef int i,j
+        for i in range(n):
+            for j in range(i, n):
+                adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
+                adjacency_matrices[j][i] = adjacency_matrices[i][j]
+        for i in range(n):
+            for j in range(i, n):
+                if len(graph_list[i]) > 0 and len(graph_list[j]) >0: 
+                    a,b=adjacency_matrices[i][j]
+                    k[i][j] = self.compare_two(a,b)
+                k[j][i] = k[i][j]
+        k_norm = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
+                k_norm[j, i] = k_norm[i, j]
+        return np.nan_to_num(k_norm)
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -70,7 +70,7 @@ setup(
    cmdclass={'build_ext': build_ext},
    setup_requires=["numpy","networkx","scipy",'scikit-learn'],
    install_requires=["numpy","networkx","scipy",'scikit-learn'],
-    version="0.2.4alpha",
+    version="0.2.4.2beta",
    classifiers=[
            "Programming Language :: Python :: 3",
            "License :: OSI Approved :: MIT License",