From c486d94183e72c148a55f771bc98a62435b07d97 Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Fri, 22 Feb 2019 17:01:56 +0100
Subject: [PATCH] Shortest path kernel work!

---
 README.md                                  |   1 -
 gmatch4py/kernels/adjacency.pyx            |  52 ++++++++++
 gmatch4py/kernels/shortest_path_kernel.pyx | 105 ++++++++++++++++-----
 setup.py                                   |   2 +-
 4 files changed, 136 insertions(+), 24 deletions(-)
 create mode 100644 gmatch4py/kernels/adjacency.pyx

diff --git a/README.md b/README.md
index 31f1278..332cc17 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,6 @@ ged.distance(result)
     * Shortest Path Kernel [3]
     * Weisfeiler-Lehman Kernel [4]
         * Subtree Kernel 
-        * Edge Kernel
  * Graph Edit Distance [5]
     * Approximated Graph Edit Distance 
     * Hausdorff Graph Edit Distance 
diff --git a/gmatch4py/kernels/adjacency.pyx b/gmatch4py/kernels/adjacency.pyx
new file mode 100644
index 0000000..67b3cb5
--- /dev/null
+++ b/gmatch4py/kernels/adjacency.pyx
@@ -0,0 +1,52 @@
+import networkx as nx
+import numpy as np
+
+def get_adjacency(G1,G2):
+    """
+    Return adjacency matrices of two graph based on nodes present in both of them.
+    
+    Parameters
+    ----------
+    G1 : nx.Graph
+        first graph
+    G2 : nx.Graph
+        second graph
+    
+    Returns
+    -------
+    tuple of np.array
+        adjacency matrices of G1 and G2
+    """
+
+    # Extract nodes
+    nodes_G1=list(G1.nodes())
+    nodes_G2=list(G2.nodes())
+
+    # Get Adjacency Matrix for each graph
+    adj_original_G1 = nx.convert_matrix.to_numpy_matrix(G1,nodes_G1)
+    adj_original_G2 = nx.convert_matrix.to_numpy_matrix(G2,nodes_G2)
+
+    # Get old index
+    index_node_G1={node: ix for ix,node in enumerate(nodes_G1)}
+    index_node_G2={node: ix for ix,node in enumerate(nodes_G2)}
+
+    # Building new indices
+    nodes_unique = list(set(nodes_G1).union(nodes_G2))
+    new_node_index = {node:i for i,node in enumerate(nodes_unique)}
+
+    n=len(nodes_unique)
+    
+    #Generate new adjacent matrices
+    new_adj_G1= np.zeros((n,n))
+    new_adj_G2= np.zeros((n,n))
+
+    # Filling old values
+    for n1 in nodes_unique:
+        for n2 in nodes_unique:
+            if n1 in G1.nodes() and n2 in G1.nodes():
+                new_adj_G1[new_node_index[n1],new_node_index[n2]]=adj_original_G1[index_node_G1[n1],index_node_G1[n2]]
+            if n1 in G2.nodes() and n2 in G2.nodes():
+                new_adj_G2[new_node_index[n1],new_node_index[n2]]=adj_original_G2[index_node_G2[n1],index_node_G2[n2]]
+
+    return new_adj_G1,new_adj_G2
+
diff --git a/gmatch4py/kernels/shortest_path_kernel.pyx b/gmatch4py/kernels/shortest_path_kernel.pyx
index e7e7444..8c5c93d 100644
--- a/gmatch4py/kernels/shortest_path_kernel.pyx
+++ b/gmatch4py/kernels/shortest_path_kernel.pyx
@@ -12,15 +12,21 @@ Modified by : Jacques Fize
 
 import networkx as nx
 import numpy as np
+cimport numpy as np
+from scipy.sparse.csgraph import floyd_warshall
+from .adjacency import get_adjacency
+from cython.parallel cimport prange,parallel
+from ..helpers.general import parsenx2graph
+from ..base cimport Base
 
-
-class ShortestPathGraphKernel:
+cdef class ShortestPathGraphKernel(Base):
     """
     Shorthest path graph kernel.
     """
-    __type__ = "sim"
-    @staticmethod
-    def compare( g_1, g_2, verbose=False):
+    def __init__(self):
+        Base.__init__(self,0,True)
+    
+    def compare_two(self,g_1, g_2):
         """Compute the kernel value (similarity) between two graphs.
         Parameters
         ----------
@@ -34,15 +40,18 @@ class ShortestPathGraphKernel:
         """
         # Diagonal superior matrix of the floyd warshall shortest
         # paths:
-        fwm1 = np.array(nx.floyd_warshall_numpy(g_1))
-        fwm1 = np.where(fwm1 == np.inf, 0, fwm1)
-        fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
+        if isinstance(g_1,nx.Graph) and isinstance(g_2,nx.Graph):
+            g_1,g_2= get_adjacency(g_1,g_2)
+
+        fwm1 = np.array(floyd_warshall(g_1))
+        fwm1[np.isinf(fwm1)] = 0
+        fwm1[np.isnan(fwm1)] = 0 
         fwm1 = np.triu(fwm1, k=1)
         bc1 = np.bincount(fwm1.reshape(-1).astype(int))
 
-        fwm2 = np.array(nx.floyd_warshall_numpy(g_2))
-        fwm2 = np.where(fwm2 == np.inf, 0, fwm2)
-        fwm2 = np.where(fwm2 == np.nan, 0, fwm2)
+        fwm2 = np.array(floyd_warshall(g_2))
+        fwm2[np.isinf(fwm2)] = 0
+        fwm2[np.isnan(fwm2)] = 0 
         fwm2 = np.triu(fwm2, k=1)
         bc2 = np.bincount(fwm2.reshape(-1).astype(int))
 
@@ -57,8 +66,7 @@ class ShortestPathGraphKernel:
         return np.sum(v1 * v2)
 
 
-    @staticmethod
-    def compare_list(graph_list, verbose=False):
+    cpdef np.ndarray compare(self,list graph_list, list selected):
         """Compute the all-pairs kernel values for a list of graphs.
         This function can be used to directly compute the kernel
         matrix for a list of graphs. The direct computation of the
@@ -73,16 +81,69 @@ class ShortestPathGraphKernel:
         K: numpy.array, shape = (len(graph_list), len(graph_list))
         The similarity matrix of all graphs in graph_list.
         """
-        n = len(graph_list)
-        k = np.zeros((n, n))
+        cdef int n = len(graph_list)
+        cdef double[:,:] k = np.zeros((n, n))
+        cdef int cpu_count = self.cpu_count
+        cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
+        cdef int i,j
         for i in range(n):
             for j in range(i, n):
-                k[i, j] = ShortestPathGraphKernel.compare(graph_list[i], graph_list[j])
-                k[j, i] = k[i, j]
+                adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
+                adjacency_matrices[j][i] = adjacency_matrices[i][j]
+        
+        with nogil, parallel(num_threads=cpu_count):
+            for i in prange(n,schedule='static'):
+                for j in range(i, n):
+                    with gil:
+                        if len(graph_list[i]) > 0 and len(graph_list[j]) >0: 
+                            a,b=adjacency_matrices[i][j]
+                            k[i][j] = self.compare_two(a,b)
+                    k[j][i] = k[i][j]
+
+        k_norm = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
+                k_norm[j, i] = k_norm[i, j]
 
-        k_norm = np.zeros(k.shape)
-        for i in range(k.shape[0]):
-            for j in range(k.shape[1]):
-                k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
+        return np.nan_to_num(k_norm)
 
-        return k_norm
\ No newline at end of file
+    cpdef np.ndarray compare_single_core(self,list graph_list, list selected):
+        """Compute the all-pairs kernel values for a list of graphs.
+        This function can be used to directly compute the kernel
+        matrix for a list of graphs. The direct computation of the
+        kernel matrix is faster than the computation of all individual
+        pairwise kernel values.
+        Parameters
+        ----------
+        graph_list: list
+            A list of graphs (list of networkx graphs)
+        Return
+        ------
+        K: numpy.array, shape = (len(graph_list), len(graph_list))
+        The similarity matrix of all graphs in graph_list.
+        """
+        cdef int n = len(graph_list)
+        cdef double[:,:] k = np.zeros((n, n))
+        
+        cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
+        cdef int i,j
+        for i in range(n):
+            for j in range(i, n):
+                adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
+                adjacency_matrices[j][i] = adjacency_matrices[i][j]
+
+        for i in range(n):
+            for j in range(i, n):
+                if len(graph_list[i]) > 0 and len(graph_list[j]) >0: 
+                    a,b=adjacency_matrices[i][j]
+                    k[i][j] = self.compare_two(a,b)
+                k[j][i] = k[i][j]
+
+        k_norm = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
+                k_norm[j, i] = k_norm[i, j]
+        
+        return np.nan_to_num(k_norm)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 71fcff8..c380167 100644
--- a/setup.py
+++ b/setup.py
@@ -70,7 +70,7 @@ setup(
     cmdclass={'build_ext': build_ext},
     setup_requires=["numpy","networkx","scipy",'scikit-learn'],
     install_requires=["numpy","networkx","scipy",'scikit-learn'],
-    version="0.2.4alpha",
+    version="0.2.4.2beta",
     classifiers=[
             "Programming Language :: Python :: 3",
             "License :: OSI Approved :: MIT License",
-- 
GitLab