Add Graph kernels + Change template Graph Viewer

4eaadc56 · Pokiros · 957a7e13 · 957a7e13 · 4eaadc56 · 4eaadc56
Commit 4eaadc56 authored 7 years ago by Pokiros
Hide whitespace changes
Inline Side-by-side

Showing

with 184 additions and 151 deletions
+184 -151
--- a/ged4py/kernels/GKextCPy.py
+++ b/ged4py/kernels/GKextCPy.py
-# coding = utf-8
-import numpy as np
-from scipy.sparse import bsr_matrix
-def selectLinearGaussian(h1,h2,h3,sigma):
-    k=0
-    if sigma >0:
-        for i in range(len(h1)):
-            k+=h1[i]*h2[i]
-    else:
-        for i in range(len(h1)):
-            k+=(h1[i]-h2[i])**2
-        k=np.exp(-1*k/(2*(sigma**2)))
-    return k
-def productMapping(e1,e2,v1_label,v2_label,H):
-    nv_x=0
-    for i in range(len(v1_label)):
-        for j in range(len(v2_label)):
-            if v1_label[i]==v2_label[j]:
-                H[i][j]=nv_x
-                nv_x+=1
-    return nv_x
-def productAdjacency(e1,e2,v1_label,v2_label,H):
-    n_vx = len(v1_label) * len(v2_label);
-    Ax =bsr_matrix((n_vx,n_vx))
-    #dAx
-    v=[]
-    for i in range(len(e1)):
-        for j in range(len(e2)):
-            if (v1_label[e1[i][0]] == v2_label[e2[j][0]]
-                and v1_label[e1(i, 1)] == v2_label[e2(j, 1)]
-                and e1(i, 2) == e2(j, 2)):
-                v.append((H(e1(i, 0), e2(j, 0)), H(e1(i, 1), e2(j, 1)), 1.0));
-                v.append((H(e1(i, 1), e2(j, 1)), H(e1(i, 0), e2(j, 0)), 1.0));
-            if (v1_label[e1[i][0]] == v2_label[e2[j][0]]
-                and v1_label[e1(i, 1)] == v2_label[e2(j, 0)]
-                and e1(i, 2) == e2(j, 2)) :
-                v.append(T(H(e1(i, 0), e2(j, 1)), H(e1(i, 1), e2(j, 0)), 1.0));
-                v.append(T(H(e1(i, 1), e2(j, 0)), H(e1(i, 0), e2(j, 1)), 1.0));
--- a/ged4py/kernels/shortest_path_kernel.py
+++ b/ged4py/kernels/shortest_path_kernel.py
+# coding = utf-8
+"""Shortest-Path graph kernel.
+Python implementation based on: "Shortest-path kernels on graphs", by
+Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE
+International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005
+doi: 10.1109/ICDM.2005.132
+Author : Sandro Vega-Pons, Emanuele Olivetti
+Modified by : Jacques Fize
+"""
+import numpy as np
+import networkx as nx
+class ShortestPathGraphKernel:
+    """
+    Shorthest path graph kernel.
+    """
+    def compare(self, g_1, g_2, verbose=False):
+        """Compute the kernel value (similarity) between two graphs.
+        Parameters
+        ----------
+        g1 : networkx.Graph
+            First graph.
+        g2 : networkx.Graph
+            Second graph.
+        Returns
+        -------
+        k : The similarity value between g1 and g2.
+        """
+        # Diagonal superior matrix of the floyd warshall shortest
+        # paths:
+        fwm1 = np.array(nx.floyd_warshall_numpy(g_1))
+        fwm1 = np.where(fwm1 == np.inf, 0, fwm1)
+        fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
+        fwm1 = np.triu(fwm1, k=1)
+        bc1 = np.bincount(fwm1.reshape(-1).astype(int))
+        fwm2 = np.array(nx.floyd_warshall_numpy(g_2))
+        fwm2 = np.where(fwm2 == np.inf, 0, fwm2)
+        fwm2 = np.where(fwm2 == np.nan, 0, fwm2)
+        fwm2 = np.triu(fwm2, k=1)
+        bc2 = np.bincount(fwm2.reshape(-1).astype(int))
+        # Copy into arrays with the same length the non-zero shortests
+        # paths:
+        v1 = np.zeros(max(len(bc1), len(bc2)) - 1)
+        v1[range(0, len(bc1)-1)] = bc1[1:]
+        v2 = np.zeros(max(len(bc1), len(bc2)) - 1)
+        v2[range(0, len(bc2)-1)] = bc2[1:]
+        return np.sum(v1 * v2)
+    def compare_normalized(self, g_1, g_2, verbose=False):
+        """Compute the normalized kernel value between two graphs.
+        A normalized version of the kernel is given by the equation:
+        k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2))
+        Parameters
+        ----------
+        g1 : networkx.Graph
+            First graph.
+        g2 : networkx.Graph
+            Second graph.
+        Returns
+        -------
+        k : The similarity value between g1 and g2.
+        """
+        return self.compare(g_1, g_2) / (np.sqrt(self.compare(g_1, g_1) *
+                                                 self.compare(g_2, g_2)))
+    def compare_list(self, graph_list, verbose=False):
+        """Compute the all-pairs kernel values for a list of graphs.
+        This function can be used to directly compute the kernel
+        matrix for a list of graphs. The direct computation of the
+        kernel matrix is faster than the computation of all individual
+        pairwise kernel values.
+        Parameters
+        ----------
+        graph_list: list
+            A list of graphs (list of networkx graphs)
+        Return
+        ------
+        K: numpy.array, shape = (len(graph_list), len(graph_list))
+        The similarity matrix of all graphs in graph_list.
+        """
+        n = len(graph_list)
+        k = np.zeros((n, n))
+        for i in range(n):
+            for j in range(i, n):
+                k[i, j] = self.compare(graph_list[i], graph_list[j])
+                k[j, i] = k[i, j]
+        k_norm = np.zeros(k.shape)
+        for i in range(k.shape[0]):
+            for j in range(k.shape[1]):
+                k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
+        return k_norm
\ No newline at end of file
--- a/ged4py/weisfeiler_lehman.py
+++ b/ged4py/weisfeiler_lehman.py
@@ -8,25 +8,23 @@ Mehlhorn, Karsten M. Borgwardt, JMLR, 2012.
 http://jmlr.csail.mit.edu/papers/v12/shervashidze11a.html
 Author : Sandro Vega-Pons, Emanuele Olivetti
+Source : https://github.com/emanuele/jstsp2015/blob/master/gk_weisfeiler_lehman.py
+Modified by : Jacques Fizen
 """
 import numpy as np
 import networkx as nx
 import copy
+class WeisfeleirLehmanKernel(object):
-class GK_WL():
+    @staticmethod
-    """
+    def compare(self,graph_list,h=2):
-    Weisfeiler_Lehman graph kernel.
-    """
-    def compare_list(self, graph_list, h=1, node_label=True):
        """Compute the all-pairs kernel values for a list of graphs.
        This function can be used to directly compute the kernel
        matrix for a list of graphs. The direct computation of the
        kernel matrix is faster than the computation of all individual
        pairwise kernel values.
        Parameters
        ----------
        graph_list: list
@@ -37,16 +35,13 @@ class GK_WL():
            Whether to use original node labels. True for using node labels
            saved in the attribute 'node_label'. False for using the node
            degree of each node as node attribute.
        Return
        ------
        K: numpy.array, shape = (len(graph_list), len(graph_list))
        The similarity matrix of all graphs in graph_list.
        """
        self.graphs = graph_list
        n = len(graph_list)
-        lists = [0] * n
        k = [0] * (h + 1)
        n_nodes = 0
        n_max = 0
@@ -54,14 +49,13 @@ class GK_WL():
        # Compute adjacency lists and n_nodes, the total number of
        # nodes in the dataset.
        for i in range(n):
-            lists[i] = graph_list[i].adjacency_list()
+            n_nodes += graph_list[i].number_of_nodes()
-            n_nodes = n_nodes + len(graph_list[i])
            # Computing the maximum number of nodes in the graphs. It
            # will be used in the computation of vectorial
            # representation.
-            if(n_max < len(graph_list[i])):
+            if (n_max < graph_list[i].number_of_nodes()):
-                n_max = len(graph_list[i])
+                n_max = graph_list[i].number_of_nodes()
        phi = np.zeros((n_max, n), dtype=np.uint64)
@@ -76,63 +70,50 @@ class GK_WL():
        # mapping from multiset labels (strings) to short labels
        # (integers)
-        if node_label is True:
+        for i in range(n):
-            for i in range(n):
+            nodes = graph_list[i].nodes()
-                l_aux = nx.get_node_attributes(graph_list[i],
+            # It is assumed that the graph has an attribute
-                                               'label').values()
+            # 'node_label'
-                l_aux = list(l_aux)
+            labels[i] = np.zeros(len(nodes), dtype=np.int32)
-                # It is assumed that the graph has an attribute
+            for j in range(len(nodes)):
-                # 'node_label'
+                if not (nodes[j] in label_lookup):
-                labels[i] = np.zeros(len(l_aux), dtype=np.int32)
+                    label_lookup[nodes[j]] = str(label_counter)
+                    labels[i][j] = label_counter
-                for j in range(len(l_aux)):
+                    label_counter += 1
-                    if not (l_aux[j] in label_lookup):
+                else:
-                        label_lookup[l_aux[j]] = label_counter
+                    labels[i][j] = label_lookup[nodes[j]]
-                        labels[i][j] = label_counter
+                # labels are associated to a natural number
-                        label_counter += 1
+                # starting with 0.
-                    else:
+                phi[labels[i][j], i] += 1
-                        labels[i][j] = label_lookup[l_aux[j]]
-                    # labels are associated to a natural number
+            graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup)
-                    # starting with 0.
+        k = np.dot(phi.transpose(), phi).astype(np.float64)
-                    phi[labels[i][j], i] += 1
-        else:
-            for i in range(n):
-                labels[i] = np.array(list(graph_list[i].degree().values()))
-                for j in range(len(labels[i])):
-                    phi[labels[i][j], i] += 1
-        print(phi)
-        # Simplified vectorial representation of graphs (just taking
-        # the vectors before the kernel iterations), i.e., it is just
-        # the original nodes degree.
-        self.vectors = np.copy(phi.transpose())
-        k = np.dot(phi.transpose(), phi)
        # MAIN LOOP
        it = 0
-        new_labels = copy.deepcopy(labels)
+        new_labels = copy.deepcopy(labels) # Can't work without it !!!
        while it < h:
            # create an empty lookup table
            label_lookup = {}
            label_counter = 0
-            phi = np.zeros((n_nodes, n), dtype=np.uint64)
+            phi = np.zeros((n_nodes, n))
            for i in range(n):
-                for v in range(len(lists[i])):
+                nodes = graph_list[i].nodes()
+                for v in range(len(nodes)):
                    # form a multiset label of the node v of the i'th graph
                    # and convert it to a string
-                    long_label = np.concatenate((np.array([labels[i][v]]),
+                    long_label = []
-                                                 np.sort(labels[i]
+                    long_label.extend(nx.neighbors(graph_list[i],nodes[v]))
-                                                 [lists[i][v]])))
-                    long_label_string = str(long_label)
+                    long_label_string = "".join(long_label)
                    # if the multiset label has not yet occurred, add it to the
                    # lookup table and assign a number to it
                    if not (long_label_string in label_lookup):
-                        label_lookup[long_label_string] = label_counter
+                        label_lookup[long_label_string] = str(label_counter)
                        new_labels[i][v] = label_counter
                        label_counter += 1
                    else:
@@ -142,7 +123,6 @@ class GK_WL():
                phi[new_labels[i], i] += aux[new_labels[i]]
            k += np.dot(phi.transpose(), phi)
-            labels = copy.deepcopy(new_labels)
            it = it + 1
        # Compute the normalized version of the kernel
@@ -151,30 +131,4 @@ class GK_WL():
            for j in range(k.shape[1]):
                k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
        return k_norm
\ No newline at end of file
-    def compare(self, g_1, g_2, h=1, node_label=True):
-        """Compute the kernel value (similarity) between two graphs.
-        The kernel is normalized to [0,1] by the equation:
-        k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2))
-        Parameters
-        ----------
-        g_1 : networkx.Graph
-            First graph.
-        g_2 : networkx.Graph
-            Second graph.
-        h : interger
-            Number of iterations.
-        node_label : boolean
-            Whether to use the values under the graph attribute 'node_label'
-            as node labels. If False, the degree of the nodes are used as
-            labels.
-        Returns
-        -------
-        k : The similarity value between g1 and g2.
-        """
-        gl = [g_1, g_2]
-        return self.compare_list(gl, h, node_label)[0, 1]
--- a/generate_eval_support.py
+++ b/generate_eval_support.py
 # coding: utf-8
-from ner.gate_annie import GateAnnie
+import glob
-from ner.nltk import NLTK
+import json
-from pipeline import *
+import numpy as np
+from progressbar import ProgressBar, Timer, Bar, ETA
-from pos_tagger.tagger import Tagger
 # Disa
-from disambiguator.pagerank import *
 from disambiguator.geodict_gaurav import *
+from ged4py.exception import NotFoundDistance
+from ged4py.geo_bp2 import GeoBP2
 # Graph Edit Distance Algorithm Import
-from ged4py.algorithm import graph_edit_dist as ged
 from ged4py.geo_ged import GeoGED
 from ged4py.geo_hed import GeoHED
-from ged4py.hausdorff_edit_distance import HED
-from ged4py.bipartite_graph_matching_2 import BP_2
 from ged4py.greedy_edit_distance import GreedyEditDistance
-from ged4py.geo_bp2 import GeoBP2
+from ged4py.hausdorff_edit_distance import HED
+from ged4py.kernels.weisfeiler_lehman import *
+from pipeline import *
-from ged4py.exception import NotFoundDistance
+from pos_tagger.tagger import Tagger
-import numpy as np
-import glob, json, argparse
-from progressbar import ProgressBar,Timer,Bar,ETA
 # Similarity Function between graph and a set of graphs
+grap_kernel_results=[]
+graph_lookup={}
 def compareGED(id_,graphs):
    g=graphs[id_]
    sc=np.zeros(len(graphs))
@@ -65,6 +58,24 @@ def compareBP2(id_,graphs):
        else:
            sc[id_] = np.inf
    return sc
+def compareSubTreeKernel(id_,graphs):
+    global grap_kernel_results, graph_lookup
+    h=WeisfeleirLehmanKernel()
+    j=0
+    sc = np.zeros(len(graphs))
+    if len(grap_kernel_results)<1:
+        graphs_array=[None for i in range(len(graphs))]
+        for i,g in graphs.items():
+            graphs_array[i]=g
+        grap_kernel_results=h.compare(graphs_array,h=3)
+    for i in range(abs(id_-len(grap_kernel_results))):
+        sc[id_ + i] = 1 - grap_kernel_results[id_ + i,id_]
+        sc[id_ - i] = 1 - grap_kernel_results[id,id_ - i]  # We deal with distance
+    return sc
 def compareGEOBP2(id_,graphs):
    bp2=GeoBP2()
    g = graphs[id_]
@@ -118,7 +129,8 @@ funcDict={
    "GEOBP2":compareGEOBP2,
    "HED":compareHED,
    "GEOHED":compareGEOHED,
-    "GREEDY":compareGreedy
+    "GREEDY":compareGreedy,
+    "WLSUBTREE":compareSubTreeKernel
 }

--- a/graph_viewer/server.py
+++ b/graph_viewer/server.py
@@ -12,7 +12,7 @@ for fn in dataFiles:
 print(data_.keys())
 @app.route("/<gmmeasure>")
-def index(gmmeasure):
+def index(gmmeasure="GED"):
    if not gmmeasure in data_.keys():
        gmmeasure="GED"
    return render_template("index.html",data=json.dumps(json.load(open(data_[gmmeasure]))),measureAvailable=list(data_.keys()))

--- a/graph_viewer/templates/index.html
+++ b/graph_viewer/templates/index.html
@@ -78,12 +78,25 @@
 <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta.2/js/bootstrap.min.js" integrity="sha384-alpBpkh1PFOepccYVYDB4do5UnbKysX5WZXm3XxPqe5iKTfUKjNkCk9SaVuEZflJ" crossorigin="anonymous"></script>
 <script type="text/javascript">
+  /***  little hack starts here ***/
  function generate_map(id_tiles, locations, edges) {
+     L.Map = L.Map.extend({
+        openPopup: function(popup) {
+            //        this.closePopup();  // just comment this
+            this._popup = popup;
+            return this.addLayer(popup).fire('popupopen', {
+                popup: this._popup
+            });
+        }
+    }); /***  end of hack ***/
    var map = L.map(id_tiles).setView([0, 0], 13);
    var markers = [];
    locations.forEach(function(loc) {
-      var mark = L.marker([loc[0], loc[1]]).addTo(map).bindPopup(loc[2]);
+      var mark = L.marker([loc[0], loc[1]]).addTo(map).bindPopup(loc[2]).openPopup();
      markers.push(mark);
    });