Commit 4eaadc56 authored by Pokiros's avatar Pokiros
Browse files

Add Graph kernels + Change template Graph Viewer

parent 957a7e13
No related merge requests found
Showing with 184 additions and 151 deletions
+184 -151
# coding = utf-8
import numpy as np
from scipy.sparse import bsr_matrix
def selectLinearGaussian(h1,h2,h3,sigma):
k=0
if sigma >0:
for i in range(len(h1)):
k+=h1[i]*h2[i]
else:
for i in range(len(h1)):
k+=(h1[i]-h2[i])**2
k=np.exp(-1*k/(2*(sigma**2)))
return k
def productMapping(e1,e2,v1_label,v2_label,H):
nv_x=0
for i in range(len(v1_label)):
for j in range(len(v2_label)):
if v1_label[i]==v2_label[j]:
H[i][j]=nv_x
nv_x+=1
return nv_x
def productAdjacency(e1,e2,v1_label,v2_label,H):
n_vx = len(v1_label) * len(v2_label);
Ax =bsr_matrix((n_vx,n_vx))
#dAx
v=[]
for i in range(len(e1)):
for j in range(len(e2)):
if (v1_label[e1[i][0]] == v2_label[e2[j][0]]
and v1_label[e1(i, 1)] == v2_label[e2(j, 1)]
and e1(i, 2) == e2(j, 2)):
v.append((H(e1(i, 0), e2(j, 0)), H(e1(i, 1), e2(j, 1)), 1.0));
v.append((H(e1(i, 1), e2(j, 1)), H(e1(i, 0), e2(j, 0)), 1.0));
if (v1_label[e1[i][0]] == v2_label[e2[j][0]]
and v1_label[e1(i, 1)] == v2_label[e2(j, 0)]
and e1(i, 2) == e2(j, 2)) :
v.append(T(H(e1(i, 0), e2(j, 1)), H(e1(i, 1), e2(j, 0)), 1.0));
v.append(T(H(e1(i, 1), e2(j, 0)), H(e1(i, 0), e2(j, 1)), 1.0));
# coding = utf-8
"""Shortest-Path graph kernel.
Python implementation based on: "Shortest-path kernels on graphs", by
Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE
International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005
doi: 10.1109/ICDM.2005.132
Author : Sandro Vega-Pons, Emanuele Olivetti
Modified by : Jacques Fize
"""
import numpy as np
import networkx as nx
class ShortestPathGraphKernel:
"""
Shorthest path graph kernel.
"""
def compare(self, g_1, g_2, verbose=False):
"""Compute the kernel value (similarity) between two graphs.
Parameters
----------
g1 : networkx.Graph
First graph.
g2 : networkx.Graph
Second graph.
Returns
-------
k : The similarity value between g1 and g2.
"""
# Diagonal superior matrix of the floyd warshall shortest
# paths:
fwm1 = np.array(nx.floyd_warshall_numpy(g_1))
fwm1 = np.where(fwm1 == np.inf, 0, fwm1)
fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
fwm1 = np.triu(fwm1, k=1)
bc1 = np.bincount(fwm1.reshape(-1).astype(int))
fwm2 = np.array(nx.floyd_warshall_numpy(g_2))
fwm2 = np.where(fwm2 == np.inf, 0, fwm2)
fwm2 = np.where(fwm2 == np.nan, 0, fwm2)
fwm2 = np.triu(fwm2, k=1)
bc2 = np.bincount(fwm2.reshape(-1).astype(int))
# Copy into arrays with the same length the non-zero shortests
# paths:
v1 = np.zeros(max(len(bc1), len(bc2)) - 1)
v1[range(0, len(bc1)-1)] = bc1[1:]
v2 = np.zeros(max(len(bc1), len(bc2)) - 1)
v2[range(0, len(bc2)-1)] = bc2[1:]
return np.sum(v1 * v2)
def compare_normalized(self, g_1, g_2, verbose=False):
"""Compute the normalized kernel value between two graphs.
A normalized version of the kernel is given by the equation:
k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2))
Parameters
----------
g1 : networkx.Graph
First graph.
g2 : networkx.Graph
Second graph.
Returns
-------
k : The similarity value between g1 and g2.
"""
return self.compare(g_1, g_2) / (np.sqrt(self.compare(g_1, g_1) *
self.compare(g_2, g_2)))
def compare_list(self, graph_list, verbose=False):
"""Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the
kernel matrix is faster than the computation of all individual
pairwise kernel values.
Parameters
----------
graph_list: list
A list of graphs (list of networkx graphs)
Return
------
K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list.
"""
n = len(graph_list)
k = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
k[i, j] = self.compare(graph_list[i], graph_list[j])
k[j, i] = k[i, j]
k_norm = np.zeros(k.shape)
for i in range(k.shape[0]):
for j in range(k.shape[1]):
k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
return k_norm
\ No newline at end of file
...@@ -8,25 +8,23 @@ Mehlhorn, Karsten M. Borgwardt, JMLR, 2012. ...@@ -8,25 +8,23 @@ Mehlhorn, Karsten M. Borgwardt, JMLR, 2012.
http://jmlr.csail.mit.edu/papers/v12/shervashidze11a.html http://jmlr.csail.mit.edu/papers/v12/shervashidze11a.html
Author : Sandro Vega-Pons, Emanuele Olivetti Author : Sandro Vega-Pons, Emanuele Olivetti
Source : https://github.com/emanuele/jstsp2015/blob/master/gk_weisfeiler_lehman.py
Modified by : Jacques Fizen
""" """
import numpy as np import numpy as np
import networkx as nx import networkx as nx
import copy import copy
class WeisfeleirLehmanKernel(object):
class GK_WL(): @staticmethod
""" def compare(self,graph_list,h=2):
Weisfeiler_Lehman graph kernel.
"""
def compare_list(self, graph_list, h=1, node_label=True):
"""Compute the all-pairs kernel values for a list of graphs. """Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the matrix for a list of graphs. The direct computation of the
kernel matrix is faster than the computation of all individual kernel matrix is faster than the computation of all individual
pairwise kernel values. pairwise kernel values.
Parameters Parameters
---------- ----------
graph_list: list graph_list: list
...@@ -37,16 +35,13 @@ class GK_WL(): ...@@ -37,16 +35,13 @@ class GK_WL():
Whether to use original node labels. True for using node labels Whether to use original node labels. True for using node labels
saved in the attribute 'node_label'. False for using the node saved in the attribute 'node_label'. False for using the node
degree of each node as node attribute. degree of each node as node attribute.
Return Return
------ ------
K: numpy.array, shape = (len(graph_list), len(graph_list)) K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list. The similarity matrix of all graphs in graph_list.
""" """
self.graphs = graph_list self.graphs = graph_list
n = len(graph_list) n = len(graph_list)
lists = [0] * n
k = [0] * (h + 1) k = [0] * (h + 1)
n_nodes = 0 n_nodes = 0
n_max = 0 n_max = 0
...@@ -54,14 +49,13 @@ class GK_WL(): ...@@ -54,14 +49,13 @@ class GK_WL():
# Compute adjacency lists and n_nodes, the total number of # Compute adjacency lists and n_nodes, the total number of
# nodes in the dataset. # nodes in the dataset.
for i in range(n): for i in range(n):
lists[i] = graph_list[i].adjacency_list() n_nodes += graph_list[i].number_of_nodes()
n_nodes = n_nodes + len(graph_list[i])
# Computing the maximum number of nodes in the graphs. It # Computing the maximum number of nodes in the graphs. It
# will be used in the computation of vectorial # will be used in the computation of vectorial
# representation. # representation.
if(n_max < len(graph_list[i])): if (n_max < graph_list[i].number_of_nodes()):
n_max = len(graph_list[i]) n_max = graph_list[i].number_of_nodes()
phi = np.zeros((n_max, n), dtype=np.uint64) phi = np.zeros((n_max, n), dtype=np.uint64)
...@@ -76,63 +70,50 @@ class GK_WL(): ...@@ -76,63 +70,50 @@ class GK_WL():
# mapping from multiset labels (strings) to short labels # mapping from multiset labels (strings) to short labels
# (integers) # (integers)
if node_label is True: for i in range(n):
for i in range(n): nodes = graph_list[i].nodes()
l_aux = nx.get_node_attributes(graph_list[i], # It is assumed that the graph has an attribute
'label').values() # 'node_label'
l_aux = list(l_aux) labels[i] = np.zeros(len(nodes), dtype=np.int32)
# It is assumed that the graph has an attribute for j in range(len(nodes)):
# 'node_label' if not (nodes[j] in label_lookup):
labels[i] = np.zeros(len(l_aux), dtype=np.int32) label_lookup[nodes[j]] = str(label_counter)
labels[i][j] = label_counter
for j in range(len(l_aux)): label_counter += 1
if not (l_aux[j] in label_lookup): else:
label_lookup[l_aux[j]] = label_counter labels[i][j] = label_lookup[nodes[j]]
labels[i][j] = label_counter # labels are associated to a natural number
label_counter += 1 # starting with 0.
else: phi[labels[i][j], i] += 1
labels[i][j] = label_lookup[l_aux[j]]
# labels are associated to a natural number graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup)
# starting with 0. k = np.dot(phi.transpose(), phi).astype(np.float64)
phi[labels[i][j], i] += 1
else:
for i in range(n):
labels[i] = np.array(list(graph_list[i].degree().values()))
for j in range(len(labels[i])):
phi[labels[i][j], i] += 1
print(phi)
# Simplified vectorial representation of graphs (just taking
# the vectors before the kernel iterations), i.e., it is just
# the original nodes degree.
self.vectors = np.copy(phi.transpose())
k = np.dot(phi.transpose(), phi)
# MAIN LOOP # MAIN LOOP
it = 0 it = 0
new_labels = copy.deepcopy(labels) new_labels = copy.deepcopy(labels) # Can't work without it !!!
while it < h: while it < h:
# create an empty lookup table # create an empty lookup table
label_lookup = {} label_lookup = {}
label_counter = 0 label_counter = 0
phi = np.zeros((n_nodes, n), dtype=np.uint64) phi = np.zeros((n_nodes, n))
for i in range(n): for i in range(n):
for v in range(len(lists[i])): nodes = graph_list[i].nodes()
for v in range(len(nodes)):
# form a multiset label of the node v of the i'th graph # form a multiset label of the node v of the i'th graph
# and convert it to a string # and convert it to a string
long_label = np.concatenate((np.array([labels[i][v]]), long_label = []
np.sort(labels[i] long_label.extend(nx.neighbors(graph_list[i],nodes[v]))
[lists[i][v]])))
long_label_string = str(long_label) long_label_string = "".join(long_label)
# if the multiset label has not yet occurred, add it to the # if the multiset label has not yet occurred, add it to the
# lookup table and assign a number to it # lookup table and assign a number to it
if not (long_label_string in label_lookup): if not (long_label_string in label_lookup):
label_lookup[long_label_string] = label_counter label_lookup[long_label_string] = str(label_counter)
new_labels[i][v] = label_counter new_labels[i][v] = label_counter
label_counter += 1 label_counter += 1
else: else:
...@@ -142,7 +123,6 @@ class GK_WL(): ...@@ -142,7 +123,6 @@ class GK_WL():
phi[new_labels[i], i] += aux[new_labels[i]] phi[new_labels[i], i] += aux[new_labels[i]]
k += np.dot(phi.transpose(), phi) k += np.dot(phi.transpose(), phi)
labels = copy.deepcopy(new_labels)
it = it + 1 it = it + 1
# Compute the normalized version of the kernel # Compute the normalized version of the kernel
...@@ -151,30 +131,4 @@ class GK_WL(): ...@@ -151,30 +131,4 @@ class GK_WL():
for j in range(k.shape[1]): for j in range(k.shape[1]):
k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
return k_norm return k_norm
\ No newline at end of file
def compare(self, g_1, g_2, h=1, node_label=True):
"""Compute the kernel value (similarity) between two graphs.
The kernel is normalized to [0,1] by the equation:
k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2))
Parameters
----------
g_1 : networkx.Graph
First graph.
g_2 : networkx.Graph
Second graph.
h : interger
Number of iterations.
node_label : boolean
Whether to use the values under the graph attribute 'node_label'
as node labels. If False, the degree of the nodes are used as
labels.
Returns
-------
k : The similarity value between g1 and g2.
"""
gl = [g_1, g_2]
return self.compare_list(gl, h, node_label)[0, 1]
# coding: utf-8 # coding: utf-8
from ner.gate_annie import GateAnnie import glob
from ner.nltk import NLTK import json
from pipeline import * import numpy as np
from progressbar import ProgressBar, Timer, Bar, ETA
from pos_tagger.tagger import Tagger
# Disa # Disa
from disambiguator.pagerank import *
from disambiguator.geodict_gaurav import * from disambiguator.geodict_gaurav import *
from ged4py.exception import NotFoundDistance
from ged4py.geo_bp2 import GeoBP2
# Graph Edit Distance Algorithm Import # Graph Edit Distance Algorithm Import
from ged4py.algorithm import graph_edit_dist as ged
from ged4py.geo_ged import GeoGED from ged4py.geo_ged import GeoGED
from ged4py.geo_hed import GeoHED from ged4py.geo_hed import GeoHED
from ged4py.hausdorff_edit_distance import HED
from ged4py.bipartite_graph_matching_2 import BP_2
from ged4py.greedy_edit_distance import GreedyEditDistance from ged4py.greedy_edit_distance import GreedyEditDistance
from ged4py.geo_bp2 import GeoBP2 from ged4py.hausdorff_edit_distance import HED
from ged4py.kernels.weisfeiler_lehman import *
from pipeline import *
from ged4py.exception import NotFoundDistance from pos_tagger.tagger import Tagger
import numpy as np
import glob, json, argparse
from progressbar import ProgressBar,Timer,Bar,ETA
# Similarity Function between graph and a set of graphs # Similarity Function between graph and a set of graphs
grap_kernel_results=[]
graph_lookup={}
def compareGED(id_,graphs): def compareGED(id_,graphs):
g=graphs[id_] g=graphs[id_]
sc=np.zeros(len(graphs)) sc=np.zeros(len(graphs))
...@@ -65,6 +58,24 @@ def compareBP2(id_,graphs): ...@@ -65,6 +58,24 @@ def compareBP2(id_,graphs):
else: else:
sc[id_] = np.inf sc[id_] = np.inf
return sc return sc
def compareSubTreeKernel(id_,graphs):
global grap_kernel_results, graph_lookup
h=WeisfeleirLehmanKernel()
j=0
sc = np.zeros(len(graphs))
if len(grap_kernel_results)<1:
graphs_array=[None for i in range(len(graphs))]
for i,g in graphs.items():
graphs_array[i]=g
grap_kernel_results=h.compare(graphs_array,h=3)
for i in range(abs(id_-len(grap_kernel_results))):
sc[id_ + i] = 1 - grap_kernel_results[id_ + i,id_]
sc[id_ - i] = 1 - grap_kernel_results[id,id_ - i] # We deal with distance
return sc
def compareGEOBP2(id_,graphs): def compareGEOBP2(id_,graphs):
bp2=GeoBP2() bp2=GeoBP2()
g = graphs[id_] g = graphs[id_]
...@@ -118,7 +129,8 @@ funcDict={ ...@@ -118,7 +129,8 @@ funcDict={
"GEOBP2":compareGEOBP2, "GEOBP2":compareGEOBP2,
"HED":compareHED, "HED":compareHED,
"GEOHED":compareGEOHED, "GEOHED":compareGEOHED,
"GREEDY":compareGreedy "GREEDY":compareGreedy,
"WLSUBTREE":compareSubTreeKernel
} }
......
...@@ -12,7 +12,7 @@ for fn in dataFiles: ...@@ -12,7 +12,7 @@ for fn in dataFiles:
print(data_.keys()) print(data_.keys())
@app.route("/<gmmeasure>") @app.route("/<gmmeasure>")
def index(gmmeasure): def index(gmmeasure="GED"):
if not gmmeasure in data_.keys(): if not gmmeasure in data_.keys():
gmmeasure="GED" gmmeasure="GED"
return render_template("index.html",data=json.dumps(json.load(open(data_[gmmeasure]))),measureAvailable=list(data_.keys())) return render_template("index.html",data=json.dumps(json.load(open(data_[gmmeasure]))),measureAvailable=list(data_.keys()))
......
...@@ -78,12 +78,25 @@ ...@@ -78,12 +78,25 @@
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta.2/js/bootstrap.min.js" integrity="sha384-alpBpkh1PFOepccYVYDB4do5UnbKysX5WZXm3XxPqe5iKTfUKjNkCk9SaVuEZflJ" crossorigin="anonymous"></script> <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta.2/js/bootstrap.min.js" integrity="sha384-alpBpkh1PFOepccYVYDB4do5UnbKysX5WZXm3XxPqe5iKTfUKjNkCk9SaVuEZflJ" crossorigin="anonymous"></script>
<script type="text/javascript"> <script type="text/javascript">
/*** little hack starts here ***/
function generate_map(id_tiles, locations, edges) { function generate_map(id_tiles, locations, edges) {
L.Map = L.Map.extend({
openPopup: function(popup) {
// this.closePopup(); // just comment this
this._popup = popup;
return this.addLayer(popup).fire('popupopen', {
popup: this._popup
});
}
}); /*** end of hack ***/
var map = L.map(id_tiles).setView([0, 0], 13); var map = L.map(id_tiles).setView([0, 0], 13);
var markers = []; var markers = [];
locations.forEach(function(loc) { locations.forEach(function(loc) {
var mark = L.marker([loc[0], loc[1]]).addTo(map).bindPopup(loc[2]); var mark = L.marker([loc[0], loc[1]]).addTo(map).bindPopup(loc[2]).openPopup();
markers.push(mark); markers.push(mark);
}); });
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment