Commit 01d33886 authored by Fize Jacques's avatar Fize Jacques

- Major changes and debug algorithms GED + MCS,VEO,VerteRanking, etc..

parent e49469d0
......@@ -9,3 +9,7 @@ cdef class Base:
cpdef np.ndarray compare(self,list graph_list, list selected)
cpdef np.ndarray distance(self, np.ndarray matrix)
cpdef np.ndarray similarity(self, np.ndarray matrix)
cpdef bint isAccepted(self,G,index,selected)
cpdef intersection(G,H)
cpdef union_(G,H)
......@@ -2,6 +2,7 @@
import numpy as np
cimport numpy as np
import networkx as nx
cdef np.ndarray minmax_scale(np.ndarray matrix):
"""
......@@ -14,6 +15,70 @@ cdef np.ndarray minmax_scale(np.ndarray matrix):
max_=np.max(matrix)
return matrix/(max_-min_)
cpdef intersection(G, H):
"""
Return a new graph that contains only the edges and nodes that exist in
both G and H.
The node sets of H and G must be the same.
Parameters
----------
G,H : graph
A NetworkX graph. G and H must have the same node sets.
Returns
-------
GH : A new graph with the same type as G.
Notes
-----
Attributes from the graph, nodes, and edges are not copied to the new
graph. If you want a new graph of the intersection of G and H
with the attributes (including edge data) from G use remove_nodes_from()
as follows
>>> G=nx.path_graph(3)
>>> H=nx.path_graph(5)
>>> R=G.copy()
>>> R.remove_nodes_from(n for n in G if n not in H)
Modified so it can be used with two graphs with different nodes set
"""
# create new graph
R = nx.create_empty_copy(G)
if not G.is_multigraph() == H.is_multigraph():
raise nx.NetworkXError('G and H must both be graphs or multigraphs.')
if G.number_of_edges() <= H.number_of_edges():
if G.is_multigraph():
edges = G.edges(keys=True)
else:
edges = G.edges()
for e in edges:
if H.has_edge(*e):
R.add_edge(*e)
else:
if H.is_multigraph():
edges = H.edges(keys=True)
else:
edges = H.edges()
for e in edges:
if G.has_edge(*e):
R.add_edge(*e)
nodes_g=set(G.nodes())
nodes_h=set(H.nodes())
R.remove_nodes_from(list(nodes_g - nodes_h))
return R
cpdef union_(G, H):
R = nx.create_empty_copy(G)
R.add_edges_from(G.edges(data=True))
R.add_edges_from(G.edges(data=True))
return R
cdef class Base:
def __cinit__(self):
......@@ -64,3 +129,14 @@ cdef class Base:
R=g1.copy()
R.remove_nodes_from(n for n in g1 if n not in g2)
return R
cpdef bint isAccepted(self,G,index,selected):
f=True
if not G:
f=False
elif len(G)== 0:
f=False
if selected:
if not index in selected:
f=False
return f
# coding = utf-8
\ No newline at end of file
# coding = utf-8
from termcolor import colored
class NotFoundDistance(Exception):
def __init__(self,dd,distanceFunctionDict):
# Call the base class constructor with the parameters it needs
super(Exception, self).__init__(colored("{0} is not an edit distance implemented ! Select a distance from : {1}".format(dd,",".join(distanceFunctionDict.keys())),"red"))
import numpy as np
cimport numpy as np
from ..base cimport Base
cdef class AbstractGraphEditDistance(Base):
cdef double node_del
cdef double node_ins
cdef double edge_del
cdef double edge_ins
cdef np.ndarray cost_matrix
cpdef double distance_ged(self,G,H)
cdef list edit_costs(self,G,H)
cpdef np.ndarray create_cost_matrix(self,G,H)
cdef double insert_cost(self, int i, int j, nodesH, H)
cdef double delete_cost(self, int i, int j, nodesG, G)
cpdef double substitute_cost(self, node1, node2, G, H)
......@@ -6,63 +6,43 @@ import sys
import numpy as np
from scipy.optimize import linear_sum_assignment
cimport numpy as np
from ..base cimport Base
cdef class AbstractGraphEditDistance(Base):
class AbstractGraphEditDistance(object):
def __init__(self, node_del,node_ins,edge_del,edge_ins):
Base.__init__(self,1,False)
def __init__(self, g1, g2,debug=False,**kwargs):
self.g1 = g1
self.g2 = g2
self.debug=debug
self.node_del = node_del
self.node_ins = node_ins
self.edge_del = edge_del
self.edge_ins = edge_ins
self.node_del = kwargs.get("node_del",1)
self.node_ins = kwargs.get("node_ins",1)
self.edge_del = kwargs.get("edge_del",1)
self.edge_ins = kwargs.get("edge_ins",1)
cpdef double distance_ged(self,G,H):
"""
Return the distance between G and H
:return:
"""
cdef list opt_path = self.edit_costs(G,H)
return np.sum(opt_path)
def distance(self):
opt_path = self.edit_costs()
if self.debug:
print("Edit path for ",str(self.__class__.__name__),"\n",opt_path)
return sum(opt_path)
def print_operations(self,cost_matrix,row_ind,col_ind):
cdef list nodes1 = list(self.g1.nodes)
cdef list nodes2 = list(self.g2.nodes)
dn1 = self.g1.nodes
dn2 = self.g2.nodes
cdef int n=len(nodes1)
cdef int m=len(nodes2)
cdef int x,y,i
for i in range(len(row_ind)):
y,x=row_ind[i],col_ind[i]
val=cost_matrix[row_ind[i]][col_ind[i]]
if x<m and y<n:
print("SUB {0} to {1} cost = {2}".format(dn1[nodes1[y]]["label"],dn2[nodes2[x]]["label"],val))
elif x <m and y>=n:
print("ADD {0} cost = {1}".format(dn2[nodes2[y-n]]["label"],val))
elif x>=m and y<n:
print("DEL {0} cost = {1}".format(dn1[nodes1[m-x]]["label"],val))
def edit_costs(self):
cdef np.ndarray cost_matrix = self.create_cost_matrix()
if self.debug:
np.set_printoptions(precision=3)
print("Cost Matrix for ",str(self.__class__.__name__),"\n",cost_matrix)
cdef list edit_costs(self, G, H):
"""
Return the optimal path edit cost list, to transform G into H
:return:
"""
cdef np.ndarray cost_matrix = self.create_cost_matrix(G,H).astype(float)
row_ind,col_ind = linear_sum_assignment(cost_matrix)
if self.debug:
self.print_operations(cost_matrix,row_ind,col_ind)
cdef int f=len(row_ind)
return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)]
def create_cost_matrix(self):
cpdef np.ndarray create_cost_matrix(self, G, H):
"""
Creates a |N+M| X |N+M| cost matrix between all nodes in
graphs g1 and g2
graphs G and H
Each cost represents the cost of substituting,
deleting or inserting a node
The cost matrix consists of four regions:
......@@ -73,46 +53,47 @@ class AbstractGraphEditDistance(object):
The delete -> delete region is filled with zeros
"""
cdef int n = len(self.g1)
cdef int m = len(self.g2)
cdef int n = G.number_of_nodes()
cdef int m = H.number_of_nodes()
cdef np.ndarray cost_matrix = np.zeros((n+m,n+m))
#cost_matrix = [[0 for i in range(n + m)] for j in range(n + m)]
cdef list nodes1 = list(self.g1.nodes)
cdef list nodes2 = list(self.g2.nodes)
cdef list nodes1 = list(G.nodes())
cdef list nodes2 = list(H.nodes())
cdef int i,j
for i in range(n):
for j in range(m):
cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j])
cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j], G, H)
for i in range(m):
for j in range(m):
cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2)
cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2, H)
for i in range(n):
for j in range(n):
cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1)
cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1, G)
self.cost_matrix = cost_matrix
return cost_matrix
def insert_cost(self, int i, int j):
cdef double insert_cost(self, int i, int j, nodesH, H):
raise NotImplementedError
def delete_cost(self, int i, int j):
cdef double delete_cost(self, int i, int j, nodesG, G):
raise NotImplementedError
def substitute_cost(self, nodes1, nodes2):
cpdef double substitute_cost(self, node1, node2, G, H):
raise NotImplementedError
def print_matrix(self):
print("cost matrix:")
print(list(self.g1.nodes))
print(list(self.g2.nodes))
print(np.array(self.create_cost_matrix()))
for column in self.create_cost_matrix():
for row in column:
if row == sys.maxsize:
print ("inf\t")
cpdef np.ndarray compare(self,list listgs, list selected):
cdef int n = len(listgs)
cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float)
cdef int i,j
for i in range(n):
for j in range(i, n):
g1,g2=listgs[i],listgs[j]
f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected)
if f:
comparison_matrix[i, j] = self.distance_ged(g1, g2)
else:
print ("%.2f\t" % float(row))
print("")
comparison_matrix[i, j] = np.inf
comparison_matrix[j, i] = comparison_matrix[i, j]
np.fill_diagonal(comparison_matrix,0)
return comparison_matrix
import sys
from .abstract_graph_edit_dist import AbstractGraphEditDistance
class EdgeEditDistance(AbstractGraphEditDistance):
"""
Calculates the graph edit distance between two edges.
A node in this context is interpreted as a graph,
and edges are interpreted as nodes.
"""
def __init__(self, g1, g2,**kwargs):
AbstractGraphEditDistance.__init__(self, g1, g2,**kwargs)
def insert_cost(self, int i, int j, nodes2):
if i == j:
return self.edge_ins
return sys.maxsize
def delete_cost(self, int i, int j, nodes1):
if i == j:
return self.edge_del
return sys.maxsize
def substitute_cost(self, edge1, edge2):
if edge1 == edge2:
return 0.
return self.edge_del+self.edge_ins
# -*- coding: UTF-8 -*-
import sys
import networkx as nx
from .abstract_graph_edit_dist import AbstractGraphEditDistance
from .edge_edit_dist import EdgeEditDistance
from ..graph.edge_graph import EdgeGraph
def compare(g1, g2, print_details=False):
ged = GraphEditDistance(g1, g2,print_details)
return ged.distance()
class GraphEditDistance(AbstractGraphEditDistance):
def __init__(self, g1, g2,debug=False,**kwargs):
AbstractGraphEditDistance.__init__(self, g1, g2,debug,**kwargs)
def substitute_cost(self, node1, node2):
return self.relabel_cost(node1, node2) + self.edge_diff(node1, node2)
def relabel_cost(self, node1, node2):
if node1 == node2:
edges1=set(self.get_edge_multigraph(self.g1,node1))
edges2=set(self.get_edge_multigraph(self.g2,node2))
return abs(len(edges2.difference(edges1))) # Take in account if there is a different number of edges
else:
return self.node_ins+self.node_del
def delete_cost(self, int i, int j, nodes1):
if i == j:
return self.node_del+self.g1.degree(nodes1[i]) # Deleting a node implicate to delete in and out edges
return sys.maxsize
def insert_cost(self, int i, int j, nodes2):
if i == j:
deg=self.g2.degree(nodes2[j])
if isinstance(deg,dict):deg=0
return self.node_ins+deg
else:
return sys.maxsize
def get_edge_multigraph(self,g,node):
cdef list edges=[]
for id_,val in g.edges[node].items():
if not 0 in val:
edges.append(str(id_) + val["color"])
else:
for _,edge in val.items():
edges.append(str(id_)+edge["color"])
return edges
def edge_diff(self, node1, node2):
cdef list edges1,edges2
if isinstance(self.g1,nx.MultiDiGraph):
edges1 = self.get_edge_multigraph(self.g1,node1)
edges2 = self.get_edge_multigraph(self.g2,node2)
else:
edges1 = list(self.g1.edges[node1].keys())
edges2 = list(self.g2.edges[node2].keys())
if len(edges1) == 0 or len(edges2) == 0:
return max(len(edges1), len(edges2))
edit_edit_dist = EdgeEditDistance(
EdgeGraph(node1,edges1),
EdgeGraph(node2,edges2),
edge_del=self.edge_del,edge_ins=self.edge_ins,node_ins=self.node_ins,node_del=self.node_del
)
return edit_edit_dist.distance()
# coding = utf-8
import numpy as np
from .algorithm.graph_edit_dist import GraphEditDistance
from cython.parallel import prange
class ApproximateGraphEditDistance():
__type__ = "dist"
@staticmethod
def compare(listgs,selected,c_del_node=1,c_del_edge=1,c_ins_node=1,c_ins_edge=1):
cdef int n= len(listgs)
cdef double[:,:] comparison_matrix = np.zeros((n,n))
cdef int i,j
for i in prange(n,nogil=True):
for j in range(i,n):
with gil:
f=True
if not listgs[i] or not listgs[j]:
f=False
elif len(listgs[i])== 0 or len(listgs[j]) == 0:
f=False
if selected:
if not i in selected:
f=False
if f:
comparison_matrix[i][j] = GraphEditDistance(listgs[i],listgs[j],False,node_del=c_del_node,node_ins=c_ins_node,edge_del=c_del_edge,edge_ins=c_ins_edge).distance()
else:
comparison_matrix[i][j] = np.inf
comparison_matrix[j][i] = comparison_matrix[i][j]
return comparison_matrix
\ No newline at end of file
# coding = utf-8
import numpy as np
cimport numpy as np
from ..base cimport Base
cdef class BP_2():
cdef class BP_2(Base):
"""
"""
__type__="dist"
cdef int node_del
cdef int node_ins
cdef int edge_del
cdef int edge_ins
@staticmethod
def compare(listgs,selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1):
def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
"""Constructor for HED"""
Base.__init__(self,1,False)
self.node_del = node_del
self.node_ins = node_ins
self.edge_del = edge_del
self.edge_ins = edge_ins
cpdef np.ndarray compare(self,list listgs, list selected):
cdef int n = len(listgs)
comparator = BP_2(c_del_node, c_ins_node, c_del_edge, c_ins_edge)
cdef np.ndarray comparison_matrix = np.zeros((n, n))
cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float)
cdef int i,j
for i in range(n):
for j in range(i, n):
f=True
if not listgs[i] or not listgs[j]:
f=False
elif len(listgs[i])== 0 or len(listgs[j]) == 0:
f=False
if selected:
if not i in selected:
f=False
g1,g2=listgs[i],listgs[j]
f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected)
if f:
comparison_matrix[i, j] = comparator.bp2(listgs[i], listgs[j])
comparison_matrix[i, j] = self.bp2(g1, g2)
else:
comparison_matrix[i, j] = np.inf
comparison_matrix[j, i] = comparison_matrix[i, j]
return comparison_matrix
def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1):
"""Constructor for HED"""
self.node_del = node_del
......@@ -43,16 +45,16 @@ cdef class BP_2():
self.edge_del = edge_del
self.edge_ins = edge_ins
def bp2(self, g1, g2):
cdef double bp2(self, g1, g2):
"""
Compute de Hausdorff Edit Distance
:param g1: first graph
:param g2: second graph
:return:
"""
return np.min(self.distance(self.psi(g1,g2)),self.distance(self.psi(g2,g1)))
return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))])
def distance(self,e):
cdef double distance_bp2(self,e):
return np.sum(e)
cdef list psi(self,g1,g2):
......@@ -75,6 +77,25 @@ cdef class BP_2():
return psi_
cdef float sum_fuv(self, g1, g2):
"""
Compute Nearest Neighbour Distance between G1 and G2
:param g1: First Graph
:param g2: Second Graph
:return:
"""
cdef np.ndarray min_sum = np.zeros(len(g1))
nodes1 = list(g1.nodes)
nodes2 = list(g2.nodes)
nodes2.extend([None])
cdef np.ndarray min_i
for i in range(len(nodes1)):
min_i = np.zeros(len(nodes2))
for j in range(len(nodes2)):
min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j])
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float fuv(self, g1, g2, n1, n2):
"""
Compute the Node Distance function
......@@ -85,12 +106,12 @@ cdef class BP_2():
:return:
"""
if n2 == None: # Del
return self.node_del + ((self.edge_del / 2) * g1.degree(n1))
return self.node_del + ((self.edge_del / 2.) * g1.degree(n1))
if n1 == None: # Insert
return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2))
return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2))
else:
if n1 == n2:
return 0.
return 0
return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
cdef float hed_edge(self, g1, g2, n1, n2):
......@@ -104,24 +125,6 @@ cdef class BP_2():
"""
return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
cdef list get_edge_multigraph(self, g, node):
"""
Get list of edge around a node in a Multigraph
:param g: multigraph
:param node: node in the multigraph
:return:
"""
cdef list originals_ = g.edges(node, data=True)
cdef int n= len(originals_)
if n == 0:
return []
cdef list edges = [""]*n
for i in range(n):
edge=originals_[i]
edges[i]=("{0}-{1}".format(edge[0],edge[1]))
return edges
cdef float sum_gpq(self, g1, n1, g2, n2):
"""
......@@ -132,10 +135,14 @@ cdef class BP_2():
:param n2: node in the second graph
:return:
"""
cdef list edges1 = self.get_edge_multigraph(g1, n1)
cdef list edges2 = self.get_edge_multigraph(g2, n2)
edges2.extend([None])
#if isinstance(g1, nx.MultiDiGraph):
cdef list edges1 = list(g1.edges(n1)) if n1 else []
cdef list edges2 = list(g2.edges(n2)) if n2 else []
cdef np.ndarray min_sum = np.zeros(len(edges1))
edges2.extend([None])
cdef np.ndarray min_i
for i in range(len(edges1)):
min_i = np.zeros(len(edges2))
for j in range(len(edges2)):
......@@ -143,7 +150,7 @@ cdef class BP_2():
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float gpq(self, e1, e2):
cdef float gpq(self, tuple e1, tuple e2):
"""
Compute the edge distance function
:param e1: edge1
......@@ -156,6 +163,5 @@ cdef class BP_2():
return self.edge_ins
else:
if e1 == e2:
return 0.
return (self.edge_del + self.edge_ins) / 2
return 0
return (self.edge_del + self.edge_ins) / 2.
# coding = utf-8
\ No newline at end of file
# -*- coding: UTF-8 -*-
class EdgeGraph():
def __init__(self, init_node, nodes):
self.init_node=init_node
self.nodes_ = nodes
self.edge=nodes
def nodes(self):
return self.nodes_
def size(self):
return len(self.nodes)
def __len__(self):
return len(self.nodes_)
import numpy as np
cimport numpy as np
from .abstract_graph_edit_dist cimport AbstractGraphEditDistance
cdef class GraphEditDistance(AbstractGraphEditDistance):
cpdef double substitute_cost(self, node1, node2, G, H)
cdef double delete_cost(self, int i, int j, nodesG, G)
cdef double insert_cost(self, int i, int j, nodesH, H)
\ No newline at end of file
# -*- coding: UTF-8 -*-
import sys