diff --git a/gmatch4py/base.pxd b/gmatch4py/base.pxd index bb9e239196b691097c6a6a6edbe282889f6f263b..f36f2a903ab4a1de9a13f189be4187972bed8903 100644 --- a/gmatch4py/base.pxd +++ b/gmatch4py/base.pxd @@ -9,3 +9,7 @@ cdef class Base: cpdef np.ndarray compare(self,list graph_list, list selected) cpdef np.ndarray distance(self, np.ndarray matrix) cpdef np.ndarray similarity(self, np.ndarray matrix) + cpdef bint isAccepted(self,G,index,selected) + +cpdef intersection(G,H) +cpdef union_(G,H) diff --git a/gmatch4py/base.pyx b/gmatch4py/base.pyx index 361f9f45a0ed39388fe20a5d153965e41b78f31f..df9a3c726e5652c761e057c60469e2a7f0eb0424 100644 --- a/gmatch4py/base.pyx +++ b/gmatch4py/base.pyx @@ -2,6 +2,7 @@ import numpy as np cimport numpy as np +import networkx as nx cdef np.ndarray minmax_scale(np.ndarray matrix): """ @@ -14,6 +15,70 @@ cdef np.ndarray minmax_scale(np.ndarray matrix): max_=np.max(matrix) return matrix/(max_-min_) + + +cpdef intersection(G, H): + """ + Return a new graph that contains only the edges and nodes that exist in + both G and H. + + The node sets of H and G must be the same. + + Parameters + ---------- + G,H : graph + A NetworkX graph. G and H must have the same node sets. + + Returns + ------- + GH : A new graph with the same type as G. + + Notes + ----- + Attributes from the graph, nodes, and edges are not copied to the new + graph. If you want a new graph of the intersection of G and H + with the attributes (including edge data) from G use remove_nodes_from() + as follows + + >>> G=nx.path_graph(3) + >>> H=nx.path_graph(5) + >>> R=G.copy() + >>> R.remove_nodes_from(n for n in G if n not in H) + + Modified so it can be used with two graphs with different nodes set + """ + # create new graph + R = nx.create_empty_copy(G) + + if not G.is_multigraph() == H.is_multigraph(): + raise nx.NetworkXError('G and H must both be graphs or multigraphs.') + if G.number_of_edges() <= H.number_of_edges(): + if G.is_multigraph(): + edges = G.edges(keys=True) + else: + edges = G.edges() + for e in edges: + if H.has_edge(*e): + R.add_edge(*e) + else: + if H.is_multigraph(): + edges = H.edges(keys=True) + else: + edges = H.edges() + for e in edges: + if G.has_edge(*e): + R.add_edge(*e) + nodes_g=set(G.nodes()) + nodes_h=set(H.nodes()) + R.remove_nodes_from(list(nodes_g - nodes_h)) + return R + +cpdef union_(G, H): + R = nx.create_empty_copy(G) + R.add_edges_from(G.edges(data=True)) + R.add_edges_from(G.edges(data=True)) + return R + cdef class Base: def __cinit__(self): @@ -64,3 +129,14 @@ cdef class Base: R=g1.copy() R.remove_nodes_from(n for n in g1 if n not in g2) return R + + cpdef bint isAccepted(self,G,index,selected): + f=True + if not G: + f=False + elif len(G)== 0: + f=False + if selected: + if not index in selected: + f=False + return f diff --git a/gmatch4py/exception/__init__.py b/gmatch4py/exception/__init__.py deleted file mode 100644 index 950f6351f2b5c063c306b42eb677bb9695abdd58..0000000000000000000000000000000000000000 --- a/gmatch4py/exception/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/exception/__init__.pyx b/gmatch4py/exception/__init__.pyx deleted file mode 100644 index 1d997a2148b1f47f536017587f0c8da2b2cf1f9f..0000000000000000000000000000000000000000 --- a/gmatch4py/exception/__init__.pyx +++ /dev/null @@ -1,7 +0,0 @@ -# coding = utf-8 -from termcolor import colored -class NotFoundDistance(Exception): - def __init__(self,dd,distanceFunctionDict): - # Call the base class constructor with the parameters it needs - super(Exception, self).__init__(colored("{0} is not an edit distance implemented ! Select a distance from : {1}".format(dd,",".join(distanceFunctionDict.keys())),"red")) - diff --git a/gmatch4py/ged/abstract_graph_edit_dist.pxd b/gmatch4py/ged/abstract_graph_edit_dist.pxd new file mode 100644 index 0000000000000000000000000000000000000000..3f12072ddee12851b057a72866070047cd1b6714 --- /dev/null +++ b/gmatch4py/ged/abstract_graph_edit_dist.pxd @@ -0,0 +1,17 @@ +import numpy as np +cimport numpy as np +from ..base cimport Base + +cdef class AbstractGraphEditDistance(Base): + cdef double node_del + cdef double node_ins + cdef double edge_del + cdef double edge_ins + cdef np.ndarray cost_matrix + + cpdef double distance_ged(self,G,H) + cdef list edit_costs(self,G,H) + cpdef np.ndarray create_cost_matrix(self,G,H) + cdef double insert_cost(self, int i, int j, nodesH, H) + cdef double delete_cost(self, int i, int j, nodesG, G) + cpdef double substitute_cost(self, node1, node2, G, H) diff --git a/gmatch4py/ged/abstract_graph_edit_dist.pyx b/gmatch4py/ged/abstract_graph_edit_dist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..d85adf4dc2b1a8d34a2193fecc27487f6693e00d --- /dev/null +++ b/gmatch4py/ged/abstract_graph_edit_dist.pyx @@ -0,0 +1,99 @@ +# -*- coding: UTF-8 -*- +from __future__ import print_function + +import sys + +import numpy as np +from scipy.optimize import linear_sum_assignment +cimport numpy as np +from ..base cimport Base + +cdef class AbstractGraphEditDistance(Base): + + + def __init__(self, node_del,node_ins,edge_del,edge_ins): + Base.__init__(self,1,False) + + self.node_del = node_del + self.node_ins = node_ins + self.edge_del = edge_del + self.edge_ins = edge_ins + + + cpdef double distance_ged(self,G,H): + """ + Return the distance between G and H + :return: + """ + cdef list opt_path = self.edit_costs(G,H) + return np.sum(opt_path) + + + cdef list edit_costs(self, G, H): + """ + Return the optimal path edit cost list, to transform G into H + :return: + """ + cdef np.ndarray cost_matrix = self.create_cost_matrix(G,H).astype(float) + row_ind,col_ind = linear_sum_assignment(cost_matrix) + cdef int f=len(row_ind) + return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)] + + cpdef np.ndarray create_cost_matrix(self, G, H): + """ + Creates a |N+M| X |N+M| cost matrix between all nodes in + graphs G and H + Each cost represents the cost of substituting, + deleting or inserting a node + The cost matrix consists of four regions: + + substitute | insert costs + ------------------------------- + delete | delete -> delete + + The delete -> delete region is filled with zeros + """ + cdef int n = G.number_of_nodes() + cdef int m = H.number_of_nodes() + cdef np.ndarray cost_matrix = np.zeros((n+m,n+m)) + cdef list nodes1 = list(G.nodes()) + cdef list nodes2 = list(H.nodes()) + cdef int i,j + for i in range(n): + for j in range(m): + cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j], G, H) + + for i in range(m): + for j in range(m): + cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2, H) + + for i in range(n): + for j in range(n): + cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1, G) + + return cost_matrix + + cdef double insert_cost(self, int i, int j, nodesH, H): + raise NotImplementedError + + cdef double delete_cost(self, int i, int j, nodesG, G): + raise NotImplementedError + + cpdef double substitute_cost(self, node1, node2, G, H): + raise NotImplementedError + + cpdef np.ndarray compare(self,list listgs, list selected): + cdef int n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float) + cdef int i,j + for i in range(n): + for j in range(i, n): + g1,g2=listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) + if f: + comparison_matrix[i, j] = self.distance_ged(g1, g2) + else: + comparison_matrix[i, j] = np.inf + comparison_matrix[j, i] = comparison_matrix[i, j] + np.fill_diagonal(comparison_matrix,0) + return comparison_matrix diff --git a/gmatch4py/ged/algorithm/__init__.py b/gmatch4py/ged/algorithm/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx b/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx deleted file mode 100644 index f9c09363b74a99678cf71eaf4480b726d7f00cf4..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: UTF-8 -*- -from __future__ import print_function - -import sys - -import numpy as np -from scipy.optimize import linear_sum_assignment -cimport numpy as np - - -class AbstractGraphEditDistance(object): - - - def __init__(self, g1, g2,debug=False,**kwargs): - self.g1 = g1 - self.g2 = g2 - self.debug=debug - - self.node_del = kwargs.get("node_del",1) - self.node_ins = kwargs.get("node_ins",1) - self.edge_del = kwargs.get("edge_del",1) - self.edge_ins = kwargs.get("edge_ins",1) - - - def distance(self): - opt_path = self.edit_costs() - if self.debug: - print("Edit path for ",str(self.__class__.__name__),"\n",opt_path) - return sum(opt_path) - - def print_operations(self,cost_matrix,row_ind,col_ind): - cdef list nodes1 = list(self.g1.nodes) - cdef list nodes2 = list(self.g2.nodes) - dn1 = self.g1.nodes - dn2 = self.g2.nodes - - cdef int n=len(nodes1) - cdef int m=len(nodes2) - cdef int x,y,i - for i in range(len(row_ind)): - y,x=row_ind[i],col_ind[i] - val=cost_matrix[row_ind[i]][col_ind[i]] - if x<m and y<n: - print("SUB {0} to {1} cost = {2}".format(dn1[nodes1[y]]["label"],dn2[nodes2[x]]["label"],val)) - elif x <m and y>=n: - print("ADD {0} cost = {1}".format(dn2[nodes2[y-n]]["label"],val)) - elif x>=m and y<n: - print("DEL {0} cost = {1}".format(dn1[nodes1[m-x]]["label"],val)) - - def edit_costs(self): - cdef np.ndarray cost_matrix = self.create_cost_matrix() - if self.debug: - np.set_printoptions(precision=3) - print("Cost Matrix for ",str(self.__class__.__name__),"\n",cost_matrix) - - row_ind,col_ind = linear_sum_assignment(cost_matrix) - if self.debug: - self.print_operations(cost_matrix,row_ind,col_ind) - cdef int f=len(row_ind) - return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)] - - def create_cost_matrix(self): - """ - Creates a |N+M| X |N+M| cost matrix between all nodes in - graphs g1 and g2 - Each cost represents the cost of substituting, - deleting or inserting a node - The cost matrix consists of four regions: - - substitute | insert costs - ------------------------------- - delete | delete -> delete - - The delete -> delete region is filled with zeros - """ - cdef int n = len(self.g1) - cdef int m = len(self.g2) - cdef np.ndarray cost_matrix = np.zeros((n+m,n+m)) - #cost_matrix = [[0 for i in range(n + m)] for j in range(n + m)] - cdef list nodes1 = list(self.g1.nodes) - cdef list nodes2 = list(self.g2.nodes) - cdef int i,j - for i in range(n): - for j in range(m): - cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j]) - - for i in range(m): - for j in range(m): - cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2) - - for i in range(n): - for j in range(n): - cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1) - - self.cost_matrix = cost_matrix - return cost_matrix - - def insert_cost(self, int i, int j): - raise NotImplementedError - - def delete_cost(self, int i, int j): - raise NotImplementedError - - def substitute_cost(self, nodes1, nodes2): - raise NotImplementedError - - def print_matrix(self): - print("cost matrix:") - print(list(self.g1.nodes)) - print(list(self.g2.nodes)) - print(np.array(self.create_cost_matrix())) - for column in self.create_cost_matrix(): - for row in column: - if row == sys.maxsize: - print ("inf\t") - else: - print ("%.2f\t" % float(row)) - print("") diff --git a/gmatch4py/ged/algorithm/edge_edit_dist.pyx b/gmatch4py/ged/algorithm/edge_edit_dist.pyx deleted file mode 100644 index 60d6928010178210cdea51e5c4bef03d63d1a01e..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/algorithm/edge_edit_dist.pyx +++ /dev/null @@ -1,29 +0,0 @@ -import sys - -from .abstract_graph_edit_dist import AbstractGraphEditDistance - - -class EdgeEditDistance(AbstractGraphEditDistance): - """ - Calculates the graph edit distance between two edges. - A node in this context is interpreted as a graph, - and edges are interpreted as nodes. - """ - - def __init__(self, g1, g2,**kwargs): - AbstractGraphEditDistance.__init__(self, g1, g2,**kwargs) - - def insert_cost(self, int i, int j, nodes2): - if i == j: - return self.edge_ins - return sys.maxsize - - def delete_cost(self, int i, int j, nodes1): - if i == j: - return self.edge_del - return sys.maxsize - - def substitute_cost(self, edge1, edge2): - if edge1 == edge2: - return 0. - return self.edge_del+self.edge_ins diff --git a/gmatch4py/ged/algorithm/graph_edit_dist.pyx b/gmatch4py/ged/algorithm/graph_edit_dist.pyx deleted file mode 100644 index b9f16f42b57a908549000df50ad8bda3c32b4107..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/algorithm/graph_edit_dist.pyx +++ /dev/null @@ -1,72 +0,0 @@ -# -*- coding: UTF-8 -*- - -import sys - -import networkx as nx - -from .abstract_graph_edit_dist import AbstractGraphEditDistance -from .edge_edit_dist import EdgeEditDistance -from ..graph.edge_graph import EdgeGraph - - -def compare(g1, g2, print_details=False): - ged = GraphEditDistance(g1, g2,print_details) - return ged.distance() - - -class GraphEditDistance(AbstractGraphEditDistance): - - def __init__(self, g1, g2,debug=False,**kwargs): - AbstractGraphEditDistance.__init__(self, g1, g2,debug,**kwargs) - - def substitute_cost(self, node1, node2): - return self.relabel_cost(node1, node2) + self.edge_diff(node1, node2) - - def relabel_cost(self, node1, node2): - if node1 == node2: - edges1=set(self.get_edge_multigraph(self.g1,node1)) - edges2=set(self.get_edge_multigraph(self.g2,node2)) - return abs(len(edges2.difference(edges1))) # Take in account if there is a different number of edges - else: - return self.node_ins+self.node_del - - def delete_cost(self, int i, int j, nodes1): - if i == j: - return self.node_del+self.g1.degree(nodes1[i]) # Deleting a node implicate to delete in and out edges - return sys.maxsize - - def insert_cost(self, int i, int j, nodes2): - if i == j: - deg=self.g2.degree(nodes2[j]) - if isinstance(deg,dict):deg=0 - return self.node_ins+deg - else: - return sys.maxsize - - def get_edge_multigraph(self,g,node): - cdef list edges=[] - for id_,val in g.edges[node].items(): - if not 0 in val: - edges.append(str(id_) + val["color"]) - else: - for _,edge in val.items(): - edges.append(str(id_)+edge["color"]) - return edges - - def edge_diff(self, node1, node2): - cdef list edges1,edges2 - if isinstance(self.g1,nx.MultiDiGraph): - edges1 = self.get_edge_multigraph(self.g1,node1) - edges2 = self.get_edge_multigraph(self.g2,node2) - else: - edges1 = list(self.g1.edges[node1].keys()) - edges2 = list(self.g2.edges[node2].keys()) - if len(edges1) == 0 or len(edges2) == 0: - return max(len(edges1), len(edges2)) - - edit_edit_dist = EdgeEditDistance( - EdgeGraph(node1,edges1), - EdgeGraph(node2,edges2), - edge_del=self.edge_del,edge_ins=self.edge_ins,node_ins=self.node_ins,node_del=self.node_del - ) - return edit_edit_dist.distance() diff --git a/gmatch4py/ged/approximate_ged.pyx b/gmatch4py/ged/approximate_ged.pyx deleted file mode 100644 index 27ea4371ad287ebd14b05a613d9b02025a2f8651..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/approximate_ged.pyx +++ /dev/null @@ -1,33 +0,0 @@ -# coding = utf-8 - -import numpy as np - -from .algorithm.graph_edit_dist import GraphEditDistance -from cython.parallel import prange - -class ApproximateGraphEditDistance(): - __type__ = "dist" - - @staticmethod - def compare(listgs,selected,c_del_node=1,c_del_edge=1,c_ins_node=1,c_ins_edge=1): - cdef int n= len(listgs) - cdef double[:,:] comparison_matrix = np.zeros((n,n)) - cdef int i,j - for i in prange(n,nogil=True): - for j in range(i,n): - with gil: - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - - if f: - comparison_matrix[i][j] = GraphEditDistance(listgs[i],listgs[j],False,node_del=c_del_node,node_ins=c_ins_node,edge_del=c_del_edge,edge_ins=c_ins_edge).distance() - else: - comparison_matrix[i][j] = np.inf - comparison_matrix[j][i] = comparison_matrix[i][j] - return comparison_matrix \ No newline at end of file diff --git a/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py/ged/bipartite_graph_matching_2.pyx index b4c7a2f28954d8ad3dabc5b23b3e5bca7a5d302a..af5c8f18c68bafbc96f8d13cc45ad38b37333af2 100644 --- a/gmatch4py/ged/bipartite_graph_matching_2.pyx +++ b/gmatch4py/ged/bipartite_graph_matching_2.pyx @@ -1,41 +1,43 @@ # coding = utf-8 import numpy as np cimport numpy as np +from ..base cimport Base -cdef class BP_2(): +cdef class BP_2(Base): """ """ - __type__="dist" cdef int node_del cdef int node_ins cdef int edge_del cdef int edge_ins - @staticmethod - def compare(listgs,selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1): + def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): + """Constructor for HED""" + Base.__init__(self,1,False) + self.node_del = node_del + self.node_ins = node_ins + self.edge_del = edge_del + self.edge_ins = edge_ins + + cpdef np.ndarray compare(self,list listgs, list selected): cdef int n = len(listgs) - comparator = BP_2(c_del_node, c_ins_node, c_del_edge, c_ins_edge) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) + cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float) + cdef int i,j for i in range(n): for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False + g1,g2=listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) if f: - comparison_matrix[i, j] = comparator.bp2(listgs[i], listgs[j]) + comparison_matrix[i, j] = self.bp2(g1, g2) else: comparison_matrix[i, j] = np.inf comparison_matrix[j, i] = comparison_matrix[i, j] return comparison_matrix + def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1): """Constructor for HED""" self.node_del = node_del @@ -43,16 +45,16 @@ cdef class BP_2(): self.edge_del = edge_del self.edge_ins = edge_ins - def bp2(self, g1, g2): + cdef double bp2(self, g1, g2): """ Compute de Hausdorff Edit Distance :param g1: first graph :param g2: second graph :return: """ - return np.min(self.distance(self.psi(g1,g2)),self.distance(self.psi(g2,g1))) + return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))]) - def distance(self,e): + cdef double distance_bp2(self,e): return np.sum(e) cdef list psi(self,g1,g2): @@ -75,6 +77,25 @@ cdef class BP_2(): return psi_ + cdef float sum_fuv(self, g1, g2): + """ + Compute Nearest Neighbour Distance between G1 and G2 + :param g1: First Graph + :param g2: Second Graph + :return: + """ + cdef np.ndarray min_sum = np.zeros(len(g1)) + nodes1 = list(g1.nodes) + nodes2 = list(g2.nodes) + nodes2.extend([None]) + cdef np.ndarray min_i + for i in range(len(nodes1)): + min_i = np.zeros(len(nodes2)) + for j in range(len(nodes2)): + min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j]) + min_sum[i] = np.min(min_i) + return np.sum(min_sum) + cdef float fuv(self, g1, g2, n1, n2): """ Compute the Node Distance function @@ -85,12 +106,12 @@ cdef class BP_2(): :return: """ if n2 == None: # Del - return self.node_del + ((self.edge_del / 2) * g1.degree(n1)) + return self.node_del + ((self.edge_del / 2.) * g1.degree(n1)) if n1 == None: # Insert - return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2)) + return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2)) else: if n1 == n2: - return 0. + return 0 return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 cdef float hed_edge(self, g1, g2, n1, n2): @@ -104,24 +125,6 @@ cdef class BP_2(): """ return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) - cdef list get_edge_multigraph(self, g, node): - """ - Get list of edge around a node in a Multigraph - :param g: multigraph - :param node: node in the multigraph - :return: - """ - - cdef list originals_ = g.edges(node, data=True) - cdef int n= len(originals_) - if n == 0: - return [] - - cdef list edges = [""]*n - for i in range(n): - edge=originals_[i] - edges[i]=("{0}-{1}".format(edge[0],edge[1])) - return edges cdef float sum_gpq(self, g1, n1, g2, n2): """ @@ -132,10 +135,14 @@ cdef class BP_2(): :param n2: node in the second graph :return: """ - cdef list edges1 = self.get_edge_multigraph(g1, n1) - cdef list edges2 = self.get_edge_multigraph(g2, n2) - edges2.extend([None]) + + #if isinstance(g1, nx.MultiDiGraph): + cdef list edges1 = list(g1.edges(n1)) if n1 else [] + cdef list edges2 = list(g2.edges(n2)) if n2 else [] + cdef np.ndarray min_sum = np.zeros(len(edges1)) + edges2.extend([None]) + cdef np.ndarray min_i for i in range(len(edges1)): min_i = np.zeros(len(edges2)) for j in range(len(edges2)): @@ -143,7 +150,7 @@ cdef class BP_2(): min_sum[i] = np.min(min_i) return np.sum(min_sum) - cdef float gpq(self, e1, e2): + cdef float gpq(self, tuple e1, tuple e2): """ Compute the edge distance function :param e1: edge1 @@ -156,6 +163,5 @@ cdef class BP_2(): return self.edge_ins else: if e1 == e2: - return 0. - return (self.edge_del + self.edge_ins) / 2 - + return 0 + return (self.edge_del + self.edge_ins) / 2. diff --git a/gmatch4py/ged/graph/__init__.py b/gmatch4py/ged/graph/__init__.py deleted file mode 100644 index 950f6351f2b5c063c306b42eb677bb9695abdd58..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/graph/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/ged/graph/__init__.pyx b/gmatch4py/ged/graph/__init__.pyx deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/gmatch4py/ged/graph/edge_graph.pyx b/gmatch4py/ged/graph/edge_graph.pyx deleted file mode 100644 index 24b8bda1ed36c399cbe58eb0115b2cee4d85841d..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/graph/edge_graph.pyx +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: UTF-8 -*- - - -class EdgeGraph(): - - def __init__(self, init_node, nodes): - self.init_node=init_node - self.nodes_ = nodes - self.edge=nodes - def nodes(self): - return self.nodes_ - - def size(self): - return len(self.nodes) - def __len__(self): - return len(self.nodes_) diff --git a/gmatch4py/ged/graph_edit_dist.pxd b/gmatch4py/ged/graph_edit_dist.pxd new file mode 100644 index 0000000000000000000000000000000000000000..975f39b1ab05df1b55d8d571afc4867d34b63fcf --- /dev/null +++ b/gmatch4py/ged/graph_edit_dist.pxd @@ -0,0 +1,9 @@ +import numpy as np +cimport numpy as np +from .abstract_graph_edit_dist cimport AbstractGraphEditDistance + + +cdef class GraphEditDistance(AbstractGraphEditDistance): + cpdef double substitute_cost(self, node1, node2, G, H) + cdef double delete_cost(self, int i, int j, nodesG, G) + cdef double insert_cost(self, int i, int j, nodesH, H) \ No newline at end of file diff --git a/gmatch4py/ged/graph_edit_dist.pyx b/gmatch4py/ged/graph_edit_dist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..db351d4abffafebd1699fd916895793161a9c1d2 --- /dev/null +++ b/gmatch4py/ged/graph_edit_dist.pyx @@ -0,0 +1,44 @@ +# -*- coding: UTF-8 -*- + +import sys + +import networkx as nx +import numpy as np +cimport numpy as np +from .abstract_graph_edit_dist cimport AbstractGraphEditDistance +from ..base cimport intersection + + +cdef class GraphEditDistance(AbstractGraphEditDistance): + + def __init__(self,node_del,node_ins,edge_del,edge_ins): + AbstractGraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins) + + cpdef double substitute_cost(self, node1, node2, G, H): + return self.relabel_cost(node1, node2, G, H) + + def relabel_cost(self, node1, node2, G, H): + if node1 != node2: + R = nx.create_empty_copy(G) + R.add_edges_from(G.edges(node1,data=True)) + nx.relabel_nodes(R,{node1:node2},copy=False) + + R2 = nx.create_empty_copy(H) + R2.add_edges_from(H.edges(node2,data=True)) + + return abs(R2.number_of_edges()-intersection(R,R2).number_of_edges()) + else: + return self.node_ins+self.node_del + + cdef double delete_cost(self, int i, int j, nodesG, G): + if i == j: + return self.node_del+(G.degree(nodesG[i])*self.edge_del) # Deleting a node implicate to delete in and out edges + return sys.maxsize + + cdef double insert_cost(self, int i, int j, nodesH, H): + if i == j: + deg=H.degree(nodesH[j]) + if isinstance(deg,dict):deg=0 + return self.node_ins+(deg*self.edge_ins) + else: + return sys.maxsize \ No newline at end of file diff --git a/gmatch4py/ged/greedy_edit_distance.pyx b/gmatch4py/ged/greedy_edit_distance.pyx index 77520305011396a5b41f36f40394d415ea2d5efe..b4908cb2337400eec20fa5d20fa6a704b0a61c36 100644 --- a/gmatch4py/ged/greedy_edit_distance.pyx +++ b/gmatch4py/ged/greedy_edit_distance.pyx @@ -1,10 +1,11 @@ # coding = utf-8 -import numpy as np +import sys -from .algorithm.graph_edit_dist import GraphEditDistance +from .graph_edit_dist cimport GraphEditDistance +import numpy as np cimport numpy as np -class GreedyEditDistance(GraphEditDistance): +cdef class GreedyEditDistance(GraphEditDistance): """ Implementation of the Greedy Edit Distance presented in : @@ -12,39 +13,14 @@ class GreedyEditDistance(GraphEditDistance): Andreas Fischer, Kaspar Riesen, Horst Bunke 2016 """ - __type__ = "dist" - @staticmethod - def compare(listgs, selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1): - cdef int n = len(listgs) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - comparison_matrix[i, j] = GreedyEditDistance(listgs[i], listgs[j],False, node_del=c_del_node, - node_ins=c_ins_node, edge_del=c_del_edge, - edge_ins=c_ins_edge).distance() - else: - comparison_matrix[i, j] = np.inf - comparison_matrix[j, i] = comparison_matrix[i, j] - - - return comparison_matrix - def __init__(self,g1,g2,debug=False,**kwargs): - """Constructor for GreedyEditDistance""" - super().__init__(g1,g2,debug,**kwargs) + def __init__(self,node_del,node_ins,edge_del,edge_ins): + GraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins) - def edit_costs(self): - cdef np.ndarray cost_matrix=self.create_cost_matrix() + cdef list edit_costs(self, G, H): + cdef np.ndarray cost_matrix=self.create_cost_matrix(G,H) + """ cdef np.ndarray cost_matrix_2=cost_matrix.copy() cdef list psi=[] for i in range(len(cost_matrix)): @@ -52,4 +28,11 @@ class GreedyEditDistance(GraphEditDistance): cost_matrix=np.delete(cost_matrix,phi_i,1) psi.append([i,phi_i+i]) #+i to compensate the previous column deletion return [cost_matrix_2[psi[i][0]][psi[i][1]] for i in range(len(psi))] - + """ + cdef np.ndarray cost_matrix_2=cost_matrix.copy().astype(np.double) + cdef list psi=[] + for i in range(len(cost_matrix)): + phi_i=np.argmin(cost_matrix_2[i]) + cost_matrix_2[:,phi_i]=sys.maxsize + psi.append([i,phi_i]) #+i to compensate the previous column deletion + return [cost_matrix[psi[i][0]][psi[i][1]] for i in range(len(psi))] diff --git a/gmatch4py/ged/hausdorff_edit_distance.pyx b/gmatch4py/ged/hausdorff_edit_distance.pyx index fc123625ce80c50c91178d468ecab28e558464d0..49c11c03895133ec48f9fc0f16080a7b18bf459f 100644 --- a/gmatch4py/ged/hausdorff_edit_distance.pyx +++ b/gmatch4py/ged/hausdorff_edit_distance.pyx @@ -2,8 +2,9 @@ import numpy as np cimport numpy as np -#from libcpp.list cimport list as cpplist -cdef class HED: +from ..base cimport Base + +cdef class HED(Base): """ Implementation of Hausdorff Edit Distance described in @@ -17,24 +18,25 @@ cdef class HED: cdef int edge_del cdef int edge_ins - __type__ = "dist" - @staticmethod - def compare(list listgs, selected, int c_del_node=1, int c_del_edge=1, int c_ins_node=1, int c_ins_edge=1): + def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): + """Constructor for HED""" + Base.__init__(self,1,False) + self.node_del = node_del + self.node_ins = node_ins + self.edge_del = edge_del + self.edge_ins = edge_ins + + + cpdef np.ndarray compare(self,list listgs, list selected): cdef int n = len(listgs) - comparator = HED(c_del_node, c_ins_node, c_del_edge, c_ins_edge) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) + cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float) + cdef int i,j for i in range(n): for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False + g1,g2=listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) if f: - comparison_matrix[i, j] = comparator.hed(listgs[i], listgs[j]) + comparison_matrix[i, j] = self.hed(g1, g2) else: comparison_matrix[i, j] = np.inf comparison_matrix[j, i] = comparison_matrix[i, j] @@ -42,14 +44,7 @@ cdef class HED: return comparison_matrix - def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): - """Constructor for HED""" - self.node_del = node_del - self.node_ins = node_ins - self.edge_del = edge_del - self.edge_ins = edge_ins - - cpdef float hed(self, g1, g2): + cdef float hed(self, g1, g2): """ Compute de Hausdorff Edit Distance :param g1: first graph @@ -87,9 +82,9 @@ cdef class HED: :return: """ if n2 == None: # Del - return self.node_del + ((self.edge_del / 2) * g1.degree(n1)) + return self.node_del + ((self.edge_del / 2.) * g1.degree(n1)) if n1 == None: # Insert - return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2)) + return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2)) else: if n1 == n2: return 0 @@ -106,27 +101,8 @@ cdef class HED: """ return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) - cdef list get_edge_multigraph(self, g, node): - """ - Get list of edge around a node in a Multigraph - :param g: multigraph - :param node: node in the multigraph - :return: - """ - - cdef list originals_ = g.edges(node, data=True) - cdef int n= len(originals_) - if n == 0: - return [] - - cdef list edges = [""]*n - for i in range(n): - edge=originals_[i] - edges[i]=("{0}-{1}".format(edge[0],edge[1])) - return edges - - cdef float sum_gpq(self, g1, n1, g2, n2): + cdef float sum_gpq(self, g1, n1, g2, n2): """ Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 :param g1: first graph @@ -137,12 +113,8 @@ cdef class HED: """ #if isinstance(g1, nx.MultiDiGraph): - cdef list edges1 = self.get_edge_multigraph(g1, n1) - cdef list edges2 = self.get_edge_multigraph(g2, n2) - - #else: - #edges1 = [str(n1 + "-" + ef) for ef in list(g1.edge[n1].keys())] - #edges2 = [str(n2 + "-" + ef) for ef in list(g2.edge[n2].keys())] + cdef list edges1 = list(g1.edges(n1)) if n1 else [] + cdef list edges2 = list(g2.edges(n2)) if n2 else [] cdef np.ndarray min_sum = np.zeros(len(edges1)) edges2.extend([None]) @@ -154,7 +126,7 @@ cdef class HED: min_sum[i] = np.min(min_i) return np.sum(min_sum) - cdef float gpq(self, str e1, str e2): + cdef float gpq(self, tuple e1, tuple e2): """ Compute the edge distance function :param e1: edge1 @@ -168,4 +140,4 @@ cdef class HED: else: if e1 == e2: return 0 - return (self.edge_del + self.edge_ins) / 2 + return (self.edge_del + self.edge_ins) / 2. diff --git a/gmatch4py/jaccard.pyx b/gmatch4py/jaccard.pyx index a41f4bad8fba9685724a0db5b6bf093fc2de294e..aeea06050b6857a0c6cc112d9d7ee9e1efd1c19c 100644 --- a/gmatch4py/jaccard.pyx +++ b/gmatch4py/jaccard.pyx @@ -4,37 +4,32 @@ import numpy as np cimport numpy as np from .base cimport Base -def intersect(a, b): - return list(set(a) & set(b)) -class Jaccard(Base): - __type__ = "sim" +from .base cimport intersection,union_ - @staticmethod - def compare(listgs,selected): +cdef class Jaccard(Base): + + def __init__(self): + Base.__init__(self,0,True) + + cpdef np.ndarray compare(self,list listgs, list selected): cdef int n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)) - cdef i=0 - cdef j=0 + cdef int i,j for i in range(n): for j in range(i,n): - g1 = listgs[i] - g2 = listgs[j] - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False + g1,g2=listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) if f: - inter_ver,inter_ed = Jaccard.intersect_graph(g1,g2) - un_ver,un_edg=Jaccard.union_nodes(g1,g2),Jaccard.union_edges(g1,g2) - if len(un_ver) == 0 or len(un_edg) == 0: + inter_g=intersection(g1,g2) + union_g=union_(g1,g2) + if union_g.number_of_nodes() == 0 or union_g.number_of_edges()== 0: comparison_matrix[i, j] = 0. else: - comparison_matrix[i,j]=(len(inter_ver)/len(un_ver))*(len(inter_ed)/len(un_edg)) + comparison_matrix[i,j]=\ + ((inter_g.number_of_nodes())/(union_g.number_of_nodes()))\ + *\ + ((union_g.number_of_edges())/(union_g.number_of_edges())) else: comparison_matrix[i, j] = 0. @@ -43,52 +38,4 @@ class Jaccard(Base): return comparison_matrix - @staticmethod - def intersect_edges(g1,g2): - cdef list ed1 = Jaccard.transform_edges(list(g1.edges(data=True))) - cdef list ed2 = Jaccard.transform_edges(list(g2.edges(data=True))) - cdef list inter_ed=[] - for e1 in ed1: - for e2 in ed2: - if e1 == e2: - inter_ed.append(e1) - return inter_ed - - @staticmethod - def union_nodes(g1, g2): - cdef set union=set([]) - for n in g1.nodes():union.add(n) - for n in g2.nodes(): union.add(n) - return union - - @staticmethod - def union_edges(g1, g2): - cdef list ed1 = Jaccard.transform_edges(g1.edges(data=True)) - cdef list ed2 = Jaccard.transform_edges(g2.edges(data=True)) - cdef list union = [] - cdef set register=set([]) - trans_=lambda x : "{0}-{1}:{2}".format(x[0],x[1],x[2]["color"]) - for e1 in ed1: - if not trans_(e1) in register: - union.append(e1) - register.add(trans_(e1)) - for e2 in ed2: - if not trans_(e2) in register: - union.append(e2) - register.add(trans_(e2)) - return union - @staticmethod - def intersect_nodes(g1,g2): - return intersect(list(g1.nodes),list(g2.nodes)) - - @staticmethod - def intersect_graph(g1,g2): - return Jaccard.intersect_nodes(g1,g2),Jaccard.intersect_edges(g1,g2) - - @staticmethod - def transform_edges(ed): - for e in range(len(ed)): - if "id" in ed[e][-1]: - del ed[e][-1]["id"] - return ed diff --git a/gmatch4py/mcs.pyx b/gmatch4py/mcs.pyx index d9bb488b1ce31e7ebc6cd402c617b6cc254c1a10..de798e97c54bb7d6cbbb6e136e1efaa08d2fb020 100644 --- a/gmatch4py/mcs.pyx +++ b/gmatch4py/mcs.pyx @@ -17,16 +17,10 @@ cdef class MCS(Base): cdef np.ndarray comparison_matrix = np.zeros((n, n)) for i in range(n): for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False + g1,g2=listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) if f: - comparison_matrix[i, j] = self.s_mcs(listgs[i],listgs[j]) + comparison_matrix[i, j] = self.s_mcs(g1,g2) else: comparison_matrix[i, j] = 0. comparison_matrix[j, i] = comparison_matrix[i, j] diff --git a/gmatch4py/vertex_edge_overlap.pyx b/gmatch4py/vertex_edge_overlap.pyx index edc3f7252f3a5ffc83f391e6dabe4d46f8e7de7d..2a75277f4f33428b999f04badbdf906ae17fb359 100644 --- a/gmatch4py/vertex_edge_overlap.pyx +++ b/gmatch4py/vertex_edge_overlap.pyx @@ -2,12 +2,9 @@ import numpy as np cimport numpy as np +from .base cimport Base,intersection - -cdef list intersect(a, b): - return list(set(a) & set(b)) -class VertexEdgeOverlap(): - __type__ = "sim" +cdef class VertexEdgeOverlap(Base): """ Vertex/Edge Overlap Algorithm @@ -16,62 +13,28 @@ class VertexEdgeOverlap(): Code Author : Jacques Fize """ + def __init__(self): + Base.__init__(self,0,True) - @staticmethod - def compare(list listgs,selected): + cpdef np.ndarray compare(self,list listgs, list selected): n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)) - cdef list inter_ver - cdef list inter_ed - cdef int denom + cdef list inter_ver,inter_ed + cdef int denom,i,j for i in range(n): for j in range(i,n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False + g1,g2 = listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) if f: - g1 = listgs[i] - g2 = listgs[j] - inter_ver,inter_ed = VertexEdgeOverlap.intersect_graph(g1,g2) - denom=len(g1)+len(g2)+len(g1.edges(data=True))+len(g2.edges(data=True)) + inter_g= intersection(g1,g2) + denom=g1.number_of_nodes()+g2.number_of_nodes()+\ + g1.number_of_edges()+g2.number_of_edges() if denom == 0: continue - comparison_matrix[i,j]=2*(len(inter_ver)+len(inter_ed))/denom # Data = True --> For nx.MultiDiGraph - else: - comparison_matrix[i, j] = 0. + comparison_matrix[i,j]=(2*(inter_g.number_of_nodes() + +inter_g.number_of_edges()))/denom # Data = True --> For nx.MultiDiGraph comparison_matrix[j, i] = comparison_matrix[i, j] return comparison_matrix - @staticmethod - def intersect_edges(g1,g2): - cdef list ed1 = VertexEdgeOverlap.transform_edges(list(g1.edges(data=True))) - cdef list ed2 = VertexEdgeOverlap.transform_edges(list(g2.edges(data=True))) - cdef list inter_ed=[] - for e1 in ed1: - for e2 in ed2: - if e1 == e2: - inter_ed.append(e1) - return inter_ed - - - @staticmethod - def intersect_nodes(g1,g2): - return intersect(list(g1.nodes),list(g2.nodes)) - - @staticmethod - def intersect_graph(g1,g2): - return VertexEdgeOverlap.intersect_nodes(g1,g2),VertexEdgeOverlap.intersect_edges(g1,g2) - - @staticmethod - def transform_edges(ed): - for e in range(len(ed)): - if "id" in ed[e][-1]: - del ed[e][-1]["id"] - return ed diff --git a/gmatch4py/vertex_ranking.pyx b/gmatch4py/vertex_ranking.pyx index 42f9130ee4f3aec2a3a2a6bc17d579061993f8b6..8a730e888dbe2e85583a06aafdf03f58ad5a1685 100644 --- a/gmatch4py/vertex_ranking.pyx +++ b/gmatch4py/vertex_ranking.pyx @@ -29,6 +29,8 @@ cdef class VertexRanking(Base): for i in range(n): pager_i=list(page_r[i]) for j in range(i,n): + g1,g2=listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) & self.isAccepted(g2,j,selected) pager_j=list(page_r[j]) node_intersection=list(set(pager_i) & set(pager_j)) X,Y=[],[] diff --git a/setup.py b/setup.py index 4fef13160a83901846a6b5296f4033a853ff7e10..a9d67f2ba1e43b5298f3a81d9e9a39743388e733 100644 --- a/setup.py +++ b/setup.py @@ -9,12 +9,38 @@ except: print("You don't seem to have Cython installed. Please get a") print("copy from www.cython.org and install it") sys.exit(1) +def scandir(dir, files=[]): + for file in os.listdir(dir): + path = os.path.join(dir, file) + if os.path.isfile(path) and path.endswith(".pyx"): + files.append(path.replace(os.path.sep, ".")[:-4]) + elif os.path.isdir(path): + scandir(path, files) + return files + + +# generate an Extension object from its dotted name +def makeExtension(extName): + extPath = extName.replace(".", os.path.sep)+".pyx" + return Extension( + extName, + [extPath],include_dirs=[np.get_include()],language='c++' + ) + +# get the list of extensions +extNames = scandir("gmatch4py") + +# and build up the set of Extension objects +extensions = cythonize([makeExtension(name) for name in extNames]) setup( name="Gmatch4py", description="A module for graph matching", - packages=["gmatch4py", "gmatch4py.ged", "gmatch4py.kernels"], - ext_modules=cythonize([Extension("*", ["gmatch4py/*.pyx"],include_dirs=[np.get_include()])]), + packages=["gmatch4py"], + #ext_modules=cythonize([ + # Extension("*", ["gmatch4py/*.pyx"],include_dirs=[np.get_include()]) + #]), + ext_modules=extensions, cmdclass={'build_ext': build_ext}, setup_requires=["numpy","networkx"], install_requires=["numpy","networkx"],