diff --git a/.gitignore b/.gitignore index 894a44cc066a027465cd26d634948d56d13af9af..48dfef6fb278e66f6229ceecefd7bc4bd64ced9f 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ +*.cpp +*.c +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 6d95cbeb4941b2356303394f46860281a3bf297a..d30da9a417e7af4b4dd83e3990632240e30b0183 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,43 @@ -# GMatch4py -A python module for graph matching using Networkx +# Gmatch4py a graph matching library for Python + +Gmatch4py is a library dedicated to graph matching. Graph structure are stored in NetworkX.Graph objects. + +## List of algorithm + + * DeltaCon and DeltaCon0 (*debug needed*) [1] + * Vertex Ranking (*debug needed*) [2] + * Vertex Edge Overlap [2] + * Graph kernels + * Random Walk Kernel (*debug needed*) [3] + * Geometrical + * K-Step + * Shortest Path Kernel [3] + * Weisfeiler-Lehman Kernel [4] + * Subtree Kernel + * Edge Kernel + * Subtree Geo Kernel [new] + * Edge Geo Kernel [new] + * Graph Edit Distance [5] + * Approximated Graph Edit Distance + * Hausdorff Graph Edit Distance + * Bipartite Graph Edit Distance + * Greedy Edit Distance + * MCS [6] + + +## Publications associated + + * [1] Koutra, D., Vogelstein, J. T., & Faloutsos, C. (2013, May). Deltacon: A principled massive-graph similarity function. In Proceedings of the 2013 SIAM International Conference on Data Mining (pp. 162-170). Society for Industrial and Applied Mathematics. + * [2] Papadimitriou, P., Dasdan, A., & Garcia-Molina, H. (2010). Web graph similarity for anomaly detection. Journal of Internet Services and Applications, 1(1), 19-30. + * [3] Vishwanathan, S. V. N., Schraudolph, N. N., Kondor, R., & Borgwardt, K. M. (2010). Graph kernels. Journal of Machine Learning Research, 11(Apr), 1201-1242. + * [4] Shervashidze, N., Schweitzer, P., Leeuwen, E. J. V., Mehlhorn, K., & Borgwardt, K. M. (2011). Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12(Sep), 2539-2561. + * [5] Fischer, A., Riesen, K., & Bunke, H. (2017). Improved quadratic time approximation of graph edit distance by combining Hausdorff matching and greedy assignment. Pattern Recognition Letters, 87, 55-62. + * [6] A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, Pattern Recognition Letters, 1998 + +## Authors + +Jacques Fize + +## TODO + + * Debug algorithms with --> (*debug needed*) \ No newline at end of file diff --git a/gmatch4py/__init__.py b/gmatch4py/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..950f6351f2b5c063c306b42eb677bb9695abdd58 --- /dev/null +++ b/gmatch4py/__init__.py @@ -0,0 +1 @@ +# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/bag_of_cliques.pyx b/gmatch4py/bag_of_cliques.pyx new file mode 100644 index 0000000000000000000000000000000000000000..b7c68e4e869deef2ce72e134b01d95957a47e560 --- /dev/null +++ b/gmatch4py/bag_of_cliques.pyx @@ -0,0 +1,166 @@ +# coding = utf-8 + +import copy +from typing import Sequence + +import networkx as nx +import numpy as np +cimport numpy as np +import sys + +from networkit import graph +from networkit.clique import MaximalCliques + +def nx2nk(nxG, weightAttr=None): + """ + Convert a networkx.Graph to a NetworKit.Graph + :param weightAttr: the edge attribute which should be treated as the edge weight. + """ + + # map networkx node ids to consecutive numerical node ids + idmap = dict((id, u) for (id, u) in zip(list(nxG.nodes), range(nxG.number_of_nodes()))) + z = max(idmap.values()) + 1 + # print("z = {0}".format(z)) + + if weightAttr is not None: + nkG = graph.Graph(z, weighted=True, directed=nxG.is_directed()) + for (u_, v_) in nxG.edges(): + u, v = idmap[u_], idmap[v_] + w = nxG[u_][v_][weightAttr] + nkG.addEdge(u, v, w) + else: + nkG = graph.Graph(z, directed=nxG.is_directed()) + for (u_, v_) in nxG.edges(): + u, v = idmap[u_], idmap[v_] + # print(u_, v_, u, v) + assert (u < z) + assert (v < z) + nkG.addEdge(u, v) + + assert (nkG.numberOfNodes() == nxG.number_of_nodes()) + assert (nkG.numberOfEdges() == nxG.number_of_edges()) + return nkG.removeSelfLoops(),idmap + +def getClique(nx_graph): + final_cliques=[] + if len(nx_graph) ==0: + return final_cliques + netkit_graph,idmap=nx2nk(nx_graph) + idmap={v:k for k,v in idmap.items()} + cliques=MaximalCliques(netkit_graph).run().getCliques() + for cl in cliques: + final_cliques.append(list(map(lambda x:idmap[x],cl))) + return final_cliques + +class BagOfCliques(): + + @staticmethod + def compare(graphs,selected): + b=BagOfCliques() + bog=b.getBagOfCliques(graphs).astype(np.float32) + #Compute cosine similarity + cdef int n=bog.shape[0] + cdef double[:,:] scores = np.zeros((n,n)) + cdef int i + for i in range(len(scores)): + if not i in selected: + continue + for j in range(i,len(scores)): + scores[i,j]=(np.dot(bog[i],bog[j]))/(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line + scores[j,i]=scores[i,j] + return scores + + def getUniqueCliques(self,graphs): + """ + Return unique cliques from a population of graphs + :return: + """ + t = {} + c_ = 0 + cdef list clique_vocab = [] + cdef list cli_temp + cdef list cliques + cdef int len_graphs=len(graphs) + cdef int km= -1 + for g in graphs: + km+=1 + if not g: + continue + sys.stdout.write("\r{0}/{1} -- {2}".format(km,len_graphs,len(g))) + try: + cliques = list(getClique(nx.Graph(g))) + except: + #no clique found + #print(nx.Graph(g).edges()) + cliques =[] + for clique in cliques: + + cli_temp = copy.deepcopy(clique) + new_clique = False + for i in range(len(clique)): + flag = False + v = None # vertex deleted + for vertex in cli_temp: + if vertex in t: + v = vertex + flag = True + + if not flag in t: + v = cli_temp[0] + t[v] = {} + new_clique = True + t = t[v] + cli_temp.remove(v) + + if new_clique: + c_ += 1 + clique_vocab.append(clique) + return clique_vocab + + + def clique2str(self,cliques): + return "".join(sorted(cliques)) + + def transform_clique_vocab(self,clique_vocab): + cdef dict new_vocab={} + cdef int len_voc=len(clique_vocab) + for c in range(len_voc): + print(c) + new_vocab[self.clique2str(clique_vocab[c])]=c + return new_vocab + + + def ifHaveMinor(self,clique, dict mapping): + """ + If a clique (minor) H belong to a graph G + :param H: + :return: + """ + if self.clique2str(clique) in mapping: + return 1 + return 0 + + + def getBagOfCliques(self,graphs ): + """ + + :param clique_vocab: + :return: + """ + cdef list clique_vocab=self.getUniqueCliques(graphs) + cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab) + cdef int l_v=len(clique_vocab) + cdef np.ndarray boc = np.zeros((len(graphs), l_v)) + cdef np.ndarray vector + cdef list cliques + for g in range(len(graphs)): + sys.stdout.write("\r{0}/{1}".format(g,len(graphs))) + gr = graphs[g] + vector = np.zeros(l_v) + cliques = list(getClique(nx.Graph(gr))) + for clique in cliques: + hash=self.clique2str(clique) + if hash in map_str_cliques: + vector[map_str_cliques[hash]] = 1 + boc[g] = vector + return boc \ No newline at end of file diff --git a/gmatch4py/deltacon.pyx b/gmatch4py/deltacon.pyx new file mode 100644 index 0000000000000000000000000000000000000000..a4d01b05816bb3bacf08818f42cf954cb80dc524 --- /dev/null +++ b/gmatch4py/deltacon.pyx @@ -0,0 +1,153 @@ +# coding = utf-8 + +import networkx as nx +import numpy as np +import scipy.sparse + + +class DeltaCon0(): + __type__ = "sim" + + @staticmethod + def compare(list_gs,selected): + n=len(list_gs) + + comparison_matrix = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + g1,g2=list_gs[i],list_gs[j] + f=True + if not list_gs[i] or not list_gs[j]: + f=False + elif len(list_gs[i])== 0 or len(list_gs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + # S1 + epsilon = 1/(1+DeltaCon0.maxDegree(g1)) + D, A = DeltaCon0.degreeAndAdjacencyMatrix(g1) + S1 = np.linalg.inv(np.identity(len(g1))+(epsilon**2)*D -epsilon*A) + + # S2 + D, A = DeltaCon0.degreeAndAdjacencyMatrix(g2) + epsilon = 1 / (1 + DeltaCon0.maxDegree(g2)) + S2 = np.linalg.inv(np.identity(len(g2))+(epsilon**2)*D -epsilon*A) + + + comparison_matrix[i,j] = 1/(1+DeltaCon0.rootED(S1,S2)) + comparison_matrix[j,i] = comparison_matrix[i,j] + else: + comparison_matrix[i, j] = 0. + comparison_matrix[j, i] = comparison_matrix[i, j] + + + return comparison_matrix + + @staticmethod + def rootED(S1,S2): + return np.sqrt(np.sum((S1-S2)**2)) # Long live numpy ! + + @staticmethod + def degreeAndAdjacencyMatrix(G): + """ + Return the Degree(D) and Adjacency Matrix(A) from a graph G. + Inspired of nx.laplacian_matrix(G,nodelist,weight) code proposed by networkx + :param G: + :return: + """ + A = nx.to_scipy_sparse_matrix(G, nodelist=list(G.nodes), weight="weight", + format='csr') + n, m = A.shape + diags = A.sum(axis=1) + D = scipy.sparse.spdiags(diags.flatten(), [0], m, n, format='csr') + + return D, A + @staticmethod + def maxDegree(G): + degree_sequence = sorted(nx.degree(G).values(), reverse=True) # degree sequence + # print "Degree sequence", degree_sequence + dmax = max(degree_sequence) + return dmax + +class DeltaCon(): + __type__ = "sim" + + @staticmethod + def relabel_nodes(graph_list): + label_lookup = {} + label_counter = 0 + n= len(graph_list) + # label_lookup is an associative array, which will contain the + # mapping from multiset labels (strings) to short labels + # (integers) + for i in range(n): + nodes = list(graph_list[i].nodes) + + for j in range(len(nodes)): + if not (nodes[j] in label_lookup): + label_lookup[nodes[j]] = label_counter + label_counter += 1 + + graph_list[i] = nx.relabel_nodes(graph_list[i], label_lookup) + return graph_list + @staticmethod + def compare(list_gs, g=3): + n=len(list_gs) + list_gs=DeltaCon.relabel_nodes(list_gs) + comparison_matrix = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + g1,g2=list_gs[i],list_gs[j] + + V = list(g1.nodes) + V.extend(list(g2.nodes)) + V=np.unique(V) + + partitions=V.copy() + np.random.shuffle(partitions) + if len(partitions)< g: + partitions=np.array([partitions]) + else: + partitions=np.array_split(partitions,g) + partitions_e_1 = DeltaCon.partitions2e(partitions, list(g1.nodes)) + partitions_e_2 = DeltaCon.partitions2e(partitions, list(g2.nodes)) + S1,S2=[],[] + for k in range(len(partitions)): + s0k1,s0k2=partitions_e_1[k],partitions_e_2[k] + + # S1 + epsilon = 1/(1+DeltaCon0.maxDegree(g1)) + D, A = DeltaCon0.degreeAndAdjacencyMatrix(g1) + s1k = np.linalg.inv(np.identity(len(g1))+(epsilon**2)*D -epsilon*A) + s1k=np.linalg.solve(s1k,s0k1).tolist() + + # S2 + D, A = DeltaCon0.degreeAndAdjacencyMatrix(g2) + epsilon = 1 / (1 + DeltaCon0.maxDegree(g2)) + s2k= np.linalg.inv(np.identity(len(g2))+(epsilon**2)*D -epsilon*A) + s2k = np.linalg.solve(s2k, s0k2).tolist() + + + + S1.append(s1k) + S2.append(s2k) + + comparison_matrix[i,j] = 1/(1+DeltaCon0.rootED(np.array(S1),np.array(S2))) + comparison_matrix[j,i] = comparison_matrix[i,j] + + return comparison_matrix + + + @staticmethod + def partitions2e( partitions, V): + e = [ [] for i in range(len(partitions))] + for p in range(len(partitions)): + e[p] = [] + for i in range(len(V)): + if i in partitions[p]: + e[p].append(1.0) + else: + e[p].append(0.0) + return e \ No newline at end of file diff --git a/gmatch4py/exception/__init__.py b/gmatch4py/exception/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..950f6351f2b5c063c306b42eb677bb9695abdd58 --- /dev/null +++ b/gmatch4py/exception/__init__.py @@ -0,0 +1 @@ +# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/exception/__init__.pyx b/gmatch4py/exception/__init__.pyx new file mode 100644 index 0000000000000000000000000000000000000000..1d997a2148b1f47f536017587f0c8da2b2cf1f9f --- /dev/null +++ b/gmatch4py/exception/__init__.pyx @@ -0,0 +1,7 @@ +# coding = utf-8 +from termcolor import colored +class NotFoundDistance(Exception): + def __init__(self,dd,distanceFunctionDict): + # Call the base class constructor with the parameters it needs + super(Exception, self).__init__(colored("{0} is not an edit distance implemented ! Select a distance from : {1}".format(dd,",".join(distanceFunctionDict.keys())),"red")) + diff --git a/gmatch4py/ged/__init__.py b/gmatch4py/ged/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c6c3cf41f050a3a264593b6ecb526318bf65a2 --- /dev/null +++ b/gmatch4py/ged/__init__.py @@ -0,0 +1,2 @@ +# coding = utf-8 + diff --git a/gmatch4py/ged/algorithm/__init__.py b/gmatch4py/ged/algorithm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx b/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..f9c09363b74a99678cf71eaf4480b726d7f00cf4 --- /dev/null +++ b/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx @@ -0,0 +1,118 @@ +# -*- coding: UTF-8 -*- +from __future__ import print_function + +import sys + +import numpy as np +from scipy.optimize import linear_sum_assignment +cimport numpy as np + + +class AbstractGraphEditDistance(object): + + + def __init__(self, g1, g2,debug=False,**kwargs): + self.g1 = g1 + self.g2 = g2 + self.debug=debug + + self.node_del = kwargs.get("node_del",1) + self.node_ins = kwargs.get("node_ins",1) + self.edge_del = kwargs.get("edge_del",1) + self.edge_ins = kwargs.get("edge_ins",1) + + + def distance(self): + opt_path = self.edit_costs() + if self.debug: + print("Edit path for ",str(self.__class__.__name__),"\n",opt_path) + return sum(opt_path) + + def print_operations(self,cost_matrix,row_ind,col_ind): + cdef list nodes1 = list(self.g1.nodes) + cdef list nodes2 = list(self.g2.nodes) + dn1 = self.g1.nodes + dn2 = self.g2.nodes + + cdef int n=len(nodes1) + cdef int m=len(nodes2) + cdef int x,y,i + for i in range(len(row_ind)): + y,x=row_ind[i],col_ind[i] + val=cost_matrix[row_ind[i]][col_ind[i]] + if x<m and y<n: + print("SUB {0} to {1} cost = {2}".format(dn1[nodes1[y]]["label"],dn2[nodes2[x]]["label"],val)) + elif x <m and y>=n: + print("ADD {0} cost = {1}".format(dn2[nodes2[y-n]]["label"],val)) + elif x>=m and y<n: + print("DEL {0} cost = {1}".format(dn1[nodes1[m-x]]["label"],val)) + + def edit_costs(self): + cdef np.ndarray cost_matrix = self.create_cost_matrix() + if self.debug: + np.set_printoptions(precision=3) + print("Cost Matrix for ",str(self.__class__.__name__),"\n",cost_matrix) + + row_ind,col_ind = linear_sum_assignment(cost_matrix) + if self.debug: + self.print_operations(cost_matrix,row_ind,col_ind) + cdef int f=len(row_ind) + return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)] + + def create_cost_matrix(self): + """ + Creates a |N+M| X |N+M| cost matrix between all nodes in + graphs g1 and g2 + Each cost represents the cost of substituting, + deleting or inserting a node + The cost matrix consists of four regions: + + substitute | insert costs + ------------------------------- + delete | delete -> delete + + The delete -> delete region is filled with zeros + """ + cdef int n = len(self.g1) + cdef int m = len(self.g2) + cdef np.ndarray cost_matrix = np.zeros((n+m,n+m)) + #cost_matrix = [[0 for i in range(n + m)] for j in range(n + m)] + cdef list nodes1 = list(self.g1.nodes) + cdef list nodes2 = list(self.g2.nodes) + cdef int i,j + for i in range(n): + for j in range(m): + cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j]) + + for i in range(m): + for j in range(m): + cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2) + + for i in range(n): + for j in range(n): + cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1) + + self.cost_matrix = cost_matrix + return cost_matrix + + def insert_cost(self, int i, int j): + raise NotImplementedError + + def delete_cost(self, int i, int j): + raise NotImplementedError + + def substitute_cost(self, nodes1, nodes2): + raise NotImplementedError + + def print_matrix(self): + print("cost matrix:") + print(list(self.g1.nodes)) + print(list(self.g2.nodes)) + print(np.array(self.create_cost_matrix())) + for column in self.create_cost_matrix(): + for row in column: + if row == sys.maxsize: + print ("inf\t") + else: + print ("%.2f\t" % float(row)) + print("") diff --git a/gmatch4py/ged/algorithm/edge_edit_dist.pyx b/gmatch4py/ged/algorithm/edge_edit_dist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..60d6928010178210cdea51e5c4bef03d63d1a01e --- /dev/null +++ b/gmatch4py/ged/algorithm/edge_edit_dist.pyx @@ -0,0 +1,29 @@ +import sys + +from .abstract_graph_edit_dist import AbstractGraphEditDistance + + +class EdgeEditDistance(AbstractGraphEditDistance): + """ + Calculates the graph edit distance between two edges. + A node in this context is interpreted as a graph, + and edges are interpreted as nodes. + """ + + def __init__(self, g1, g2,**kwargs): + AbstractGraphEditDistance.__init__(self, g1, g2,**kwargs) + + def insert_cost(self, int i, int j, nodes2): + if i == j: + return self.edge_ins + return sys.maxsize + + def delete_cost(self, int i, int j, nodes1): + if i == j: + return self.edge_del + return sys.maxsize + + def substitute_cost(self, edge1, edge2): + if edge1 == edge2: + return 0. + return self.edge_del+self.edge_ins diff --git a/gmatch4py/ged/algorithm/graph_edit_dist.pyx b/gmatch4py/ged/algorithm/graph_edit_dist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..b9f16f42b57a908549000df50ad8bda3c32b4107 --- /dev/null +++ b/gmatch4py/ged/algorithm/graph_edit_dist.pyx @@ -0,0 +1,72 @@ +# -*- coding: UTF-8 -*- + +import sys + +import networkx as nx + +from .abstract_graph_edit_dist import AbstractGraphEditDistance +from .edge_edit_dist import EdgeEditDistance +from ..graph.edge_graph import EdgeGraph + + +def compare(g1, g2, print_details=False): + ged = GraphEditDistance(g1, g2,print_details) + return ged.distance() + + +class GraphEditDistance(AbstractGraphEditDistance): + + def __init__(self, g1, g2,debug=False,**kwargs): + AbstractGraphEditDistance.__init__(self, g1, g2,debug,**kwargs) + + def substitute_cost(self, node1, node2): + return self.relabel_cost(node1, node2) + self.edge_diff(node1, node2) + + def relabel_cost(self, node1, node2): + if node1 == node2: + edges1=set(self.get_edge_multigraph(self.g1,node1)) + edges2=set(self.get_edge_multigraph(self.g2,node2)) + return abs(len(edges2.difference(edges1))) # Take in account if there is a different number of edges + else: + return self.node_ins+self.node_del + + def delete_cost(self, int i, int j, nodes1): + if i == j: + return self.node_del+self.g1.degree(nodes1[i]) # Deleting a node implicate to delete in and out edges + return sys.maxsize + + def insert_cost(self, int i, int j, nodes2): + if i == j: + deg=self.g2.degree(nodes2[j]) + if isinstance(deg,dict):deg=0 + return self.node_ins+deg + else: + return sys.maxsize + + def get_edge_multigraph(self,g,node): + cdef list edges=[] + for id_,val in g.edges[node].items(): + if not 0 in val: + edges.append(str(id_) + val["color"]) + else: + for _,edge in val.items(): + edges.append(str(id_)+edge["color"]) + return edges + + def edge_diff(self, node1, node2): + cdef list edges1,edges2 + if isinstance(self.g1,nx.MultiDiGraph): + edges1 = self.get_edge_multigraph(self.g1,node1) + edges2 = self.get_edge_multigraph(self.g2,node2) + else: + edges1 = list(self.g1.edges[node1].keys()) + edges2 = list(self.g2.edges[node2].keys()) + if len(edges1) == 0 or len(edges2) == 0: + return max(len(edges1), len(edges2)) + + edit_edit_dist = EdgeEditDistance( + EdgeGraph(node1,edges1), + EdgeGraph(node2,edges2), + edge_del=self.edge_del,edge_ins=self.edge_ins,node_ins=self.node_ins,node_del=self.node_del + ) + return edit_edit_dist.distance() diff --git a/gmatch4py/ged/approximate_ged.pyx b/gmatch4py/ged/approximate_ged.pyx new file mode 100644 index 0000000000000000000000000000000000000000..27ea4371ad287ebd14b05a613d9b02025a2f8651 --- /dev/null +++ b/gmatch4py/ged/approximate_ged.pyx @@ -0,0 +1,33 @@ +# coding = utf-8 + +import numpy as np + +from .algorithm.graph_edit_dist import GraphEditDistance +from cython.parallel import prange + +class ApproximateGraphEditDistance(): + __type__ = "dist" + + @staticmethod + def compare(listgs,selected,c_del_node=1,c_del_edge=1,c_ins_node=1,c_ins_edge=1): + cdef int n= len(listgs) + cdef double[:,:] comparison_matrix = np.zeros((n,n)) + cdef int i,j + for i in prange(n,nogil=True): + for j in range(i,n): + with gil: + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + + if f: + comparison_matrix[i][j] = GraphEditDistance(listgs[i],listgs[j],False,node_del=c_del_node,node_ins=c_ins_node,edge_del=c_del_edge,edge_ins=c_ins_edge).distance() + else: + comparison_matrix[i][j] = np.inf + comparison_matrix[j][i] = comparison_matrix[i][j] + return comparison_matrix \ No newline at end of file diff --git a/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py/ged/bipartite_graph_matching_2.pyx new file mode 100644 index 0000000000000000000000000000000000000000..b4c7a2f28954d8ad3dabc5b23b3e5bca7a5d302a --- /dev/null +++ b/gmatch4py/ged/bipartite_graph_matching_2.pyx @@ -0,0 +1,161 @@ +# coding = utf-8 +import numpy as np +cimport numpy as np + +cdef class BP_2(): + """ + + """ + __type__="dist" + + cdef int node_del + cdef int node_ins + cdef int edge_del + cdef int edge_ins + + @staticmethod + def compare(listgs,selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1): + cdef int n = len(listgs) + comparator = BP_2(c_del_node, c_ins_node, c_del_edge, c_ins_edge) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + comparison_matrix[i, j] = comparator.bp2(listgs[i], listgs[j]) + else: + comparison_matrix[i, j] = np.inf + comparison_matrix[j, i] = comparison_matrix[i, j] + + return comparison_matrix + + def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1): + """Constructor for HED""" + self.node_del = node_del + self.node_ins = node_ins + self.edge_del = edge_del + self.edge_ins = edge_ins + + def bp2(self, g1, g2): + """ + Compute de Hausdorff Edit Distance + :param g1: first graph + :param g2: second graph + :return: + """ + return np.min(self.distance(self.psi(g1,g2)),self.distance(self.psi(g2,g1))) + + def distance(self,e): + return np.sum(e) + + cdef list psi(self,g1,g2): + cdef list psi_=[] + cdef list nodes1 = list(g1.nodes) + cdef list nodes2 = list(g2.nodes) + for u in nodes1: + v=None + for w in nodes2: + if 2*self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,None) + self.fuv(g1,g2,None,w)\ + and self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,v): + v=w + psi_.append(self.fuv(g1,g2,u,v)) + if u: + nodes1= list(set(nodes1).difference(set([u]))) + if v: + nodes2= list(set(nodes2).difference(set([v]))) + for v in nodes2: + psi_.append(self.fuv(g1,g2,None,v)) + return psi_ + + + cdef float fuv(self, g1, g2, n1, n2): + """ + Compute the Node Distance function + :param g1: first graph + :param g2: second graph + :param n1: node of the first graph + :param n2: node of the second graph + :return: + """ + if n2 == None: # Del + return self.node_del + ((self.edge_del / 2) * g1.degree(n1)) + if n1 == None: # Insert + return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2)) + else: + if n1 == n2: + return 0. + return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 + + cdef float hed_edge(self, g1, g2, n1, n2): + """ + Compute HEDistance between edges of n1 and n2, respectively in g1 and g2 + :param g1: first graph + :param g2: second graph + :param n1: node of the first graph + :param n2: node of the second graph + :return: + """ + return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) + + cdef list get_edge_multigraph(self, g, node): + """ + Get list of edge around a node in a Multigraph + :param g: multigraph + :param node: node in the multigraph + :return: + """ + + cdef list originals_ = g.edges(node, data=True) + cdef int n= len(originals_) + if n == 0: + return [] + + cdef list edges = [""]*n + for i in range(n): + edge=originals_[i] + edges[i]=("{0}-{1}".format(edge[0],edge[1])) + return edges + + cdef float sum_gpq(self, g1, n1, g2, n2): + """ + Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 + :param g1: first graph + :param n1: node in the first graph + :param g2: second graph + :param n2: node in the second graph + :return: + """ + cdef list edges1 = self.get_edge_multigraph(g1, n1) + cdef list edges2 = self.get_edge_multigraph(g2, n2) + edges2.extend([None]) + cdef np.ndarray min_sum = np.zeros(len(edges1)) + for i in range(len(edges1)): + min_i = np.zeros(len(edges2)) + for j in range(len(edges2)): + min_i[j] = self.gpq(edges1[i], edges2[j]) + min_sum[i] = np.min(min_i) + return np.sum(min_sum) + + cdef float gpq(self, e1, e2): + """ + Compute the edge distance function + :param e1: edge1 + :param e2: edge2 + :return: + """ + if e2 == None: # Del + return self.edge_del + if e1 == None: # Insert + return self.edge_ins + else: + if e1 == e2: + return 0. + return (self.edge_del + self.edge_ins) / 2 + diff --git a/gmatch4py/ged/graph/__init__.py b/gmatch4py/ged/graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..950f6351f2b5c063c306b42eb677bb9695abdd58 --- /dev/null +++ b/gmatch4py/ged/graph/__init__.py @@ -0,0 +1 @@ +# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/ged/graph/__init__.pyx b/gmatch4py/ged/graph/__init__.pyx new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/gmatch4py/ged/graph/edge_graph.pyx b/gmatch4py/ged/graph/edge_graph.pyx new file mode 100644 index 0000000000000000000000000000000000000000..24b8bda1ed36c399cbe58eb0115b2cee4d85841d --- /dev/null +++ b/gmatch4py/ged/graph/edge_graph.pyx @@ -0,0 +1,16 @@ +# -*- coding: UTF-8 -*- + + +class EdgeGraph(): + + def __init__(self, init_node, nodes): + self.init_node=init_node + self.nodes_ = nodes + self.edge=nodes + def nodes(self): + return self.nodes_ + + def size(self): + return len(self.nodes) + def __len__(self): + return len(self.nodes_) diff --git a/gmatch4py/ged/greedy_edit_distance.pyx b/gmatch4py/ged/greedy_edit_distance.pyx new file mode 100644 index 0000000000000000000000000000000000000000..77520305011396a5b41f36f40394d415ea2d5efe --- /dev/null +++ b/gmatch4py/ged/greedy_edit_distance.pyx @@ -0,0 +1,55 @@ +# coding = utf-8 +import numpy as np + +from .algorithm.graph_edit_dist import GraphEditDistance +cimport numpy as np + +class GreedyEditDistance(GraphEditDistance): + """ + Implementation of the Greedy Edit Distance presented in : + + Improved quadratic time approximation of graph edit distance by Hausdorff matching and greedy assignement + Andreas Fischer, Kaspar Riesen, Horst Bunke + 2016 + """ + __type__ = "dist" + @staticmethod + def compare(listgs, selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1): + cdef int n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + comparison_matrix[i, j] = GreedyEditDistance(listgs[i], listgs[j],False, node_del=c_del_node, + node_ins=c_ins_node, edge_del=c_del_edge, + edge_ins=c_ins_edge).distance() + else: + comparison_matrix[i, j] = np.inf + comparison_matrix[j, i] = comparison_matrix[i, j] + + + return comparison_matrix + + def __init__(self,g1,g2,debug=False,**kwargs): + """Constructor for GreedyEditDistance""" + super().__init__(g1,g2,debug,**kwargs) + + + def edit_costs(self): + cdef np.ndarray cost_matrix=self.create_cost_matrix() + cdef np.ndarray cost_matrix_2=cost_matrix.copy() + cdef list psi=[] + for i in range(len(cost_matrix)): + phi_i=np.argmin((cost_matrix[i])) + cost_matrix=np.delete(cost_matrix,phi_i,1) + psi.append([i,phi_i+i]) #+i to compensate the previous column deletion + return [cost_matrix_2[psi[i][0]][psi[i][1]] for i in range(len(psi))] + diff --git a/gmatch4py/ged/hausdorff_edit_distance.pyx b/gmatch4py/ged/hausdorff_edit_distance.pyx new file mode 100644 index 0000000000000000000000000000000000000000..fc123625ce80c50c91178d468ecab28e558464d0 --- /dev/null +++ b/gmatch4py/ged/hausdorff_edit_distance.pyx @@ -0,0 +1,171 @@ +# coding = utf-8 + +import numpy as np +cimport numpy as np +#from libcpp.list cimport list as cpplist +cdef class HED: + """ + Implementation of Hausdorff Edit Distance described in + + Improved quadratic time approximation of graph edit distance by Hausdorff matching and greedy assignement + Andreas Fischer, Kaspar Riesen, Horst Bunke + 2016 + """ + + cdef int node_del + cdef int node_ins + cdef int edge_del + cdef int edge_ins + + __type__ = "dist" + @staticmethod + def compare(list listgs, selected, int c_del_node=1, int c_del_edge=1, int c_ins_node=1, int c_ins_edge=1): + cdef int n = len(listgs) + comparator = HED(c_del_node, c_ins_node, c_del_edge, c_ins_edge) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + comparison_matrix[i, j] = comparator.hed(listgs[i], listgs[j]) + else: + comparison_matrix[i, j] = np.inf + comparison_matrix[j, i] = comparison_matrix[i, j] + + return comparison_matrix + + + def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): + """Constructor for HED""" + self.node_del = node_del + self.node_ins = node_ins + self.edge_del = edge_del + self.edge_ins = edge_ins + + cpdef float hed(self, g1, g2): + """ + Compute de Hausdorff Edit Distance + :param g1: first graph + :param g2: second graph + :return: + """ + return self.sum_fuv(g1, g2) + self.sum_fuv(g2, g1) + + cdef float sum_fuv(self, g1, g2): + """ + Compute Nearest Neighbour Distance between G1 and G2 + :param g1: First Graph + :param g2: Second Graph + :return: + """ + cdef np.ndarray min_sum = np.zeros(len(g1)) + nodes1 = list(g1.nodes) + nodes2 = list(g2.nodes) + nodes2.extend([None]) + cdef np.ndarray min_i + for i in range(len(nodes1)): + min_i = np.zeros(len(nodes2)) + for j in range(len(nodes2)): + min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j]) + min_sum[i] = np.min(min_i) + return np.sum(min_sum) + + cdef float fuv(self, g1, g2, n1, n2): + """ + Compute the Node Distance function + :param g1: first graph + :param g2: second graph + :param n1: node of the first graph + :param n2: node of the second graph + :return: + """ + if n2 == None: # Del + return self.node_del + ((self.edge_del / 2) * g1.degree(n1)) + if n1 == None: # Insert + return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2)) + else: + if n1 == n2: + return 0 + return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 + + cdef float hed_edge(self, g1, g2, n1, n2): + """ + Compute HEDistance between edges of n1 and n2, respectively in g1 and g2 + :param g1: first graph + :param g2: second graph + :param n1: node of the first graph + :param n2: node of the second graph + :return: + """ + return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) + + cdef list get_edge_multigraph(self, g, node): + """ + Get list of edge around a node in a Multigraph + :param g: multigraph + :param node: node in the multigraph + :return: + """ + + cdef list originals_ = g.edges(node, data=True) + cdef int n= len(originals_) + if n == 0: + return [] + + + cdef list edges = [""]*n + for i in range(n): + edge=originals_[i] + edges[i]=("{0}-{1}".format(edge[0],edge[1])) + return edges + + cdef float sum_gpq(self, g1, n1, g2, n2): + """ + Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 + :param g1: first graph + :param n1: node in the first graph + :param g2: second graph + :param n2: node in the second graph + :return: + """ + + #if isinstance(g1, nx.MultiDiGraph): + cdef list edges1 = self.get_edge_multigraph(g1, n1) + cdef list edges2 = self.get_edge_multigraph(g2, n2) + + #else: + #edges1 = [str(n1 + "-" + ef) for ef in list(g1.edge[n1].keys())] + #edges2 = [str(n2 + "-" + ef) for ef in list(g2.edge[n2].keys())] + + cdef np.ndarray min_sum = np.zeros(len(edges1)) + edges2.extend([None]) + cdef np.ndarray min_i + for i in range(len(edges1)): + min_i = np.zeros(len(edges2)) + for j in range(len(edges2)): + min_i[j] = self.gpq(edges1[i], edges2[j]) + min_sum[i] = np.min(min_i) + return np.sum(min_sum) + + cdef float gpq(self, str e1, str e2): + """ + Compute the edge distance function + :param e1: edge1 + :param e2: edge2 + :return: + """ + if e2 == None: # Del + return self.edge_del + if e1 == None: # Insert + return self.edge_ins + else: + if e1 == e2: + return 0 + return (self.edge_del + self.edge_ins) / 2 diff --git a/gmatch4py/jaccard.pyx b/gmatch4py/jaccard.pyx new file mode 100644 index 0000000000000000000000000000000000000000..db107778db05262b2d67c2d79b91f7ca88f80268 --- /dev/null +++ b/gmatch4py/jaccard.pyx @@ -0,0 +1,93 @@ +# coding = utf-8 + +import numpy as np +cimport numpy as np + +def intersect(a, b): + return list(set(a) & set(b)) +class Jaccard(): + __type__ = "sim" + + + @staticmethod + def compare(listgs,selected): + cdef int n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + cdef i=0 + cdef j=0 + for i in range(n): + for j in range(i,n): + g1 = listgs[i] + g2 = listgs[j] + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + inter_ver,inter_ed = Jaccard.intersect_graph(g1,g2) + un_ver,un_edg=Jaccard.union_nodes(g1,g2),Jaccard.union_edges(g1,g2) + if len(un_ver) == 0 or len(un_edg) == 0: + comparison_matrix[i, j] = 0. + else: + comparison_matrix[i,j]=(len(inter_ver)/len(un_ver))*(len(inter_ed)/len(un_edg)) + else: + comparison_matrix[i, j] = 0. + + comparison_matrix[j, i] = comparison_matrix[i, j] + + return comparison_matrix + + + @staticmethod + def intersect_edges(g1,g2): + cdef list ed1 = Jaccard.transform_edges(list(g1.edges(data=True))) + cdef list ed2 = Jaccard.transform_edges(list(g2.edges(data=True))) + cdef list inter_ed=[] + for e1 in ed1: + for e2 in ed2: + if e1 == e2: + inter_ed.append(e1) + return inter_ed + + @staticmethod + def union_nodes(g1, g2): + cdef set union=set([]) + for n in g1.nodes():union.add(n) + for n in g2.nodes(): union.add(n) + return union + + @staticmethod + def union_edges(g1, g2): + cdef list ed1 = Jaccard.transform_edges(g1.edges(data=True)) + cdef list ed2 = Jaccard.transform_edges(g2.edges(data=True)) + cdef list union = [] + cdef set register=set([]) + trans_=lambda x : "{0}-{1}:{2}".format(x[0],x[1],x[2]["color"]) + for e1 in ed1: + if not trans_(e1) in register: + union.append(e1) + register.add(trans_(e1)) + for e2 in ed2: + if not trans_(e2) in register: + union.append(e2) + register.add(trans_(e2)) + return union + @staticmethod + def intersect_nodes(g1,g2): + return intersect(list(g1.nodes),list(g2.nodes)) + + @staticmethod + def intersect_graph(g1,g2): + return Jaccard.intersect_nodes(g1,g2),Jaccard.intersect_edges(g1,g2) + + @staticmethod + def transform_edges(ed): + for e in range(len(ed)): + if "id" in ed[e][-1]: + del ed[e][-1]["id"] + return ed + diff --git a/gmatch4py/kernels/__init__.py b/gmatch4py/kernels/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..950f6351f2b5c063c306b42eb677bb9695abdd58 --- /dev/null +++ b/gmatch4py/kernels/__init__.py @@ -0,0 +1 @@ +# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/kernels/random_walk_kernel.pyx b/gmatch4py/kernels/random_walk_kernel.pyx new file mode 100644 index 0000000000000000000000000000000000000000..d780eb08273d0c77232d3ebdab446c902e13ac59 --- /dev/null +++ b/gmatch4py/kernels/random_walk_kernel.pyx @@ -0,0 +1,93 @@ +# coding = utf-8 + +import networkx as nx +import numpy as np + +class GeometricRandomWalkKernel(): + __type__ = "sim" + @staticmethod + def maxDegree(G): + degree_sequence = sorted(nx.degree(G).values(), reverse=True) # degree sequence + + # print "Degree sequence", degree_sequence + dmax = max(degree_sequence) + return dmax + @staticmethod + def compare(listgs): + + n = len(listgs) + comparison_matrix=np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + if len(listgs[i]) <1 or len(listgs[j]) <1: + comparison_matrix[i, j] = 0 + comparison_matrix[j, i] = 0 + continue + direct_product_graph=nx.tensor_product(listgs[i],listgs[j]) + Ax = nx.adjacency_matrix(direct_product_graph).todense() + try: + la = 1/ ((GeometricRandomWalkKernel.maxDegree(direct_product_graph)**2)+1) # lambda value + except: + la= pow(1,-6) + eps = pow(10,-10) + I=np.identity(Ax.shape[0]) + I_vec=np.ones(Ax.shape[0]) + x=I_vec.copy() + x_pre=np.zeros(Ax.shape[0]) + c=0 + + while (np.linalg.norm(x-x_pre)) > eps: + if c > 100: + break + x_pre=x + + x= I_vec + la*np.dot(Ax,x_pre.T) + c+=1 + comparison_matrix[i,j]=np.sum(x) + comparison_matrix[j,i]=comparison_matrix[i,j] + print(comparison_matrix) + for i in range(n): + for j in range(i,n): + comparison_matrix[i,j] = (comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j])) + comparison_matrix[j,i]=comparison_matrix[i,j] + return comparison_matrix + +class KStepRandomWalkKernel(): + __type__ = "sim" + @staticmethod + def maxDegree(G): + degree_sequence = sorted(nx.degree(G).values(), reverse=True) # degree sequence + # print "Degree sequence", degree_sequence + dmax = max(degree_sequence) + return dmax + @staticmethod + def compare(listgs,lambda_list=[1,1,1]): + k=len(lambda_list) + if not len(lambda_list) == k: + raise AttributeError + n = len(listgs) + comparison_matrix=np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + if len(listgs[i]) <1 or len(listgs[j]) <1: + comparison_matrix[i, j] = 0 + comparison_matrix[j, i] = 0 + continue + direct_product_graph=nx.tensor_product(listgs[i],listgs[j]) + Ax = nx.adjacency_matrix(direct_product_graph).todense() + eps = pow(10,-10) + I=np.identity(Ax.shape[0]) + ax_pow = I.copy() + sum_ = lambda_list[0] * I + for kk in range(1, k): + ax_pow *= Ax + sum_ += lambda_list[kk] * ax_pow + + comparison_matrix[i, j] = np.sum(sum_)/(len(listgs[i])**2 * len(listgs[j])**2) + comparison_matrix[j,i] = comparison_matrix[i,j] + + for i in range(n): + for j in range(i,n): + comparison_matrix[i,j] = comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j]) + comparison_matrix[j,i]=comparison_matrix[i,j] + return comparison_matrix \ No newline at end of file diff --git a/gmatch4py/kernels/shortest_path_kernel.pyx b/gmatch4py/kernels/shortest_path_kernel.pyx new file mode 100644 index 0000000000000000000000000000000000000000..e7e7444f49afd0a2af30cfff17a9ef3b3a2209d9 --- /dev/null +++ b/gmatch4py/kernels/shortest_path_kernel.pyx @@ -0,0 +1,88 @@ +# coding = utf-8 + +""" +Shortest-Path graph kernel. +Python implementation based on: "Shortest-path kernels on graphs", by +Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE +International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005 +doi: 10.1109/ICDM.2005.132 +Author : Sandro Vega-Pons, Emanuele Olivetti +Modified by : Jacques Fize +""" + +import networkx as nx +import numpy as np + + +class ShortestPathGraphKernel: + """ + Shorthest path graph kernel. + """ + __type__ = "sim" + @staticmethod + def compare( g_1, g_2, verbose=False): + """Compute the kernel value (similarity) between two graphs. + Parameters + ---------- + g1 : networkx.Graph + First graph. + g2 : networkx.Graph + Second graph. + Returns + ------- + k : The similarity value between g1 and g2. + """ + # Diagonal superior matrix of the floyd warshall shortest + # paths: + fwm1 = np.array(nx.floyd_warshall_numpy(g_1)) + fwm1 = np.where(fwm1 == np.inf, 0, fwm1) + fwm1 = np.where(fwm1 == np.nan, 0, fwm1) + fwm1 = np.triu(fwm1, k=1) + bc1 = np.bincount(fwm1.reshape(-1).astype(int)) + + fwm2 = np.array(nx.floyd_warshall_numpy(g_2)) + fwm2 = np.where(fwm2 == np.inf, 0, fwm2) + fwm2 = np.where(fwm2 == np.nan, 0, fwm2) + fwm2 = np.triu(fwm2, k=1) + bc2 = np.bincount(fwm2.reshape(-1).astype(int)) + + # Copy into arrays with the same length the non-zero shortests + # paths: + v1 = np.zeros(max(len(bc1), len(bc2)) - 1) + v1[range(0, len(bc1)-1)] = bc1[1:] + + v2 = np.zeros(max(len(bc1), len(bc2)) - 1) + v2[range(0, len(bc2)-1)] = bc2[1:] + + return np.sum(v1 * v2) + + + @staticmethod + def compare_list(graph_list, verbose=False): + """Compute the all-pairs kernel values for a list of graphs. + This function can be used to directly compute the kernel + matrix for a list of graphs. The direct computation of the + kernel matrix is faster than the computation of all individual + pairwise kernel values. + Parameters + ---------- + graph_list: list + A list of graphs (list of networkx graphs) + Return + ------ + K: numpy.array, shape = (len(graph_list), len(graph_list)) + The similarity matrix of all graphs in graph_list. + """ + n = len(graph_list) + k = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + k[i, j] = ShortestPathGraphKernel.compare(graph_list[i], graph_list[j]) + k[j, i] = k[i, j] + + k_norm = np.zeros(k.shape) + for i in range(k.shape[0]): + for j in range(k.shape[1]): + k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) + + return k_norm \ No newline at end of file diff --git a/gmatch4py/kernels/weisfeiler_lehman.pyx b/gmatch4py/kernels/weisfeiler_lehman.pyx new file mode 100644 index 0000000000000000000000000000000000000000..dd0a1d3feade50cf2ca3c6377dd2eaeb8935cb10 --- /dev/null +++ b/gmatch4py/kernels/weisfeiler_lehman.pyx @@ -0,0 +1,143 @@ +# coding = utf-8 + +"""Weisfeiler_Lehman graph kernel. + +Python implementation based on: "Weisfeiler-Lehman Graph Kernels", by: +Nino Shervashidze, Pascal Schweitzer, Erik J. van Leeuwen, Kurt +Mehlhorn, Karsten M. Borgwardt, JMLR, 2012. +http://jmlr.csail.mit.edu/papers/v12/shervashidze11a.html + +Author : Sandro Vega-Pons, Emanuele Olivetti +Source : https://github.com/emanuele/jstsp2015/blob/master/gk_weisfeiler_lehman.py +Modified by : Jacques Fize +""" + +import copy + +import networkx as nx +import numpy as np +cimport numpy as np + + +class WeisfeleirLehmanKernel(object): + __type__ = "sim" + @staticmethod + def compare(graph_list,selected,h=2): + """Compute the all-pairs kernel values for a list of graphs. + This function can be used to directly compute the kernel + matrix for a list of graphs. The direct computation of the + kernel matrix is faster than the computation of all individual + pairwise kernel values. + Parameters + ---------- + graph_list: list + A list of graphs (list of networkx graphs) + h : interger + Number of iterations. + node_label : boolean + Whether to use original node labels. True for using node labels + saved in the attribute 'node_label'. False for using the node + degree of each node as node attribute. + Return + ------ + K: numpy.array, shape = (len(graph_list), len(graph_list)) + The similarity matrix of all graphs in graph_list. + """ + + cdef int n = len(graph_list) + cdef np.ndarray phi + cdef int n_nodes = 0 + cdef int n_max = 0 + cdef int i,j + # Compute adjacency lists and n_nodes, the total number of + # nodes in the dataset. + for i in range(n): + n_nodes += graph_list[i].number_of_nodes() + + # Computing the maximum number of nodes in the graphs. It + # will be used in the computation of vectorial + # representation. + if (n_max < graph_list[i].number_of_nodes()): + n_max = graph_list[i].number_of_nodes() + + phi = np.zeros((n_nodes, n), dtype=np.uint64) + + # INITIALIZATION: initialize the nodes labels for each graph + # with their labels or with degrees (for unlabeled graphs) + + cdef list labels = [0] * n + cdef dict label_lookup = {} + cdef int label_counter = 0 + + + # label_lookup is an associative array, which will contain the + # mapping from multiset labels (strings) to short labels + # (integers) + + cdef list nodes + for i in range(n): + nodes = list(graph_list[i].nodes) + # It is assumed that the graph has an attribute + # 'node_label' + labels[i] = np.zeros(len(nodes), dtype=np.int32) + + for j in range(len(nodes)): + if not (nodes[j] in label_lookup): + label_lookup[nodes[j]] = str(label_counter) + labels[i][j] = label_counter + label_counter += 1 + else: + labels[i][j] = label_lookup[nodes[j]] + # labels are associated to a natural number + # starting with 0. + + phi[labels[i][j], i] += 1 + + graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup) + + cdef np.ndarray[np.float64_t] k + k = np.dot(phi.transpose(), phi) + + # MAIN LOOP + cdef int it = 0 + + new_labels = copy.deepcopy(labels) # Can't work without it !!! + + while it < h: + # create an empty lookup table + label_lookup = {} + label_counter = 0 + + phi = np.zeros((n_nodes, n)) + for i in range(n): + nodes = list(graph_list[i].nodes) + for v in range(len(nodes)): + # form a multiset label of the node v of the i'th graph + # and convert it to a string + + long_label = [] + long_label.extend(nx.neighbors(graph_list[i],nodes[v])) + + long_label_string = "".join(long_label) + # if the multiset label has not yet occurred, add it to the + # lookup table and assign a number to it + if not (long_label_string in label_lookup): + label_lookup[long_label_string] = str(label_counter) + new_labels[i][v] = label_counter + label_counter += 1 + else: + new_labels[i][v] = label_lookup[long_label_string] + # fill the column for i'th graph in phi + aux = np.bincount(new_labels[i]) + phi[new_labels[i], i] += aux[new_labels[i]] + + k += np.dot(phi.transpose(), phi) + it = it + 1 + + # Compute the normalized version of the kernel + cdef np.ndarray[np.float64_t] k_norm = np.zeros((k.shape[0],k.shape[1])) + for i in range(k.shape[0]): + for j in range(k.shape[1]): + k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) + + return k_norm \ No newline at end of file diff --git a/gmatch4py/kernels/weisfeiler_lehman_edge.pyx b/gmatch4py/kernels/weisfeiler_lehman_edge.pyx new file mode 100644 index 0000000000000000000000000000000000000000..b2ccbccb0ad9ab3eb33858f058abe3a84c0f19ae --- /dev/null +++ b/gmatch4py/kernels/weisfeiler_lehman_edge.pyx @@ -0,0 +1,189 @@ +# coding = utf-8 + +from helpers.gazeteer_helpers import get_data,get_data_by_wikidata_id + + +"""Weisfeiler_Lehman GEO graph kernel. + +""" + +import numpy as np +import networkx as nx +import copy + +class WeisfeleirLehmanKernelEdge(object): + __type__ = "sim" + + + @staticmethod + def compare(graph_list,h=3): + """Compute the all-pairs kernel values for a list of graphs. + This function can be used to directly compute the kernel + matrix for a list of graphs. The direct computation of the + kernel matrix is faster than the computation of all individual + pairwise kernel values. + Parameters + ---------- + graph_list: list + A list of graphs (list of networkx graphs) + h : interger + Number of iterations. + node_label : boolean + Whether to use original node labels. True for using node labels + saved in the attribute 'node_label'. False for using the node + degree of each node as node attribute. + Return + ------ + K: numpy.array, shape = (len(graph_list), len(graph_list)) + The similarity matrix of all graphs in graph_list. + """ + + n = len(graph_list) + k = [0] * (h + 1) + n_nodes = 0 + n_max = 0 + ams=[nx.adjacency_matrix(g).todense() for g in graph_list] + inclusion_dictionnary={} + + # Compute adjacency lists and n_nodes, the total number of + # nodes in the dataset. + for i in range(n): + n_nodes += graph_list[i].number_of_nodes() + + """ + Store Inclusion Informations + """ + for node in graph_list[i].nodes(): + graph_list[i].nodes[node]["id_GD"]=node + if not node in inclusion_dictionnary: + inc_list = [] + try: + inc_list = get_data(node)["inc_P131"] + except: + try: + inc_list=get_data_by_wikidata_id(get_data(node)["continent"])["id"] + except: + pass # No inclusion + if inc_list: + inc_list = inc_list if isinstance(inc_list,list) else [inc_list] + + inclusion_dictionnary[node]=inc_list[0] + for j in range(1,len(inc_list)): + if j+1 < len(inc_list): + inclusion_dictionnary[inc_list[j]]=inc_list[j+1] + + + + + # Computing the maximum number of nodes in the graphs. It + # will be used in the computation of vectorial + # representation. + if (n_max < graph_list[i].number_of_nodes()): + n_max = graph_list[i].number_of_nodes() + + phi = np.zeros((n_nodes, n), dtype=np.uint64) + #print(inclusion_dictionnary) + # INITIALIZATION: initialize the nodes labels for each graph + # with their labels or with degrees (for unlabeled graphs) + + labels = [0] * n + label_lookup = {} + label_counter = 0 + + # label_lookup is an associative array, which will contain the + # mapping from multiset labels (strings) to short labels + # (integers) + for i in range(n): + nodes = list(graph_list[i].nodes) + # It is assumed that the graph has an attribute + # 'node_label' + labels[i] = np.zeros(len(nodes), dtype=np.int32) + + for j in range(len(nodes)): + if not (nodes[j] in label_lookup): + label_lookup[nodes[j]] = str(label_counter) + labels[i][j] = label_counter + label_counter += 1 + else: + labels[i][j] = label_lookup[nodes[j]] + # labels are associated to a natural number + # starting with 0. + + phi[labels[i][j], i] += 1 + + graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup) + + L=label_counter + print("L1",L) + ed=np.zeros((np.int((L*(L+1))),n)) + # MAIN LOOP + it = 0 + new_labels = copy.deepcopy(labels) # Can't work without it !!! + + for i in range(n): + labels_aux = np.tile(new_labels[i].reshape(-1,1),len(new_labels[i])) + a=np.minimum(labels_aux,labels_aux.T) + b=np.maximum(labels_aux,np.transpose(labels_aux)) + I=np.triu((ams[i] !=0),1) + a_i=np.extract(I,a) + b_i = np.extract(I, b) + Ind=np.abs(np.multiply((a[I]-1),(2*L+2-a[I])/2+b[I]-a[I]+1).astype(int)) + minind=np.min(Ind) + aux=np.bincount(Ind) + ed[Ind,i]=aux[Ind] + + mask=np.sum(ed,1) !=0 + ed= ed[mask] + k=np.dot(ed.T,ed) + + it = 0 + new_labels = copy.deepcopy(new_labels) # Can't work without it !!! + + while it < h: + label_lookup={} + label_counter=0 + for i in range(n): + nodes = list(graph_list[i].nodes) + for v in range(len(nodes)): + # form a multiset label of the node v of the i'th graph + # and convert it to a string + + long_label = [] + long_label.extend(nx.neighbors(graph_list[i],nodes[v])) + + long_label_string = "".join(long_label) + # if the multiset label has not yet occurred, add it to the + # lookup table and assign a number to it + if not (long_label_string in label_lookup): + label_lookup[long_label_string] = str(label_counter) + new_labels[i][v] = label_counter + label_counter += 1 + else: + new_labels[i][v] = label_lookup[long_label_string] + + L = label_counter + print("L2",L) + ed = np.zeros((np.int((L * (L + 1))), n)) + for i in range(n): + labels_aux = np.tile(new_labels[i].reshape(-1, 1), len(new_labels[i])) + a = np.minimum(labels_aux, labels_aux.T) + b = np.maximum(labels_aux, np.transpose(labels_aux)) + I = np.triu((ams[i] != 0), 1) + a_i = np.extract(I, a) + b_i = np.extract(I, b) + Ind = np.abs(np.multiply((a[I] - 1), (2 * L + 2 - a[I]) / 2 + b[I] - a[I] + 1).astype(int)) + minind = np.min(Ind) + aux = np.bincount(Ind) + ed[Ind, i] = aux[Ind] + + mask = np.sum(ed, 1) != 0 + ed = ed[mask] + k += np.dot(ed.T, ed) + print(k) + it+=1 + k_norm = np.zeros(k.shape) + for i in range(k.shape[0]): + for j in range(k.shape[1]): + k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) + + return k_norm \ No newline at end of file diff --git a/gmatch4py/kernels/weisfeiler_lehman_geo.pyx b/gmatch4py/kernels/weisfeiler_lehman_geo.pyx new file mode 100644 index 0000000000000000000000000000000000000000..117066b8fa0c435c23b47f76a20cef010438be3c --- /dev/null +++ b/gmatch4py/kernels/weisfeiler_lehman_geo.pyx @@ -0,0 +1,165 @@ +# coding = utf-8 + +from helpers.gazeteer_helpers import get_data,get_data_by_wikidata_id + +# coding = utf-8 + +"""Weisfeiler_Lehman GEO graph kernel. + +""" + +import numpy as np +import networkx as nx +import copy + + +class WeisfeleirLehmanKernelGEO(object): + __type__ = "sim" + __depreciated__=True + + @staticmethod + def compare(graph_list,h=2,verbose=False): + """Compute the all-pairs kernel values for a list of graphs. + This function can be used to directly compute the kernel + matrix for a list of graphs. The direct computation of the + kernel matrix is faster than the computation of all individual + pairwise kernel values. + Parameters + ---------- + graph_list: list + A list of graphs (list of networkx graphs) + h : interger + Number of iterations. + node_label : boolean + Whether to use original node labels. True for using node labels + saved in the attribute 'node_label'. False for using the node + degree of each node as node attribute. + Return + ------ + K: numpy.array, shape = (len(graph_list), len(graph_list)) + The similarity matrix of all graphs in graph_list. + """ + + n = len(graph_list) + k = [0] * (h + 1) + n_nodes = 0 + n_max = 0 + + inclusion_dictionnary={} + + # Compute adjacency lists and n_nodes, the total number of + # nodes in the dataset. + for i in range(n): + n_nodes += graph_list[i].number_of_nodes() + + """ + Store Inclusion Informations + """ + for node in graph_list[i].nodes(): + graph_list[i].nodes[node]["id_GD"]=node + if not node in inclusion_dictionnary: + inc_list = [] + try: + inc_list = get_data(node)["inc_P131"] + except: + try: + inc_list=get_data_by_wikidata_id(get_data(node)["continent"])["id"] + except: + pass # No inclusion + if inc_list: + inc_list = inc_list if isinstance(inc_list,list) else [inc_list] + + inclusion_dictionnary[node]=inc_list[0] + for j in range(1,len(inc_list)): + if j+1 < len(inc_list): + inclusion_dictionnary[inc_list[j]]=inc_list[j+1] + + + + + # Computing the maximum number of nodes in the graphs. It + # will be used in the computation of vectorial + # representation. + if (n_max < graph_list[i].number_of_nodes()): + n_max = graph_list[i].number_of_nodes() + + phi = np.zeros((n_nodes, n), dtype=np.uint64) + if verbose: print(inclusion_dictionnary) + # INITIALIZATION: initialize the nodes labels for each graph + # with their labels or with degrees (for unlabeled graphs) + + labels = [0] * n + label_lookup = {} + label_counter = 0 + + # label_lookup is an associative array, which will contain the + # mapping from multiset labels (strings) to short labels + # (integers) + for i in range(n): + nodes = list(graph_list[i].nodes) + # It is assumed that the graph has an attribute + # 'node_label' + labels[i] = np.zeros(len(nodes), dtype=np.int32) + + for j in range(len(nodes)): + if not (nodes[j] in label_lookup): + label_lookup[nodes[j]] = str(label_counter) + labels[i][j] = label_counter + label_counter += 1 + else: + labels[i][j] = label_lookup[nodes[j]] + # labels are associated to a natural number + # starting with 0. + + phi[labels[i][j], i] += 1 + + graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup) + k = np.dot(phi.transpose(), phi).astype(np.float64) + + # MAIN LOOP + it = 0 + new_labels = copy.deepcopy(labels) # Can't work without it !!! + + while it < h: + # create an empty lookup table + label_lookup = {} + label_counter = 0 + + phi = np.zeros((n_nodes, n)) + for i in range(n): + nodes = list(graph_list[i].nodes) + for v in range(len(nodes)): + # form a multiset label of the node v of the i'th graph + # and convert it to a string + + id_GD = graph_list[i].nodes[nodes[v]]['id_GD'] + if id_GD in inclusion_dictionnary: + + long_label_string = inclusion_dictionnary[id_GD] + graph_list[i].nodes[nodes[v]]['id_GD']=inclusion_dictionnary[id_GD] + else: + long_label_string = id_GD + + + # if the multiset label has not yet occurred, add it to the + # lookup table and assign a number to it + if not (long_label_string in label_lookup): + label_lookup[long_label_string] = str(label_counter) + new_labels[i][v] = label_counter + label_counter += 1 + else: + new_labels[i][v] = label_lookup[long_label_string] + # fill the column for i'th graph in phi + aux = np.bincount(new_labels[i]) + phi[new_labels[i], i] += (1/(it+2))*aux[new_labels[i]] # +2 because it0 =0 + + k += np.dot(phi.transpose(), phi) + it = it + 1 + + # Compute the normalized version of the kernel + k_norm = np.zeros(k.shape) + for i in range(k.shape[0]): + for j in range(k.shape[1]): + k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) + + return k_norm \ No newline at end of file diff --git a/gmatch4py/mcs.pyx b/gmatch4py/mcs.pyx new file mode 100644 index 0000000000000000000000000000000000000000..98d5032e4cd3b644242b201006574ce0f6d0adf9 --- /dev/null +++ b/gmatch4py/mcs.pyx @@ -0,0 +1,74 @@ +# coding = utf-8 +import networkx as nx +import numpy as np +cimport numpy as np + +class MCS(): + """ + *A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, + Pattern Recognition Letters, 1998* + """ + @staticmethod + def compare(listgs,selected): + cdef int n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + comparison_matrix[i, j] = MCS.s_mcs(listgs[i],listgs[j]) + else: + comparison_matrix[i, j] = 0. + comparison_matrix[j, i] = comparison_matrix[i, j] + return comparison_matrix + + + @staticmethod + def intersect(a, b): + return list(set(a) & set(b)) + + @staticmethod + def transform_edges(ed): + for e in range(len(ed)): + if "id" in ed[e][-1]: + del ed[e][-1]["id"] + return ed + + + @staticmethod + def intersect_edges(g1, g2): + cdef list ed1 = MCS.transform_edges(list(g1.edges(data=True))) + cdef list ed2 = MCS.transform_edges(list(g2.edges(data=True))) + inter_ed = [] + for e1 in ed1: + for e2 in ed2: + if e1 == e2: + inter_ed.append(e1) + return inter_ed + + @staticmethod + def intersect_nodes(g1, g2): + return MCS.intersect(list(g1.nodes), list(g2.nodes)) + + @staticmethod + def maximum_common_subgraph(g1, g2): + """ + Extract maximum common subgraph + """ + res = nx.MultiDiGraph() + res.add_nodes_from(MCS.intersect_nodes(g1, g2)) + res.add_edges_from(MCS.intersect_edges(g1, g2)) + return res + + @staticmethod + def s_mcs(g1, g2): + + return len(MCS.maximum_common_subgraph(g1, g2)) / float(max(len(g1), len(g2))) + diff --git a/gmatch4py/vertex_edge_overlap.pyx b/gmatch4py/vertex_edge_overlap.pyx new file mode 100644 index 0000000000000000000000000000000000000000..edc3f7252f3a5ffc83f391e6dabe4d46f8e7de7d --- /dev/null +++ b/gmatch4py/vertex_edge_overlap.pyx @@ -0,0 +1,77 @@ +# coding = utf-8 + +import numpy as np +cimport numpy as np + + +cdef list intersect(a, b): + return list(set(a) & set(b)) +class VertexEdgeOverlap(): + __type__ = "sim" + + """ + Vertex/Edge Overlap Algorithm + presented in Web graph similarity for anomaly detection, Journal of Internet Services and Applications, 2008 + by P. Papadimitriou, A. Dasdan and H.Gracia-Molina + + Code Author : Jacques Fize + """ + + @staticmethod + def compare(list listgs,selected): + n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + cdef list inter_ver + cdef list inter_ed + cdef int denom + for i in range(n): + for j in range(i,n): + f=True + if not listgs[i] or not listgs[j]: + f=False + elif len(listgs[i])== 0 or len(listgs[j]) == 0: + f=False + if selected: + if not i in selected: + f=False + if f: + g1 = listgs[i] + g2 = listgs[j] + inter_ver,inter_ed = VertexEdgeOverlap.intersect_graph(g1,g2) + denom=len(g1)+len(g2)+len(g1.edges(data=True))+len(g2.edges(data=True)) + if denom == 0: + continue + comparison_matrix[i,j]=2*(len(inter_ver)+len(inter_ed))/denom # Data = True --> For nx.MultiDiGraph + else: + comparison_matrix[i, j] = 0. + comparison_matrix[j, i] = comparison_matrix[i, j] + return comparison_matrix + + + @staticmethod + def intersect_edges(g1,g2): + cdef list ed1 = VertexEdgeOverlap.transform_edges(list(g1.edges(data=True))) + cdef list ed2 = VertexEdgeOverlap.transform_edges(list(g2.edges(data=True))) + cdef list inter_ed=[] + for e1 in ed1: + for e2 in ed2: + if e1 == e2: + inter_ed.append(e1) + return inter_ed + + + @staticmethod + def intersect_nodes(g1,g2): + return intersect(list(g1.nodes),list(g2.nodes)) + + @staticmethod + def intersect_graph(g1,g2): + return VertexEdgeOverlap.intersect_nodes(g1,g2),VertexEdgeOverlap.intersect_edges(g1,g2) + + @staticmethod + def transform_edges(ed): + for e in range(len(ed)): + if "id" in ed[e][-1]: + del ed[e][-1]["id"] + return ed + diff --git a/gmatch4py/vertex_ranking.pyx b/gmatch4py/vertex_ranking.pyx new file mode 100644 index 0000000000000000000000000000000000000000..8f72a4df204b7d5d90159ab58196f09be1e30dbc --- /dev/null +++ b/gmatch4py/vertex_ranking.pyx @@ -0,0 +1,39 @@ +# coding = utf-8 + +import networkx as nx +import numpy as np +cimport numpy as np +from scipy.stats import spearmanr + + +def intersect(a, b): + return list(set(a) & set(b)) + +class VertexRanking(): + """ + Vertex Ranking + presented in Web graph similarity for anomaly detection, Journal of Internet Services and Applications, 2008 # Maybe not ?? + by P. Papadimitriou, A. Dasdan and H.Gracia-Molina + + Code Author : Jacques Fize + + """ + __type__ = "sim" + @staticmethod + def compare(listgs): + cdef int n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n,n)) + cdef list page_r=[nx.pagerank(nx.DiGraph(g)) for g in listgs] + cdef list node_intersection + cdef list X + cdef list Y + for i in range(n): + for j in range(i,n): + node_intersection=intersect(list(page_r[i].keys()),list(page_r[j].keys())) + X,Y=[],[] + for node in node_intersection: + X.append(page_r[i][node]) + Y.append(page_r[j][node]) + comparison_matrix[i,j] = spearmanr(X,Y)[0] + comparison_matrix[j,i] = comparison_matrix[i,j] + return comparison_matrix diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..c712b1084b7d97c2ad3147398fc29125b53be7ad --- /dev/null +++ b/setup.py @@ -0,0 +1,54 @@ +import sys, os +from distutils.core import setup +from distutils.extension import Extension + +# we'd better have Cython installed, or it's a no-go +from Cython.Build import cythonize + +try: + from Cython.Distutils import build_ext +except: + print("You don't seem to have Cython installed. Please get a") + print("copy from www.cython.org and install it") + sys.exit(1) + + +# scan the 'dvedit' directory for extension files, converting +# them to extension names in dotted notation +def scandir(dir, files=[]): + for file in os.listdir(dir): + path = os.path.join(dir, file) + if os.path.isfile(path) and path.endswith(".pyx"): + files.append(path.replace(os.path.sep, ".")[:-4]) + elif os.path.isdir(path): + scandir(path, files) + return files + + +# generate an Extension object from its dotted name +def makeExtension(extName): + extPath = extName.replace(".", os.path.sep) + ".pyx" + return Extension( + extName, + [extPath], + language="c++", + extra_compile_args=["-O3", "-Wall", '-std=c++11', '-v'], + ) + + +# get the list of extensions +extNames = scandir("gmatch4py") + +# and build up the set of Extension objects +extensions = cythonize([makeExtension(name) for name in extNames]) + +# finally, we can pass all this to distutils +setup( + name="Gmatch4py", + description="A module for graph matching", + packages=["gmatch4py", "gmatch4py.ged", "gmatch4py.kernels"], + ext_modules=extensions, + cmdclass={'build_ext': build_ext}, + setup_requires=["numpy"], + install_requires=["numpy"] +)