diff --git a/gmatch4py/base.pxd b/gmatch4py/base.pxd index e3dd39ec7a44d163ec0761eb9f99fb2850fa8141..930e12ad66a591460bd8fc38c682ae3a63a67127 100644 --- a/gmatch4py/base.pxd +++ b/gmatch4py/base.pxd @@ -4,14 +4,15 @@ cdef class Base: ## Attribute(s) cdef int type_alg cdef bint normalized - + cdef int cpu_count ## Methods cpdef np.ndarray compare(self,list graph_list, list selected) cpdef np.ndarray compare_old(self,list listgs, list selected) cpdef np.ndarray distance(self, np.ndarray matrix) cpdef np.ndarray similarity(self, np.ndarray matrix) cpdef bint isAccepted(self,G,index,selected) - cpdef list get_selected_array(self,selected,size_corpus) + cpdef np.ndarray get_selected_array(self,selected,size_corpus) + cpdef intersection(G,H) cpdef union_(G,H) diff --git a/gmatch4py/base.pyx b/gmatch4py/base.pyx index ac993092459d1027e90a17e8e3106a41a5a30962..e9d9b0d915c48a828ce18896a39f6c54a3c88151 100644 --- a/gmatch4py/base.pyx +++ b/gmatch4py/base.pyx @@ -3,6 +3,10 @@ import numpy as np cimport numpy as np import networkx as nx +cimport cython +import multiprocessing + + cpdef np.ndarray minmax_scale(np.ndarray matrix): """ @@ -136,17 +140,22 @@ cdef class Base: else: self.type_alg=type_alg self.normalized=normalized + self.cpu_count=multiprocessing.cpu_count() - cpdef list get_selected_array(self,selected,size_corpus): - cdef list selected_test = [True]*size_corpus - if selected: - selected_test = [False]*size_corpus + cpdef np.ndarray get_selected_array(self,selected,size_corpus): + cdef double[:] selected_test = np.zeros(size_corpus) + if not selected == None: for ix in range(len(selected)): - selected_test[ix]=True - return selected + selected_test[selected[ix]]=1 + return np.array(selected_test) + else: + return np.array(selected_test)+1 + cpdef np.ndarray compare_old(self,list listgs, list selected): pass + + @cython.boundscheck(False) cpdef np.ndarray compare(self,list graph_list, list selected): """ Return the similarity/distance matrix using the current algorithm. diff --git a/gmatch4py/ged/abstract_graph_edit_dist.pyx b/gmatch4py/ged/abstract_graph_edit_dist.pyx index f4b25fde928a0b0c497be35ba9df2bd7a033fbdf..683da17616a7ffa8bcb0c16cc91e32dc207f6186 100644 --- a/gmatch4py/ged/abstract_graph_edit_dist.pyx +++ b/gmatch4py/ged/abstract_graph_edit_dist.pyx @@ -115,17 +115,17 @@ cdef class AbstractGraphEditDistance(Base): cdef double[:,:] comparison_matrix = np.zeros((n, n)) listgs=parsenx2graph(listgs) cdef long[:] n_nodes = np.array([g.size() for g in listgs]) - - cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef double[:] selected_test = np.array(self.get_selected_array(selected,n)) cdef int i,j val=np.inf - with nogil, parallel(num_threads=8): + + with nogil, parallel(num_threads=self.cpu_count): for i in prange(n,schedule='static'): for j in range(n): - if n_nodes[i]>0 and n_nodes[j]>0 and selected_test[i] : - with gil: - comparison_matrix[i][j] = self.distance_ged(listgs[i],listgs[j]) - else: - comparison_matrix[i][j] = 0 - #comparison_matrix[j, i] = comparison_matrix[i, j] + if n_nodes[i]>0 and n_nodes[j]>0 and selected_test[i] == 1 : + with gil: + comparison_matrix[i][j] = self.distance_ged(listgs[i],listgs[j]) + else: + comparison_matrix[i][j] = 0 + #comparison_matrix[j, i] = comparison_matrix[i, j] return np.array(comparison_matrix) diff --git a/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py/ged/bipartite_graph_matching_2.pyx index 45fd67f625450c68d0579634de6d652430cfdd14..faef03ab776f3b4f122cd147647dc3b976e056f7 100644 --- a/gmatch4py/ged/bipartite_graph_matching_2.pyx +++ b/gmatch4py/ged/bipartite_graph_matching_2.pyx @@ -53,22 +53,22 @@ cdef class BP_2(Base): cdef int n = len(listgs) cdef list new_gs=parsenx2graph(listgs) cdef double[:,:] comparison_matrix = np.zeros((n, n)) - cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef double[:] selected_test = self.get_selected_array(selected,n) cdef int i,j cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) cdef long[:] n_edges = np.array([g.density() for g in new_gs]) - with nogil, parallel(num_threads=4): + with nogil, parallel(num_threads=self.cpu_count): for i in prange(n,schedule='static'): for j in range(i,n): - if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True: + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1: with gil: comparison_matrix[i, j] = self.bp2(new_gs[i], new_gs[j]) else: comparison_matrix[i, j] = 0 comparison_matrix[j, i] = comparison_matrix[i, j] - return comparison_matrix + return np.array(comparison_matrix) cdef double bp2(self, g1, g2): @@ -148,18 +148,18 @@ cdef class BP_2(Base): :return: """ cdef np.ndarray min_sum = np.zeros(g1.size()) - nodes1 = list(g1.nodes()) - nodes2 = list(g2.nodes()) + cdef list nodes1 = list(g1.nodes()) + cdef list nodes2 = list(g2.nodes()) nodes2.extend([None]) cdef np.ndarray min_i - for i in range(len(nodes1)): - min_i = np.zeros(len(nodes2)) - for j in range(len(nodes2)): + for i in range(g1.size()): + min_i = np.zeros(g2.size()) + for j in range(g2.size()): min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j]) min_sum[i] = np.min(min_i) return np.sum(min_sum) - cdef float fuv(self, g1, g2, n1, n2): + cdef float fuv(self, g1, g2, str n1, str n2): """ Compute the Node Distance function :param g1: first graph @@ -177,7 +177,7 @@ cdef class BP_2(Base): return 0 return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 - cdef float hed_edge(self, g1, g2, n1, n2): + cdef float hed_edge(self, g1, g2, str n1, str n2): """ Compute HEDistance between edges of n1 and n2, respectively in g1 and g2 :param g1: first graph @@ -189,7 +189,7 @@ cdef class BP_2(Base): return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) - cdef float sum_gpq(self, g1, n1, g2, n2): + cdef float sum_gpq(self, g1, str n1, g2, str n2): """ Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 :param g1: first graph @@ -213,7 +213,7 @@ cdef class BP_2(Base): min_sum[i] = np.min(min_i) return np.sum(min_sum) - cdef float gpq(self, tuple e1, tuple e2): + cdef float gpq(self, str e1, str e2): """ Compute the edge distance function :param e1: edge1 diff --git a/gmatch4py/ged/graph_edit_dist.pyx b/gmatch4py/ged/graph_edit_dist.pyx index 3706c283884e6f488124812a0627b8fb5316c16c..b1f9346cf79e3bcf96dad6d7231533a47f8fcb9f 100644 --- a/gmatch4py/ged/graph_edit_dist.pyx +++ b/gmatch4py/ged/graph_edit_dist.pyx @@ -7,7 +7,6 @@ import numpy as np cimport numpy as np from .abstract_graph_edit_dist cimport AbstractGraphEditDistance from ..base cimport intersection,union_ -from ..graph cimport Graph cdef class GraphEditDistance(AbstractGraphEditDistance): diff --git a/gmatch4py/ged/hausdorff_edit_distance.pyx b/gmatch4py/ged/hausdorff_edit_distance.pyx index a83e2a1897bfd9330d2dd9e8beb30d3abd146fa2..bf6b8fbdfbb93c3275e21fec62dc91d7d1d5232d 100644 --- a/gmatch4py/ged/hausdorff_edit_distance.pyx +++ b/gmatch4py/ged/hausdorff_edit_distance.pyx @@ -49,12 +49,12 @@ cdef class HED(Base): cdef int n = len(listgs) cdef list new_gs=parsenx2graph(listgs) cdef double[:,:] comparison_matrix = np.zeros((n, n)) - cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef double[:] selected_test = np.array(self.get_selected_array(selected,n)) cdef int i,j cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) cdef long[:] n_edges = np.array([g.density() for g in new_gs]) - with nogil, parallel(num_threads=4): + with nogil, parallel(num_threads=self.cpu_count): for i in prange(n,schedule='static'): for j in range(i,n): if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True: @@ -64,7 +64,7 @@ cdef class HED(Base): comparison_matrix[i, j] = 0 comparison_matrix[j, i] = comparison_matrix[i, j] - return comparison_matrix + return np.array(comparison_matrix) cdef float hed(self, g1, g2): @@ -83,19 +83,19 @@ cdef class HED(Base): :param g2: Second Graph :return: """ - cdef np.ndarray min_sum = np.zeros(len(g1)) - nodes1 = list(g1.nodes()) - nodes2 = list(g2.nodes()) + cdef np.ndarray min_sum = np.zeros(g1.size()) + cdef list nodes1 = list(g1.nodes()) + cdef list nodes2 = list(g2.nodes()) nodes2.extend([None]) cdef np.ndarray min_i - for i in range(len(nodes1)): - min_i = np.zeros(len(nodes2)) - for j in range(len(nodes2)): + for i in range(g1.size()): + min_i = np.zeros(g2.size()) + for j in range(g2.size()): min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j]) min_sum[i] = np.min(min_i) return np.sum(min_sum) - cdef float fuv(self, g1, g2, n1, n2): + cdef float fuv(self, g1, g2, str n1, str n2): """ Compute the Node Distance function :param g1: first graph @@ -113,7 +113,7 @@ cdef class HED(Base): return 0 return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 - cdef float hed_edge(self, g1, g2, n1, n2): + cdef float hed_edge(self, g1, g2, str n1, str n2): """ Compute HEDistance between edges of n1 and n2, respectively in g1 and g2 :param g1: first graph @@ -125,7 +125,7 @@ cdef class HED(Base): return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) - cdef float sum_gpq(self, g1, n1, g2, n2): + cdef float sum_gpq(self, g1, str n1, g2, str n2): """ Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 :param g1: first graph @@ -149,7 +149,7 @@ cdef class HED(Base): min_sum[i] = np.min(min_i) return np.sum(min_sum) - cdef float gpq(self, tuple e1, tuple e2): + cdef float gpq(self, str e1, str e2): """ Compute the edge distance function :param e1: edge1 diff --git a/gmatch4py/graph.pyx b/gmatch4py/graph.pyx index 4d3b770b1babedcbab3a77c55711ebcff2c20d23..ac03d698de659cf201a0b428d3d11920ca01e32a 100644 --- a/gmatch4py/graph.pyx +++ b/gmatch4py/graph.pyx @@ -207,6 +207,7 @@ cdef class Graph: return [(e1,e2,self.edges_attr_list[self.edges_hash_idx[hash_]])for hash_ in hashes] else: return [(e1,e2,None)] + cpdef list get_edges_no(self,str n): return self.edges_of_nodes[n] diff --git a/gmatch4py/jaccard.pyx b/gmatch4py/jaccard.pyx index 99d8b5d03c7fed243a35fd6d56c29e1a80b35710..d69c0ca7123a1992334592f52f57b89f5b8fa342 100644 --- a/gmatch4py/jaccard.pyx +++ b/gmatch4py/jaccard.pyx @@ -5,7 +5,7 @@ cimport numpy as np from .base cimport Base from .base cimport intersection,union_ -from ..helpers.general import parsenx2graph +from .helpers.general import parsenx2graph from cython.parallel cimport prange,parallel cdef class Jaccard(Base): @@ -47,7 +47,7 @@ cdef class Jaccard(Base): cdef long[:] n_edges = np.array([g.density() for g in new_gs]) cdef int i,j - cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef double[:] selected_test = np.array(self.get_selected_array(selected,n)) cdef double[:,:] intersect_len_nodes = np.zeros((n, n)) cdef double[:,:] intersect_len_edges = np.zeros((n, n)) @@ -59,10 +59,10 @@ cdef class Jaccard(Base): intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j])) union_len_nodes[i][j]=new_gs[i].size_node_union(new_gs[j]) union_len_edges[i][j]=new_gs[i].size_node_union(new_gs[j]) - with nogil, parallel(num_threads=4): + with nogil, parallel(num_threads=self.cpu_count): for i in prange(n,schedule='static'): for j in range(i,n): - if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i]: + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1: if union_len_edges[i][j] >0 and union_len_nodes[i][j] >0: comparison_matrix[i][j]= \ (intersect_len_edges[i][j]/union_len_edges[i][j])*\ diff --git a/gmatch4py/kernels/weisfeiler_lehman.pyx b/gmatch4py/kernels/weisfeiler_lehman.pyx index 93a78cbbc36b49d0f328198fa7f38a59ea1fa22a..e0e4c0edf80b6012f95784bb178620100a2f64c9 100644 --- a/gmatch4py/kernels/weisfeiler_lehman.pyx +++ b/gmatch4py/kernels/weisfeiler_lehman.pyx @@ -105,7 +105,6 @@ cdef class WeisfeleirLehmanKernel(Base): # cdef np.ndarray[np.float64_t] k k = np.dot(phi.transpose(), phi) - print(1) # MAIN LOOP cdef int it = 0 diff --git a/gmatch4py/mcs.pyx b/gmatch4py/mcs.pyx index 9ad07f06926f3568d3a0afb7bbe290ed70581d00..50e09fbba6db257ab1a34350cf7d06f07dbc2f15 100644 --- a/gmatch4py/mcs.pyx +++ b/gmatch4py/mcs.pyx @@ -4,7 +4,7 @@ cimport numpy as np from .graph cimport Graph from .base cimport Base from cython.parallel cimport prange,parallel -from ..helpers.general import parsenx2graph +from .helpers.general import parsenx2graph cdef class MCS(Base): """ @@ -31,7 +31,7 @@ cdef class MCS(Base): cpdef np.ndarray compare(self,list listgs, list selected): cdef int n = len(listgs) cdef double [:,:] comparison_matrix = np.zeros((n, n)) - cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef double[:] selected_test = np.array(self.get_selected_array(selected,n)) cdef list new_gs=parsenx2graph(listgs) cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) cdef double [:,:] intersect_len_nodes = np.zeros((n, n)) @@ -40,10 +40,10 @@ cdef class MCS(Base): for j in range(i,n): intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j]) - with nogil, parallel(num_threads=4): + with nogil, parallel(num_threads=self.cpu_count): for i in prange(n,schedule='static'): for j in range(i, n): - if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i]: + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1: comparison_matrix[i][j] = intersect_len_nodes[i][j]/max(n_nodes[i],n_nodes[j]) else: comparison_matrix[i][j] = 0. diff --git a/gmatch4py/vertex_edge_overlap.pyx b/gmatch4py/vertex_edge_overlap.pyx index 3bbe9c7ee496eb91bb200c046fdc53ec23770664..b883c9aeac3fb4a20ba48d3ebf0cf55ce1cf596b 100644 --- a/gmatch4py/vertex_edge_overlap.pyx +++ b/gmatch4py/vertex_edge_overlap.pyx @@ -5,7 +5,7 @@ cimport numpy as np from .base cimport Base,intersection from .graph cimport Graph from cython.parallel cimport prange,parallel -from ..helpers.general import parsenx2graph +from .helpers.general import parsenx2graph cdef class VertexEdgeOverlap(Base): @@ -47,7 +47,7 @@ cdef class VertexEdgeOverlap(Base): cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) cdef long[:] n_edges = np.array([g.density() for g in new_gs]) - cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef double[:] selected_test = np.array(self.get_selected_array(selected,n)) cdef double[:,:] intersect_len_nodes = np.zeros((n, n)) cdef double[:,:] intersect_len_edges = np.zeros((n, n)) @@ -56,10 +56,10 @@ cdef class VertexEdgeOverlap(Base): intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j]) intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j])) - with nogil, parallel(num_threads=4): + with nogil, parallel(num_threads=self.cpu_count): for i in prange(n,schedule='static'): for j in range(i,n): - if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True: + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1: denom=n_nodes[i]+n_nodes[j]+\ n_edges[i]+n_edges[j] if denom == 0: diff --git a/setup.py b/setup.py index 16b9d05a5c4ecb984e4145537bda928a19b553ac..71fcff81e83aed1c91979a981033d1f9c8ddcaef 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( cmdclass={'build_ext': build_ext}, setup_requires=["numpy","networkx","scipy",'scikit-learn'], install_requires=["numpy","networkx","scipy",'scikit-learn'], - version="0.2.2alpha", + version="0.2.4alpha", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", diff --git a/test/real_test.py b/test/real_test.py index d2753d87a7e48615b9da219afddfc91fd263a6ad..f0fb835c52c5b98928f2211d4dd32e3397320214 100644 --- a/test/real_test.py +++ b/test/real_test.py @@ -1 +1,14 @@ from gmatch4py import * +import networkx as nx + +graphs=[nx.random_tree(10) for i in range(10)] +comparator=None +for class_ in [BagOfNodes,WeisfeleirLehmanKernel,GraphEditDistance, BP_2, GreedyEditDistance, HED, Jaccard, MCS, VertexEdgeOverlap]: + print(class_) + if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED): + comparator = class_(1, 1, 1, 1) + elif class_ == WeisfeleirLehmanKernel: + comparator = class_(h=2) + else: + comparator=class_() + matrix = comparator.compare(graphs, [])