diff --git a/gmatch4py/base.pxd b/gmatch4py/base.pxd index f36f2a903ab4a1de9a13f189be4187972bed8903..e3dd39ec7a44d163ec0761eb9f99fb2850fa8141 100644 --- a/gmatch4py/base.pxd +++ b/gmatch4py/base.pxd @@ -7,9 +7,11 @@ cdef class Base: ## Methods cpdef np.ndarray compare(self,list graph_list, list selected) + cpdef np.ndarray compare_old(self,list listgs, list selected) cpdef np.ndarray distance(self, np.ndarray matrix) cpdef np.ndarray similarity(self, np.ndarray matrix) cpdef bint isAccepted(self,G,index,selected) + cpdef list get_selected_array(self,selected,size_corpus) cpdef intersection(G,H) cpdef union_(G,H) diff --git a/gmatch4py/base.pyx b/gmatch4py/base.pyx index f27971f87151c4c67dbc8639a8550dcf8caf3fd7..ac993092459d1027e90a17e8e3106a41a5a30962 100644 --- a/gmatch4py/base.pyx +++ b/gmatch4py/base.pyx @@ -136,6 +136,17 @@ cdef class Base: else: self.type_alg=type_alg self.normalized=normalized + + cpdef list get_selected_array(self,selected,size_corpus): + cdef list selected_test = [True]*size_corpus + if selected: + selected_test = [False]*size_corpus + for ix in range(len(selected)): + selected_test[ix]=True + return selected + + cpdef np.ndarray compare_old(self,list listgs, list selected): + pass cpdef np.ndarray compare(self,list graph_list, list selected): """ Return the similarity/distance matrix using the current algorithm. diff --git a/gmatch4py/ged/abstract_graph_edit_dist.pxd b/gmatch4py/ged/abstract_graph_edit_dist.pxd index 9a4b92de1195a188808a4d5223509d59a3be1922..ee01fe9518f21970f26ff2e1a5cba84fca556e8c 100644 --- a/gmatch4py/ged/abstract_graph_edit_dist.pxd +++ b/gmatch4py/ged/abstract_graph_edit_dist.pxd @@ -16,3 +16,4 @@ cdef class AbstractGraphEditDistance(Base): cdef double insert_cost(self, int i, int j, nodesH, H) cdef double delete_cost(self, int i, int j, nodesG, G) cpdef double substitute_cost(self, node1, node2, G, H) + diff --git a/gmatch4py/ged/abstract_graph_edit_dist.pyx b/gmatch4py/ged/abstract_graph_edit_dist.pyx index 141196589418dfac2c4590a380c52ae3d2850e4b..f4b25fde928a0b0c497be35ba9df2bd7a033fbdf 100644 --- a/gmatch4py/ged/abstract_graph_edit_dist.pyx +++ b/gmatch4py/ged/abstract_graph_edit_dist.pyx @@ -4,14 +4,18 @@ from __future__ import print_function import sys import warnings import numpy as np +cimport numpy as np try: from munkres import munkres except ImportError: warnings.warn("To obtain optimal results install the Cython 'munkres' module at https://github.com/jfrelinger/cython-munkres-wrapper") from scipy.optimize import linear_sum_assignment as munkres -cimport numpy as np + from ..base cimport Base import networkx as nx +from ..helpers.general import parsenx2graph +from cython.parallel cimport prange,parallel + cdef class AbstractGraphEditDistance(Base): @@ -90,7 +94,7 @@ cdef class AbstractGraphEditDistance(Base): cpdef double substitute_cost(self, node1, node2, G, H): raise NotImplementedError - cpdef np.ndarray compare(self,list listgs, list selected): + cpdef np.ndarray compare_old(self,list listgs, list selected): cdef int n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float) cdef int i,j @@ -105,3 +109,23 @@ cdef class AbstractGraphEditDistance(Base): #comparison_matrix[j, i] = comparison_matrix[i, j] np.fill_diagonal(comparison_matrix,0) return comparison_matrix + + cpdef np.ndarray compare(self,list listgs, list selected): + cdef int n = len(listgs) + cdef double[:,:] comparison_matrix = np.zeros((n, n)) + listgs=parsenx2graph(listgs) + cdef long[:] n_nodes = np.array([g.size() for g in listgs]) + + cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef int i,j + val=np.inf + with nogil, parallel(num_threads=8): + for i in prange(n,schedule='static'): + for j in range(n): + if n_nodes[i]>0 and n_nodes[j]>0 and selected_test[i] : + with gil: + comparison_matrix[i][j] = self.distance_ged(listgs[i],listgs[j]) + else: + comparison_matrix[i][j] = 0 + #comparison_matrix[j, i] = comparison_matrix[i, j] + return np.array(comparison_matrix) diff --git a/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py/ged/bipartite_graph_matching_2.pyx index 59e33e08781c00775e36269ca056b48b771f90ca..45fd67f625450c68d0579634de6d652430cfdd14 100644 --- a/gmatch4py/ged/bipartite_graph_matching_2.pyx +++ b/gmatch4py/ged/bipartite_graph_matching_2.pyx @@ -2,7 +2,8 @@ import numpy as np cimport numpy as np from ..base cimport Base - +from cython.parallel cimport prange,parallel +from ..helpers.general import parsenx2graph cdef class BP_2(Base): @@ -32,7 +33,7 @@ cdef class BP_2(Base): self.edge_del = edge_del self.edge_ins = edge_ins - cpdef np.ndarray compare(self,list listgs, list selected): + cpdef np.ndarray compare_old(self,list listgs, list selected): cdef int n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float) cdef int i,j @@ -48,6 +49,27 @@ cdef class BP_2(Base): return comparison_matrix + cpdef np.ndarray compare(self,list listgs, list selected): + cdef int n = len(listgs) + cdef list new_gs=parsenx2graph(listgs) + cdef double[:,:] comparison_matrix = np.zeros((n, n)) + cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef int i,j + cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) + cdef long[:] n_edges = np.array([g.density() for g in new_gs]) + + with nogil, parallel(num_threads=4): + for i in prange(n,schedule='static'): + for j in range(i,n): + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True: + with gil: + comparison_matrix[i, j] = self.bp2(new_gs[i], new_gs[j]) + else: + comparison_matrix[i, j] = 0 + comparison_matrix[j, i] = comparison_matrix[i, j] + + return comparison_matrix + cdef double bp2(self, g1, g2): """ @@ -100,8 +122,8 @@ cdef class BP_2(Base): list containing costs from the optimal edit path """ cdef list psi_=[] - cdef list nodes1 = list(g1.nodes) - cdef list nodes2 = list(g2.nodes) + cdef list nodes1 = list(g1.nodes()) + cdef list nodes2 = list(g2.nodes()) for u in nodes1: v=None for w in nodes2: @@ -125,9 +147,9 @@ cdef class BP_2(Base): :param g2: Second Graph :return: """ - cdef np.ndarray min_sum = np.zeros(len(g1)) - nodes1 = list(g1.nodes) - nodes2 = list(g2.nodes) + cdef np.ndarray min_sum = np.zeros(g1.size()) + nodes1 = list(g1.nodes()) + nodes2 = list(g2.nodes()) nodes2.extend([None]) cdef np.ndarray min_i for i in range(len(nodes1)): @@ -178,8 +200,8 @@ cdef class BP_2(Base): """ #if isinstance(g1, nx.MultiDiGraph): - cdef list edges1 = list(g1.edges(n1)) if n1 else [] - cdef list edges2 = list(g2.edges(n2)) if n2 else [] + cdef list edges1 = g1.get_edges_no(n1) if n1 else [] + cdef list edges2 = g2.get_edges_no(n2) if n2 else [] cdef np.ndarray min_sum = np.zeros(len(edges1)) edges2.extend([None]) diff --git a/gmatch4py/ged/graph_edit_dist.pxd b/gmatch4py/ged/graph_edit_dist.pxd index 975f39b1ab05df1b55d8d571afc4867d34b63fcf..18020dab052797e7bef10bc24837b4d8802e3f0a 100644 --- a/gmatch4py/ged/graph_edit_dist.pxd +++ b/gmatch4py/ged/graph_edit_dist.pxd @@ -4,6 +4,7 @@ from .abstract_graph_edit_dist cimport AbstractGraphEditDistance cdef class GraphEditDistance(AbstractGraphEditDistance): + cpdef object relabel_cost(self, node1, node2, G, H) cpdef double substitute_cost(self, node1, node2, G, H) cdef double delete_cost(self, int i, int j, nodesG, G) cdef double insert_cost(self, int i, int j, nodesH, H) \ No newline at end of file diff --git a/gmatch4py/ged/graph_edit_dist.pyx b/gmatch4py/ged/graph_edit_dist.pyx index 331d0bb05fa53831b001f25742bf76818846e21e..3706c283884e6f488124812a0627b8fb5316c16c 100644 --- a/gmatch4py/ged/graph_edit_dist.pyx +++ b/gmatch4py/ged/graph_edit_dist.pyx @@ -7,6 +7,7 @@ import numpy as np cimport numpy as np from .abstract_graph_edit_dist cimport AbstractGraphEditDistance from ..base cimport intersection,union_ +from ..graph cimport Graph cdef class GraphEditDistance(AbstractGraphEditDistance): @@ -14,52 +15,43 @@ cdef class GraphEditDistance(AbstractGraphEditDistance): def __init__(self,node_del,node_ins,edge_del,edge_ins,weighted=False): AbstractGraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins) self.weighted=weighted + cpdef double substitute_cost(self, node1, node2, G, H): return self.relabel_cost(node1, node2, G, H) - def add_edges(self,node1,node2,G): - R=nx.create_empty_copy(G) - try: - R.add_edges_from(G.edges(node1,node2)) - except Exception as e: - # To counter bug with a None for attribute... weird ?? - arr_=G.edges(node1,node2) - new_list=[] - for item in arr_: - new_list.append((item[0],item[1])) - R.add_edges_from(new_list) - return R - - def relabel_cost(self, node1, node2, G, H): + cpdef object relabel_cost(self, node1, node2, G, H): ## Si deux noeuds égaux if node1 == node2 and G.degree(node1) == H.degree(node2): return 0.0 elif node1 == node2 and G.degree(node1) != H.degree(node2): - R = self.add_edges(node1,node2,G) - R2 = self.add_edges(node1,node2,H) - inter_=intersection(R,R2).number_of_edges() - add_diff=abs(R2.number_of_edges()-inter_) - del_diff=abs(R.number_of_edges()-inter_) + #R = Graph(self.add_edges(node1,node2,G),G.get_node_key(),G.get_egde_key()) + #R2 = Graph(self.add_edges(node1,node2,H),H.get_node_key(),H.get_egde_key()) + #inter_= R.size_edge_intersect(R2) + R=set(G.get_edges_no(node1)) + R2=set(H.get_edges_no(node2)) + inter_=R.intersection(R2) + add_diff=abs(len(R2)-len(inter_))#abs(R2.density()-inter_) + del_diff=abs(len(R)-len(inter_))#abs(R.density()-inter_) return (add_diff*self.edge_ins)+(del_diff*self.edge_del) #si deux noeuds connectés - if (node1,node2) in G.edges() or (node2,node1) in G.edges(): + if G.has_edge(node1,node2) or G.has_edge(node2,node1): return self.node_ins+self.node_del - if not node2 in G: - nodesH=list(H.nodes()) - index=nodesH.index(node2) + if not node2 in G.nodes(): + nodesH=H.nodes() + index=list(nodesH).index(node2) return self.node_del+self.node_ins+self.insert_cost(index,index,nodesH,H) return sys.maxsize cdef double delete_cost(self, int i, int j, nodesG, G): if i == j: - return self.node_del+(G.degree(nodesG[i],weight=("weight" if self.weighted else None))*self.edge_del) # Deleting a node implicate to delete in and out edges + return self.node_del+(G.degree(nodesG[i],weight=True)*self.edge_del) # Deleting a node implicate to delete in and out edges return sys.maxsize cdef double insert_cost(self, int i, int j, nodesH, H): if i == j: - deg=H.degree(nodesH[j],weight=("weight" if self.weighted else None)) + deg=H.degree(nodesH[j],weight=True) if isinstance(deg,dict):deg=0 return self.node_ins+(deg*self.edge_ins) else: diff --git a/gmatch4py/ged/graph_edit_dist_2.pyx b/gmatch4py/ged/graph_edit_dist_2.pyx deleted file mode 100644 index 9120da34ef0d31a70c424bb196a712698ce5d7c1..0000000000000000000000000000000000000000 --- a/gmatch4py/ged/graph_edit_dist_2.pyx +++ /dev/null @@ -1,68 +0,0 @@ -# -*- coding: UTF-8 -*- - -import sys - -import networkx as nx -import numpy as np -cimport numpy as np -from .abstract_graph_edit_dist cimport AbstractGraphEditDistance -from ..base cimport intersection,union_ -from ..graph cimport Graph - - -cdef class GraphEditDistance(AbstractGraphEditDistance): - - def __init__(self,node_del,node_ins,edge_del,edge_ins,weighted=False): - AbstractGraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins) - self.weighted=weighted - - cpdef double substitute_cost(self, node1, node2, G, H): - return self.relabel_cost(node1, node2, G, H) - - def add_edges(self,node1,node2,G): - R=nx.create_empty_copy(G.get_nx()) - try: - R.add_edges_from(G.edges(node1,node2)) - except Exception as e: - # To counter bug with a None for attribute... weird ?? - arr_=G.edges(node1,node2) - new_list=[] - for item in arr_: - new_list.append((item[0],item[1])) - R.add_edges_from(new_list) - return R - - cpdef relabel_cost(self, node1, node2, G, H): - ## Si deux noeuds égaux - if node1 == node2 and G.degree(node1) == H.degree(node2): - return 0.0 - elif node1 == node2 and G.degree(node1) != H.degree(node2): - R = Graph(self.add_edges(node1,node2,G),G.get_node_key(),G.get_egde_key()) - R2 = Graph(self.add_edges(node1,node2,H),H.get_node_key(),H.get_egde_key()) - inter_= R.size_edge_intersect(R2) - add_diff=abs(R2.density()-inter_) - del_diff=abs(R.density()-inter_) - return (add_diff*self.edge_ins)+(del_diff*self.edge_del) - - - #si deux noeuds connectés - if G.has_edge(*(node1,node2)) or G.has_edge(*(node2,node1)): - return self.node_ins+self.node_del - if not node2 in G: - nodesH=H.nodes() - index=list(nodesH).index(node2) - return self.node_del+self.node_ins+self.insert_cost(index,index,nodesH,H) - return sys.maxsize - - cdef double delete_cost(self, int i, int j, nodesG, G): - if i == j: - return self.node_del+(G.degree(nodesG[i],weight=True)*self.edge_del) # Deleting a node implicate to delete in and out edges - return sys.maxsize - - cdef double insert_cost(self, int i, int j, nodesH, H): - if i == j: - deg=H.degree(nodesH[j],weight=True) - if isinstance(deg,dict):deg=0 - return self.node_ins+(deg*self.edge_ins) - else: - return sys.maxsize \ No newline at end of file diff --git a/gmatch4py/ged/greedy_edit_distance.pyx b/gmatch4py/ged/greedy_edit_distance.pyx index b4908cb2337400eec20fa5d20fa6a704b0a61c36..9bdd2c47c9103a2c1a07d5845bbede52beb47b72 100644 --- a/gmatch4py/ged/greedy_edit_distance.pyx +++ b/gmatch4py/ged/greedy_edit_distance.pyx @@ -4,6 +4,7 @@ import sys from .graph_edit_dist cimport GraphEditDistance import numpy as np cimport numpy as np +from cython.parallel cimport prange,parallel cdef class GreedyEditDistance(GraphEditDistance): """ @@ -20,15 +21,6 @@ cdef class GreedyEditDistance(GraphEditDistance): cdef list edit_costs(self, G, H): cdef np.ndarray cost_matrix=self.create_cost_matrix(G,H) - """ - cdef np.ndarray cost_matrix_2=cost_matrix.copy() - cdef list psi=[] - for i in range(len(cost_matrix)): - phi_i=np.argmin((cost_matrix[i])) - cost_matrix=np.delete(cost_matrix,phi_i,1) - psi.append([i,phi_i+i]) #+i to compensate the previous column deletion - return [cost_matrix_2[psi[i][0]][psi[i][1]] for i in range(len(psi))] - """ cdef np.ndarray cost_matrix_2=cost_matrix.copy().astype(np.double) cdef list psi=[] for i in range(len(cost_matrix)): diff --git a/gmatch4py/ged/hausdorff_edit_distance.pyx b/gmatch4py/ged/hausdorff_edit_distance.pyx index d2327f6b35da425c0c302eb12b5964a7f9a81450..a83e2a1897bfd9330d2dd9e8beb30d3abd146fa2 100644 --- a/gmatch4py/ged/hausdorff_edit_distance.pyx +++ b/gmatch4py/ged/hausdorff_edit_distance.pyx @@ -3,6 +3,8 @@ import numpy as np cimport numpy as np from ..base cimport Base +from cython.parallel cimport prange,parallel +from ..helpers.general import parsenx2graph cdef class HED(Base): """ @@ -27,7 +29,7 @@ cdef class HED(Base): self.edge_ins = edge_ins - cpdef np.ndarray compare(self,list listgs, list selected): + cpdef np.ndarray compare_old(self,list listgs, list selected): cdef int n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)).astype(float) cdef int i,j @@ -42,6 +44,27 @@ cdef class HED(Base): comparison_matrix[j, i] = comparison_matrix[i, j] return comparison_matrix + + cpdef np.ndarray compare(self,list listgs, list selected): + cdef int n = len(listgs) + cdef list new_gs=parsenx2graph(listgs) + cdef double[:,:] comparison_matrix = np.zeros((n, n)) + cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef int i,j + cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) + cdef long[:] n_edges = np.array([g.density() for g in new_gs]) + + with nogil, parallel(num_threads=4): + for i in prange(n,schedule='static'): + for j in range(i,n): + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True: + with gil: + comparison_matrix[i, j] = self.hed(new_gs[i], new_gs[j]) + else: + comparison_matrix[i, j] = 0 + comparison_matrix[j, i] = comparison_matrix[i, j] + + return comparison_matrix cdef float hed(self, g1, g2): @@ -61,8 +84,8 @@ cdef class HED(Base): :return: """ cdef np.ndarray min_sum = np.zeros(len(g1)) - nodes1 = list(g1.nodes) - nodes2 = list(g2.nodes) + nodes1 = list(g1.nodes()) + nodes2 = list(g2.nodes()) nodes2.extend([None]) cdef np.ndarray min_i for i in range(len(nodes1)): @@ -113,8 +136,8 @@ cdef class HED(Base): """ #if isinstance(g1, nx.MultiDiGraph): - cdef list edges1 = list(g1.edges(n1)) if n1 else [] - cdef list edges2 = list(g2.edges(n2)) if n2 else [] + cdef list edges1 = g1.get_edges_no(n1) if n1 else [] + cdef list edges2 = g2.get_edges_no(n2) if n2 else [] cdef np.ndarray min_sum = np.zeros(len(edges1)) edges2.extend([None]) diff --git a/gmatch4py/graph.pxd b/gmatch4py/graph.pxd index 4cb6bba0d491f2d1df1f4bcb381f0bb1439f2325..e1ef555680d968f2f84d573b08754cac0749706e 100644 --- a/gmatch4py/graph.pxd +++ b/gmatch4py/graph.pxd @@ -38,6 +38,7 @@ cdef class Graph: cdef dict degree_per_attr # degree information per attr val cdef dict degree_per_attr_weighted # degree information per attr val cdef list attr_nodes # list of attr(dict) values for each node + cdef dict edges_of_nodes # list of egdes connected to each node # EDGES ATTRIBUTES ################## @@ -107,7 +108,8 @@ cdef class Graph: ## GETTER ######### - cpdef list get_edges_(self,e1,e2) + cpdef list get_edges_ed(self,str e1, str e2) + cpdef list get_edges_no(self,str n) cpdef set get_edges_hash(self) cpdef set get_nodes_hash(self) diff --git a/gmatch4py/graph.pyx b/gmatch4py/graph.pyx index 45397f6e07bb1d42c467d9beb672ef50ae1c27c1..4d3b770b1babedcbab3a77c55711ebcff2c20d23 100644 --- a/gmatch4py/graph.pyx +++ b/gmatch4py/graph.pyx @@ -59,7 +59,9 @@ cdef class Graph: self.degree_per_attr_weighted={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals} # Retrieving Degree Information + self.edges_of_nodes={} for n in self.nodes_list: + self.edges_of_nodes[n]=[self.hash_edge_attr(e1,e2,attr_dict[self.edge_attr_key]) if self.is_edge_attr else self.hash_edge(e1,e2) for e1,e2,attr_dict in G.edges(n,data=True)] degree_all.append(G.degree(n)) degree_all_weighted.append(G.degree(n,weight="weight")) if self.is_directed: @@ -161,12 +163,12 @@ cdef class Graph: cpdef bint has_edge(self,str n_id1,str n_id2): if self.is_directed: - if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1][n_id2]: + if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1]: return True else: - if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1][n_id2]: + if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1]: return True - if n_id2 in self.edges_hash_map and n_id1 in self.edges_hash_map[n_id2][n_id1]: + if n_id2 in self.edges_hash_map and n_id1 in self.edges_hash_map[n_id2]: return True return False @@ -199,12 +201,14 @@ cdef class Graph: else: return self.edges_list - cpdef list get_edges_(self,e1,e2): + cpdef list get_edges_ed(self,str e1,str e2): if self.is_edge_attr: hashes=self.edges_hash_map[e1][e2] return [(e1,e2,self.edges_attr_list[self.edges_hash_idx[hash_]])for hash_ in hashes] else: return [(e1,e2,None)] + cpdef list get_edges_no(self,str n): + return self.edges_of_nodes[n] cpdef dict get_edge_attr(self,edge_hash): return self.edges_attr_list[self.edges_hash_idx[edge_hash]] diff --git a/gmatch4py/helpers/general.pyx b/gmatch4py/helpers/general.pyx new file mode 100644 index 0000000000000000000000000000000000000000..0bb88f726ef6c7f42a80ef923221c0059b4a3417 --- /dev/null +++ b/gmatch4py/helpers/general.pyx @@ -0,0 +1,7 @@ +from ..graph cimport Graph +import networkx as nx + +def parsenx2graph(list_gs): + new_gs=[nx.relabel_nodes(g,{node:str(node) for node in list(g.nodes)},copy=True) for g in list_gs] + new_gs=[Graph(g) for g in new_gs] + return new_gs diff --git a/gmatch4py/jaccard.pyx b/gmatch4py/jaccard.pyx index a987192ab183d27f438f1920edaec774830f29a8..99d8b5d03c7fed243a35fd6d56c29e1a80b35710 100644 --- a/gmatch4py/jaccard.pyx +++ b/gmatch4py/jaccard.pyx @@ -5,14 +5,16 @@ cimport numpy as np from .base cimport Base from .base cimport intersection,union_ - +from ..helpers.general import parsenx2graph +from cython.parallel cimport prange,parallel cdef class Jaccard(Base): def __init__(self): Base.__init__(self,0,True) - cpdef np.ndarray compare(self,list listgs, list selected): + + cpdef np.ndarray compare_old(self,list listgs, list selected): cdef int n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)) cdef int i,j @@ -37,5 +39,38 @@ cdef class Jaccard(Base): return comparison_matrix + cpdef np.ndarray compare(self,list listgs, list selected): + cdef int n = len(listgs) + cdef list new_gs=parsenx2graph(listgs) + cdef double[:,:] comparison_matrix = np.zeros((n, n)) + cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) + cdef long[:] n_edges = np.array([g.density() for g in new_gs]) + cdef int i,j + + cdef bint[:] selected_test = self.get_selected_array(selected,n) + + cdef double[:,:] intersect_len_nodes = np.zeros((n, n)) + cdef double[:,:] intersect_len_edges = np.zeros((n, n)) + cdef double[:,:] union_len_nodes = np.zeros((n, n)) + cdef double[:,:] union_len_edges = np.zeros((n, n)) + for i in range(n): + for j in range(i,n): + intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j]) + intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j])) + union_len_nodes[i][j]=new_gs[i].size_node_union(new_gs[j]) + union_len_edges[i][j]=new_gs[i].size_node_union(new_gs[j]) + with nogil, parallel(num_threads=4): + for i in prange(n,schedule='static'): + for j in range(i,n): + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i]: + if union_len_edges[i][j] >0 and union_len_nodes[i][j] >0: + comparison_matrix[i][j]= \ + (intersect_len_edges[i][j]/union_len_edges[i][j])*\ + (intersect_len_nodes[i][j]/union_len_nodes[i][j]) + + else: + comparison_matrix[i][j] = 0. + comparison_matrix[j][i] = comparison_matrix[i][j] + return np.array(comparison_matrix) diff --git a/gmatch4py/mcs.pyx b/gmatch4py/mcs.pyx index d2742e4f762477587335b3f39021b182a2de4a69..9ad07f06926f3568d3a0afb7bbe290ed70581d00 100644 --- a/gmatch4py/mcs.pyx +++ b/gmatch4py/mcs.pyx @@ -1,8 +1,10 @@ # coding = utf-8 import numpy as np cimport numpy as np - +from .graph cimport Graph from .base cimport Base +from cython.parallel cimport prange,parallel +from ..helpers.general import parsenx2graph cdef class MCS(Base): """ @@ -12,7 +14,7 @@ cdef class MCS(Base): def __init__(self): Base.__init__(self,0,True) - cpdef np.ndarray compare(self,list listgs, list selected): + cpdef np.ndarray compare_old(self,list listgs, list selected): cdef int n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)) for i in range(n): @@ -25,6 +27,28 @@ cdef class MCS(Base): comparison_matrix[i, j] = 0. comparison_matrix[j, i] = comparison_matrix[i, j] return comparison_matrix + + cpdef np.ndarray compare(self,list listgs, list selected): + cdef int n = len(listgs) + cdef double [:,:] comparison_matrix = np.zeros((n, n)) + cdef bint[:] selected_test = self.get_selected_array(selected,n) + cdef list new_gs=parsenx2graph(listgs) + cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) + cdef double [:,:] intersect_len_nodes = np.zeros((n, n)) + cdef int i,j + for i in range(n): + for j in range(i,n): + intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j]) + + with nogil, parallel(num_threads=4): + for i in prange(n,schedule='static'): + for j in range(i, n): + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i]: + comparison_matrix[i][j] = intersect_len_nodes[i][j]/max(n_nodes[i],n_nodes[j]) + else: + comparison_matrix[i][j] = 0. + comparison_matrix[j][i] = comparison_matrix[i][j] + return np.array(comparison_matrix) def s_mcs(self,G, H): """ diff --git a/gmatch4py/vertex_edge_overlap.pyx b/gmatch4py/vertex_edge_overlap.pyx index 51f365f477ffe9edee3058245ecdbfac912d7ca2..3bbe9c7ee496eb91bb200c046fdc53ec23770664 100644 --- a/gmatch4py/vertex_edge_overlap.pyx +++ b/gmatch4py/vertex_edge_overlap.pyx @@ -5,6 +5,7 @@ cimport numpy as np from .base cimport Base,intersection from .graph cimport Graph from cython.parallel cimport prange,parallel +from ..helpers.general import parsenx2graph cdef class VertexEdgeOverlap(Base): @@ -18,19 +19,35 @@ cdef class VertexEdgeOverlap(Base): def __init__(self): Base.__init__(self,0,True) + cpdef np.ndarray compare_old(self,list listgs, list selected): + n = len(listgs) + cdef np.ndarray comparison_matrix = np.zeros((n, n)) + cdef list inter_ver,inter_ed + cdef int denom,i,j + for i in range(n): + for j in range(i,n): + g1,g2 = listgs[i],listgs[j] + f=self.isAccepted(g1,i,selected) + if f: + inter_g= intersection(g1,g2) + denom=g1.number_of_nodes()+g2.number_of_nodes()+\ + g1.number_of_edges()+g2.number_of_edges() + if denom == 0: + continue + comparison_matrix[i,j]=(2*(inter_g.number_of_nodes() + +inter_g.number_of_edges()))/denom # Data = True --> For nx.MultiDiGraph + comparison_matrix[j, i] = comparison_matrix[i, j] + return comparison_matrix + cpdef np.ndarray compare(self,list listgs, list selected): cdef int n = len(listgs) - cdef list new_gs=[Graph(g) for g in listgs] + cdef list new_gs=parsenx2graph(listgs) cdef double[:,:] comparison_matrix = np.zeros((n, n)) - cdef list inter_ver,inter_ed cdef int denom,i,j - cdef bint f cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) cdef long[:] n_edges = np.array([g.density() for g in new_gs]) - #print(type(test[0,0])) - #cdef str[:,:] hash_edges = test - cdef bint[:] selected_test + cdef bint[:] selected_test = self.get_selected_array(selected,n) cdef double[:,:] intersect_len_nodes = np.zeros((n, n)) cdef double[:,:] intersect_len_edges = np.zeros((n, n)) @@ -42,7 +59,7 @@ cdef class VertexEdgeOverlap(Base): with nogil, parallel(num_threads=4): for i in prange(n,schedule='static'): for j in range(i,n): - if n_nodes[i] > 0 and n_nodes[j] > 0 : + if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True: denom=n_nodes[i]+n_nodes[j]+\ n_edges[i]+n_edges[j] if denom == 0: diff --git a/setup.py b/setup.py index ecdce15995f8921517ea3ad83a10672aad40fd76..16b9d05a5c4ecb984e4145537bda928a19b553ac 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( cmdclass={'build_ext': build_ext}, setup_requires=["numpy","networkx","scipy",'scikit-learn'], install_requires=["numpy","networkx","scipy",'scikit-learn'], - version="0.2.2", + version="0.2.2alpha", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", @@ -78,7 +78,7 @@ setup( ] ) #Clean cpp and compiled file -f=False +f=True if f: if os.path.exists("build"): shutil.rmtree("build") diff --git a/gmatch4py/test/test.py b/test/test.py similarity index 100% rename from gmatch4py/test/test.py rename to test/test.py