Commit 31bf928c by Fize Jacques

### Graph class almost done

parent d8645375
 ... ... @@ -2,11 +2,16 @@ from __future__ import print_function import sys import warnings import numpy as np from scipy.optimize import linear_sum_assignment try: from munkres import munkres except ImportError: warnings.warn("To obtain optimal results install the Cython 'munkres' module at https://github.com/jfrelinger/cython-munkres-wrapper") from scipy.optimize import linear_sum_assignment as munkres cimport numpy as np from ..base cimport Base import networkx as nx cdef class AbstractGraphEditDistance(Base): ... ... @@ -35,9 +40,7 @@ cdef class AbstractGraphEditDistance(Base): :return: """ cdef np.ndarray cost_matrix = self.create_cost_matrix(G,H).astype(float) row_ind,col_ind = linear_sum_assignment(cost_matrix) cdef int f=len(row_ind) return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)] return cost_matrix[munkres(cost_matrix)].tolist() cpdef np.ndarray create_cost_matrix(self, G, H): """ ... ... @@ -53,8 +56,13 @@ cdef class AbstractGraphEditDistance(Base): The delete -> delete region is filled with zeros """ cdef int n = G.number_of_nodes() cdef int m = H.number_of_nodes() cdef int n,m try: n = G.number_of_nodes() m = H.number_of_nodes() except: n = G.size() m = H.size() cdef np.ndarray cost_matrix = np.zeros((n+m,n+m)) cdef list nodes1 = list(G.nodes()) cdef list nodes2 = list(H.nodes()) ... ... @@ -89,7 +97,7 @@ cdef class AbstractGraphEditDistance(Base): for i in range(n): for j in range(n): g1,g2=listgs[i],listgs[j] f=self.isAccepted(g1,i,selected) f=self.isAccepted(g1 if isinstance(g1,nx.Graph) else g1.get_nx(),i,selected) if f: comparison_matrix[i, j] = self.distance_ged(g1, g2) else: ... ...
 # -*- coding: UTF-8 -*- import sys import networkx as nx import numpy as np cimport numpy as np from .abstract_graph_edit_dist cimport AbstractGraphEditDistance from ..base cimport intersection,union_ from ..graph cimport Graph cdef class GraphEditDistance(AbstractGraphEditDistance): def __init__(self,node_del,node_ins,edge_del,edge_ins,weighted=False): AbstractGraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins) self.weighted=weighted cpdef double substitute_cost(self, node1, node2, G, H): return self.relabel_cost(node1, node2, G, H) def add_edges(self,node1,node2,G): R=nx.create_empty_copy(G.get_nx()) try: R.add_edges_from(G.edges(node1,node2)) except Exception as e: # To counter bug with a None for attribute... weird ?? arr_=G.edges(node1,node2) new_list=[] for item in arr_: new_list.append((item[0],item[1])) R.add_edges_from(new_list) return R cpdef relabel_cost(self, node1, node2, G, H): ## Si deux noeuds égaux if node1 == node2 and G.degree(node1) == H.degree(node2): return 0.0 elif node1 == node2 and G.degree(node1) != H.degree(node2): R = Graph(self.add_edges(node1,node2,G),G.get_node_key(),G.get_egde_key()) R2 = Graph(self.add_edges(node1,node2,H),H.get_node_key(),H.get_egde_key()) inter_= R.size_edge_intersect(R2) add_diff=abs(R2.density()-inter_) del_diff=abs(R.density()-inter_) return (add_diff*self.edge_ins)+(del_diff*self.edge_del) #si deux noeuds connectés if G.has_edge(*(node1,node2)) or G.has_edge(*(node2,node1)): return self.node_ins+self.node_del if not node2 in G: nodesH=H.nodes() index=list(nodesH).index(node2) return self.node_del+self.node_ins+self.insert_cost(index,index,nodesH,H) return sys.maxsize cdef double delete_cost(self, int i, int j, nodesG, G): if i == j: return self.node_del+(G.degree(nodesG[i],weight=True)*self.edge_del) # Deleting a node implicate to delete in and out edges return sys.maxsize cdef double insert_cost(self, int i, int j, nodesH, H): if i == j: deg=H.degree(nodesH[j],weight=True) if isinstance(deg,dict):deg=0 return self.node_ins+(deg*self.edge_ins) else: return sys.maxsize \ No newline at end of file
 cimport numpy as np cdef class Graph: ################################## # ATTRIBUTES ################################## # GRAPH PROPERTY ATTRIBUTES ########################### cdef bint is_directed # If the graph is directed cdef bint is_multi # If the graph is a Multi-Graph cdef bint is_node_attr cdef bint is_edge_attr # ATTR VAL ATTRIBUTES ##################### cdef str node_attr_key # Key that contains the main attr value for a node cdef str edge_attr_key # Key that contains the main attr value for an edge cdef set unique_node_attr_vals # list cdef set unique_edge_attr_vals # list ## NODE ATTRIBUTES ################# cdef list nodes_list # list of nodes ids cdef list nodes_attr_list # list of attr value for each node (following nodes list order) cdef list nodes_hash # hash representation of every node cdef set nodes_hash_set # hash representation of every node (set version for intersection and union operation) cdef dict nodes_idx # index of each node in `nodes_list` cdef list nodes_weight # list that contains each node's weight (following nodes_list order) cdef long[:] nodes_degree # degree list cdef long[:] nodes_degree_in # in degree list cdef long[:] nodes_degree_out # out degree list cdef long[:] nodes_degree_weighted #weighted vers. of nodes_degree cdef long[:] nodes_degree_in_weighted #weighted vers. of nodes_degree_in cdef long[:] nodes_degree_out_weighted #weighted vers. of nodes_degree_out cdef dict degree_per_attr # degree information per attr val cdef dict degree_per_attr_weighted # degree information per attr val cdef list attr_nodes # list of attr(dict) values for each node # EDGES ATTRIBUTES ################## cdef list edges_list # edge list cdef list edges_attr_list # list of attr value for each edge (following nodes list order) cdef dict edges_hash_idx # index of hash in edges_list and edges_attr_list cdef list edges_hash # hash representation of every edges ## A VOIR ! cdef set edges_hash_set # set of hash representation of every edges (set version for intersection and union operation) cdef dict edges_weight # list that contains each node's weight (following nodes_list order) cdef dict edges_hash_map #[id1,[id2,hash]] cdef list attr_edges # list of attr(dict) values for each edge # SIZE ATTTRIBUTE ############### cdef long number_of_nodes # number of nodes cdef long number_of_edges # number of edges cdef dict number_of_edges_per_attr # number of nodes per attr value cdef dict number_of_nodes_per_attr # number of edges per attr value cdef object nx_g ################################## # METHODS ################################## # DIMENSION GETTER ################## cpdef long size(self) cpdef int size_attr(self, attr_val) cpdef long density(self) cpdef int density_attr(self, str attr_val) # HASH FUNCTION ############### cpdef str hash_node(self,str n1) cpdef str hash_edge(self,str n1,str n2) cpdef str hash_node_attr(self,str n1, str attr_value) cpdef str hash_edge_attr(self,str n1,str n2, str attr_value) ## EXIST FUNCTION ############### cpdef bint has_node(self,str n_id) cpdef bint has_edge(self,str n_id1,str n_id2) ## LEN FUNCTION ############### cpdef int size_node_intersect(self,Graph G) cpdef int size_node_union(self,Graph G) cpdef int size_edge_intersect(self,Graph G) cpdef int size_edge_union(self,Graph G) # DEGREE FUNCTION ################# cpdef int degree(self,str n_id, bint weight=*) cpdef int in_degree(self,str n_id, bint weight=*) cpdef int out_degree(self,str n_id, bint weight=*) cpdef int in_degree_attr(self,str n_id,str attr_val, bint weight=*) cpdef int out_degree_attr(self,str n_id,str attr_val, bint weight=*) cpdef int degree_attr(self,str n_id,str attr_val, bint weight=*) ## GETTER ######### cpdef list get_edges_(self,e1,e2) cpdef set get_edges_hash(self) cpdef set get_nodes_hash(self) cpdef str get_node_key(self) cpdef str get_egde_key(self) cpdef dict get_edge_attrs(self,edge_hash) cpdef dict get_node_attrs(self, node_hash) cpdef dict get_node_attr(self, node_hash) cpdef dict get_edge_attr(self,edge_hash) \ No newline at end of file
 ... ... @@ -8,57 +8,6 @@ import networkx as nx cdef class Graph: # GRAPH PROPERTY ATTRIBUTES ########################### cdef bint is_directed # If the graph is directed cdef bint is_multi # If the graph is a Multi-Graph cdef bint is_node_attr cdef bint is_edge_attr # ATTR VAL ATTRIBUTES ##################### cdef str node_attr_key # Key that contains the main attr value for a node cdef str edge_attr_key # Key that contains the main attr value for an edge cdef set unique_node_attr_vals # list cdef set unique_edge_attr_vals # list ## NODE ATTRIBUTES ################# cdef list nodes_list # list of nodes ids cdef list nodes_attr_list # list of attr value for each node (following nodes list order) cdef list nodes_hash # hash representation of every node cdef set nodes_hash_set # hash representation of every node (set version for intersection and union operation) cdef dict nodes_idx # index of each node in `nodes_list` cdef list nodes_weight # list that contains each node's weight (following nodes_list order) cdef long[:] nodes_degree # degree list cdef long[:] nodes_degree_in # in degree list cdef long[:] nodes_degree_out # out degree list cdef dict degree_per_attr # degree information per attr val cdef list attr_nodes # list of attr(dict) values for each node # EDGES ATTRIBUTES ################## cdef list edges_list # edge list cdef list edges_attr_list # list of attr value for each edge (following nodes list order) cdef list edges_hash # hash representation of every edges ## A VOIR ! cdef set edges_hash_set # set of hash representation of every edges (set version for intersection and union operation) cdef dict edges_weight # list that contains each node's weight (following nodes_list order) cdef dict edges_hash_map #[id1,[id2,hash]] cdef list attr_edges # list of attr(dict) values for each edge # SIZE INDICATOR ############### cdef long number_of_nodes # number of nodes cdef long number_of_edges # number of edges cdef dict number_of_edges_per_attr # number of nodes per attr value cdef dict number_of_nodes_per_attr # number of edges per attr value cdef object nx_g def __init__(self,G, node_attr_key="",edge_attr_key=""): self.nx_g=G ... ... @@ -101,53 +50,81 @@ cdef class Graph: degree_all=[] degree_in=[] degree_out=[] degree_all_weighted=[] degree_in_weighted=[] degree_out_weighted=[] if self.is_edge_attr: self.degree_per_attr={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals} self.degree_per_attr_weighted={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals} # Retrieving Degree Information for n in self.nodes_list: degree_all.append(G.degree(n)) degree_all_weighted.append(G.degree(n,weight="weight")) if self.is_directed: degree_in.append(G.in_degree(n)) degree_in_weighted.append(G.in_degree(n,weight="weight")) degree_out.append(G.out_degree(n)) degree_out_weighted.append(G.out_degree(n)) else: degree_in.append(degree_all[-1]) degree_in_weighted.append(degree_all_weighted[-1]) degree_out.append(degree_all[-1]) degree_out_weighted.append(degree_all_weighted[-1]) if self.is_edge_attr: if self.is_directed: in_edge=list(G.in_edges(n,data=True)) out_edge=list(G.in_edges(n,data=True)) for n1,n2,attr_dict in in_edge: self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1 self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 ) for n1,n2,attr_dict in out_edge: self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["out"]+=1 self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 ) else: edges=G.edges(n,data=True) for n1,n2,attr_dict in edges: self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1 self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["out"]+=1 self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 ) self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 ) self.nodes_degree=np.array(degree_all) self.nodes_degree_in=np.array(degree_in) self.nodes_degree_out=np.array(degree_out) self.nodes_degree_weighted=np.array(degree_all_weighted) self.nodes_degree_in_weighted=np.array(degree_in_weighted) self.nodes_degree_out_weighted=np.array(degree_out_weighted) # EDGE INFO INIT ################# self.edges_hash=[] self.edges_hash_map = {} self.edges_hash_idx = {} for ix, ed in enumerate(self.edges_list): e1,e2=ed if not e1 in self.edges_hash_map:self.edges_hash_map[e1]={} self.edges_hash_map[e1][e2]=self.hash_edge_attr(e1,e2,self.edges_attr_list[ix]) if self.is_edge_attr else self.hash_edge(e1,e2) self.edges_hash.append(self.edges_hash_map[e1][e2]) hash_=self.hash_edge_attr(e1,e2,self.edges_attr_list[ix]) if self.is_edge_attr else self.hash_edge(e1,e2) if self.is_multi: if not e2 in self.edges_hash_map[e1]:self.edges_hash_map[e1][e2]={} self.edges_hash_map[e1][e2][self.edges_attr_list[ix]]=hash_ else: self.edges_hash_map[e1][e2]=hash_ self.edges_hash_idx[hash_]=ix self.edges_hash.append(hash_) self.edges_hash_set=set(self.edges_hash) self.edges_weight={} for e1,e2,attr_dict in list(G.edges(data=True)): self.edges_hash_map[e1][e2]=attr_dict["weight"] if "weight" in attr_dict else 1 hash_=self.hash_edge_attr(e1,e2,attr_dict[self.edge_attr_key]) if self.is_edge_attr else self.hash_edge(e1,e2) self.edges_weight[hash_]=attr_dict["weight"] if "weight" in attr_dict else 1 self.number_of_edges = len(self.edges_list) self.number_of_nodes = len(self.nodes_list) ... ... @@ -177,12 +154,12 @@ cdef class Graph: return "_".join([n1,n2,attr_value]) ## EXIST FUNCTION cdef bint has_node(self,str n_id): cpdef bint has_node(self,str n_id): if n_id in self.nodes_list: return True return False cdef bint has_edge(self,str n_id1,str n_id2): cpdef bint has_edge(self,str n_id1,str n_id2): if self.is_directed: if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1][n_id2]: return True ... ... @@ -205,6 +182,10 @@ cdef class Graph: return len(self.edges_hash_set.union(G.edges_hash_set)) ## GETTER def get_nx(self): return self.nx_g def nodes(self,data=False): if data: return self.nodes_list,self.attr_nodes ... ... @@ -217,11 +198,39 @@ cdef class Graph: return self.edges_list,self.attr_edges else: return self.edges_list cpdef list get_edges_(self,e1,e2): if self.is_edge_attr: hashes=self.edges_hash_map[e1][e2] return [(e1,e2,self.edges_attr_list[self.edges_hash_idx[hash_]])for hash_ in hashes] else: return [(e1,e2,None)] cpdef dict get_edge_attr(self,edge_hash): return self.edges_attr_list[self.edges_hash_idx[edge_hash]] cpdef dict get_node_attr(self, node_hash): return self.edges_attr_list[self.edges_hash_idx[node_hash]] cpdef dict get_edge_attrs(self,edge_hash): return self.attr_edges[self.edges_hash_idx[edge_hash]] cpdef dict get_node_attrs(self, node_hash): return self.attr_nodes[self.edges_hash_idx[node_hash]] cpdef set get_edges_hash(self): return self.edges_hash_set cpdef set get_nodes_hash(self): return self.nodes_hash_set cpdef str get_node_key(self): return self.node_attr_key cpdef str get_egde_key(self): return self.edge_attr_key ##### cpdef long size(self): return self.number_of_nodes ... ... @@ -234,28 +243,44 @@ cdef class Graph: cpdef int density_attr(self, str attr_val): return self.number_of_edges_per_attr[attr_val] cpdef int degree(self,str n_id): cpdef int degree(self,str n_id, bint weight=False): if weight: return self.nodes_degree_weighted[self.nodes_idx[n_id]] return self.nodes_degree[self.nodes_idx[n_id]] cpdef int in_degree(self,str n_id): cpdef int in_degree(self,str n_id, bint weight=False): if weight: return self.nodes_degree_in_weighted[self.nodes_idx[n_id]] return self.nodes_degree_in[self.nodes_idx[n_id]] cpdef int out_degree(self,str n_id): cpdef int out_degree(self,str n_id, bint weight=False): if weight: return self.nodes_degree_out_weighted[self.nodes_idx[n_id]] return self.nodes_degree_out[self.nodes_idx[n_id]] cpdef int in_degree_attr(self,str n_id,str attr_val): cpdef int in_degree_attr(self,str n_id,str attr_val, bint weight=False): if not self.is_edge_attr and not self.is_directed: raise AttributeError("No edge attribute have been defined") if weight: return self.degree_per_attr_weighted[attr_val][n_id]["in"] return self.degree_per_attr[attr_val][n_id]["in"] cpdef int out_degree_attr(self,str n_id,str attr_val): cpdef int out_degree_attr(self,str n_id,str attr_val, bint weight=False): if not self.is_edge_attr and not self.is_directed: raise AttributeError("No edge attribute have been defined") if weight: return self.degree_per_attr_weighted[attr_val][n_id]["out"] return self.degree_per_attr[attr_val][n_id]["out"] cpdef int degree_attr(self,str n_id,str attr_val): cpdef int degree_attr(self,str n_id,str attr_val, bint weight=False): if not self.is_edge_attr: raise AttributeError("No edge attribute have been defined") if not self.is_directed: if weight: return self.degree_per_attr_weighted[attr_val][n_id]["out"] return self.degree_per_attr[attr_val][n_id]["out"] if weight: return self.degree_per_attr_weighted[attr_val][n_id]["in"] + self.degree_per_attr_weighted[attr_val][n_id]["out"] return self.degree_per_attr[attr_val][n_id]["out"] + self.degree_per_attr[attr_val][n_id]["in"] #GRAPH SETTER ... ...
 ... ... @@ -3,6 +3,8 @@ import numpy as np cimport numpy as np from .base cimport Base,intersection from .graph cimport Graph from cython.parallel cimport prange,parallel cdef class VertexEdgeOverlap(Base): ... ... @@ -17,24 +19,39 @@ cdef class VertexEdgeOverlap(Base): Base.__init__(self,0,True) cpdef np.ndarray compare(self,list listgs, list selected): n = len(listgs) cdef np.ndarray comparison_matrix = np.zeros((n, n)) cdef int n = len(listgs) cdef list new_gs=[Graph(g) for g in listgs] cdef double[:,:] comparison_matrix = np.zeros((n, n)) cdef list inter_ver,inter_ed cdef int denom,i,j cdef bint f cdef long[:] n_nodes = np.array([g.size() for g in new_gs]) cdef long[:] n_edges = np.array([g.density() for g in new_gs]) #print(type(test[0,0])) #cdef str[:,:] hash_edges = test cdef bint[:] selected_test cdef double[:,:] intersect_len_nodes = np.zeros((n, n)) cdef double[:,:] intersect_len_edges = np.zeros((n, n)) for i in range(n): for j in range(i,n): g1,g2 = listgs[i],listgs[j] f=self.isAccepted(g1,i,selected) if f: inter_g= intersection(g1,g2) denom=g1.number_of_nodes()+g2.number_of_nodes()+\ g1.number_of_edges()+g2.number_of_edges() if denom == 0: continue comparison_matrix[i,j]=(2*(inter_g.number_of_nodes() +inter_g.number_of_edges()))/denom # Data = True --> For nx.MultiDiGraph comparison_matrix[j, i] = comparison_matrix[i, j] return comparison_matrix intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j]) intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j])) with nogil, parallel(num_threads=4): for i in prange(n,schedule='static'): for j in range(i,n): if n_nodes[i] > 0 and n_nodes[j] > 0 : denom=n_nodes[i]+n_nodes[j]+\ n_edges[i]+n_edges[j] if denom == 0: continue comparison_matrix[i][j]=(2*(intersect_len_nodes[i][j] +intersect_len_edges[i][j]))/denom # Data = True --> For nx.MultiDiGraph comparison_matrix[i][j] = comparison_matrix[i][j] return np.array(comparison_matrix)
 ... ... @@ -42,7 +42,9 @@ def makeExtension(extName): return Extension( extName, [extPath],include_dirs=[np.get_include()],language='c++',libraries=libs [extPath],include_dirs=[np.get_include()],language='c++',libraries=libs, #extra_compile_args = ["-O0", "-fopenmp"],extra_link_args=['-fopenmp'] ) # get the list of extensions ... ... @@ -76,7 +78,7 @@ setup( ] ) #Clean cpp and compiled file f=True f=False if f: if os.path.exists("build"): shutil.rmtree("build") ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!