Commit 31bf928c authored by Fize Jacques's avatar Fize Jacques
Browse files

Graph class almost done

parent d8645375
......@@ -2,11 +2,16 @@
from __future__ import print_function
import sys
import warnings
import numpy as np
from scipy.optimize import linear_sum_assignment
try:
from munkres import munkres
except ImportError:
warnings.warn("To obtain optimal results install the Cython 'munkres' module at https://github.com/jfrelinger/cython-munkres-wrapper")
from scipy.optimize import linear_sum_assignment as munkres
cimport numpy as np
from ..base cimport Base
import networkx as nx
cdef class AbstractGraphEditDistance(Base):
......@@ -35,9 +40,7 @@ cdef class AbstractGraphEditDistance(Base):
:return:
"""
cdef np.ndarray cost_matrix = self.create_cost_matrix(G,H).astype(float)
row_ind,col_ind = linear_sum_assignment(cost_matrix)
cdef int f=len(row_ind)
return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)]
return cost_matrix[munkres(cost_matrix)].tolist()
cpdef np.ndarray create_cost_matrix(self, G, H):
"""
......@@ -53,8 +56,13 @@ cdef class AbstractGraphEditDistance(Base):
The delete -> delete region is filled with zeros
"""
cdef int n = G.number_of_nodes()
cdef int m = H.number_of_nodes()
cdef int n,m
try:
n = G.number_of_nodes()
m = H.number_of_nodes()
except:
n = G.size()
m = H.size()
cdef np.ndarray cost_matrix = np.zeros((n+m,n+m))
cdef list nodes1 = list(G.nodes())
cdef list nodes2 = list(H.nodes())
......@@ -89,7 +97,7 @@ cdef class AbstractGraphEditDistance(Base):
for i in range(n):
for j in range(n):
g1,g2=listgs[i],listgs[j]
f=self.isAccepted(g1,i,selected)
f=self.isAccepted(g1 if isinstance(g1,nx.Graph) else g1.get_nx(),i,selected)
if f:
comparison_matrix[i, j] = self.distance_ged(g1, g2)
else:
......
# -*- coding: UTF-8 -*-
import sys
import networkx as nx
import numpy as np
cimport numpy as np
from .abstract_graph_edit_dist cimport AbstractGraphEditDistance
from ..base cimport intersection,union_
from ..graph cimport Graph
cdef class GraphEditDistance(AbstractGraphEditDistance):
def __init__(self,node_del,node_ins,edge_del,edge_ins,weighted=False):
AbstractGraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins)
self.weighted=weighted
cpdef double substitute_cost(self, node1, node2, G, H):
return self.relabel_cost(node1, node2, G, H)
def add_edges(self,node1,node2,G):
R=nx.create_empty_copy(G.get_nx())
try:
R.add_edges_from(G.edges(node1,node2))
except Exception as e:
# To counter bug with a None for attribute... weird ??
arr_=G.edges(node1,node2)
new_list=[]
for item in arr_:
new_list.append((item[0],item[1]))
R.add_edges_from(new_list)
return R
cpdef relabel_cost(self, node1, node2, G, H):
## Si deux noeuds égaux
if node1 == node2 and G.degree(node1) == H.degree(node2):
return 0.0
elif node1 == node2 and G.degree(node1) != H.degree(node2):
R = Graph(self.add_edges(node1,node2,G),G.get_node_key(),G.get_egde_key())
R2 = Graph(self.add_edges(node1,node2,H),H.get_node_key(),H.get_egde_key())
inter_= R.size_edge_intersect(R2)
add_diff=abs(R2.density()-inter_)
del_diff=abs(R.density()-inter_)
return (add_diff*self.edge_ins)+(del_diff*self.edge_del)
#si deux noeuds connectés
if G.has_edge(*(node1,node2)) or G.has_edge(*(node2,node1)):
return self.node_ins+self.node_del
if not node2 in G:
nodesH=H.nodes()
index=list(nodesH).index(node2)
return self.node_del+self.node_ins+self.insert_cost(index,index,nodesH,H)
return sys.maxsize
cdef double delete_cost(self, int i, int j, nodesG, G):
if i == j:
return self.node_del+(G.degree(nodesG[i],weight=True)*self.edge_del) # Deleting a node implicate to delete in and out edges
return sys.maxsize
cdef double insert_cost(self, int i, int j, nodesH, H):
if i == j:
deg=H.degree(nodesH[j],weight=True)
if isinstance(deg,dict):deg=0
return self.node_ins+(deg*self.edge_ins)
else:
return sys.maxsize
\ No newline at end of file
cimport numpy as np
cdef class Graph:
##################################
# ATTRIBUTES
##################################
# GRAPH PROPERTY ATTRIBUTES
###########################
cdef bint is_directed # If the graph is directed
cdef bint is_multi # If the graph is a Multi-Graph
cdef bint is_node_attr
cdef bint is_edge_attr
# ATTR VAL ATTRIBUTES
#####################
cdef str node_attr_key # Key that contains the main attr value for a node
cdef str edge_attr_key # Key that contains the main attr value for an edge
cdef set unique_node_attr_vals # list
cdef set unique_edge_attr_vals # list
## NODE ATTRIBUTES
#################
cdef list nodes_list # list of nodes ids
cdef list nodes_attr_list # list of attr value for each node (following nodes list order)
cdef list nodes_hash # hash representation of every node
cdef set nodes_hash_set # hash representation of every node (set version for intersection and union operation)
cdef dict nodes_idx # index of each node in `nodes_list`
cdef list nodes_weight # list that contains each node's weight (following nodes_list order)
cdef long[:] nodes_degree # degree list
cdef long[:] nodes_degree_in # in degree list
cdef long[:] nodes_degree_out # out degree list
cdef long[:] nodes_degree_weighted #weighted vers. of nodes_degree
cdef long[:] nodes_degree_in_weighted #weighted vers. of nodes_degree_in
cdef long[:] nodes_degree_out_weighted #weighted vers. of nodes_degree_out
cdef dict degree_per_attr # degree information per attr val
cdef dict degree_per_attr_weighted # degree information per attr val
cdef list attr_nodes # list of attr(dict) values for each node
# EDGES ATTRIBUTES
##################
cdef list edges_list # edge list
cdef list edges_attr_list # list of attr value for each edge (following nodes list order)
cdef dict edges_hash_idx # index of hash in edges_list and edges_attr_list
cdef list edges_hash # hash representation of every edges ## A VOIR !
cdef set edges_hash_set # set of hash representation of every edges (set version for intersection and union operation)
cdef dict edges_weight # list that contains each node's weight (following nodes_list order)
cdef dict edges_hash_map #[id1,[id2,hash]]
cdef list attr_edges # list of attr(dict) values for each edge
# SIZE ATTTRIBUTE
###############
cdef long number_of_nodes # number of nodes
cdef long number_of_edges # number of edges
cdef dict number_of_edges_per_attr # number of nodes per attr value
cdef dict number_of_nodes_per_attr # number of edges per attr value
cdef object nx_g
##################################
# METHODS
##################################
# DIMENSION GETTER
##################
cpdef long size(self)
cpdef int size_attr(self, attr_val)
cpdef long density(self)
cpdef int density_attr(self, str attr_val)
# HASH FUNCTION
###############
cpdef str hash_node(self,str n1)
cpdef str hash_edge(self,str n1,str n2)
cpdef str hash_node_attr(self,str n1, str attr_value)
cpdef str hash_edge_attr(self,str n1,str n2, str attr_value)
## EXIST FUNCTION
###############
cpdef bint has_node(self,str n_id)
cpdef bint has_edge(self,str n_id1,str n_id2)
## LEN FUNCTION
###############
cpdef int size_node_intersect(self,Graph G)
cpdef int size_node_union(self,Graph G)
cpdef int size_edge_intersect(self,Graph G)
cpdef int size_edge_union(self,Graph G)
# DEGREE FUNCTION
#################
cpdef int degree(self,str n_id, bint weight=*)
cpdef int in_degree(self,str n_id, bint weight=*)
cpdef int out_degree(self,str n_id, bint weight=*)
cpdef int in_degree_attr(self,str n_id,str attr_val, bint weight=*)
cpdef int out_degree_attr(self,str n_id,str attr_val, bint weight=*)
cpdef int degree_attr(self,str n_id,str attr_val, bint weight=*)
## GETTER
#########
cpdef list get_edges_(self,e1,e2)
cpdef set get_edges_hash(self)
cpdef set get_nodes_hash(self)
cpdef str get_node_key(self)
cpdef str get_egde_key(self)
cpdef dict get_edge_attrs(self,edge_hash)
cpdef dict get_node_attrs(self, node_hash)
cpdef dict get_node_attr(self, node_hash)
cpdef dict get_edge_attr(self,edge_hash)
\ No newline at end of file
......@@ -8,57 +8,6 @@ import networkx as nx
cdef class Graph:
# GRAPH PROPERTY ATTRIBUTES
###########################
cdef bint is_directed # If the graph is directed
cdef bint is_multi # If the graph is a Multi-Graph
cdef bint is_node_attr
cdef bint is_edge_attr
# ATTR VAL ATTRIBUTES
#####################
cdef str node_attr_key # Key that contains the main attr value for a node
cdef str edge_attr_key # Key that contains the main attr value for an edge
cdef set unique_node_attr_vals # list
cdef set unique_edge_attr_vals # list
## NODE ATTRIBUTES
#################
cdef list nodes_list # list of nodes ids
cdef list nodes_attr_list # list of attr value for each node (following nodes list order)
cdef list nodes_hash # hash representation of every node
cdef set nodes_hash_set # hash representation of every node (set version for intersection and union operation)
cdef dict nodes_idx # index of each node in `nodes_list`
cdef list nodes_weight # list that contains each node's weight (following nodes_list order)
cdef long[:] nodes_degree # degree list
cdef long[:] nodes_degree_in # in degree list
cdef long[:] nodes_degree_out # out degree list
cdef dict degree_per_attr # degree information per attr val
cdef list attr_nodes # list of attr(dict) values for each node
# EDGES ATTRIBUTES
##################
cdef list edges_list # edge list
cdef list edges_attr_list # list of attr value for each edge (following nodes list order)
cdef list edges_hash # hash representation of every edges ## A VOIR !
cdef set edges_hash_set # set of hash representation of every edges (set version for intersection and union operation)
cdef dict edges_weight # list that contains each node's weight (following nodes_list order)
cdef dict edges_hash_map #[id1,[id2,hash]]
cdef list attr_edges # list of attr(dict) values for each edge
# SIZE INDICATOR
###############
cdef long number_of_nodes # number of nodes
cdef long number_of_edges # number of edges
cdef dict number_of_edges_per_attr # number of nodes per attr value
cdef dict number_of_nodes_per_attr # number of edges per attr value
cdef object nx_g
def __init__(self,G, node_attr_key="",edge_attr_key=""):
self.nx_g=G
......@@ -101,53 +50,81 @@ cdef class Graph:
degree_all=[]
degree_in=[]
degree_out=[]
degree_all_weighted=[]
degree_in_weighted=[]
degree_out_weighted=[]
if self.is_edge_attr:
self.degree_per_attr={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals}
self.degree_per_attr_weighted={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals}
# Retrieving Degree Information
for n in self.nodes_list:
degree_all.append(G.degree(n))
degree_all_weighted.append(G.degree(n,weight="weight"))
if self.is_directed:
degree_in.append(G.in_degree(n))
degree_in_weighted.append(G.in_degree(n,weight="weight"))
degree_out.append(G.out_degree(n))
degree_out_weighted.append(G.out_degree(n))
else:
degree_in.append(degree_all[-1])
degree_in_weighted.append(degree_all_weighted[-1])
degree_out.append(degree_all[-1])
degree_out_weighted.append(degree_all_weighted[-1])
if self.is_edge_attr:
if self.is_directed:
in_edge=list(G.in_edges(n,data=True))
out_edge=list(G.in_edges(n,data=True))
for n1,n2,attr_dict in in_edge:
self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1
self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
for n1,n2,attr_dict in out_edge:
self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["out"]+=1
self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
else:
edges=G.edges(n,data=True)
for n1,n2,attr_dict in edges:
self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1
self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["out"]+=1
self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
self.nodes_degree=np.array(degree_all)
self.nodes_degree_in=np.array(degree_in)
self.nodes_degree_out=np.array(degree_out)
self.nodes_degree_weighted=np.array(degree_all_weighted)
self.nodes_degree_in_weighted=np.array(degree_in_weighted)
self.nodes_degree_out_weighted=np.array(degree_out_weighted)
# EDGE INFO INIT
#################
self.edges_hash=[]
self.edges_hash_map = {}
self.edges_hash_idx = {}
for ix, ed in enumerate(self.edges_list):
e1,e2=ed
if not e1 in self.edges_hash_map:self.edges_hash_map[e1]={}
self.edges_hash_map[e1][e2]=self.hash_edge_attr(e1,e2,self.edges_attr_list[ix]) if self.is_edge_attr else self.hash_edge(e1,e2)
self.edges_hash.append(self.edges_hash_map[e1][e2])
hash_=self.hash_edge_attr(e1,e2,self.edges_attr_list[ix]) if self.is_edge_attr else self.hash_edge(e1,e2)
if self.is_multi:
if not e2 in self.edges_hash_map[e1]:self.edges_hash_map[e1][e2]={}
self.edges_hash_map[e1][e2][self.edges_attr_list[ix]]=hash_
else:
self.edges_hash_map[e1][e2]=hash_
self.edges_hash_idx[hash_]=ix
self.edges_hash.append(hash_)
self.edges_hash_set=set(self.edges_hash)
self.edges_weight={}
for e1,e2,attr_dict in list(G.edges(data=True)):
self.edges_hash_map[e1][e2]=attr_dict["weight"] if "weight" in attr_dict else 1
hash_=self.hash_edge_attr(e1,e2,attr_dict[self.edge_attr_key]) if self.is_edge_attr else self.hash_edge(e1,e2)
self.edges_weight[hash_]=attr_dict["weight"] if "weight" in attr_dict else 1
self.number_of_edges = len(self.edges_list)
self.number_of_nodes = len(self.nodes_list)
......@@ -177,12 +154,12 @@ cdef class Graph:
return "_".join([n1,n2,attr_value])
## EXIST FUNCTION
cdef bint has_node(self,str n_id):
cpdef bint has_node(self,str n_id):
if n_id in self.nodes_list:
return True
return False
cdef bint has_edge(self,str n_id1,str n_id2):
cpdef bint has_edge(self,str n_id1,str n_id2):
if self.is_directed:
if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1][n_id2]:
return True
......@@ -205,6 +182,10 @@ cdef class Graph:
return len(self.edges_hash_set.union(G.edges_hash_set))
## GETTER
def get_nx(self):
return self.nx_g
def nodes(self,data=False):
if data:
return self.nodes_list,self.attr_nodes
......@@ -217,11 +198,39 @@ cdef class Graph:
return self.edges_list,self.attr_edges
else:
return self.edges_list
cpdef list get_edges_(self,e1,e2):
if self.is_edge_attr:
hashes=self.edges_hash_map[e1][e2]
return [(e1,e2,self.edges_attr_list[self.edges_hash_idx[hash_]])for hash_ in hashes]
else:
return [(e1,e2,None)]
cpdef dict get_edge_attr(self,edge_hash):
return self.edges_attr_list[self.edges_hash_idx[edge_hash]]
cpdef dict get_node_attr(self, node_hash):
return self.edges_attr_list[self.edges_hash_idx[node_hash]]
cpdef dict get_edge_attrs(self,edge_hash):
return self.attr_edges[self.edges_hash_idx[edge_hash]]
cpdef dict get_node_attrs(self, node_hash):
return self.attr_nodes[self.edges_hash_idx[node_hash]]
cpdef set get_edges_hash(self):
return self.edges_hash_set
cpdef set get_nodes_hash(self):
return self.nodes_hash_set
cpdef str get_node_key(self):
return self.node_attr_key
cpdef str get_egde_key(self):
return self.edge_attr_key
#####
cpdef long size(self):
return self.number_of_nodes
......@@ -234,28 +243,44 @@ cdef class Graph:
cpdef int density_attr(self, str attr_val):
return self.number_of_edges_per_attr[attr_val]
cpdef int degree(self,str n_id):
cpdef int degree(self,str n_id, bint weight=False):
if weight:
return self.nodes_degree_weighted[self.nodes_idx[n_id]]
return self.nodes_degree[self.nodes_idx[n_id]]
cpdef int in_degree(self,str n_id):
cpdef int in_degree(self,str n_id, bint weight=False):
if weight:
return self.nodes_degree_in_weighted[self.nodes_idx[n_id]]
return self.nodes_degree_in[self.nodes_idx[n_id]]
cpdef int out_degree(self,str n_id):
cpdef int out_degree(self,str n_id, bint weight=False):
if weight:
return self.nodes_degree_out_weighted[self.nodes_idx[n_id]]
return self.nodes_degree_out[self.nodes_idx[n_id]]
cpdef int in_degree_attr(self,str n_id,str attr_val):
cpdef int in_degree_attr(self,str n_id,str attr_val, bint weight=False):
if not self.is_edge_attr and not self.is_directed:
raise AttributeError("No edge attribute have been defined")
if weight:
return self.degree_per_attr_weighted[attr_val][n_id]["in"]
return self.degree_per_attr[attr_val][n_id]["in"]
cpdef int out_degree_attr(self,str n_id,str attr_val):
cpdef int out_degree_attr(self,str n_id,str attr_val, bint weight=False):
if not self.is_edge_attr and not self.is_directed:
raise AttributeError("No edge attribute have been defined")
if weight:
return self.degree_per_attr_weighted[attr_val][n_id]["out"]
return self.degree_per_attr[attr_val][n_id]["out"]
cpdef int degree_attr(self,str n_id,str attr_val):
cpdef int degree_attr(self,str n_id,str attr_val, bint weight=False):
if not self.is_edge_attr:
raise AttributeError("No edge attribute have been defined")
if not self.is_directed:
if weight:
return self.degree_per_attr_weighted[attr_val][n_id]["out"]
return self.degree_per_attr[attr_val][n_id]["out"]
if weight:
return self.degree_per_attr_weighted[attr_val][n_id]["in"] + self.degree_per_attr_weighted[attr_val][n_id]["out"]
return self.degree_per_attr[attr_val][n_id]["out"] + self.degree_per_attr[attr_val][n_id]["in"]
#GRAPH SETTER
......
......@@ -3,6 +3,8 @@
import numpy as np
cimport numpy as np
from .base cimport Base,intersection
from .graph cimport Graph
from cython.parallel cimport prange,parallel
cdef class VertexEdgeOverlap(Base):
......@@ -17,24 +19,39 @@ cdef class VertexEdgeOverlap(Base):
Base.__init__(self,0,True)
cpdef np.ndarray compare(self,list listgs, list selected):
n = len(listgs)
cdef np.ndarray comparison_matrix = np.zeros((n, n))
cdef int n = len(listgs)
cdef list new_gs=[Graph(g) for g in listgs]
cdef double[:,:] comparison_matrix = np.zeros((n, n))
cdef list inter_ver,inter_ed
cdef int denom,i,j
cdef bint f
cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
cdef long[:] n_edges = np.array([g.density() for g in new_gs])
#print(type(test[0,0]))
#cdef str[:,:] hash_edges = test
cdef bint[:] selected_test
cdef double[:,:] intersect_len_nodes = np.zeros((n, n))
cdef double[:,:] intersect_len_edges = np.zeros((n, n))
for i in range(n):
for j in range(i,n):
g1,g2 = listgs[i],listgs[j]
f=self.isAccepted(g1,i,selected)
if f:
inter_g= intersection(g1,g2)
denom=g1.number_of_nodes()+g2.number_of_nodes()+\
g1.number_of_edges()+g2.number_of_edges()
if denom == 0:
continue
comparison_matrix[i,j]=(2*(inter_g.number_of_nodes()
+inter_g.number_of_edges()))/denom # Data = True --> For nx.MultiDiGraph
comparison_matrix[j, i] = comparison_matrix[i, j]
return comparison_matrix
intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j])
intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j]))
with nogil, parallel(num_threads=4):
for i in prange(n,schedule='static'):
for j in range(i,n):
if n_nodes[i] > 0 and n_nodes[j] > 0 :
denom=n_nodes[i]+n_nodes[j]+\
n_edges[i]+n_edges[j]
if denom == 0:
continue
comparison_matrix[i][j]=(2*(intersect_len_nodes[i][j]
+intersect_len_edges[i][j]))/denom # Data = True --> For nx.MultiDiGraph
comparison_matrix[i][j] = comparison_matrix[i][j]
return np.array(comparison_matrix)
......@@ -42,7 +42,9 @@ def makeExtension(extName):
return Extension(
extName,
[extPath],include_dirs=[np.get_include()],language='c++',libraries=libs
[extPath],include_dirs=[np.get_include()],language='c++',libraries=libs,
#extra_compile_args = ["-O0", "-fopenmp"],extra_link_args=['-fopenmp']
)
# get the list of extensions
......@@ -76,7 +78,7 @@ setup(
]
)
#Clean cpp and compiled file
f=True
f=False
if f:
if os.path.exists("build"):
shutil.rmtree("build")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment