An error occurred while loading the file. Please try again.
-
Dumoulin Nicolas authored11afd67a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# coding = utf-8
import numpy as np
cimport numpy as np
from ..base cimport Base
from cython.parallel cimport prange,parallel
from ..helpers.general import parsenx2graph
cimport cython
cdef class BP_2(Base):
cdef int node_del
cdef int node_ins
cdef int edge_del
cdef int edge_ins
def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
"""
BP_2 Constructor
Parameters
----------
node_del :int
Node deletion cost
node_ins : int
Node insertion cost
edge_del : int
Edge Deletion cost
edge_ins : int
Edge Insertion cost
"""
Base.__init__(self,1,False)
self.node_del = node_del
self.node_ins = node_ins
self.edge_del = edge_del
self.edge_ins = edge_ins
@cython.boundscheck(False)
cpdef np.ndarray compare(self,list listgs, list selected):
cdef int n = len(listgs)
cdef list new_gs=parsenx2graph(listgs)
cdef double[:,:] comparison_matrix = np.zeros((n, n))
cdef double[:] selected_test = self.get_selected_array(selected,n)
cdef int i,j
cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
cdef long[:] n_edges = np.array([g.density() for g in new_gs])
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(i,n):
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1:
with gil:
comparison_matrix[i, j] = self.bp2(new_gs[i], new_gs[j])
else:
comparison_matrix[i, j] = 0
comparison_matrix[j, i] = comparison_matrix[i, j]
return np.array(comparison_matrix)
cdef double bp2(self, g1, g2):
"""
Compute the BP2 similarity value between two `networkx.Graph`
Parameters
----------
g1 : gmatch4py.Graph
First Graph
g2 : gmatch4py.Graph
Second Graph
Returns
-------
float
similarity value
"""
return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))])
cdef double distance_bp2(self,e):
"""
Return the distance based on the edit path found.
Parameters
----------
e : list
Contains the edit path costs
Returns
-------
double
Return sum of the costs from the edit path
"""
return np.sum(e)
cdef list psi(self,g1,g2):
"""
Return the optimal edit path :math:`\psi` based on BP2 algorithm.
Parameters
----------
g1 : networkx.Graph
First Graph
g2 : networkx.Graph
Second Graph
Returns
-------
list
list containing costs from the optimal edit path
"""
cdef list psi_=[]
cdef list nodes1 = list(g1.nodes())
cdef list nodes2 = list(g2.nodes())
for u in nodes1:
v=None
for w in nodes2:
if 2*self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,None) + self.fuv(g1,g2,None,w)\
and self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,v):
v=w
psi_.append(self.fuv(g1,g2,u,v))
if u:
nodes1= list(set(nodes1).difference(set([u])))
if v:
nodes2= list(set(nodes2).difference(set([v])))
for v in nodes2:
psi_.append(self.fuv(g1,g2,None,v))
return psi_
cdef float fuv(self, g1, g2, str n1, str n2):
"""
Compute the Node Distance function
Parameters
----------
g1 : gmatch4py.Graph
First graph
g2 : gmatch4py.Graph
Second graph
n1 : int or str
identifier of the first node
n2 : int or str
identifier of the second node
Returns
-------
float
node distance
"""
if n2 == None: # Del
return self.node_del + ((self.edge_del / 2.) * g1.degree(n1))
if n1 == None: # Insert
return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2))
else:
if n1 == n2:
return 0
return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
cdef float hed_edge(self, g1, g2, str n1, str n2):
"""
Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
Parameters
----------
g1 : gmatch4py.Graph
First graph
g2 : gmatch4py.Graph
Second graph
n1 : int or str
identifier of the first node
n2 : int or str
identifier of the second node
Returns
-------
float
HEDistance between g1 and g2
"""
return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
cdef float sum_gpq(self, g1, str n1, g2, str n2):
"""
Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2
Parameters
----------
g1 : gmatch4py.Graph
First graph
g2 : gmatch4py.Graph
Second graph
n1 : int or str
identifier of the first node
n2 : int or str
identifier of the second node
Returns
-------
float
Nearest Neighbour Distance
"""
#if isinstance(g1, nx.MultiDiGraph):
cdef list edges1 = g1.get_edges_no(n1) if n1 else []
cdef list edges2 = g2.get_edges_no(n2) if n2 else []
cdef np.ndarray min_sum = np.zeros(len(edges1))
edges2.extend([None])
cdef np.ndarray min_i
for i in range(len(edges1)):
min_i = np.zeros(len(edges2))
for j in range(len(edges2)):
min_i[j] = self.gpq(edges1[i], edges2[j])
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float gpq(self, str e1, str e2):
"""
Compute the edge distance function
Parameters
----------
e1 : str
first edge identifier
e2
second edge indentifier
Returns
-------
float
edge distance
"""
if e2 == None: # Del
return self.edge_del
if e1 == None: # Insert
return self.edge_ins
else:
if e1 == e2:
return 0
return (self.edge_del + self.edge_ins) / 2.