Commit c1bfdd5e authored by Fize Jacques's avatar Fize Jacques

Almost done ! Parallelization and Graph seems towork

parent 47125f55
......@@ -4,14 +4,15 @@ cdef class Base:
## Attribute(s)
cdef int type_alg
cdef bint normalized
cdef int cpu_count
## Methods
cpdef np.ndarray compare(self,list graph_list, list selected)
cpdef np.ndarray compare_old(self,list listgs, list selected)
cpdef np.ndarray distance(self, np.ndarray matrix)
cpdef np.ndarray similarity(self, np.ndarray matrix)
cpdef bint isAccepted(self,G,index,selected)
cpdef list get_selected_array(self,selected,size_corpus)
cpdef np.ndarray get_selected_array(self,selected,size_corpus)
cpdef intersection(G,H)
cpdef union_(G,H)
......@@ -3,6 +3,10 @@
import numpy as np
cimport numpy as np
import networkx as nx
cimport cython
import multiprocessing
cpdef np.ndarray minmax_scale(np.ndarray matrix):
"""
......@@ -136,17 +140,22 @@ cdef class Base:
else:
self.type_alg=type_alg
self.normalized=normalized
self.cpu_count=multiprocessing.cpu_count()
cpdef list get_selected_array(self,selected,size_corpus):
cdef list selected_test = [True]*size_corpus
if selected:
selected_test = [False]*size_corpus
cpdef np.ndarray get_selected_array(self,selected,size_corpus):
cdef double[:] selected_test = np.zeros(size_corpus)
if not selected == None:
for ix in range(len(selected)):
selected_test[ix]=True
return selected
selected_test[selected[ix]]=1
return np.array(selected_test)
else:
return np.array(selected_test)+1
cpdef np.ndarray compare_old(self,list listgs, list selected):
pass
@cython.boundscheck(False)
cpdef np.ndarray compare(self,list graph_list, list selected):
"""
Return the similarity/distance matrix using the current algorithm.
......
......@@ -115,17 +115,17 @@ cdef class AbstractGraphEditDistance(Base):
cdef double[:,:] comparison_matrix = np.zeros((n, n))
listgs=parsenx2graph(listgs)
cdef long[:] n_nodes = np.array([g.size() for g in listgs])
cdef bint[:] selected_test = self.get_selected_array(selected,n)
cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
cdef int i,j
val=np.inf
with nogil, parallel(num_threads=8):
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(n):
if n_nodes[i]>0 and n_nodes[j]>0 and selected_test[i] :
with gil:
comparison_matrix[i][j] = self.distance_ged(listgs[i],listgs[j])
else:
comparison_matrix[i][j] = 0
#comparison_matrix[j, i] = comparison_matrix[i, j]
if n_nodes[i]>0 and n_nodes[j]>0 and selected_test[i] == 1 :
with gil:
comparison_matrix[i][j] = self.distance_ged(listgs[i],listgs[j])
else:
comparison_matrix[i][j] = 0
#comparison_matrix[j, i] = comparison_matrix[i, j]
return np.array(comparison_matrix)
......@@ -53,22 +53,22 @@ cdef class BP_2(Base):
cdef int n = len(listgs)
cdef list new_gs=parsenx2graph(listgs)
cdef double[:,:] comparison_matrix = np.zeros((n, n))
cdef bint[:] selected_test = self.get_selected_array(selected,n)
cdef double[:] selected_test = self.get_selected_array(selected,n)
cdef int i,j
cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
cdef long[:] n_edges = np.array([g.density() for g in new_gs])
with nogil, parallel(num_threads=4):
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(i,n):
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True:
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1:
with gil:
comparison_matrix[i, j] = self.bp2(new_gs[i], new_gs[j])
else:
comparison_matrix[i, j] = 0
comparison_matrix[j, i] = comparison_matrix[i, j]
return comparison_matrix
return np.array(comparison_matrix)
cdef double bp2(self, g1, g2):
......@@ -148,18 +148,18 @@ cdef class BP_2(Base):
:return:
"""
cdef np.ndarray min_sum = np.zeros(g1.size())
nodes1 = list(g1.nodes())
nodes2 = list(g2.nodes())
cdef list nodes1 = list(g1.nodes())
cdef list nodes2 = list(g2.nodes())
nodes2.extend([None])
cdef np.ndarray min_i
for i in range(len(nodes1)):
min_i = np.zeros(len(nodes2))
for j in range(len(nodes2)):
for i in range(g1.size()):
min_i = np.zeros(g2.size())
for j in range(g2.size()):
min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j])
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float fuv(self, g1, g2, n1, n2):
cdef float fuv(self, g1, g2, str n1, str n2):
"""
Compute the Node Distance function
:param g1: first graph
......@@ -177,7 +177,7 @@ cdef class BP_2(Base):
return 0
return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
cdef float hed_edge(self, g1, g2, n1, n2):
cdef float hed_edge(self, g1, g2, str n1, str n2):
"""
Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
:param g1: first graph
......@@ -189,7 +189,7 @@ cdef class BP_2(Base):
return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
cdef float sum_gpq(self, g1, n1, g2, n2):
cdef float sum_gpq(self, g1, str n1, g2, str n2):
"""
Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2
:param g1: first graph
......@@ -213,7 +213,7 @@ cdef class BP_2(Base):
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float gpq(self, tuple e1, tuple e2):
cdef float gpq(self, str e1, str e2):
"""
Compute the edge distance function
:param e1: edge1
......
......@@ -7,7 +7,6 @@ import numpy as np
cimport numpy as np
from .abstract_graph_edit_dist cimport AbstractGraphEditDistance
from ..base cimport intersection,union_
from ..graph cimport Graph
cdef class GraphEditDistance(AbstractGraphEditDistance):
......
......@@ -49,12 +49,12 @@ cdef class HED(Base):
cdef int n = len(listgs)
cdef list new_gs=parsenx2graph(listgs)
cdef double[:,:] comparison_matrix = np.zeros((n, n))
cdef bint[:] selected_test = self.get_selected_array(selected,n)
cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
cdef int i,j
cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
cdef long[:] n_edges = np.array([g.density() for g in new_gs])
with nogil, parallel(num_threads=4):
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(i,n):
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True:
......@@ -64,7 +64,7 @@ cdef class HED(Base):
comparison_matrix[i, j] = 0
comparison_matrix[j, i] = comparison_matrix[i, j]
return comparison_matrix
return np.array(comparison_matrix)
cdef float hed(self, g1, g2):
......@@ -83,19 +83,19 @@ cdef class HED(Base):
:param g2: Second Graph
:return:
"""
cdef np.ndarray min_sum = np.zeros(len(g1))
nodes1 = list(g1.nodes())
nodes2 = list(g2.nodes())
cdef np.ndarray min_sum = np.zeros(g1.size())
cdef list nodes1 = list(g1.nodes())
cdef list nodes2 = list(g2.nodes())
nodes2.extend([None])
cdef np.ndarray min_i
for i in range(len(nodes1)):
min_i = np.zeros(len(nodes2))
for j in range(len(nodes2)):
for i in range(g1.size()):
min_i = np.zeros(g2.size())
for j in range(g2.size()):
min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j])
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float fuv(self, g1, g2, n1, n2):
cdef float fuv(self, g1, g2, str n1, str n2):
"""
Compute the Node Distance function
:param g1: first graph
......@@ -113,7 +113,7 @@ cdef class HED(Base):
return 0
return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
cdef float hed_edge(self, g1, g2, n1, n2):
cdef float hed_edge(self, g1, g2, str n1, str n2):
"""
Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
:param g1: first graph
......@@ -125,7 +125,7 @@ cdef class HED(Base):
return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
cdef float sum_gpq(self, g1, n1, g2, n2):
cdef float sum_gpq(self, g1, str n1, g2, str n2):
"""
Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2
:param g1: first graph
......@@ -149,7 +149,7 @@ cdef class HED(Base):
min_sum[i] = np.min(min_i)
return np.sum(min_sum)
cdef float gpq(self, tuple e1, tuple e2):
cdef float gpq(self, str e1, str e2):
"""
Compute the edge distance function
:param e1: edge1
......
......@@ -207,6 +207,7 @@ cdef class Graph:
return [(e1,e2,self.edges_attr_list[self.edges_hash_idx[hash_]])for hash_ in hashes]
else:
return [(e1,e2,None)]
cpdef list get_edges_no(self,str n):
return self.edges_of_nodes[n]
......
......@@ -5,7 +5,7 @@ cimport numpy as np
from .base cimport Base
from .base cimport intersection,union_
from ..helpers.general import parsenx2graph
from .helpers.general import parsenx2graph
from cython.parallel cimport prange,parallel
cdef class Jaccard(Base):
......@@ -47,7 +47,7 @@ cdef class Jaccard(Base):
cdef long[:] n_edges = np.array([g.density() for g in new_gs])
cdef int i,j
cdef bint[:] selected_test = self.get_selected_array(selected,n)
cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
cdef double[:,:] intersect_len_nodes = np.zeros((n, n))
cdef double[:,:] intersect_len_edges = np.zeros((n, n))
......@@ -59,10 +59,10 @@ cdef class Jaccard(Base):
intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j]))
union_len_nodes[i][j]=new_gs[i].size_node_union(new_gs[j])
union_len_edges[i][j]=new_gs[i].size_node_union(new_gs[j])
with nogil, parallel(num_threads=4):
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(i,n):
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i]:
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1:
if union_len_edges[i][j] >0 and union_len_nodes[i][j] >0:
comparison_matrix[i][j]= \
(intersect_len_edges[i][j]/union_len_edges[i][j])*\
......
......@@ -105,7 +105,6 @@ cdef class WeisfeleirLehmanKernel(Base):
# cdef np.ndarray[np.float64_t] k
k = np.dot(phi.transpose(), phi)
print(1)
# MAIN LOOP
cdef int it = 0
......
......@@ -4,7 +4,7 @@ cimport numpy as np
from .graph cimport Graph
from .base cimport Base
from cython.parallel cimport prange,parallel
from ..helpers.general import parsenx2graph
from .helpers.general import parsenx2graph
cdef class MCS(Base):
"""
......@@ -31,7 +31,7 @@ cdef class MCS(Base):
cpdef np.ndarray compare(self,list listgs, list selected):
cdef int n = len(listgs)
cdef double [:,:] comparison_matrix = np.zeros((n, n))
cdef bint[:] selected_test = self.get_selected_array(selected,n)
cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
cdef list new_gs=parsenx2graph(listgs)
cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
cdef double [:,:] intersect_len_nodes = np.zeros((n, n))
......@@ -40,10 +40,10 @@ cdef class MCS(Base):
for j in range(i,n):
intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j])
with nogil, parallel(num_threads=4):
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(i, n):
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i]:
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1:
comparison_matrix[i][j] = intersect_len_nodes[i][j]/max(n_nodes[i],n_nodes[j])
else:
comparison_matrix[i][j] = 0.
......
......@@ -5,7 +5,7 @@ cimport numpy as np
from .base cimport Base,intersection
from .graph cimport Graph
from cython.parallel cimport prange,parallel
from ..helpers.general import parsenx2graph
from .helpers.general import parsenx2graph
cdef class VertexEdgeOverlap(Base):
......@@ -47,7 +47,7 @@ cdef class VertexEdgeOverlap(Base):
cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
cdef long[:] n_edges = np.array([g.density() for g in new_gs])
cdef bint[:] selected_test = self.get_selected_array(selected,n)
cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
cdef double[:,:] intersect_len_nodes = np.zeros((n, n))
cdef double[:,:] intersect_len_edges = np.zeros((n, n))
......@@ -56,10 +56,10 @@ cdef class VertexEdgeOverlap(Base):
intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j])
intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j]))
with nogil, parallel(num_threads=4):
with nogil, parallel(num_threads=self.cpu_count):
for i in prange(n,schedule='static'):
for j in range(i,n):
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == True:
if n_nodes[i] > 0 and n_nodes[j] > 0 and selected_test[i] == 1:
denom=n_nodes[i]+n_nodes[j]+\
n_edges[i]+n_edges[j]
if denom == 0:
......
......@@ -70,7 +70,7 @@ setup(
cmdclass={'build_ext': build_ext},
setup_requires=["numpy","networkx","scipy",'scikit-learn'],
install_requires=["numpy","networkx","scipy",'scikit-learn'],
version="0.2.2alpha",
version="0.2.4alpha",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
......
from gmatch4py import *
import networkx as nx
graphs=[nx.random_tree(10) for i in range(10)]
comparator=None
for class_ in [BagOfNodes,WeisfeleirLehmanKernel,GraphEditDistance, BP_2, GreedyEditDistance, HED, Jaccard, MCS, VertexEdgeOverlap]:
print(class_)
if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED):
comparator = class_(1, 1, 1, 1)
elif class_ == WeisfeleirLehmanKernel:
comparator = class_(h=2)
else:
comparator=class_()
matrix = comparator.compare(graphs, [])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment