Commit 37b712ed authored by Fize Jacques's avatar Fize Jacques

Adding a bit of documentation

Optimize Bag of Clique by using a sparse matrix
parent c6bacf41
......@@ -6,6 +6,7 @@ from typing import Sequence
import networkx as nx
import numpy as np
cimport numpy as np
from scipy.sparse import csr_matrix,lil_matrix
import sys
from .base cimport Base,intersection
......@@ -20,6 +21,7 @@ cdef class BagOfCliques(Base):
cpdef np.ndarray compare(self,list listgs, list selected):
b=BagOfCliques()
bog=b.getBagOfCliques(listgs).astype(np.float32)
print(bog.shape)
#Compute cosine similarity
cdef int n=bog.shape[0]
cdef np.ndarray scores = np.zeros((n,n))
......@@ -28,8 +30,10 @@ cdef class BagOfCliques(Base):
if selected:
if not i in selected:
continue
bog_i=np.asarray(bog[i].todense())
for j in range(i,len(scores)):
scores[i,j]=(np.dot(bog[i],bog[j]))/(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line
bog_j=np.asarray(bog[j].todense())
scores[i,j]=(np.dot(bog_i,bog_j.T))/(np.sqrt(np.sum(bog_i**2))*np.sqrt(np.sum(bog_j**2))) # Can be computed in one line
scores[j,i]=scores[i,j]
return scores
......@@ -114,9 +118,11 @@ cdef class BagOfCliques(Base):
cdef list clique_vocab=self.getUniqueCliques(graphs)
cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab)
cdef int l_v=len(clique_vocab)
cdef np.ndarray boc = np.zeros((len(graphs), l_v))
boc = lil_matrix((len(graphs), l_v))
cdef np.ndarray vector
cdef list cliques
cdef str hash
#print(1)
for g in range(len(graphs)):
#sys.stdout.write("\r{0}/{1}".format(g,len(graphs)))
gr = graphs[g]
......
......@@ -74,18 +74,60 @@ cpdef intersection(G, H):
return R
cpdef union_(G, H):
"""
Return a graph that contains nodes and edges from both graph G and H.
Parameters
----------
G : networkx.Graph
First graph
H : networkx.Graph
Second graph
Returns
-------
networkx.Graph
A new graph with the same type as G.
"""
R = nx.create_empty_copy(G)
R.add_nodes_from(H.nodes(data=True))
R.add_edges_from(G.edges(data=True))
R.add_edges_from(H.edges(data=True))
return R
cdef class Base:
"""
This class define the common methods to all Graph Matching algorithm.
Attributes
----------
type_alg : int
Indicate the type of measure returned by the algorithm :
* 0 : similarity
* 1 : distance
normalized : bool
Indicate if the algorithm return normalized results (between 0 and 1)
"""
def __cinit__(self):
self.type_alg=0
self.normalized=False
def __init__(self,type_alg,normalized):
"""
Constructor of Base
Parameters
----------
type_alg : int
Indicate the type of measure returned by the algorithm :
* **0** : similarity
* **1** : distance
normalized : bool
Indicate if the algorithm return normalized results (between 0 and 1)
"""
if type_alg <0:
self.type_alg=0
elif type_alg >1 :
......@@ -94,12 +136,40 @@ cdef class Base:
self.type_alg=type_alg
self.normalized=normalized
cpdef np.ndarray compare(self,list graph_list, list selected):
"""
Return the similarity/distance matrix using the current algorithm.
>>>Base.compare([nx.Graph(),nx.Graph()],None)
>>>Base.compare([nx.Graph(),nx.Graph()],[0,1])
Parameters
----------
graph_list : networkx.Graph array
Contains the graphs to compare
selected : int array
Sometimes, you only wants to compute similarity of some graphs to every graphs. If so, indicate their indices in
`graph_list`, else, put the None value.
the None value
Returns
-------
np.array
distance/similarity matrix
"""
pass
cpdef np.ndarray distance(self, np.ndarray matrix):
"""
Return the distance matrix between the graphs
:return: np.ndarray
Return a normalized distance matrix
Parameters
----------
matrix : np.array
Similarity/distance matrix you want to transform
Returns
-------
np.array
distance matrix
"""
if self.type_alg == 1:
if not self.normalized:
......@@ -111,8 +181,16 @@ cdef class Base:
return 1-matrix
cpdef np.ndarray similarity(self, np.ndarray matrix):
"""
Return a the similarity value between the graphs
:return:
Return a normalized similarity matrix
Parameters
----------
matrix : np.array
Similarity/distance matrix you want to transform
Returns
-------
np.array
similarity matrix
"""
if self.type_alg == 0:
return matrix
......@@ -121,18 +199,42 @@ cdef class Base:
matrix=minmax_scale(matrix)
return 1-matrix
def mcs(self,g1,g2):
def mcs(self, G, H):
"""
Return the Most Common Subgraph
:param g1: graph1
:param g2: graph2
:return: np.ndarray
Return the Most Common Subgraph of
Parameters
----------
G : networkx.Graph
First Graph
H : networkx.Graph
Second Graph
Returns
-------
networkx.Graph
Most common Subgrah
"""
R=g1.copy()
R.remove_nodes_from(n for n in g1 if n not in g2)
return R
cpdef bint isAccepted(self,G,index,selected):
"""
Indicate if the graph will be compared to the other. A graph is "accepted" if :
* G exists(!= None) and not empty (|vertices(G)| >0)
* If selected graph to compare were indicated, check if G exists in selected
Parameters
----------
G : networkx.Graph
Graph
index : int
index in the graph list parameter in `Base.compare()`
selected : int array
`selected` parameter value in `Base.compare()`
Returns
-------
bool :
if is accepted
"""
f=True
if not G:
f=False
......
......@@ -4,9 +4,7 @@ cimport numpy as np
from ..base cimport Base
cdef class BP_2(Base):
"""
"""
cdef int node_del
cdef int node_ins
......@@ -14,7 +12,20 @@ cdef class BP_2(Base):
cdef int edge_ins
def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
"""Constructor for HED"""
"""
BP_2 Constructor
Parameters
----------
node_del :int
Node deletion cost
node_ins : int
Node insertion cost
edge_del : int
Edge Deletion cost
edge_ins : int
Edge Insertion cost
"""
Base.__init__(self,1,False)
self.node_del = node_del
self.node_ins = node_ins
......@@ -38,26 +49,56 @@ cdef class BP_2(Base):
return comparison_matrix
def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1):
"""Constructor for HED"""
self.node_del = node_del
self.node_ins = node_ins
self.edge_del = edge_del
self.edge_ins = edge_ins
cdef double bp2(self, g1, g2):
"""
Compute de Hausdorff Edit Distance
:param g1: first graph
:param g2: second graph
:return:
Compute the BP2 similarity value between two `networkx.Graph`
Parameters
----------
g1 : networkx.Graph
First Graph
g2 : networkx.Graph
Second Graph
Returns
-------
float
similarity value
"""
return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))])
cdef double distance_bp2(self,e):
"""
Return the distance based on the edit path found.
Parameters
----------
e : list
Contains the edit path costs
Returns
-------
double
Return sum of the costs from the edit path
"""
return np.sum(e)
cdef list psi(self,g1,g2):
"""
Return the optimal edit path :math:`\psi` based on BP2 algorithm.
Parameters
----------
g1 : networkx.Graph
First Graph
g2 : networkx.Graph
Second Graph
Returns
-------
list
list containing costs from the optimal edit path
"""
cdef list psi_=[]
cdef list nodes1 = list(g1.nodes)
cdef list nodes2 = list(g2.nodes)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment