Commit 37b712ed by Fize Jacques

### Adding a bit of documentation

Optimize Bag of Clique by using a sparse matrix
parent c6bacf41
 ... ... @@ -6,6 +6,7 @@ from typing import Sequence import networkx as nx import numpy as np cimport numpy as np from scipy.sparse import csr_matrix,lil_matrix import sys from .base cimport Base,intersection ... ... @@ -20,6 +21,7 @@ cdef class BagOfCliques(Base): cpdef np.ndarray compare(self,list listgs, list selected): b=BagOfCliques() bog=b.getBagOfCliques(listgs).astype(np.float32) print(bog.shape) #Compute cosine similarity cdef int n=bog.shape[0] cdef np.ndarray scores = np.zeros((n,n)) ... ... @@ -28,8 +30,10 @@ cdef class BagOfCliques(Base): if selected: if not i in selected: continue bog_i=np.asarray(bog[i].todense()) for j in range(i,len(scores)): scores[i,j]=(np.dot(bog[i],bog[j]))/(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line bog_j=np.asarray(bog[j].todense()) scores[i,j]=(np.dot(bog_i,bog_j.T))/(np.sqrt(np.sum(bog_i**2))*np.sqrt(np.sum(bog_j**2))) # Can be computed in one line scores[j,i]=scores[i,j] return scores ... ... @@ -114,9 +118,11 @@ cdef class BagOfCliques(Base): cdef list clique_vocab=self.getUniqueCliques(graphs) cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab) cdef int l_v=len(clique_vocab) cdef np.ndarray boc = np.zeros((len(graphs), l_v)) boc = lil_matrix((len(graphs), l_v)) cdef np.ndarray vector cdef list cliques cdef str hash #print(1) for g in range(len(graphs)): #sys.stdout.write("\r{0}/{1}".format(g,len(graphs))) gr = graphs[g] ... ...
 ... ... @@ -74,18 +74,60 @@ cpdef intersection(G, H): return R cpdef union_(G, H): """ Return a graph that contains nodes and edges from both graph G and H. Parameters ---------- G : networkx.Graph First graph H : networkx.Graph Second graph Returns ------- networkx.Graph A new graph with the same type as G. """ R = nx.create_empty_copy(G) R.add_nodes_from(H.nodes(data=True)) R.add_edges_from(G.edges(data=True)) R.add_edges_from(H.edges(data=True)) return R cdef class Base: """ This class define the common methods to all Graph Matching algorithm. Attributes ---------- type_alg : int Indicate the type of measure returned by the algorithm : * 0 : similarity * 1 : distance normalized : bool Indicate if the algorithm return normalized results (between 0 and 1) """ def __cinit__(self): self.type_alg=0 self.normalized=False def __init__(self,type_alg,normalized): """ Constructor of Base Parameters ---------- type_alg : int Indicate the type of measure returned by the algorithm : * **0** : similarity * **1** : distance normalized : bool Indicate if the algorithm return normalized results (between 0 and 1) """ if type_alg <0: self.type_alg=0 elif type_alg >1 : ... ... @@ -94,12 +136,40 @@ cdef class Base: self.type_alg=type_alg self.normalized=normalized cpdef np.ndarray compare(self,list graph_list, list selected): """ Return the similarity/distance matrix using the current algorithm. >>>Base.compare([nx.Graph(),nx.Graph()],None) >>>Base.compare([nx.Graph(),nx.Graph()],[0,1]) Parameters ---------- graph_list : networkx.Graph array Contains the graphs to compare selected : int array Sometimes, you only wants to compute similarity of some graphs to every graphs. If so, indicate their indices in graph_list, else, put the None value. the None value Returns ------- np.array distance/similarity matrix """ pass cpdef np.ndarray distance(self, np.ndarray matrix): """ Return the distance matrix between the graphs :return: np.ndarray Return a normalized distance matrix Parameters ---------- matrix : np.array Similarity/distance matrix you want to transform Returns ------- np.array distance matrix """ if self.type_alg == 1: if not self.normalized: ... ... @@ -111,8 +181,16 @@ cdef class Base: return 1-matrix cpdef np.ndarray similarity(self, np.ndarray matrix): """ Return a the similarity value between the graphs :return: Return a normalized similarity matrix Parameters ---------- matrix : np.array Similarity/distance matrix you want to transform Returns ------- np.array similarity matrix """ if self.type_alg == 0: return matrix ... ... @@ -121,18 +199,42 @@ cdef class Base: matrix=minmax_scale(matrix) return 1-matrix def mcs(self,g1,g2): def mcs(self, G, H): """ Return the Most Common Subgraph :param g1: graph1 :param g2: graph2 :return: np.ndarray Return the Most Common Subgraph of Parameters ---------- G : networkx.Graph First Graph H : networkx.Graph Second Graph Returns ------- networkx.Graph Most common Subgrah """ R=g1.copy() R.remove_nodes_from(n for n in g1 if n not in g2) return R cpdef bint isAccepted(self,G,index,selected): """ Indicate if the graph will be compared to the other. A graph is "accepted" if : * G exists(!= None) and not empty (|vertices(G)| >0) * If selected graph to compare were indicated, check if G exists in selected Parameters ---------- G : networkx.Graph Graph index : int index in the graph list parameter in Base.compare() selected : int array selected parameter value in Base.compare() Returns ------- bool : if is accepted """ f=True if not G: f=False ... ...
 ... ... @@ -4,9 +4,7 @@ cimport numpy as np from ..base cimport Base cdef class BP_2(Base): """ """ cdef int node_del cdef int node_ins ... ... @@ -14,7 +12,20 @@ cdef class BP_2(Base): cdef int edge_ins def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): """Constructor for HED""" """ BP_2 Constructor Parameters ---------- node_del :int Node deletion cost node_ins : int Node insertion cost edge_del : int Edge Deletion cost edge_ins : int Edge Insertion cost """ Base.__init__(self,1,False) self.node_del = node_del self.node_ins = node_ins ... ... @@ -38,26 +49,56 @@ cdef class BP_2(Base): return comparison_matrix def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1): """Constructor for HED""" self.node_del = node_del self.node_ins = node_ins self.edge_del = edge_del self.edge_ins = edge_ins cdef double bp2(self, g1, g2): """ Compute de Hausdorff Edit Distance :param g1: first graph :param g2: second graph :return: Compute the BP2 similarity value between two networkx.Graph Parameters ---------- g1 : networkx.Graph First Graph g2 : networkx.Graph Second Graph Returns ------- float similarity value """ return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))]) cdef double distance_bp2(self,e): """ Return the distance based on the edit path found. Parameters ---------- e : list Contains the edit path costs Returns ------- double Return sum of the costs from the edit path """ return np.sum(e) cdef list psi(self,g1,g2): """ Return the optimal edit path :math:\psi based on BP2 algorithm. Parameters ---------- g1 : networkx.Graph First Graph g2 : networkx.Graph Second Graph Returns ------- list list containing costs from the optimal edit path """ cdef list psi_=[] cdef list nodes1 = list(g1.nodes) cdef list nodes2 = list(g2.nodes) ... ...
