From 37b712ed88ed6cc3b33931480f0069c3b7d6e1d5 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Thu, 23 Aug 2018 15:03:08 +0200 Subject: [PATCH] Adding a bit of documentation Optimize Bag of Clique by using a sparse matrix --- gmatch4py/bag_of_cliques.pyx | 10 +- gmatch4py/base.pyx | 126 +++++++++++++++++-- gmatch4py/ged/bipartite_graph_matching_2.pyx | 69 +++++++--- 3 files changed, 177 insertions(+), 28 deletions(-) diff --git a/gmatch4py/bag_of_cliques.pyx b/gmatch4py/bag_of_cliques.pyx index e304598..65cb720 100644 --- a/gmatch4py/bag_of_cliques.pyx +++ b/gmatch4py/bag_of_cliques.pyx @@ -6,6 +6,7 @@ from typing import Sequence import networkx as nx import numpy as np cimport numpy as np +from scipy.sparse import csr_matrix,lil_matrix import sys from .base cimport Base,intersection @@ -20,6 +21,7 @@ cdef class BagOfCliques(Base): cpdef np.ndarray compare(self,list listgs, list selected): b=BagOfCliques() bog=b.getBagOfCliques(listgs).astype(np.float32) + print(bog.shape) #Compute cosine similarity cdef int n=bog.shape[0] cdef np.ndarray scores = np.zeros((n,n)) @@ -28,8 +30,10 @@ cdef class BagOfCliques(Base): if selected: if not i in selected: continue + bog_i=np.asarray(bog[i].todense()) for j in range(i,len(scores)): - scores[i,j]=(np.dot(bog[i],bog[j]))/(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line + bog_j=np.asarray(bog[j].todense()) + scores[i,j]=(np.dot(bog_i,bog_j.T))/(np.sqrt(np.sum(bog_i**2))*np.sqrt(np.sum(bog_j**2))) # Can be computed in one line scores[j,i]=scores[i,j] return scores @@ -114,9 +118,11 @@ cdef class BagOfCliques(Base): cdef list clique_vocab=self.getUniqueCliques(graphs) cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab) cdef int l_v=len(clique_vocab) - cdef np.ndarray boc = np.zeros((len(graphs), l_v)) + boc = lil_matrix((len(graphs), l_v)) cdef np.ndarray vector cdef list cliques + cdef str hash + #print(1) for g in range(len(graphs)): #sys.stdout.write("\r{0}/{1}".format(g,len(graphs))) gr = graphs[g] diff --git a/gmatch4py/base.pyx b/gmatch4py/base.pyx index 5f699ec..bb6cef8 100644 --- a/gmatch4py/base.pyx +++ b/gmatch4py/base.pyx @@ -74,18 +74,60 @@ cpdef intersection(G, H): return R cpdef union_(G, H): + """ + Return a graph that contains nodes and edges from both graph G and H. + + Parameters + ---------- + G : networkx.Graph + First graph + H : networkx.Graph + Second graph + + Returns + ------- + networkx.Graph + A new graph with the same type as G. + """ R = nx.create_empty_copy(G) + R.add_nodes_from(H.nodes(data=True)) R.add_edges_from(G.edges(data=True)) R.add_edges_from(H.edges(data=True)) return R cdef class Base: + """ + This class define the common methods to all Graph Matching algorithm. + + Attributes + ---------- + type_alg : int + Indicate the type of measure returned by the algorithm : + * 0 : similarity + * 1 : distance + normalized : bool + Indicate if the algorithm return normalized results (between 0 and 1) + + """ def __cinit__(self): self.type_alg=0 self.normalized=False def __init__(self,type_alg,normalized): + """ + Constructor of Base + + Parameters + ---------- + type_alg : int + Indicate the type of measure returned by the algorithm : + + * **0** : similarity + * **1** : distance + normalized : bool + Indicate if the algorithm return normalized results (between 0 and 1) + """ if type_alg <0: self.type_alg=0 elif type_alg >1 : @@ -94,12 +136,40 @@ cdef class Base: self.type_alg=type_alg self.normalized=normalized cpdef np.ndarray compare(self,list graph_list, list selected): + """ + Return the similarity/distance matrix using the current algorithm. + + >>>Base.compare([nx.Graph(),nx.Graph()],None) + >>>Base.compare([nx.Graph(),nx.Graph()],[0,1]) + + Parameters + ---------- + graph_list : networkx.Graph array + Contains the graphs to compare + selected : int array + Sometimes, you only wants to compute similarity of some graphs to every graphs. If so, indicate their indices in + `graph_list`, else, put the None value. + the None value + Returns + ------- + np.array + distance/similarity matrix + + """ pass cpdef np.ndarray distance(self, np.ndarray matrix): """ - Return the distance matrix between the graphs - :return: np.ndarray + Return a normalized distance matrix + Parameters + ---------- + matrix : np.array + Similarity/distance matrix you want to transform + + Returns + ------- + np.array + distance matrix """ if self.type_alg == 1: if not self.normalized: @@ -111,8 +181,16 @@ cdef class Base: return 1-matrix cpdef np.ndarray similarity(self, np.ndarray matrix): """ - Return a the similarity value between the graphs - :return: + Return a normalized similarity matrix + Parameters + ---------- + matrix : np.array + Similarity/distance matrix you want to transform + + Returns + ------- + np.array + similarity matrix """ if self.type_alg == 0: return matrix @@ -121,18 +199,42 @@ cdef class Base: matrix=minmax_scale(matrix) return 1-matrix - def mcs(self,g1,g2): + def mcs(self, G, H): """ - Return the Most Common Subgraph - :param g1: graph1 - :param g2: graph2 - :return: np.ndarray + Return the Most Common Subgraph of + Parameters + ---------- + G : networkx.Graph + First Graph + H : networkx.Graph + Second Graph + + Returns + ------- + networkx.Graph + Most common Subgrah """ - R=g1.copy() - R.remove_nodes_from(n for n in g1 if n not in g2) - return R cpdef bint isAccepted(self,G,index,selected): + """ + Indicate if the graph will be compared to the other. A graph is "accepted" if : + * G exists(!= None) and not empty (|vertices(G)| >0) + * If selected graph to compare were indicated, check if G exists in selected + + Parameters + ---------- + G : networkx.Graph + Graph + index : int + index in the graph list parameter in `Base.compare()` + selected : int array + `selected` parameter value in `Base.compare()` + + Returns + ------- + bool : + if is accepted + """ f=True if not G: f=False diff --git a/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py/ged/bipartite_graph_matching_2.pyx index a614c13..59e33e0 100644 --- a/gmatch4py/ged/bipartite_graph_matching_2.pyx +++ b/gmatch4py/ged/bipartite_graph_matching_2.pyx @@ -4,9 +4,7 @@ cimport numpy as np from ..base cimport Base cdef class BP_2(Base): - """ - """ cdef int node_del cdef int node_ins @@ -14,7 +12,20 @@ cdef class BP_2(Base): cdef int edge_ins def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): - """Constructor for HED""" + """ + BP_2 Constructor + + Parameters + ---------- + node_del :int + Node deletion cost + node_ins : int + Node insertion cost + edge_del : int + Edge Deletion cost + edge_ins : int + Edge Insertion cost + """ Base.__init__(self,1,False) self.node_del = node_del self.node_ins = node_ins @@ -38,26 +49,56 @@ cdef class BP_2(Base): return comparison_matrix - def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1): - """Constructor for HED""" - self.node_del = node_del - self.node_ins = node_ins - self.edge_del = edge_del - self.edge_ins = edge_ins - cdef double bp2(self, g1, g2): """ - Compute de Hausdorff Edit Distance - :param g1: first graph - :param g2: second graph - :return: + Compute the BP2 similarity value between two `networkx.Graph` + + Parameters + ---------- + g1 : networkx.Graph + First Graph + g2 : networkx.Graph + Second Graph + + Returns + ------- + float + similarity value """ return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))]) cdef double distance_bp2(self,e): + """ + Return the distance based on the edit path found. + Parameters + ---------- + e : list + Contains the edit path costs + + Returns + ------- + double + Return sum of the costs from the edit path + """ return np.sum(e) cdef list psi(self,g1,g2): + """ + Return the optimal edit path :math:`\psi` based on BP2 algorithm. + + + Parameters + ---------- + g1 : networkx.Graph + First Graph + g2 : networkx.Graph + Second Graph + + Returns + ------- + list + list containing costs from the optimal edit path + """ cdef list psi_=[] cdef list nodes1 = list(g1.nodes) cdef list nodes2 = list(g2.nodes) -- GitLab