From 37b712ed88ed6cc3b33931480f0069c3b7d6e1d5 Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Thu, 23 Aug 2018 15:03:08 +0200
Subject: [PATCH] Adding a bit of documentation

Optimize Bag of Clique by using a sparse matrix
---
 gmatch4py/bag_of_cliques.pyx                 |  10 +-
 gmatch4py/base.pyx                           | 126 +++++++++++++++++--
 gmatch4py/ged/bipartite_graph_matching_2.pyx |  69 +++++++---
 3 files changed, 177 insertions(+), 28 deletions(-)

diff --git a/gmatch4py/bag_of_cliques.pyx b/gmatch4py/bag_of_cliques.pyx
index e304598..65cb720 100644
--- a/gmatch4py/bag_of_cliques.pyx
+++ b/gmatch4py/bag_of_cliques.pyx
@@ -6,6 +6,7 @@ from typing import Sequence
 import networkx as nx
 import numpy as np
 cimport numpy as np
+from scipy.sparse import csr_matrix,lil_matrix
 import sys
 
 from .base cimport Base,intersection
@@ -20,6 +21,7 @@ cdef class BagOfCliques(Base):
     cpdef np.ndarray compare(self,list listgs, list selected):
         b=BagOfCliques()
         bog=b.getBagOfCliques(listgs).astype(np.float32)
+        print(bog.shape)
         #Compute cosine similarity
         cdef int n=bog.shape[0]
         cdef np.ndarray scores = np.zeros((n,n))
@@ -28,8 +30,10 @@ cdef class BagOfCliques(Base):
             if selected:
                 if not i in selected:
                     continue
+            bog_i=np.asarray(bog[i].todense())
             for j in range(i,len(scores)):
-                scores[i,j]=(np.dot(bog[i],bog[j]))/(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line
+                bog_j=np.asarray(bog[j].todense())
+                scores[i,j]=(np.dot(bog_i,bog_j.T))/(np.sqrt(np.sum(bog_i**2))*np.sqrt(np.sum(bog_j**2))) # Can be computed in one line
                 scores[j,i]=scores[i,j]
         return scores
 
@@ -114,9 +118,11 @@ cdef class BagOfCliques(Base):
         cdef list clique_vocab=self.getUniqueCliques(graphs)
         cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab)
         cdef int l_v=len(clique_vocab)
-        cdef np.ndarray boc = np.zeros((len(graphs), l_v))
+        boc = lil_matrix((len(graphs), l_v))
         cdef np.ndarray vector
         cdef list cliques
+        cdef str hash
+        #print(1)
         for g in range(len(graphs)):
             #sys.stdout.write("\r{0}/{1}".format(g,len(graphs)))
             gr = graphs[g]
diff --git a/gmatch4py/base.pyx b/gmatch4py/base.pyx
index 5f699ec..bb6cef8 100644
--- a/gmatch4py/base.pyx
+++ b/gmatch4py/base.pyx
@@ -74,18 +74,60 @@ cpdef intersection(G, H):
     return R
 
 cpdef union_(G, H):
+    """
+    Return a graph that contains nodes and edges from both graph G and H.
+    
+    Parameters
+    ----------
+    G : networkx.Graph
+        First graph
+    H : networkx.Graph 
+        Second graph
+
+    Returns
+    -------
+    networkx.Graph
+        A new graph with the same type as G.
+    """
     R = nx.create_empty_copy(G)
+    R.add_nodes_from(H.nodes(data=True))
     R.add_edges_from(G.edges(data=True))
     R.add_edges_from(H.edges(data=True))
     return R
 
 cdef class Base:
+    """
+    This class define the common methods to all Graph Matching algorithm.
+
+    Attributes
+    ----------
+    type_alg : int
+        Indicate the type of measure returned by the algorithm :
 
+         * 0 : similarity
+         * 1 : distance
+    normalized : bool
+        Indicate if the algorithm return normalized results (between 0 and 1)
+
+    """
     def __cinit__(self):
         self.type_alg=0
         self.normalized=False
 
     def __init__(self,type_alg,normalized):
+        """
+        Constructor of Base
+
+        Parameters
+        ----------
+        type_alg : int
+            Indicate the type of measure returned by the algorithm :
+
+             * **0** : similarity
+             * **1** : distance
+        normalized : bool
+            Indicate if the algorithm return normalized results (between 0 and 1)
+        """
         if type_alg <0:
             self.type_alg=0
         elif type_alg >1 :
@@ -94,12 +136,40 @@ cdef class Base:
             self.type_alg=type_alg
         self.normalized=normalized
     cpdef np.ndarray compare(self,list graph_list, list selected):
+        """
+        Return the similarity/distance matrix using the current algorithm.
+        
+        >>>Base.compare([nx.Graph(),nx.Graph()],None)
+        >>>Base.compare([nx.Graph(),nx.Graph()],[0,1])
+        
+        Parameters
+        ----------
+        graph_list : networkx.Graph array
+            Contains the graphs to compare
+        selected : int array
+            Sometimes, you only wants to compute similarity of some graphs to every graphs. If so, indicate their indices in
+            `graph_list`, else, put the None value. 
+            the None value
+        Returns
+        -------
+        np.array
+            distance/similarity matrix
+            
+        """
         pass
 
     cpdef np.ndarray distance(self, np.ndarray matrix):
         """
-        Return the distance matrix between the graphs
-        :return: np.ndarray
+        Return a normalized distance matrix
+        Parameters
+        ----------
+        matrix : np.array
+            Similarity/distance matrix you want to transform
+
+        Returns
+        -------
+        np.array
+            distance matrix
         """
         if self.type_alg == 1:
             if not self.normalized:
@@ -111,8 +181,16 @@ cdef class Base:
             return 1-matrix
     cpdef np.ndarray similarity(self, np.ndarray matrix):
         """
-        Return a the similarity value between the graphs 
-        :return: 
+        Return a normalized similarity matrix
+        Parameters
+        ----------
+        matrix : np.array
+            Similarity/distance matrix you want to transform
+
+        Returns
+        -------
+        np.array
+            similarity matrix
         """
         if self.type_alg == 0:
             return matrix
@@ -121,18 +199,42 @@ cdef class Base:
                 matrix=minmax_scale(matrix)
             return 1-matrix
 
-    def mcs(self,g1,g2):
+    def mcs(self, G, H):
         """
-        Return the Most Common Subgraph
-        :param g1: graph1
-        :param g2: graph2
-        :return: np.ndarray
+        Return the Most Common Subgraph of
+        Parameters
+        ----------
+        G : networkx.Graph
+            First Graph
+        H : networkx.Graph
+            Second Graph
+
+        Returns
+        -------
+        networkx.Graph
+            Most common Subgrah
         """
-        R=g1.copy()
-        R.remove_nodes_from(n for n in g1 if n not in g2)
-        return R
 
     cpdef bint isAccepted(self,G,index,selected):
+        """
+        Indicate if the graph will be compared to the other. A graph is "accepted" if :
+         * G exists(!= None) and not empty (|vertices(G)| >0)
+         * If selected graph to compare were indicated, check if G exists in selected
+        
+        Parameters
+        ----------
+        G : networkx.Graph
+            Graph
+        index : int
+            index in the graph list parameter in `Base.compare()`
+        selected : int array
+            `selected` parameter value in `Base.compare()`
+
+        Returns
+        -------
+        bool :
+            if is accepted
+        """
         f=True
         if not G:
             f=False
diff --git a/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py/ged/bipartite_graph_matching_2.pyx
index a614c13..59e33e0 100644
--- a/gmatch4py/ged/bipartite_graph_matching_2.pyx
+++ b/gmatch4py/ged/bipartite_graph_matching_2.pyx
@@ -4,9 +4,7 @@ cimport numpy as np
 from ..base cimport Base
 
 cdef class BP_2(Base):
-    """
 
-    """
 
     cdef int node_del
     cdef int node_ins
@@ -14,7 +12,20 @@ cdef class BP_2(Base):
     cdef int edge_ins
 
     def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
-        """Constructor for HED"""
+        """
+        BP_2 Constructor
+
+        Parameters
+        ----------
+        node_del :int
+            Node deletion cost
+        node_ins : int
+            Node insertion cost
+        edge_del : int
+            Edge Deletion cost
+        edge_ins : int
+            Edge Insertion cost
+        """
         Base.__init__(self,1,False)
         self.node_del = node_del
         self.node_ins = node_ins
@@ -38,26 +49,56 @@ cdef class BP_2(Base):
         return comparison_matrix
 
 
-    def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1):
-        """Constructor for HED"""
-        self.node_del = node_del
-        self.node_ins = node_ins
-        self.edge_del = edge_del
-        self.edge_ins = edge_ins
-
     cdef double bp2(self, g1, g2):
         """
-        Compute de Hausdorff Edit Distance
-        :param g1: first graph
-        :param g2: second graph
-        :return:
+        Compute the BP2 similarity value between two `networkx.Graph`
+        
+        Parameters
+        ----------
+        g1 : networkx.Graph
+            First Graph
+        g2 : networkx.Graph
+            Second Graph
+
+        Returns
+        -------
+        float 
+            similarity value
         """
         return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))])
 
     cdef double distance_bp2(self,e):
+        """
+        Return the distance based on the edit path found.
+        Parameters
+        ----------
+        e : list
+            Contains the edit path costs
+
+        Returns
+        -------
+        double
+            Return sum of the costs from the edit path
+        """
         return np.sum(e)
 
     cdef list psi(self,g1,g2):
+        """
+        Return the optimal edit path :math:`\psi` based on BP2 algorithm.
+        
+        
+        Parameters
+        ----------
+        g1 : networkx.Graph
+            First Graph
+        g2 : networkx.Graph
+            Second Graph
+
+        Returns
+        -------
+        list
+            list containing costs from the optimal edit path
+        """
         cdef list psi_=[]
         cdef list nodes1 = list(g1.nodes)
         cdef list nodes2 = list(g2.nodes)
-- 
GitLab