From aec78a2ca76f4098d867dc30384ce843e7cf027a Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Thu, 9 May 2019 14:48:35 +0200
Subject: [PATCH] add edit_path function+ debug graph class + add tqdm progress
 bar when using import_dir

---
 README.md                                  |  6 +++++-
 gmatch4py/ged/abstract_graph_edit_dist.pyx | 20 ++++++++++++++++++++
 gmatch4py/graph.pxd                        | 18 +++++++++---------
 gmatch4py/graph.pyx                        | 22 +++++++++++-----------
 gmatch4py/helpers/reader.pyx               |  3 ++-
 5 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 3c7bf65..2d915f9 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,6 @@ print(result)
 
 The output is a similarity/distance matrix :
 ```python
-Out[10]:
 array([[0., 14.],
        [10., 0.]])
 ```
@@ -126,6 +125,11 @@ each code is associated with a reference to the original.**
 
 
 ## CHANGELOG
+### 7.05.2019
+
+ * Debug (problems with float edge weight)
+ * Add the `AbstractEditDistance.edit_path(G,H)` method that return the edit path, the cost matrix and the selected cost index in the cost matrix
+ * Add a tqdm progress bar for the `gmatch4py.helpers.reader.import_dir()` function
 
 ### 12.03.2019
 
diff --git a/gmatch4py/ged/abstract_graph_edit_dist.pyx b/gmatch4py/ged/abstract_graph_edit_dist.pyx
index 95ba8d4..b597b13 100644
--- a/gmatch4py/ged/abstract_graph_edit_dist.pyx
+++ b/gmatch4py/ged/abstract_graph_edit_dist.pyx
@@ -51,6 +51,26 @@ cdef class AbstractGraphEditDistance(Base):
         cdef list opt_path = self.edit_costs(G,H)
         return np.sum(opt_path)
 
+    def edit_path(self,G,H):
+        """
+        Return  the edit path along with the cost matrix and the selected indices from the Munkres Algorithm
+        
+        Parameters
+        ----------
+        G : nx.Graph
+            first graph
+        H : nx.Graph
+            second graph
+        
+        Returns
+        -------
+        np.array(1D), np.array(2D), (np.array(2D) if munkres) or (np.array(1,2) if scipy) 
+            edit_path, cost_matrix, munkres results
+        """
+        cost_matrix = self.create_cost_matrix(G,H).astype(float)
+        index_path= munkres(cost_matrix)
+        return cost_matrix[index_path], cost_matrix, index_path
+    
 
     cdef list edit_costs(self, G, H):
         """
diff --git a/gmatch4py/graph.pxd b/gmatch4py/graph.pxd
index e1ef555..d35c42d 100644
--- a/gmatch4py/graph.pxd
+++ b/gmatch4py/graph.pxd
@@ -32,9 +32,9 @@ cdef class Graph:
     cdef long[:] nodes_degree # degree list
     cdef long[:] nodes_degree_in # in degree list
     cdef long[:] nodes_degree_out # out degree list
-    cdef long[:] nodes_degree_weighted #weighted vers. of nodes_degree
-    cdef long[:] nodes_degree_in_weighted #weighted vers. of nodes_degree_in
-    cdef long[:] nodes_degree_out_weighted #weighted vers. of nodes_degree_out
+    cdef double[:] nodes_degree_weighted #weighted vers. of nodes_degree
+    cdef double[:] nodes_degree_in_weighted #weighted vers. of nodes_degree_in
+    cdef double[:] nodes_degree_out_weighted #weighted vers. of nodes_degree_out
     cdef dict degree_per_attr # degree information per attr val
     cdef dict degree_per_attr_weighted # degree information per attr val
     cdef list attr_nodes # list of attr(dict) values for each node
@@ -97,13 +97,13 @@ cdef class Graph:
 
     # DEGREE FUNCTION
     #################
-    cpdef int degree(self,str n_id, bint weight=*)
-    cpdef int in_degree(self,str n_id, bint weight=*)
-    cpdef int out_degree(self,str n_id, bint weight=*)
+    cpdef double degree(self,str n_id, bint weight=*)
+    cpdef double in_degree(self,str n_id, bint weight=*)
+    cpdef double out_degree(self,str n_id, bint weight=*)
 
-    cpdef int in_degree_attr(self,str n_id,str attr_val, bint weight=*)
-    cpdef int out_degree_attr(self,str n_id,str attr_val, bint weight=*)
-    cpdef int degree_attr(self,str n_id,str attr_val, bint weight=*)
+    cpdef double in_degree_attr(self,str n_id,str attr_val, bint weight=*)
+    cpdef double out_degree_attr(self,str n_id,str attr_val, bint weight=*)
+    cpdef double degree_attr(self,str n_id,str attr_val, bint weight=*)
 
     ## GETTER
     #########
diff --git a/gmatch4py/graph.pyx b/gmatch4py/graph.pyx
index f3f59c9..1ed87b5 100644
--- a/gmatch4py/graph.pyx
+++ b/gmatch4py/graph.pyx
@@ -108,14 +108,14 @@ cdef class Graph:
                             self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
                             self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
             
-            
             self.nodes_degree=np.array(degree_all)
             self.nodes_degree_in=np.array(degree_in)
             self.nodes_degree_out=np.array(degree_out)
 
-            self.nodes_degree_weighted=np.array(degree_all_weighted)
-            self.nodes_degree_in_weighted=np.array(degree_in_weighted)
-            self.nodes_degree_out_weighted=np.array(degree_out_weighted)
+            self.nodes_degree_weighted=np.array(degree_all_weighted).astype(np.double)
+            self.nodes_degree_in_weighted=np.array(degree_in_weighted).astype(np.double)
+            self.nodes_degree_out_weighted=np.array(degree_out_weighted).astype(np.double)
+
 
             # EDGE INFO INIT
             #################
@@ -282,36 +282,36 @@ cdef class Graph:
     cpdef int density_attr(self, str attr_val):
         return self.number_of_edges_per_attr[attr_val]
 
-    cpdef int degree(self,str n_id, bint weight=False):
+    cpdef double degree(self,str n_id, bint weight=False):
         if weight:
             return self.nodes_degree_weighted[self.nodes_idx[n_id]]
         return self.nodes_degree[self.nodes_idx[n_id]]
     
-    cpdef int in_degree(self,str n_id, bint weight=False):
+    cpdef double in_degree(self,str n_id, bint weight=False):
         if weight:
             return self.nodes_degree_in_weighted[self.nodes_idx[n_id]]
         return self.nodes_degree_in[self.nodes_idx[n_id]]
     
-    cpdef int out_degree(self,str n_id, bint weight=False):
+    cpdef double out_degree(self,str n_id, bint weight=False):
         if weight:
             return self.nodes_degree_out_weighted[self.nodes_idx[n_id]]
         return self.nodes_degree_out[self.nodes_idx[n_id]]
 
-    cpdef int in_degree_attr(self,str n_id,str attr_val, bint weight=False):
+    cpdef double in_degree_attr(self,str n_id,str attr_val, bint weight=False):
         if not self.is_edge_attr and not self.is_directed:
             raise AttributeError("No edge attribute have been defined")
         if weight:
             return self.degree_per_attr_weighted[attr_val][n_id]["in"]
         return self.degree_per_attr[attr_val][n_id]["in"]
 
-    cpdef int out_degree_attr(self,str n_id,str attr_val, bint weight=False):
+    cpdef double out_degree_attr(self,str n_id,str attr_val, bint weight=False):
         if not self.is_edge_attr and not self.is_directed:
             raise AttributeError("No edge attribute have been defined")
         if weight:
             return self.degree_per_attr_weighted[attr_val][n_id]["out"]
         return self.degree_per_attr[attr_val][n_id]["out"]
 
-    cpdef int degree_attr(self,str n_id,str attr_val, bint weight=False):
+    cpdef double degree_attr(self,str n_id,str attr_val, bint weight=False):
         if not self.is_edge_attr:
             raise AttributeError("No edge attribute have been defined")
         if not self.is_directed:
@@ -371,7 +371,7 @@ cdef class Graph:
 
     def __init_empty__(self):
         self.nodes_list,self.nodes_attr_list,self.nodes_hash,self.nodes_weight,self.attr_nodes=[],[],[],[],[]
-        self.nodes_degree,self.nodes_degree_in,self.nodes_degree_out,self.nodes_degree_weighted,self.nodes_degree_in_weighted,self.nodes_degree_out_weighted=np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.long)
+        self.nodes_degree,self.nodes_degree_in,self.nodes_degree_out,self.nodes_degree_weighted,self.nodes_degree_in_weighted,self.nodes_degree_out_weighted=np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.double),np.array([],dtype=np.double),np.array([],dtype=np.double)
         self.nodes_idx,self.degree_per_attr,self.degree_per_attr_weighted={},{},{}
         self.nodes_hash_set=set([])
         self.number_of_nodes = 0
diff --git a/gmatch4py/helpers/reader.pyx b/gmatch4py/helpers/reader.pyx
index ea9b8de..bfc3125 100644
--- a/gmatch4py/helpers/reader.pyx
+++ b/gmatch4py/helpers/reader.pyx
@@ -1,6 +1,7 @@
 # coding = utf-8
 import sys, os, glob, json, re
 import networkx as nx
+from tqdm import tqdm
 
 
 """
@@ -66,7 +67,7 @@ def import_dir(directory,format="gexf",numbered=True):
         graphs= [nx.Graph()]*(n+1)
 
     association_map, i = {}, 0
-    for fn in fns:
+    for fn in tqdm(fns,desc="Loading Graphs from {0}".format(directory)):
         if not numbered:
             graphs.append(methods_read_graph[format](fn))
             association_map[fn]=i
-- 
GitLab