+ Initalize Cython version of gmatch4py

+ Speed colision border by using shapely and load all the shape right at the beginning of the process + New experiment + Add noteboks

+ Initalize Cython version of gmatch4py
+ Speed colision border by using shapely and load all the shape right at the beginning of the process + New experiment + Add noteboks
8439cf8b · Fize Jacques · 87a9945f · 8439cf8b · 8439cf8b · 8439cf8b
Commit 8439cf8b authored 7 years ago by Fize Jacques
Hide whitespace changes
Inline Side-by-side

Showing

with 853 additions and 129 deletions
+853 -129
--- a/config/config.json
+++ b/config/config.json
@@ -5,5 +5,6 @@
  "core_nlp_URL":"http://localhost:9000",
  "es_server_old":"http://192.168.1.15:9200/",
  "es_server":"http://localhost:9200/",
-  "database_json":"resources/database_exp_12_mars.db"
+  "database_json":"resources/database_exp_12_mars.db",
+  "log_file":"extract_log"
 }
\ No newline at end of file
--- a/exp_17_avril.sh
+++ b/exp_17_avril.sh
+#!/usr/bin/env bash
+path_texts=/Users/jacquesfize/LOD_DATASETS/raw_bvlac
+output_dir=/Users/jacquesfize/LOD_DATASETS/exp_17_avr18
+if [ "$1" == "generate" ]; then
+    #python3 generate_data.py $path_texts  $output_dir/normal  asso.json normal;
+    python3 generate_transform.py $output_dir/normal  $output_dir/extension_1  extension -a 1;
+    python3 generate_transform.py $output_dir/normal  $output_dir/extension_2  extension -a 2;
+    python3 generate_transform.py $output_dir/normal  $output_dir/extension_3  extension -a 3;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_all_1  generalisation -t all -n 1;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_all_2  generalisation -t all -n 2;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_region generalisation -t bounded -b region;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_capital generalisation -t bounded -b capital;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_country generalisation -t bounded -b country;
+fi
+if [ "$1" == "eval" ]; then
+    ## Normal STR eval
+    original=$output_dir/normal
+    dir=normal;
+    mesure=("MCS" "VEO" "JACCARD" "HED" "GREEDY" "GED" "BOC" "BOWSE");
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json $original -a  -o $output_dir/result_eval/$dir/;
+    done;
+    ## Generalised STR eval
+    dir=gen_all_1
+    mesure=( "MCS" "VEO"  "JACCARD" "HED" "GREEDY"  "GED" "BOC" "BOWSE");
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json $original  -a -o $output_dir/result_eval/$dir/;
+    done;
+    dir=gen_all_2
+    mesure=( "MCS" "VEO"  "JACCARD" "HED" "GREEDY" "BOC" "BOWSE");
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json  $original -a  -o $output_dir/result_eval/$dir/;
+    done;
+    dir=gen_region
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json $original -a -o $output_dir/result_eval/$dir/;
+    done;
+    dir=gen_country
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json $original -a  -o $output_dir/result_eval/$dir/;
+    done;
+    ## Extended STR eval
+    dir=extension_1
+    mesure=( "MCS" "VEO" "JACCARD" "BOC" "WLSUBTREE" "BOWSE");
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json $original -a  -o $output_dir/result_eval/$dir/;
+    done;
+    dir=extension_2
+    for me in ${mesure[@]}; do
+        echo $me" for STR "$dir;
+        python3 eval.py $me $path_texts  $output_dir/$dir $output_dir/$dir/asso.json $original -a  -o $output_dir/result_eval/$dir/;
+    done;
+fi
\ No newline at end of file
--- a/exp_30mars.sh
+++ b/exp_30mars.sh
 #!/usr/bin/env bash
-#!/usr/bin/env bash
 path_texts=data/MADA_LIGHT_raw
 output_dir=data/graph_exp_mar_30

--- a/generate_data_csv.py
+++ b/generate_data_csv.py
+# coding = utf-8
+# coding = utf-8
+import argparse,glob,logging,string,time,re
+from concurrent.futures import ThreadPoolExecutor
+from langdetect import detect
+from progressbar import ProgressBar, Timer, Bar, ETA, Counter
+from helpers.boundary import get_all_shapes
+from nlp.disambiguator.geodict_gaurav import *
+from pipeline import *
+import logging
+logging.basicConfig(filename=config.log_file)
+logging.basicConfig(format='%(asctime)s %(message)s')
+def filter_nonprintable(text):
+    # Get the difference of all ASCII characters from the set of printable characters
+    nonprintable = set([chr(i) for i in range(128)]).difference(string.printable)
+    # Use translate to remove all non-printable characters
+    return text.translate({ord(character):None for character in nonprintable})
+parser = argparse.ArgumentParser()
+parser.add_argument("csv_input_dir")
+parser.add_argument("graphs_output_dir")
+parser.add_argument("metadata_output_fn")
+subparsers = parser.add_subparsers(help='commands')
+normal = subparsers.add_parser(
+    'normal', help='Basic STR generation. No argument are necessary !')
+normal.set_defaults(which="norm")
+gen_parser = subparsers.add_parser(
+    'generalisation', help='Apply a generalisation transformation on the generated STRs')
+gen_parser.set_defaults(which="gene")
+gen_parser.add_argument(
+    '-t','--type_gen', help='Type of generalisation',default="all")
+gen_parser.add_argument(
+    '-n', help='Language',default=1)
+gen_parser.add_argument(
+    '-b','--bound', help='If Generalisation is bounded, this arg. correspond'
+                         'to the maximal ',default="country")
+ext_parser = subparsers.add_parser(
+    'extension', help='Apply a extension process on the STRs')
+ext_parser.set_defaults(which="ext")
+ext_parser.add_argument(
+    '-d','--distance', help='radius distance',default=150)
+ext_parser.add_argument(
+    '-u','--unit', help='unit used for the radius distance',default="km")
+ext_parser.add_argument(
+    '-a','--adjacent_count', help='number of adjacent SE add to the STR',default=1)
+args = parser.parse_args()
+if "which" in args:
+    if args.which =="gene":
+        args.type_trans="gen"
+    elif args.which =="ext":
+        args.type_trans="ext"
+print("Parameters entered : ",args)
+if os.path.exists(args.csv_input_dir):
+    files_glob= glob.glob(args.csv_input_dir+"/*.csv")
+else:
+    exit()
+start = time.time()
+associated_es={}
+count_per_doc={}
+i=0
+logging.info("Get associated spatial entities and ")
+with ProgressBar(max_value=len(files_glob),widgets=[' [', Timer(), '] ',Bar(),'(', Counter(),')','(', ETA(), ')']) as pg:
+    for fn in files_glob:
+        id_=int(re.findall("\d+", fn)[-1])
+        df=pd.read_csv(fn)
+        try:
+            df=df[(df["GID"]!='O') & (df.GID.notnull())]
+        except:
+            df = df[(df.GID.notnull())]
+        try:
+            count_per_doc[id_]=json.loads(df.groupby("GID").GID.count().to_json())
+            associated_es[id_] = df[["GID","text"]].groupby("GID",as_index=False).max().set_index('GID').to_dict()["text"]
+        except:
+            count_per_doc[id_]={}
+            associated_es[id_]={}
+        pg.update(i)
+        i+=1
+logging.info("Fetch list of spatial entities available !")
+all_es=set([])
+for k,v in associated_es.items():
+    for k2 in v:
+        all_es.add(k2)
+logging.info("Get All Shapes from Database for all ES")
+all_shapes=get_all_shapes(list(all_es))
+#print(all_shapes.keys())
+i=0
+with ProgressBar(max_value=len(files_glob),
+                 widgets=[' [', Timer(), '] ', Bar(), '(', Counter(), ')', '(', ETA(), ')']) as pg:
+    for fn in files_glob:
+        id_ = int(re.findall("\d+", fn)[-1])
+        df = pd.read_csv(fn)
+        try:
+            df = df[(df["GID"] != 'O') & (df.GID.notnull())]
+        except:
+            df = df[(df.GID.notnull())]
+        df["label"]=df.GID.apply(lambda x:get_data(x)["en"])
+        df = df.rename(columns={"GID": "id"})
+        str_=STR.from_pandas(df,[],all_shapes).build()
+        nx.write_gexf(str_, args.graphs_output_dir + "/{0}.gexf".format(id_))
+        i+=1
+        pg.update(i)
+# Save Metadata
+open(os.path.join(args.graphs_output_dir,args.metadata_output_fn),'w').write(json.dumps([associated_es,count_per_doc],indent=4))
+print("--- %s seconds ---" % (time.time() - start))
\ No newline at end of file
--- a/generate_transform.py
+++ b/generate_transform.py
@@ -8,8 +8,8 @@ from concurrent.futures import ThreadPoolExecutor
 from progressbar import ProgressBar, Timer, Bar, ETA, Counter
-from .nlp.disambiguator.geodict_gaurav import *
+from nlp.disambiguator.geodict_gaurav import *
-from .pipeline import *
+from pipeline import *
 parser = argparse.ArgumentParser()
 parser.add_argument("graphs_input_dir")

--- a/gmatch4py/ged/geo_bp2.py
+++ b/gmatch4py/ged/geo_bp2.py
-# coding = utf-8
-# coding = utf-8
-from gmatch4py.ged.bipartite_graph_matching_2 import BP_2
-from gmatch4py.utils import *
-class GeoBP2(BP_2):
-    """"""
-    def __init__(self, node_del=1, node_ins=2, edge_del=1, edge_ins=1):
-        """Constructor for GeoHED"""
-        BP_2.__init__(self, node_del, node_ins, edge_del, edge_ins)
-    def geo_distance(self, g1, g2, node1, node2):
-        g1_info = get_nodes_geolocalization(g1)
-        g2_info = get_nodes_geolocalization(g2)
-        return get_distance_two_entity(node1, node2, g1_info, g2_info)
-    def fuv(self, g1, g2, n1, n2):
-        if n1 and n2:
-            return super().fuv(g1, g2, n1, n2) + self.geo_distance(g1, g2, n1, n2)
-        else:
-            return super().fuv(g1, g2, n1, n2)
--- a/gmatch4py/ged/geo_ged.py
+++ b/gmatch4py/ged/geo_ged.py
-# coding = utf-8
-from gmatch4py.ged.algorithm.graph_edit_dist import GraphEditDistance
-from gmatch4py.utils import *
-_cache_g_info={}
-class GeoGED(GraphEditDistance):
-    """"""
-    def __init__(self,g1,g2,debug=False):
-        """Constructor for GeoGED"""
-        GraphEditDistance.__init__(self,g1,g2,debug)
-        if not ",".join(g1.nodes()) in _cache_g_info:
-            self.g1_info = get_nodes_geolocalization(g1)
-            _cache_g_info[",".join(g1.nodes())] = self.g1_info
-        else:
-            self.g1_info = _cache_g_info[",".join(g1.nodes())]
-        if not ",".join(g2.nodes()) in _cache_g_info:
-            self.g2_info = get_nodes_geolocalization(g2)
-            _cache_g_info[",".join(g2.nodes())] = self.g2_info
-        else:
-            self.g2_info=_cache_g_info[",".join(g2.nodes())]
-    @staticmethod
-    def compare(listgs, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1):
-        n = len(listgs)
-        comparison_matrix = np.zeros((n, n))
-        for i in range(n):
-            for j in range(i, n):
-                comparison_matrix[i, j] = GraphEditDistance(listgs[i], listgs[j], False, node_del=c_del_node,
-                                                            node_ins=c_ins_node, edge_del=c_del_edge,
-                                                            edge_ins=c_ins_edge).distance()
-                comparison_matrix[j, i] = comparison_matrix[
-                    i, j]  # Unethical ! Since AGED is not a symmetric similarity measure !
-        return comparison_matrix
-    def insert_geo_distance(self,node2):
-        # If one nodes given, compute average distance
-        avg_=[]
-        for node in self.g1:
-            avg_.append(get_distance_two_entity(node,node2,self.g1_info,self.g2_info))
-        if avg_:
-            return np.mean(avg_)
-        return 0
-    def del_geo_distance(self,node1):
-        # If one nodes given, compute average distance
-        avg_=[]
-        for node in self.g1:
-            if node == node1:continue
-            avg_.append(get_distance_two_entity(node,node1,self.g1_info,self.g1_info))
-        if avg_:
-            return np.mean(avg_)
-        return 0
-    def insert_cost(self, i, j, nodes2):
-        return super().insert_cost(i,j,nodes2)+self.insert_geo_distance(nodes2[j])
-    def delete_cost(self, i, j, nodes1):
-        return super().insert_cost(i,j,nodes1)+self.del_geo_distance(nodes1[i])
\ No newline at end of file
--- a/gmatch4py/ged/geo_hed.py
+++ b/gmatch4py/ged/geo_hed.py
-# coding = utf-8
-from gmatch4py.ged.hausdorff_edit_distance import  HED
-from gmatch4py.utils import *
-class GeoHED(HED):
-    """"""
-    def __init__(self,node_del=1, node_ins=2, edge_del=1, edge_ins=1):
-        """Constructor for GeoHED"""
-        HED.__init__(self,node_del, node_ins, edge_del, edge_ins)
-    @staticmethod
-    def compare(listgs, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1):
-        n = len(listgs)
-        comparator = GeoHED(c_del_node, c_ins_node, c_del_edge, c_ins_edge)
-        comparison_matrix = np.zeros((n, n))
-        for i in range(n):
-            for j in range(i, n):
-                comparison_matrix[i, j] = comparator.hed(listgs[i], listgs[j])
-                comparison_matrix[j, i] = comparison_matrix[i, j]
-        return comparison_matrix
-    def geo_distance(self,g1,g2,node1,node2):
-        g1_info = get_nodes_geolocalization(g1)
-        g2_info = get_nodes_geolocalization(g2)
-        return get_distance_two_entity(node1,node2,g1_info,g2_info)
-    def fuv(self, g1, g2, n1, n2):
-        if n1 and n2:
-            return super().fuv(g1,g2,n1,n2)+ self.geo_distance(g1,g2,n1,n2)
-        else:
-            return super().fuv(g1, g2, n1, n2)
--- a/gmatch4py_cython/README.md
+++ b/gmatch4py_cython/README.md
+# Gmatch4py a graph matching library for Python
+Gmatch4py is a library dedicated to graph matching. Graph structure are stored in NetworkX.Graph objects.
+## List of algorithm
+ * DeltaCon and DeltaCon0 (*debug needed*) [1]
+ * Vertex Ranking (*debug needed*) [2]
+ * Vertex Edge Overlap [2]
+ * Graph kernels
+    * Random Walk Kernel (*debug needed*) [3]
+        * Geometrical 
+        * K-Step 
+    * Shortest Path Kernel [3]
+    * Weisfeiler-Lehman Kernel [4]
+        * Subtree Kernel 
+        * Edge Kernel
+        * Subtree Geo Kernel [new]
+        * Edge Geo Kernel [new]
+ * Graph Edit Distance [5]
+    * Approximated Graph Edit Distance 
+    * Hausdorff Graph Edit Distance 
+    * Bipartite Graph Edit Distance 
+    * Greedy Edit Distance
+ * MCS [6]
+## Publications associated
+  * [1] Koutra, D., Vogelstein, J. T., & Faloutsos, C. (2013, May). Deltacon: A principled massive-graph similarity function. In Proceedings of the 2013 SIAM International Conference on Data Mining (pp. 162-170). Society for Industrial and Applied Mathematics.
+  * [2] Papadimitriou, P., Dasdan, A., & Garcia-Molina, H. (2010). Web graph similarity for anomaly detection. Journal of Internet Services and Applications, 1(1), 19-30.
+  * [3] Vishwanathan, S. V. N., Schraudolph, N. N., Kondor, R., & Borgwardt, K. M. (2010). Graph kernels. Journal of Machine Learning Research, 11(Apr), 1201-1242.
+  * [4] Shervashidze, N., Schweitzer, P., Leeuwen, E. J. V., Mehlhorn, K., & Borgwardt, K. M. (2011). Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12(Sep), 2539-2561.
+  * [5] Fischer, A., Riesen, K., & Bunke, H. (2017). Improved quadratic time approximation of graph edit distance by combining Hausdorff matching and greedy assignment. Pattern Recognition Letters, 87, 55-62.
+  * [6] A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, Pattern Recognition Letters, 1998  
+## Authors
+Jacques Fize
+## TODO
+  * Debug algorithms with --> (*debug needed*)
\ No newline at end of file
--- a/gmatch4py_cython/__init__.pyx
+++ b/gmatch4py_cython/__init__.pyx
+__version__ = "0.1"
--- a/gmatch4py_cython/bag_of_cliques.pyx
+++ b/gmatch4py_cython/bag_of_cliques.pyx
+# coding = utf-8
+import copy
+from typing import Sequence
+import networkx as nx
+import numpy as np
+class BagOfCliques():
+    @staticmethod
+    def compare(graphs):
+        b=BagOfCliques()
+        bog=b.getBagOfCliques(graphs)
+        #Compute cosine similarity
+        scores=np.dot(bog,bog.T)
+        for i in range(len(scores)):
+            for j in range(len(scores)):
+                scores[i,j]/=(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line
+        return scores
+    def getUniqueCliques(self,graphs):
+        """
+        Return unique cliques from a population of graphs
+        :return:
+        """
+        tree = {}
+        c_ = 0
+        clique_vocab = []
+        for g in graphs:
+            cliques = list(nx.algorithms.clique.find_cliques(nx.Graph(g)))
+            for clique in cliques:
+                t = tree
+                cli_temp = copy.deepcopy(clique)
+                new_clique = False
+                for i in range(len(clique)):
+                    flag = False
+                    v = None  # vertex deleted
+                    for vertex in cli_temp:
+                        if vertex in t:
+                            v = vertex
+                            flag = True
+                    if not flag in t:
+                        v = cli_temp[0]
+                        t[v] = {}
+                        new_clique = True
+                    t = t[v]
+                    cli_temp.remove(v)
+                if new_clique:
+                    c_ += 1
+                    clique_vocab.append(clique)
+        return clique_vocab
+    def ifHaveMinor(self,G: nx.Graph, H: list):
+        """
+        If a clique (minor) H belong to a graph G
+        :param H:
+        :return:
+        """
+        if nx.Graph(G).subgraph(H).nodes() == H:
+            return 1
+        return 0
+    def getBagOfCliques(self,graphs : Sequence[nx.Graph]):
+        """
+        :param clique_vocab:
+        :return:
+        """
+        clique_vocab=self.getUniqueCliques(graphs)
+        l_v=len(clique_vocab)
+        boc = np.zeros((len(graphs), l_v))
+        for g in range(len(graphs)):
+            gr = graphs[g]
+            vector = np.zeros(l_v)
+            for m in range(l_v):
+                vector[m] = self.ifHaveMinor(gr, clique_vocab[m])
+            boc[g] = vector
+        return boc
\ No newline at end of file
--- a/gmatch4py_cython/deltacon.pyx
+++ b/gmatch4py_cython/deltacon.pyx
+# coding = utf-8
+import networkx as nx
+import numpy as np
+import scipy.sparse
+class DeltaCon0():
+    __type__ = "sim"
+    @staticmethod
+    def compare(list_gs):
+        n=len(list_gs)
+        comparison_matrix = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                g1,g2=list_gs[i],list_gs[j]
+                # S1
+                epsilon = 1/(1+DeltaCon0.maxDegree(g1))
+                D, A = DeltaCon0.degreeAndAdjacencyMatrix(g1)
+                S1 = np.linalg.inv(np.identity(len(g1))+(epsilon**2)*D -epsilon*A)
+                # S2
+                D, A = DeltaCon0.degreeAndAdjacencyMatrix(g2)
+                epsilon = 1 / (1 + DeltaCon0.maxDegree(g2))
+                S2 = np.linalg.inv(np.identity(len(g2))+(epsilon**2)*D -epsilon*A)
+                comparison_matrix[i,j] = 1/(1+DeltaCon0.rootED(S1,S2))
+                comparison_matrix[j,i] = comparison_matrix[i,j]
+        return comparison_matrix
+    @staticmethod
+    def rootED(S1,S2):
+        return np.sqrt(np.sum((S1-S2)**2)) # Long live numpy !
+    @staticmethod
+    def degreeAndAdjacencyMatrix(G):
+        """
+        Return the Degree(D) and Adjacency Matrix(A) from a graph G.
+        Inspired of nx.laplacian_matrix(G,nodelist,weight) code proposed by networkx
+        :param G:
+        :return:
+        """
+        A = nx.to_scipy_sparse_matrix(G, nodelist=G.nodes(), weight="weight",
+                                      format='csr')
+        n, m = A.shape
+        diags = A.sum(axis=1)
+        D = scipy.sparse.spdiags(diags.flatten(), [0], m, n, format='csr')
+        return D, A
+    @staticmethod
+    def maxDegree(G):
+        degree_sequence = sorted(nx.degree(G).values(), reverse=True)  # degree sequence
+        # print "Degree sequence", degree_sequence
+        dmax = max(degree_sequence)
+        return dmax
+class DeltaCon():
+    __type__ = "sim"
+    @staticmethod
+    def relabel_nodes(graph_list):
+        label_lookup = {}
+        label_counter = 0
+        n= len(graph_list)
+        # label_lookup is an associative array, which will contain the
+        # mapping from multiset labels (strings) to short labels
+        # (integers)
+        for i in range(n):
+            nodes = graph_list[i].nodes()
+            for j in range(len(nodes)):
+                if not (nodes[j] in label_lookup):
+                    label_lookup[nodes[j]] = label_counter
+                    label_counter += 1
+            graph_list[i] = nx.relabel_nodes(graph_list[i], label_lookup)
+        return graph_list
+    @staticmethod
+    def compare(list_gs, g=3):
+        n=len(list_gs)
+        list_gs=DeltaCon.relabel_nodes(list_gs)
+        comparison_matrix = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                g1,g2=list_gs[i],list_gs[j]
+                V = g1.nodes()
+                V.extend(g2.nodes())
+                V=np.unique(V)
+                partitions=V.copy()
+                np.random.shuffle(partitions)
+                if len(partitions)< g:
+                    partitions=np.array([partitions])
+                else:
+                    partitions=np.array_split(partitions,g)
+                partitions_e_1 = DeltaCon.partitions2e(partitions, g1.nodes())
+                partitions_e_2 = DeltaCon.partitions2e(partitions, g2.nodes())
+                S1,S2=[],[]
+                for k in range(len(partitions)):
+                    s0k1,s0k2=partitions_e_1[k],partitions_e_2[k]
+                    # S1
+                    epsilon = 1/(1+DeltaCon0.maxDegree(g1))
+                    D, A = DeltaCon0.degreeAndAdjacencyMatrix(g1)
+                    s1k = np.linalg.inv(np.identity(len(g1))+(epsilon**2)*D -epsilon*A)
+                    s1k=np.linalg.solve(s1k,s0k1).tolist()
+                    # S2
+                    D, A = DeltaCon0.degreeAndAdjacencyMatrix(g2)
+                    epsilon = 1 / (1 + DeltaCon0.maxDegree(g2))
+                    s2k= np.linalg.inv(np.identity(len(g2))+(epsilon**2)*D -epsilon*A)
+                    s2k = np.linalg.solve(s2k, s0k2).tolist()
+                    S1.append(s1k)
+                    S2.append(s2k)
+                comparison_matrix[i,j] = 1/(1+DeltaCon0.rootED(np.array(S1),np.array(S2)))
+                comparison_matrix[j,i] = comparison_matrix[i,j]
+        return comparison_matrix
+    @staticmethod
+    def partitions2e( partitions, V):
+        e = [ [] for i in range(len(partitions))]
+        for p in range(len(partitions)):
+            e[p] = []
+            for i in range(len(V)):
+                if i in partitions[p]:
+                    e[p].append(1.0)
+                else:
+                    e[p].append(0.0)
+        return e
\ No newline at end of file
--- a/gmatch4py_cython/ged/__init__.pyx
+++ b/gmatch4py_cython/ged/__init__.pyx
+# coding = utf-8
\ No newline at end of file
--- a/gmatch4py_cython/ged/algorithm/__init__.py
+++ b/gmatch4py_cython/ged/algorithm/__init__.py
--- a/gmatch4py_cython/ged/algorithm/abstract_graph_edit_dist.pyx
+++ b/gmatch4py_cython/ged/algorithm/abstract_graph_edit_dist.pyx
+# -*- coding: UTF-8 -*-
+from __future__ import print_function
+import sys
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+class AbstractGraphEditDistance(object):
+    def __init__(self, g1, g2,debug=False,**kwargs):
+        self.g1 = g1
+        self.g2 = g2
+        self.debug=debug
+        self.node_del = kwargs.get("node_del",1)
+        self.node_ins = kwargs.get("node_ins",1)
+        self.edge_del = kwargs.get("edge_del",1)
+        self.edge_ins = kwargs.get("edge_ins",1)
+    def distance(self):
+        opt_path = self.edit_costs()
+        if self.debug:
+            print("Edit path for ",str(self.__class__.__name__),"\n",opt_path)
+        return sum(opt_path)
+    def print_operations(self,cost_matrix,row_ind,col_ind):
+        nodes1 = self.g1.nodes()
+        nodes2 = self.g2.nodes()
+        dn1 = self.g1.node
+        dn2 = self.g2.node
+        n,m=len(nodes1),len(nodes2)
+        for i in range(len(row_ind)):
+            y,x=row_ind[i],col_ind[i]
+            val=cost_matrix[row_ind[i]][col_ind[i]]
+            if x<m and y<n:
+                print("SUB {0} to {1} cost = {2}".format(dn1[nodes1[y]]["label"],dn2[nodes2[x]]["label"],val))
+            elif x <m and y>=n:
+                print("ADD {0} cost = {1}".format(dn2[nodes2[y-n]]["label"],val))
+            elif x>=m and y<n:
+                print("DEL {0} cost = {1}".format(dn1[nodes1[m-x]]["label"],val))
+    def edit_costs(self):
+        cost_matrix = self.create_cost_matrix()
+        if self.debug:
+            np.set_printoptions(precision=3)
+            print("Cost Matrix for ",str(self.__class__.__name__),"\n",cost_matrix)
+        row_ind,col_ind = linear_sum_assignment(cost_matrix)
+        if self.debug:
+            self.print_operations(cost_matrix,row_ind,col_ind)
+        return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(len(row_ind))]
+    def create_cost_matrix(self):
+        """
+        Creates a |N+M| X |N+M| cost matrix between all nodes in
+        graphs g1 and g2
+        Each cost represents the cost of substituting,
+        deleting or inserting a node
+        The cost matrix consists of four regions:
+        substitute 	| insert costs
+        -------------------------------
+        delete 		| delete -> delete
+        The delete -> delete region is filled with zeros
+        """
+        n = len(self.g1)
+        m = len(self.g2)
+        cost_matrix = np.zeros((n+m,n+m))
+        #cost_matrix = [[0 for i in range(n + m)] for j in range(n + m)]
+        nodes1 = self.g1.nodes()
+        nodes2 = self.g2.nodes()
+        for i in range(n):
+            for j in range(m):
+                cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j])
+        for i in range(m):
+            for j in range(m):
+                cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2)
+        for i in range(n):
+            for j in range(n):
+                cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1)
+        self.cost_matrix = cost_matrix
+        return cost_matrix
+    def insert_cost(self, i, j):
+        raise NotImplementedError
+    def delete_cost(self, i, j):
+        raise NotImplementedError
+    def substitute_cost(self, nodes1, nodes2):
+        raise NotImplementedError
+    def print_matrix(self):
+        print("cost matrix:")
+        print(self.g1.nodes())
+        print(self.g2.nodes())
+        print(np.array(self.create_cost_matrix()))
+        for column in self.create_cost_matrix():
+            for row in column:
+                if row == sys.maxsize:
+                    print ("inf\t")
+                else:
+                    print ("%.2f\t" % float(row))
+            print("")
--- a/gmatch4py_cython/ged/algorithm/edge_edit_dist.pyx
+++ b/gmatch4py_cython/ged/algorithm/edge_edit_dist.pyx
+import sys
+from gmatch4py.ged.algorithm.abstract_graph_edit_dist import AbstractGraphEditDistance
+class EdgeEditDistance(AbstractGraphEditDistance):
+    """
+    Calculates the graph edit distance between two edges.
+    A node in this context is interpreted as a graph,
+    and edges are interpreted as nodes.
+    """
+    def __init__(self, g1, g2,**kwargs):
+        AbstractGraphEditDistance.__init__(self, g1, g2,**kwargs)
+    def insert_cost(self, i, j, nodes2):
+        if i == j:
+            return self.edge_ins
+        return sys.maxsize
+    def delete_cost(self, i, j, nodes1):
+        if i == j:
+            return self.edge_del
+        return sys.maxsize
+    def substitute_cost(self, edge1, edge2):
+        if edge1 == edge2:
+            return 0.
+        return self.edge_del+self.edge_ins
--- a/gmatch4py_cython/ged/algorithm/graph_edit_dist.pyx
+++ b/gmatch4py_cython/ged/algorithm/graph_edit_dist.pyx
+# -*- coding: UTF-8 -*-
+import sys
+import networkx as nx
+from gmatch4py.ged.algorithm.abstract_graph_edit_dist import AbstractGraphEditDistance
+from gmatch4py.ged.algorithm.edge_edit_dist import EdgeEditDistance
+from gmatch4py.ged.graph.edge_graph import EdgeGraph
+def compare(g1, g2, print_details=False):
+    ged = GraphEditDistance(g1, g2,print_details)
+    return ged.distance()
+class GraphEditDistance(AbstractGraphEditDistance):
+    def __init__(self, g1, g2,debug=False,**kwargs):
+        AbstractGraphEditDistance.__init__(self, g1, g2,debug,**kwargs)
+    def substitute_cost(self, node1, node2):
+        return self.relabel_cost(node1, node2) + self.edge_diff(node1, node2)
+    def relabel_cost(self, node1, node2):
+        if node1 == node2:
+            edges1=set(self.get_edge_multigraph(self.g1,node1))
+            edges2=set(self.get_edge_multigraph(self.g2,node2))
+            return abs(len(edges2.difference(edges1))) # Take in account if there is a different number of edges
+        else:
+            return self.node_ins+self.node_del
+    def delete_cost(self, i, j, nodes1):
+        if i == j:
+            return self.node_del+self.g1.degree(nodes1[i]) # Deleting a node implicate to delete in and out edges
+        return sys.maxsize
+    def insert_cost(self, i, j, nodes2):
+        if i == j:
+            deg=self.g2.degree(nodes2[j])
+            if isinstance(deg,dict):deg=0
+            return self.node_ins+deg
+        else:
+            return sys.maxsize
+    def get_edge_multigraph(self,g,node):
+        edges=[]
+        for id_,val in g.edge[node].items():
+            if not 0 in val:
+                edges.append(str(id_) + val["color"])
+            else:
+                for _,edge in val.items():
+                    edges.append(str(id_)+edge["color"])
+        return edges
+    def edge_diff(self, node1, node2):
+        if isinstance(self.g1,nx.MultiDiGraph):
+            edges1 = self.get_edge_multigraph(self.g1,node1)
+            edges2 = self.get_edge_multigraph(self.g2,node2)
+        else:
+            edges1 = list(self.g1.edge[node1].keys())
+            edges2 = list(self.g2.edge[node2].keys())
+        if len(edges1) == 0 or len(edges2) == 0:
+            return max(len(edges1), len(edges2))
+        edit_edit_dist = EdgeEditDistance(
+            EdgeGraph(node1,edges1),
+            EdgeGraph(node2,edges2),
+            edge_del=self.edge_del,edge_ins=self.edge_ins,node_ins=self.node_ins,node_del=self.node_del
+        )
+        return edit_edit_dist.distance()
--- a/gmatch4py_cython/ged/approximate_ged.pyx
+++ b/gmatch4py_cython/ged/approximate_ged.pyx
+# coding = utf-8
+import numpy as np
+from .algorithm.graph_edit_dist import GraphEditDistance
+class ApproximateGraphEditDistance():
+    __type__ = "dist"
+    @staticmethod
+    def compare(listgs,c_del_node=1,c_del_edge=1,c_ins_node=1,c_ins_edge=1):
+        n= len(listgs)
+        comparison_matrix = np.zeros((n,n))
+        for i in range(n):
+            for j in range(i,n):
+                comparison_matrix[i,j]= GraphEditDistance(listgs[i],listgs[j],False,node_del=c_del_node,node_ins=c_ins_node,edge_del=c_del_edge,edge_ins=c_ins_edge).distance()
+                comparison_matrix[j,i]= comparison_matrix[i,j] # Unethical ! Since AGED is not a symmetric similarity measure !
+        return comparison_matrix
\ No newline at end of file
--- a/gmatch4py_cython/ged/bipartite_graph_matching_2.pyx
+++ b/gmatch4py_cython/ged/bipartite_graph_matching_2.pyx
+# coding = utf-8
+import numpy as np
+class BP_2():
+    """
+    """
+    __type__="dist"
+    @staticmethod
+    def compare(listgs, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1):
+        n = len(listgs)
+        comparator = BP_2(c_del_node, c_ins_node, c_del_edge, c_ins_edge)
+        comparison_matrix = np.zeros((n, n))
+        for i in range(n):
+            for j in range(i, n):
+                comparison_matrix[i, j] = comparator.bp2(listgs[i], listgs[j])
+                comparison_matrix[j, i] = comparison_matrix[i, j]
+        return comparison_matrix
+    def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1):
+        """Constructor for HED"""
+        self.node_del = node_del
+        self.node_ins = node_ins
+        self.edge_del = edge_del
+        self.edge_ins = edge_ins
+    def bp2(self, g1, g2):
+        """
+        Compute de Hausdorff Edit Distance
+        :param g1: first graph
+        :param g2: second graph
+        :return:
+        """
+        return min(self.distance(self.psi(g1,g2)),self.distance(self.psi(g2,g1)))
+    def distance(self,e):
+        return np.sum(e)
+    def psi(self,g1,g2):
+        psi_=[]
+        nodes1 = g1.nodes()
+        nodes2 = g2.nodes()
+        for u in nodes1:
+            v=None
+            for w in nodes2:
+                if 2*self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,None) + self.fuv(g1,g2,None,w)\
+                     and self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,v):
+                    v=w
+                psi_.append(self.fuv(g1,g2,u,v))
+            if u:
+                nodes1= list(set(nodes1).difference(set([u])))
+            if v:
+                nodes2= list(set(nodes2).difference(set([v])))
+        for v in nodes2:
+            psi_.append(self.fuv(g1,g2,None,v))
+        return  psi_
+    def fuv(self, g1, g2, n1, n2):
+        """
+        Compute the Node Distance function
+        :param g1: first graph
+        :param g2: second graph
+        :param n1: node of the first graph
+        :param n2: node of the second graph
+        :return:
+        """
+        if n2 == None:  # Del
+            return self.node_del + ((self.edge_del / 2) * g1.degree(n1))
+        if n1 == None:  # Insert
+            return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2))
+        else:
+            if n1 == n2:
+                return 0.
+            return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
+    def hed_edge(self, g1, g2, n1, n2):
+        """
+        Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
+        :param g1: first graph
+        :param g2: second graph
+        :param n1: node of the first graph
+        :param n2: node of the second graph
+        :return:
+        """
+        return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
+    def get_edge_multigraph(self, g, node):
+        """
+        Get list of edge around a node in a Multigraph
+        :param g: multigraph
+        :param node: node in the multigraph
+        :return:
+        """
+        edges = []
+        for edge in g.edges(data=True):
+            if node == edge[0] or node == edge[1]:
+                edges.append("{0}-{1}-{2}".format(edge[0],edge[1],edge[2]["color"]))
+        return edges
+    def sum_gpq(self, g1, n1, g2, n2):
+        """
+        Compute Nearest Neighbour Distance between edges around n1 in G1  and edges around n2 in G2
+        :param g1: first graph
+        :param n1: node in the first graph
+        :param g2: second graph
+        :param n2: node in the second graph
+        :return:
+        """
+        #if isinstance(g1, nx.MultiDiGraph):
+        edges1 = self.get_edge_multigraph(g1, n1)
+        edges2 = self.get_edge_multigraph(g2, n2)
+        #else:
+            #print(1)
+            #edges1 = [str(n1 + "-" + ef) for ef in list(g1.edge[n1].keys())]
+            #edges2 = [str(n2 + "-" + ef) for ef in list(g2.edge[n2].keys())]
+        edges2.extend([None])
+        min_sum = np.zeros(len(edges1))
+        for i in range(len(edges1)):
+            min_i = np.zeros(len(edges2))
+            for j in range(len(edges2)):
+                min_i[j] = self.gpq(edges1[i], edges2[j])
+            min_sum[i] = np.min(min_i)
+        return np.sum(min_sum)
+    def gpq(self, e1, e2):
+        """
+        Compute the edge distance function
+        :param e1: edge1
+        :param e2: edge2
+        :return:
+        """
+        if e2 == None:  # Del
+            return self.edge_del
+        if e1 == None:  # Insert
+            return self.edge_ins
+        else:
+            if e1 == e2:
+                return 0.
+            return (self.edge_del + self.edge_ins) / 2
--- a/gmatch4py_cython/ged/graph/__init__.pyx
+++ b/gmatch4py_cython/ged/graph/__init__.pyx