From 5da5fbd1c1699a58b6f0cfe6a367e342689425bf Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Tue, 12 Mar 2019 11:35:16 +0100
Subject: [PATCH] DEbug

---
 .gitignore                                    |   2 +-
 README.md                                     |   2 +-
 auto_fill_annotation.py                       |  12 +-
 generate_annotation_file.py                   |   5 +-
 generate_selected_document.py                 |   2 +-
 .../Result_AnaysisV2_MADA.ipynb               | 410 ++++++------
 strpython/models/node2vec.py                  | 191 ------
 strpython/models/str.py                       | 601 ++++--------------
 strpython/models/word2vec.py                  | 178 ------
 .../nlp/disambiguator/delozier/__init__.py    |   1 -
 strpython/nlp/disambiguator/delozier/grid.py  |  78 ---
 strpython/nlp/disambiguator/delozier/utils.py |  11 -
 strpython/tt4py/__init__.py                   |   7 -
 strpython/tt4py/helpers.py                    |  30 -
 strpython/tt4py/tt4py.py                      | 239 -------
 15 files changed, 384 insertions(+), 1385 deletions(-)
 delete mode 100644 strpython/models/node2vec.py
 delete mode 100644 strpython/models/word2vec.py
 delete mode 100644 strpython/nlp/disambiguator/delozier/__init__.py
 delete mode 100644 strpython/nlp/disambiguator/delozier/grid.py
 delete mode 100644 strpython/nlp/disambiguator/delozier/utils.py
 delete mode 100644 strpython/tt4py/__init__.py
 delete mode 100644 strpython/tt4py/helpers.py
 delete mode 100644 strpython/tt4py/tt4py.py

diff --git a/.gitignore b/.gitignore
index 0e7aa1d..4a97748 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,7 @@ __pycache__/
 !/tests/
 .DS_Store
 .Rproj.user
-.vscode/*
+.vscode/
 data/
 csv_results
 depreciated
diff --git a/README.md b/README.md
index 64fd0de..207a270 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-#STR
+# STR
 
 This repository contains all the work on STR or Spatial Textual Representation. The file
 hierarchy is divided in multiple modules such as :
diff --git a/auto_fill_annotation.py b/auto_fill_annotation.py
index 52b49c8..abcddfc 100644
--- a/auto_fill_annotation.py
+++ b/auto_fill_annotation.py
@@ -38,17 +38,17 @@ for file in glob.glob(os.path.join(str_graph_path, "*.gexf")):
 def foo(x):
     try:
         return annotater.all(strs[x.G1], strs[x.G2],x.G1, x.G2)
-    except Exception as e:
+    except KeyError as e:
         print(e)
         return [0, 0, 0, 0]
 
 
 df["res"] = df.progress_apply(lambda x: foo(x), axis=1)
-df.res=df.res.apply(lambda x :list(map(int,x)))
-df[["c1"]] = df.res.apply(lambda x: x[0])
-df[["c2"]] = df.res.apply(lambda x: x[1])
-df[["c3"]] = df.res.apply(lambda x: x[2])
-df[["c4"]] = df.res.apply(lambda x: x[3])
+df.res=df.res.apply(lambda x :list(map(int,x)) if x else [])
+df[["c1"]] = df.res.apply(lambda x: x[0] if len(x)>0 else 0)
+df[["c2"]] = df.res.apply(lambda x: x[1] if len(x)>0 else 0)
+df[["c3"]] = df.res.apply(lambda x: x[2] if len(x)>0 else 0)
+df[["c4"]] = df.res.apply(lambda x: x[3] if len(x)>0 else 0)
 
 del df["res"]
 save_cache()
diff --git a/generate_annotation_file.py b/generate_annotation_file.py
index ec00d2a..d18abc9 100644
--- a/generate_annotation_file.py
+++ b/generate_annotation_file.py
@@ -33,10 +33,7 @@ selected = json.load(open(args.selectedFile))
 for fn in matrix_fns:
     measure = os.path.basename(fn).split("_")[0]
     
-    if os.path.basename(fn).split("_")[-2] in ["extension","gen"] or os.path.basename(fn).split("_")[-1].replace(".npy.bz2", "") in ["window"]:
-        type_ = "_".join(os.path.basename(fn).split("_")[-2:]).replace(".npy.bz2", "")
-    else:
-        type_ = "_".join(os.path.basename(fn).split("_")[-1:]).replace(".npy.bz2", "")
+    type_= "_".join(fn.split("_")[1:]).replace(".npy.bz2","")
     print("Proceeding...",measure, type_)
     df = matrix_to_pandas_dataframe(np.nan_to_num(read_bz2_matrix(fn)),
                                     selected,
diff --git a/generate_selected_document.py b/generate_selected_document.py
index 2a57623..cfbc56b 100644
--- a/generate_selected_document.py
+++ b/generate_selected_document.py
@@ -8,7 +8,7 @@ parser.add_argument("graph_input_dir")
 args=parser.parse_args()
 
 graphs={}
-for file in glob.glob(args.graph_input_dir+"/normal/*.gexf"):
+for file in glob.glob(args.graph_input_dir+"/*.gexf"):
     id=int(re.findall("\d+",file)[-1])
     graphs[id]=nx.read_gexf(file)
 
diff --git a/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb b/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb
index 81d283c..836ce6f 100644
--- a/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb
+++ b/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2018-09-28T05:03:07.327486Z",
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2018-09-28T05:03:09.093753Z",
@@ -36,7 +36,7 @@
    },
    "outputs": [],
    "source": [
-    "data=pd.read_csv(\"../../result_debug.csv\",index_col=0)\n",
+    "data=pd.read_csv(\"../../final_test.csv\",index_col=0)\n",
     "data=data[data.mesure != \"BP\"]\n",
     "data[\"mean\"]=np.mean(data[\"c1 c2 c3 c4\".split()].values,axis=1)\n",
     "data[\"sum\"]=np.sum(data[\"c1 c2 c3 c4\".split()].values,axis=1)\n",
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -67,7 +67,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2018-09-26T12:55:10.491478Z",
@@ -122,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2018-09-26T12:55:10.899176Z",
@@ -134,148 +134,121 @@
      "data": {
       "text/html": [
        "<style  type=\"text/css\" >\n",
-       "    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col0 {\n",
+       "    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col3 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col2 {\n",
-       "            background-color:  yellow;\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col0 {\n",
        "            : ;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col3 {\n",
-       "            background-color:  yellow;\n",
-       "            : ;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col0 {\n",
+       "            background-color:  #d64541;\n",
+       "            color: white;\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col1 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col1 {\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col2 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col2 {\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col3 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col5 {\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col5 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col5 {\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col5 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col1 {\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col0 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col3 {\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col1 {\n",
+       "            background-color:  yellow;\n",
        "            : ;\n",
-       "            background-color:  #d64541;\n",
-       "            color: white;\n",
-       "        }</style><table id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >c1</th>        <th class=\"col_heading level0 col1\" >c2</th>        <th class=\"col_heading level0 col2\" >c3</th>        <th class=\"col_heading level0 col3\" >c4</th>        <th class=\"col_heading level0 col4\" >mean</th>        <th class=\"col_heading level0 col5\" >sum</th>    </tr>    <tr>        <th class=\"index_name level0\" >mesure</th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>    </tr></thead><tbody>\n",
+       "        }    #T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col2 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }</style><table id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >c1</th>        <th class=\"col_heading level0 col1\" >c2</th>        <th class=\"col_heading level0 col2\" >c3</th>        <th class=\"col_heading level0 col3\" >c4</th>        <th class=\"col_heading level0 col4\" >mean</th>        <th class=\"col_heading level0 col5\" >sum</th>    </tr>    <tr>        <th class=\"index_name level0\" >mesure</th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>        <th class=\"blank\" ></th>    </tr></thead><tbody>\n",
        "                <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >BOW</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col0\" class=\"data row0 col0\" >0.953636</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col1\" class=\"data row0 col1\" >0.26</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.926</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.473091</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.653182</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row0_col5\" class=\"data row0 col5\" >2.61273</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >BagOfCliques</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row1_col0\" class=\"data row1 col0\" >0.8932</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row1_col1\" class=\"data row1 col1\" >0.3072</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.8188</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.3532</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.5931</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row1_col5\" class=\"data row1 col5\" >2.3724</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >GraphEditDistance</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row2_col0\" class=\"data row2 col0\" >0.918909</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row2_col1\" class=\"data row2 col1\" >0.227091</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.891818</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.458182</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.624</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row2_col5\" class=\"data row2 col5\" >2.496</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >GraphEditDistanceW</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row3_col0\" class=\"data row3 col0\" >0.926333</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row3_col1\" class=\"data row3 col1\" >0.2</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.901</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.469333</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.624167</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row3_col5\" class=\"data row3 col5\" >2.49667</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >GreedyEditDistance</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col0\" class=\"data row4 col0\" >0.0315385</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col1\" class=\"data row4 col1\" >0.0472308</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.0723077</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.0124615</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.0408846</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.163538</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >HED</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row5_col0\" class=\"data row5 col0\" >0.887636</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row5_col1\" class=\"data row5 col1\" >0.237091</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.826</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.363636</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.578591</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row5_col5\" class=\"data row5 col5\" >2.31436</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >Jaccard</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row6_col0\" class=\"data row6 col0\" >0.938</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row6_col1\" class=\"data row6 col1\" >0.4326</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.9052</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.2934</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.6423</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row6_col5\" class=\"data row6 col5\" >2.5692</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >MCS</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col0\" class=\"data row7 col0\" >0.9432</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col1\" class=\"data row7 col1\" >0.4278</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.9068</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.3686</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.6616</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row7_col5\" class=\"data row7 col5\" >2.6464</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >PolyIntersect</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row8_col0\" class=\"data row8 col0\" >0.584</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row8_col1\" class=\"data row8 col1\" >0.4744</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.6972</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row8_col3\" class=\"data row8 col3\" >0.1276</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.4708</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row8_col5\" class=\"data row8 col5\" >1.8832</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row9\" class=\"row_heading level0 row9\" >VertexEdgeOverlap</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row9_col0\" class=\"data row9 col0\" >0.9458</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row9_col1\" class=\"data row9 col1\" >0.3588</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row9_col2\" class=\"data row9 col2\" >0.8928</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row9_col3\" class=\"data row9 col3\" >0.3574</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row9_col4\" class=\"data row9 col4\" >0.6387</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row9_col5\" class=\"data row9 col5\" >2.5548</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820level0_row10\" class=\"row_heading level0 row10\" >WeisfeleirLehmanKernel</th>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col0\" class=\"data row10 col0\" >0.594167</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col1\" class=\"data row10 col1\" >0.762667</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col2\" class=\"data row10 col2\" >0.831333</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col3\" class=\"data row10 col3\" >0.004</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col4\" class=\"data row10 col4\" >0.548042</td>\n",
-       "                        <td id=\"T_d79cca2c_2f61_11e9_a1d4_6a0002e84820row10_col5\" class=\"data row10 col5\" >2.19217</td>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >BOW</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col0\" class=\"data row0 col0\" >0.158</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col1\" class=\"data row0 col1\" >0.088</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.15925</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.064</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.117313</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.46925</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >DeepWalk</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row1_col0\" class=\"data row1 col0\" >0.1425</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row1_col1\" class=\"data row1 col1\" >0.12375</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.15875</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.02</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.11125</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.445</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >Graph2Vec</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col0\" class=\"data row2 col0\" >0.00775</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col1\" class=\"data row2 col1\" >0.00775</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.00875</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.0005</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.0061875</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.02475</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >GraphEditDistance</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row3_col0\" class=\"data row3 col0\" >0.141333</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row3_col1\" class=\"data row3 col1\" >0.0846667</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.158667</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.06</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.111167</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.444667</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >Jaccard</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row4_col0\" class=\"data row4 col0\" >0.156</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row4_col1\" class=\"data row4 col1\" >0.091</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.156</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.0575</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.115125</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.4605</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >MCS</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col0\" class=\"data row5 col0\" >0.156889</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col1\" class=\"data row5 col1\" >0.0911111</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.158889</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.0626667</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.117389</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.469556</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >VertexEdgeOverlap</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col0\" class=\"data row6 col0\" >0.15925</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col1\" class=\"data row6 col1\" >0.089</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.15925</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.0615</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.11725</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.469</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >WeisfeleirLehmanKernel</th>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col0\" class=\"data row7 col0\" >0.142</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col1\" class=\"data row7 col1\" >0.1595</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.16</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.004</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.116375</td>\n",
+       "                        <td id=\"T_fa7389a2_43d2_11e9_991b_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.4655</td>\n",
        "            </tr>\n",
        "    </tbody></table>"
       ],
       "text/plain": [
-       "<pandas.io.formats.style.Styler at 0x12a4146d8>"
+       "<pandas.io.formats.style.Styler at 0x12c4d2518>"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -327,7 +300,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2018-09-26T12:55:10.937714Z",
@@ -339,93 +312,176 @@
      "data": {
       "text/html": [
        "<style  type=\"text/css\" >\n",
-       "    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col2 {\n",
+       "    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col2 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col3 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col4 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col4 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col3 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col7 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col4 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col2 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col5 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col3 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col7 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col7 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col4 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col3 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col4 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col5 {\n",
+       "            : ;\n",
+       "            background-color:  #d64541;\n",
+       "            color: white;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col7 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col5 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col2 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col4 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col5 {\n",
+       "            background-color:  yellow;\n",
+       "            : ;\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col4 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col5 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col2 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col7 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col3 {\n",
        "            : ;\n",
        "            background-color:  #d64541;\n",
        "            color: white;\n",
-       "        }    #T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col5 {\n",
+       "        }    #T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col4 {\n",
        "            background-color:  yellow;\n",
        "            : ;\n",
-       "        }</style><table id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >mesure</th>        <th class=\"col_heading level0 col1\" >type</th>        <th class=\"col_heading level0 col2\" >c1</th>        <th class=\"col_heading level0 col3\" >c2</th>        <th class=\"col_heading level0 col4\" >c3</th>        <th class=\"col_heading level0 col5\" >c4</th>        <th class=\"col_heading level0 col6\" >mean</th>        <th class=\"col_heading level0 col7\" >sum</th>    </tr></thead><tbody>\n",
+       "        }</style><table id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820\" ><thead>    <tr>        <th class=\"blank level0\" ></th>        <th class=\"col_heading level0 col0\" >mesure</th>        <th class=\"col_heading level0 col1\" >type</th>        <th class=\"col_heading level0 col2\" >c1</th>        <th class=\"col_heading level0 col3\" >c2</th>        <th class=\"col_heading level0 col4\" >c3</th>        <th class=\"col_heading level0 col5\" >c4</th>        <th class=\"col_heading level0 col6\" >mean</th>        <th class=\"col_heading level0 col7\" >sum</th>    </tr></thead><tbody>\n",
        "                <tr>\n",
-       "                        <th id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >6</th>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col0\" class=\"data row0 col0\" >MCS</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col1\" class=\"data row0 col1\" >extension_2</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.962</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.426</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.912</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.38</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.67</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row0_col7\" class=\"data row0 col7\" >2.68</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >21</th>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col0\" class=\"data row1 col0\" >GreedyEditDistance</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col1\" class=\"data row1 col1\" >biotexlda_window</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.078</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.1</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.142</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.018</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.0845</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row1_col7\" class=\"data row1 col7\" >0.338</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >22</th>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col0\" class=\"data row2 col0\" >HED</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col1\" class=\"data row2 col1\" >devdu_window</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.914</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.192</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.826</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.366</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col6\" class=\"data row2 col6\" >0.5745</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row2_col7\" class=\"data row2 col7\" >2.298</td>\n",
-       "            </tr>\n",
-       "            <tr>\n",
-       "                        <th id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >49</th>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col0\" class=\"data row3 col0\" >HED</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col1\" class=\"data row3 col1\" >all</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.924</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.2</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.828</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.384</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col6\" class=\"data row3 col6\" >0.584</td>\n",
-       "                        <td id=\"T_d92e5b58_2f61_11e9_ba72_6a0002e84820row3_col7\" class=\"data row3 col7\" >2.336</td>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col0\" class=\"data row0 col0\" >VertexEdgeOverlap</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col1\" class=\"data row0 col1\" >gen_country</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.158</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.092</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.158</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.06</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.117</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row0_col7\" class=\"data row0 col7\" >0.468</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col0\" class=\"data row1 col0\" >MCS</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col1\" class=\"data row1 col1\" >object</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.088</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.056</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.116</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row1_col7\" class=\"data row1 col7\" >0.464</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col0\" class=\"data row2 col0\" >WeisfeleirLehmanKernel</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col1\" class=\"data row2 col1\" >gen_country</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.154</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.006</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col6\" class=\"data row2 col6\" >0.12</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row2_col7\" class=\"data row2 col7\" >0.48</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >7</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col0\" class=\"data row3 col0\" >DeepWalk</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col1\" class=\"data row3 col1\" >ext_1</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.136</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.11</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.158</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.018</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col6\" class=\"data row3 col6\" >0.1055</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row3_col7\" class=\"data row3 col7\" >0.422</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >8</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col0\" class=\"data row4 col0\" >WeisfeleirLehmanKernel</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col1\" class=\"data row4 col1\" >gen_region</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.154</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.006</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col6\" class=\"data row4 col6\" >0.12</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row4_col7\" class=\"data row4 col7\" >0.48</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >10</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col0\" class=\"data row5 col0\" >MCS</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col1\" class=\"data row5 col1\" >bvlac</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.154</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.092</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.158</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.066</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col6\" class=\"data row5 col6\" >0.1175</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row5_col7\" class=\"data row5 col7\" >0.47</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >12</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col0\" class=\"data row6 col0\" >BOW</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col1\" class=\"data row6 col1\" >ext_1</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.084</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.066</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col6\" class=\"data row6 col6\" >0.1175</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row6_col7\" class=\"data row6 col7\" >0.47</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >13</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col0\" class=\"data row7 col0\" >Jaccard</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col1\" class=\"data row7 col1\" >object</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.152</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.09</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.152</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.054</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col6\" class=\"data row7 col6\" >0.112</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row7_col7\" class=\"data row7 col7\" >0.448</td>\n",
+       "            </tr>\n",
+       "            <tr>\n",
+       "                        <th id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >26</th>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col0\" class=\"data row8 col0\" >GraphEditDistance</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col1\" class=\"data row8 col1\" >inra</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.132</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col3\" class=\"data row8 col3\" >0.078</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.16</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col5\" class=\"data row8 col5\" >0.06</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col6\" class=\"data row8 col6\" >0.1075</td>\n",
+       "                        <td id=\"T_06bfa4d4_43d3_11e9_a30b_6a0002e84820row8_col7\" class=\"data row8 col7\" >0.43</td>\n",
        "            </tr>\n",
        "    </tbody></table>"
       ],
       "text/plain": [
-       "<pandas.io.formats.style.Styler at 0x12a4142e8>"
+       "<pandas.io.formats.style.Styler at 0x102c12cf8>"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/strpython/models/node2vec.py b/strpython/models/node2vec.py
deleted file mode 100644
index b36e7e5..0000000
--- a/strpython/models/node2vec.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import random
-
-import numpy as np
-from gensim.models import Word2Vec
-
-
-class Graph():
-    def __init__(self, nx_G, is_directed, p, q):
-        self.G = nx_G
-        self.is_directed = is_directed
-        self.p = p
-        self.q = q
-
-    def node2vec_walk(self, walk_length, start_node):
-        '''
-        Simulate a random walk starting from start node.
-        '''
-        G = self.G
-        alias_nodes = self.alias_nodes
-        alias_edges = self.alias_edges
-
-        walk = [start_node]
-
-        while len(walk) < walk_length:
-            cur = walk[-1]
-            cur_nbrs = sorted(G.neighbors(cur))
-            if len(cur_nbrs) > 0:
-                if len(walk) == 1:
-                    walk.append(
-                        cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
-                else:
-                    prev = walk[-2]
-                    next = cur_nbrs[alias_draw(alias_edges[(prev, cur)][0],
-                                               alias_edges[(prev, cur)][1])]
-                    walk.append(next)
-            else:
-                break
-
-        return walk
-
-    def simulate_walks(self, num_walks, walk_length):
-        '''
-        Repeatedly simulate random walks from each node.
-        '''
-        # sys.stdout.write("\r")
-        G = self.G
-        walks = []
-        nodes = list(G.nodes)
-        for walk_iter in range(num_walks):
-            # sys.stdout.write(
-            #     '\rWalk iteration: {0}/{1}'.format(walk_iter + 1, num_walks))
-            random.shuffle(nodes)
-            for node in nodes:
-                walks.append(self.node2vec_walk(
-                    walk_length=walk_length, start_node=node))
-
-        return walks
-
-    def get_alias_edge(self, src, dst):
-        '''
-        Get the alias edge setup lists for a given edge.
-        '''
-        G = self.G
-        p = self.p
-        q = self.q
-
-        unnormalized_probs = []
-        for dst_nbr in sorted(G.neighbors(dst)):
-            if dst_nbr == src:
-                unnormalized_probs.append(G[dst][dst_nbr]['weight'] / p)
-            elif G.has_edge(dst_nbr, src):
-                unnormalized_probs.append(G[dst][dst_nbr]['weight'])
-            else:
-                unnormalized_probs.append(G[dst][dst_nbr]['weight'] / q)
-        norm_const = sum(unnormalized_probs)
-        normalized_probs = [
-            float(u_prob) / norm_const for u_prob in unnormalized_probs]
-
-        return alias_setup(normalized_probs)
-
-    def preprocess_transition_probs(self):
-        '''
-        Preprocessing of transition probabilities for guiding the random walks.
-        '''
-        G = self.G
-        is_directed = self.is_directed
-
-        alias_nodes = {}
-        for node in list(G.nodes):
-            unnormalized_probs = [G[node][nbr]['weight']
-                                  for nbr in sorted(G.neighbors(node))]
-            norm_const = sum(unnormalized_probs)
-            normalized_probs = [
-                float(u_prob) / norm_const for u_prob in unnormalized_probs]
-            alias_nodes[node] = alias_setup(normalized_probs)
-
-        alias_edges = {}
-        triads = {}
-
-        if is_directed:
-            for edge in list(G.edges()):
-                alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
-        else:
-            for edge in list(G.edges()):
-                alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
-                alias_edges[(edge[1], edge[0])] = self.get_alias_edge(
-                    edge[1], edge[0])
-
-        self.alias_nodes = alias_nodes
-        self.alias_edges = alias_edges
-
-        return
-
-
-def alias_setup(probs):
-    '''
-    Compute utility lists for non-uniform sampling from discrete distributions.
-    Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
-    for details
-    '''
-    K = len(probs)
-    q = np.zeros(K)
-    J = np.zeros(K, dtype=np.int)
-
-    smaller = []
-    larger = []
-    for kk, prob in enumerate(probs):
-        q[kk] = K * prob
-        if q[kk] < 1.0:
-            smaller.append(kk)
-        else:
-            larger.append(kk)
-
-    while len(smaller) > 0 and len(larger) > 0:
-        small = smaller.pop()
-        large = larger.pop()
-
-        J[small] = large
-        q[large] = q[large] + q[small] - 1.0
-        if q[large] < 1.0:
-            smaller.append(large)
-        else:
-            larger.append(large)
-
-    return J, q
-
-
-def alias_draw(J, q):
-    '''
-    Draw sample from a non-uniform discrete distribution using alias sampling.
-    '''
-    K = len(J)
-
-    kk = int(np.floor(np.random.rand() * K))
-    if np.random.rand() < q[kk]:
-        return kk
-    else:
-        return J[kk]
-
-
-def learn_embeddings(walks, dimensions, window_size, nb_workers, nb_iter):
-    '''
-    Learn embeddings by optimizing the Skipgram objective using SGD.
-    '''
-    walks_ = [list(map(str, walk)) for walk in walks]
-    model = Word2Vec(walks_, size=dimensions, window=window_size,
-                     min_count=0, sg=1, workers=nb_workers, iter=nb_iter)
-    return model
-
-
-def compute_graph_model(nx_graph, **kwargs):
-    '''
-    Pipeline for representational learning for all nodes in a graph.
-        @param nx_graph
-        @kwarg p: int
-        @kwarg q: int
-    '''
-    p = kwargs.get("p", 1)
-    q = kwargs.get("q", 1)
-    dimensions = kwargs.get("dimensions", 128)
-    window_size = kwargs.get("window_size", 10)
-    nb_workers = kwargs.get("nb_workers", 8)
-    nb_iter = kwargs.get("nb_iter", 1)
-    num_walks = kwargs.get("num_walks", 10)
-    walk_length = kwargs.get("walk_length", 80)
-    directed = kwargs.get("directed", False)
-
-    G = Graph(nx_graph, directed, p, q)
-    G.preprocess_transition_probs()
-    walks = G.simulate_walks(num_walks, walk_length)
-    return learn_embeddings(walks, dimensions, window_size, nb_workers, nb_iter)
diff --git a/strpython/models/str.py b/strpython/models/str.py
index 9e14fba..6bfbed9 100644
--- a/strpython/models/str.py
+++ b/strpython/models/str.py
@@ -5,7 +5,6 @@ import os
 import time
 import warnings
 
-from tqdm import tqdm
 import folium
 import geopandas as gpd
 import networkx as nx
@@ -22,7 +21,6 @@ import numpy as np
 # logging.basicConfig(filename=config.log_file,level=logging.INFO)
 
 
-
 def get_inclusion_chain(id_, prop):
     """
     For an entity return it geographical inclusion tree using a property.
@@ -42,28 +40,10 @@ class STR(object):
     """
     Str basic structure
     """
-    __cache_inclusion = {} # Store inclusion relations found between spaital entities
-    __cache_adjacency = {} # Store adjacency relations found between spaital entities
-    __cache_entity_data = {} # Store data about entity requested
-
+    __cache_inclusion = {}
     def __init__(self, tagged_text, spatial_entities):
-        """
-        Constructir
-        
-        Parameters
-        ----------
-        tagged_text : list
-            Text in forms of token associated with tag (2D array 2*t where t == |tokens| )
-        spatial_entities : dict
-            spatial entities associated with a text. Follow this structure {"<id>: <label>"}
-        
-        """
-
         self.tagged_text = tagged_text
         self.spatial_entities = spatial_entities
-        for k in list(spatial_entities.keys()):
-            if not k[:2] == "GD":
-                del spatial_entities[k]
 
         self.adjacency_relationships = {}
         self.inclusion_relationships = {}
@@ -71,21 +51,11 @@ class STR(object):
     @staticmethod
     def from_networkx_graph(g: nx.Graph, tagged_: list = []):
         """
-        Build a STR based on networkx graph
-        
-        Parameters
-        ----------
-        g : nx.Graph
-            input graph
-        tagged_ : list, optional
-            tagged text (the default is []). A 2D array 2*t where t == |tokens|.
-        
-        Returns
-        -------
-        STR
-            resulting STR
+        Return a STR built from a Networkx imported graph
+        :param g:
+        :param tagged_:
+        :return:
         """
-
         sp_en = {}
         for nod in g:
             try:
@@ -93,7 +63,7 @@ class STR(object):
             except KeyError:  # If no label found, grab one from the geo-database
                 data = gazetteer.get_by_id(nod)
                 if data:
-                    sp_en[nod] = data[0].name
+                    sp_en[nod] = data[0].label
 
         str_ = STR(tagged_, sp_en)
         str_.set_graph(g)
@@ -102,19 +72,10 @@ class STR(object):
     @staticmethod
     def from_dict(spat_ent: dict, tagged_: list = []):
         """
-        Build a STR based on networkx graph
-        
-        Parameters
-        ----------
-        spat_ent : dict
-            Dict of patial entities associated with a text. Follow this structure {"<id>: <label>"}
-        tagged_ : list, optional
-            tagged text (the default is []). A 2D array 2*t where t == |tokens|.
-        
-        Returns
-        -------
-        STR
-            resulting STR
+        Return a STR built from a Networkx imported graph
+        :param g:
+        :param tagged_:
+        :return:
         """
         sp_en = {}
         for id_, label in spat_ent.items():
@@ -126,59 +87,16 @@ class STR(object):
 
     @staticmethod
     def from_pandas(dataf: pd.DataFrame, tagged: list = []):
-        """
-        Build a STR from a Pandas Dataframe with two column : id and label.
-        
-        Parameters
-        ----------
-        dataf : pd.DataFrame
-            dataframe containing the spatial entities
-        tagged : list, optional
-            tagged text (the default is []). A 2D array 2*t where t == |tokens|.
-        
-        Returns
-        -------
-        STR
-            resulting STR
-        """
-
         return STR.from_dict(pd.Series(dataf.label.values, index=dataf.id).to_dict(), tagged)
 
-    def set_graph(self, g):
-        """
-        Apply changes to the current STR based on Networkx Graph.
-        
-        Parameters
-        ----------
-        g : networkx.Graph
-            input graph
-        
-        """
-
-        self.graph = g
-        rel_ = self.graph.edges(data=True)
-        for edge in rel_:
-            id1, id2 = edge[0], edge[1]
-            if edge[2]["color"] == "green":
-                self.add_adjacency_rel(edge[0],edge[1])
-                self.add_cache__adjacency(id1, id2,True)
-            elif edge[2]["color"] == "red":
-                self.add_inclusion_rel(edge[0], edge[1])
-                self.add_cache_inclusion(id1,id2,True)
-
     def add_spatial_entity(self, id, label=None, v=True):
         """
-        Add a spatial entity to the current STR
-        
-        Parameters
-        ----------
-        id : str
-            identifier of the spatial entity in Geodict
-        label : str, optional
-            if not available in Geodict (the default is None)
-        
+        Adding a spatial entity to the current STR
+        :param id:
+        :param label:
+        :return:
         """
-        data_ = self.get_data(id)
+        data_ = gazetteer.get_by_id(id)
         if not data_:
             warnings.warn("{0} wasn't found in Geo-Database".format(id))
             return False
@@ -192,14 +110,9 @@ class STR(object):
     def add_spatial_entities(self, ids: list, labels: list = []):
         """
         Add spatial entities to the current STR
-        
-        Parameters
-        ----------
-        ids : list
-            list of identifiers of each spatial entity
-        labels : list, optional
-            list of labels of each spatial entity
-        
+        :param ids:
+        :param label:
+        :return:
         """
         if not labels:
             warnings.warn("Labels list is empty. @en labels from Geo-Database will be used by default")
@@ -212,121 +125,27 @@ class STR(object):
             self.add_spatial_entity(id, label, False)
         # print(self.graph.nodes(data=True))
 
-    def add_adjacency_rel(self, se1, se2):
-        """
-        Add a adjacency relationship to the current STR.
-        
-        Parameters
-        ----------
-        se1 : str
-            Identifier of the first spatial entity
-        se2 : str
-            Identifier of the second spatial entity
-        
-        """
-
-        if not se1 in self.adjacency_relationships: self.adjacency_relationships[se1] = {}
-        if not se2 in self.adjacency_relationships: self.adjacency_relationships[se2] = {}
-        self.adjacency_relationships[se1][se2],self.adjacency_relationships[se2][se1] = True, True
-        self.add_cache__adjacency(se1,se2,True)
+    def add_adjacency_rel(self, se1, se2,v=True):
+        if not se1 in self.adjacency_relationships:
+            self.adjacency_relationships[se1] = {}
+        self.adjacency_relationships[se1][se2]=v
 
-    def add_inclusion_rel(self, se1, se2):
-        """
-        Add a inclusion relationship to the current STR.
-        
-        Parameters
-        ----------
-        se1 : str
-            Identifier of the first spatial entity
-        se2 : str
-            Identifier of the second spatial entity
-        
-        """
+    def add_inclusion_rel(self, se1, se2,v=True):
         if not se1 in self.inclusion_relationships:
             self.inclusion_relationships[se1] = {}
-        self.inclusion_relationships[se1][se2]=True
-        self.add_cache_inclusion(se1,se2,True)
+        self.inclusion_relationships[se1][se2]=v
 
-    def add_cache_inclusion(self,id1, id2, v=True):
+    def transform_spatial_entities(self, transform_map):
         """
-        Add a relation of inclusion in a cache variable
-        
-        Parameters
-        ----------
-        id1 : str
-            id of the first spatial entity
-        id2 : str
-            id of the second spatial entity
-        v : bool, optional
-            if the relation exists between the two spatial entities. Default is True
-        
+        Apply transformation to a STR
+        :param transform_map:
+        :return:
         """
-
-        if not id1 in STR.__cache_inclusion:
-            STR.__cache_inclusion[id1] = {}
-        STR.__cache_inclusion[id1][id2] = v
-
-    def add_cache__adjacency(self,se1,se2,v=True):
-        """
-        Add a relation of adjacency in a cache variable
-        
-        Parameters
-        ----------
-        id1 : str
-            id of the first spatial entity
-        id2 : str
-            id of the second spatial entity
-        v : bool, optional
-            if the relation exists between the two spatial entities. Default is True
-        
-        """
-        if not se1 in STR.__cache_adjacency:
-            STR.__cache_adjacency[se1] = {}
-        if not se2 in STR.__cache_adjacency:
-            STR.__cache_adjacency[se2] = {}
-        STR.__cache_adjacency[se1][se2]=v
-        STR.__cache_adjacency[se2][se1]=v
-    
-    def get_data(self,id_se):
-        """
-        Return an gazpy.Element object containing information about a spatial entity.
-        
-        Parameters
-        ----------
-        id_se : str
-            Identifier of the spatial entity
-        
-        Returns
-        -------
-        gazpy.Element
-            data
-        """
-
-        if id_se in STR.__cache_entity_data:
-            return STR.__cache_entity_data[id_se]
-        data=gazetteer.get_by_id(id_se)
-        if len(data) > 0:
-            STR.__cache_entity_data[id_se]= data[0]
-
-
-    def transform_spatial_entities(self, transform_map : dict):
-        """
-        Replace or delete certain spatial entities based on a transformation map
-        
-        Parameters
-        ----------
-        transform_map : dict
-            New mapping for the spatial entities in the current STR. Format required : {"<id of the old spatial entity>":"<id of the new spatial entity>"}
-        
-        """
-
         final_transform_map = {}
         # Erase old spatial entities
         new_label = {}
-        to_del=set([])
         for old_se, new_se in transform_map.items():
-            data = self.get_data(new_se)
-            to_del.add(old_se)
+            data = gazetteer.get_by_id(new_se)
             if data:
                 data = data[0]
                 final_transform_map[old_se] = new_se
@@ -334,70 +153,59 @@ class STR(object):
                     self.add_spatial_entity(new_se, data.label.en)
 
                 del self.spatial_entities[old_se]
-                
                 new_label[new_se] = data.label.en
             else:
                 warnings.warn("{0} doesn't exists in the geo database!".format(new_se))
-
         self.graph = nx.relabel_nodes(self.graph, final_transform_map)
-
-        for es in to_del:
-            if es in self.graph._node:
-                self.graph.remove_node(es)
-        
         for se_ in new_label:
             self.graph.nodes[se_]["label"] = new_label[se_]
 
     def update(self):
         """
-        Update the relationship between spatial entities in the STR. Used when transforming the STR.
+        Method for updating links between spatial entities
+        :return:
         """
-
         nodes = copy.deepcopy(self.graph.nodes(data=True))
         self.graph.clear()
         self.graph.add_nodes_from(nodes)
 
+        print("inclusion")
         self.get_inclusion_relationships()
         for se1 in self.inclusion_relationships:
             for se2 in self.inclusion_relationships[se1]:
-                if not se1 in self.graph.nodes or not se2 in self.graph.nodes:
-                    continue
                 if self.inclusion_relationships[se1][se2]:
                     self.graph.add_edge(se1, se2, key=0, color="red")
 
-
+        print("adjacency")
         self.get_adjacency_relationships()
         for se1 in self.adjacency_relationships:
             for se2 in self.adjacency_relationships[se1]:
-                if not se1 in self.graph.nodes or not se2 in self.graph.nodes:
-                    continue
                 if self.adjacency_relationships[se1][se2]:
                     self.graph.add_edge(se1, se2, key=0, color="green")
+        print("fin adj")
 
 
-    
 
+
+    def add_cache_inclusion(self,id1, id2):
+        if not id1 in STR.__cache_inclusion:
+            STR.__cache_inclusion[id1] = set([])
+            STR.__cache_inclusion[id1].add(id2)
     def is_included_in(self, se1_id, se2_id):
+        global __cache_inclusion
         """
-        Return True if a spatial entity is included within another one.
-        
-        Parameters
-        ----------
-        se1_id : str
-            id of the contained entity
-        se2_id : str
-            id of the entity container 
-        
-        Returns
-        -------
-        bool
-            if se1 included in se2
+        Return true if the two spatial entities identified by @se1_id and @se2_id share an inclusion relationship
+        :param se1_id:
+        :param se2_id:
+        :return:
         """
-
         if se1_id in self.inclusion_relationships:
             if se2_id in self.inclusion_relationships[se1_id]:
                 return self.inclusion_relationships[se1_id][se2_id]
 
+        if se1_id in STR.__cache_inclusion:
+            if se2_id in STR.__cache_inclusion[se1_id]:
+                return True
 
         inc_chain_P131 = get_inclusion_chain(se1_id, "P131")
         inc_chain_P706 = get_inclusion_chain(se1_id, "P706")
@@ -405,120 +213,18 @@ class STR(object):
         inc_chain.extend(inc_chain_P706)
         inc_chain = set(inc_chain)
         if se2_id in inc_chain:
-            self.add_cache_inclusion(se1_id,se2_id,True)
-            return True
-
-        return False
-
-    def is_adjacent_cache(self,se1,se2):
-        """
-        Return true if two spatial entities were found adjacent previously.
-        
-        Parameters
-        ----------
-        se1 : str
-            id of the first spatial entity
-        se2 : str
-            id of the second spatial entity
-        
-        Returns
-        -------
-        bool
-            if se1 adjacent to se2
-        """
-
-        if se1 in STR.__cache_adjacency:
-            if se2 in STR.__cache_adjacency[se1]:
-                return STR.__cache_adjacency[se1][se2]
-        if se2 in STR.__cache_adjacency:
-            if se1 in STR.__cache_adjacency[se2]:
-                return STR.__cache_adjacency[se2][se1]
-        return False
-
-    def is_included_cache(self,se1,se2):
-        """
-        Return true if a spatial entity were found included previously in an other one.
-        
-        Parameters
-        ----------
-        se1 : str
-            id of the first spatial entity
-        se2 : str
-            id of the second spatial entity
-        
-        Returns
-        -------
-        bool
-            if se1 included to se2
-        """
-        if se1 in STR.__cache_inclusion:
-            if se2 in STR.__cache_inclusion[se1]:
-                return STR.__cache_inclusion[se1][se2]
-        return False
-    
-    def is_adjacent(self,se1,se2,datase1=None,datase2=None):
-        """
-        Return true if se1 is adjacent to se2.
-        
-        Parameters
-        ----------
-        se1 : str
-            id of the first spatial entity
-        se2 : str
-            id of the second spatial entity
-        datase1 : gazpy.Element, optional
-            if given cached data concerning the spatial entity with id = se1 (the default is None)
-        datase2 : gazpy.Element, optional
-            if given cached data concerning the spatial entity with id = se2 (the default is None)
-        
-        Returns
-        -------
-        bool
-            true if adjacent
-        """
-
-        stop_class = set(["A-PCLI", "A-ADM1"])
-
-        def get_p47_adjacency_data(self, data):
-            p47se1 = []
-            for el in data.other.P47:
-                d = gazetteer.get_by_other_id(el,"wikidata")
-                if not d:continue
-                p47se1.append(d[0].id)
-            return p47se1
-        
-        if self.is_adjacent_cache(se1,se2):
-            return False
-
-        if self.is_included_in(se1, se2) or self.is_included_in(se2, se1):
-            return False
-
-        data_se1, data_se2 = self.get_data(se1), self.get_data(se2)
-
-        if "P47" in data_se2 and se1 in self.get_p47_adjacency_data(data_se2):
-            return True
-                # print("P47")
-        elif "P47" in data_se1 and se2 in self.get_p47_adjacency_data(data_se1):
-                return True
-                    # print("P47")
-        
-        if collisionTwoSEBoundaries(se1, se2):
+            self.add_cache_inclusion(se1_id,se2_id)
             return True
 
-        if "coord" in data_se1 and "coord" in data_se2:
-            if Point(data_se1.coord.lon, data_se1.coord.lat).distance(
-                    Point(data_se2.coord.lon, data_se2.coord.lat)) < 1 and len(
-                set(data_se1.class_) & stop_class) < 1 and len(set(data_se2.class_) & stop_class) < 1:
-                return True
         return False
 
     def get_inclusion_relationships(self):
         """
-        Find all the inclusion relationships between the spatial entities declared in the current STR.
-        
+        Return all the inclusion relationships between all the spatial entities in the STR.
+        :return:
         """
-
-        for se_ in tqdm(self.spatial_entities,desc="Extract Inclusion"):
+        inclusions_ = []
+        for se_ in self.spatial_entities:
             inc_chain_P131 = get_inclusion_chain(se_, "P131")
             inc_chain_P706 = get_inclusion_chain(se_, "P706")
 
@@ -529,19 +235,61 @@ class STR(object):
             for se2_ in self.spatial_entities:
                 if se2_ in inc_chain:
                     self.add_inclusion_rel(se_,se2_)
+        return inclusions_
+
+    def getP47AdjacencyData(self, data):
+        p47se1 = []
+        for el in data.other.P47:
+            d = gazetteer.get_by_other_id(el,"wikidata")
+            if not d:continue
+            p47se1.append(d[0].id)
+        return p47se1
+
+    def is_adjacent(self,se1,se2,datase1=None,datase2=None):
+        f = False
+        stop_class = set(["A-PCLI", "A-ADM1"])
+        if self.is_included_in(se1, se2):
+            return f
+
+        elif self.is_included_in(se2, se1):
+            return f
+
+        data_se1 = gazetteer.get_by_id(se1)[0] if not datase1 else datase1 # Évite de recharger à chaque fois -_-
+        data_se2 = gazetteer.get_by_id(se2)[0] if not datase2 else datase2
+
+        # print("testP47")
+        if "P47" in data_se2.other:
+            if se1 in self.getP47AdjacencyData(data_se2):
+                return True
+                # print("P47")
+        if not f:
+            if "P47" in data_se1.other:
+                if se2 in self.getP47AdjacencyData(data_se1):
+                    return True
+                    # print("P47")
+        if not f:
+            # print("test collision")
+            if collisionTwoSEBoundaries(se1, se2):
+                return True
+        if not f:
+            if "coord" in data_se1.other and "coord" in data_se2.other:
+                if Point(data_se1.coord.lon, data_se1.coord.lat).distance(
+                        Point(data_se2.coord.lon, data_se2.coord.lat)) < 1 and len(
+                    set(data_se1.class_) & stop_class) < 1 and len(set(data_se2.class_) & stop_class) < 1:
+                    return True
+        return f
 
-    
     def get_adjacency_relationships(self):
         """
-        Find all the adjacency relationships between the spatial entities declared in the current STR.
+        Return all the adjacency relationships between all the spatial entities in the STR.
+        :return:
         """
-        
-        data={se:self.get_data(se) for se in self.spatial_entities}
-        
-        for se1 in tqdm(self.spatial_entities,desc="Extract Adjacency Relationship"):
+        data={se:gazetteer.get_by_id(se)[0] for se in self.spatial_entities}
+        for se1 in self.spatial_entities:
             data_se1 = data[se1]
             for se2 in self.spatial_entities:
                 if se1 == se2: continue
+                # print("test adjacency")
                 if se1 in self.adjacency_relationships:
                     if se2 in self.adjacency_relationships[se1]:
                         continue
@@ -556,22 +304,11 @@ class STR(object):
     def build(self, inc=True, adj=True, verbose=False):
         """
         Build the STR
-        
-        Parameters
-        ----------
-        inc : bool, optional
-            if inclusion relationship have to be included in the STR (the default is True)
-        adj : bool, optional
-            if adjacency relationship have to be included in the STR (the default is True)
-        verbose : bool, optional
-            Verbose mode activated (the default is False)
-        
-        Returns
-        -------
-        networkx.Graph
-            graph representing the STR
+        :param inc:
+        :param adj:
+        :param verbose:
+        :return:
         """
-
         nodes = []
         for k, v in self.spatial_entities.items():
             nodes.append((k, {"label": v}))
@@ -588,7 +325,7 @@ class STR(object):
                         graph.add_edge(se1,se2, key=0, color="green")
                         graph.add_edge(se2, se1, key=0, color="green")
 
-            
+            logging.info("Extract Adjacency Rel\t{0}".format(time.time()-debut))
         if inc:
             debut=time.time()
             self.get_inclusion_relationships()
@@ -596,20 +333,18 @@ class STR(object):
                 for se2 in self.inclusion_relationships[se1]:
                     if self.inclusion_relationships[se1][se2]:
                         graph.add_edge(se1,se2, key=0, color="red")
-            
+            logging.info("Extract Inclusion Rel\t{0}".format(time.time() - debut))
         self.graph = graph
         return graph
 
     def save_graph_fig(self, output_fn, format="svg"):
         """
-        Save the graphiz reprensentation of the STR graph.
+        Save the graph graphiz reprensentation
 
         Parameters
         ----------
         output_fn : string
             Output filename
-        format : str
-            Output format (svg or pdf)
 
         """
         try:
@@ -622,33 +357,28 @@ class STR(object):
             print("Error while saving STR to {0}".format(format))
 
     def getUndirected(self):
-        """
-        Return the Undirected form of a STR graph.
-        
-        Returns
-        -------
-        networkx.Graph
-            unidirected graph
-        """
-
         return nx.Graph(self.graph)
 
-    def get_geo_data_of_se(self):
-        """
-        Return Geographical information for each spatial entities in the STR
-        
-        Returns
-        -------
-        geopandas.GeoDataFrame
-            dataframe containing geographical information of each entity in the STR
-        """
+    def set_graph(self, g):
+        self.graph = g
+        rel_ = self.graph.edges(data=True)
+        for edge in rel_:
+            id1, id2 = edge[0], edge[1]
+            if edge[2]["color"] == "green":
+                self.add_adjacency_rel(edge[0],edge[1])
+                add_cache_adjacency(id1, id2)
+            elif edge[2]["color"] == "red":
+                self.add_inclusion_rel(edge[0], edge[1])
+                self.add_cache_inclusion(id1,id2)
+
 
+    def get_geo_data_of_se(self):
         points,label,class_ = [], [], []
         for se in self.spatial_entities:
             data = gazetteer.get_by_id(se)[0]
             try:
                 points.append(Point(data.coord.lon, data.coord.lat))
-                label.append(data.name)
+                label.append(data.label)
                 # class_.append(most_common(data["class"]))
             except KeyError:
                 pass
@@ -659,21 +389,7 @@ class STR(object):
         return df
 
     def get_cluster(self,id_=None):
-        """
-        Return the cluster detected using spatial entities position.
-        
-        Parameters
-        ----------
-        id_ : temp_file_id, optional
-            if cached version of geoinfo (the default is None)
-        
-        Returns
-        -------
-        gpd.GeoDataFrame 
-            cluster geometry
-        """
-
-        if os.path.exists("./temp_cluster/{0}.geojson".format(id_)):
+        if id_ and os.path.exists("./temp_cluster/{0}.geojson".format(id_)):
             return gpd.read_file("./temp_cluster/{0}.geojson".format(id_))
 
         data=self.get_geo_data_of_se()
@@ -689,6 +405,22 @@ class STR(object):
             samples,labels=dbscan(X)
             data["cluster"] = labels
 
+        """
+
+        # deuxième découpe en cluster
+        c=data['cluster'].value_counts().idxmax()
+        X=data[data["cluster"] == c]
+        X=X[["x","y"]]
+        bandwidth = estimate_bandwidth(X.values)
+        ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
+        ms.fit(X.values)
+        X["cluster"]=ms.labels_+(data['cluster'].max()+1)
+        lab=ms.labels_
+        lab+=data['cluster'].max()+1
+        
+        data["cluster"][data["cluster"] == c]=X["cluster"]
+        """
+
         geo = data.groupby("cluster").apply(to_Polygon)
         cluster_polybuff = gpd.GeoDataFrame(geometry=geo)
         if id_:
@@ -697,15 +429,6 @@ class STR(object):
 
 
     def to_folium(self):
-        """
-        Use the folium package to project the STR on a map
-        
-        Returns
-        -------
-        folium.Map
-            folium map instance
-        """
-
         points = []
         for se in self.spatial_entities:
             data = gazetteer.get_by_id(se)[0]
@@ -755,20 +478,6 @@ class STR(object):
 
 
     def map_projection(self,plt=False):
-        """
-        Return a matplotlib figure of the STR
-        
-        Parameters
-        ----------
-        plt : bool, optional
-            if the user wish to use the plt.show() (the default is False)
-        
-        Returns
-        -------
-        plt.Figure
-            Matplotlib figure instance
-        """
-
         import matplotlib.pyplot as plt
         world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
         base = world.plot(color='white', edgecolor='black', figsize=(16, 9))
@@ -811,39 +520,11 @@ class STR(object):
         plt.show()
 
 
-# def to_Multipoints(x):
-#     """
-#     Return a polygon buffered representation for a set of point
-    
-#     Parameters
-#     ----------
-#     x : pandas.Series
-#         coordinates columns
-    
-#     Returns
-#     -------
-#     shapely.geometry.Polygon
-#         polygon
-#     """
-
-#     #print(x[["x","y"]].values)
-#     return Polygon([Point(z) for z in x[["x","y"]].values]).buffer(1)
+def to_Multipoints(x):
+    #print(x[["x","y"]].values)
+    return Polygon([Point(z) for z in x[["x","y"]].values]).buffer(1)
 
 def to_Polygon(x):
-    """
-    Return a polygon buffered representation for a set of points.
-    
-    Parameters
-    ----------
-    x : pandas.Series
-        coordinates columns
-    
-    Returns
-    -------
-    shapely.geometry.Polygon
-        polygon
-    """
-
     points = [Point(z) for z in x[["x","y"]].values]
     if len(points) > 2:
         coords = [p.coords[:][0] for p in points]
diff --git a/strpython/models/word2vec.py b/strpython/models/word2vec.py
deleted file mode 100644
index c0904f7..0000000
--- a/strpython/models/word2vec.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# coding = utf-8
-
-# -*- coding: utf-8 -*-
-
-from glob import glob
-from tqdm import tqdm
-import numpy as np
-from gensim.models.word2vec import Word2Vec
-from polyglot.text import Text
-from pycorenlp import StanfordCoreNLP
-
-
-def getDependant(output_core_nlp, deps_list=["compound", "amod", "advmod"]):
-
-    """
-    Filter dependencies from Stanford NLP output
-    :param output_core_nlp: output of stanford nlp request
-    :param deps_list: list of tags that correspond to wanted dependencies
-    :return:
-    """
-    dependencies = []
-    i = 0
-    for s in output_core_nlp["sentences"]:
-        for dep in s["basicDependencies"]:
-            if dep["dep"] in deps_list:
-                dependencies.append([dep["governor"], dep["dependent"], i])
-        i += 1
-    return dependencies
-
-
-def filter_dependenciesV1(dependencies):
-    """
-    Filter Dependencies to be sure to get compound words !
-    :param dependencies:  getDependant() output
-    :return:
-    """
-    d_temp = {}
-    for d in dependencies:
-        if not d[-1] in d_temp: d_temp[d[-1]] = {}
-        if not d[0] in d_temp[d[-1]]: d_temp[d[-1]][d[0]] = set([])
-        d_temp[d[-1]][d[0]].add(d[1])
-    to_del = []
-    for d_1 in d_temp:
-        for d_2 in d_temp[d_1]:
-            d_temp[d_1][d_2] = sorted(d_temp[d_1][d_2])
-            if len(d_temp[d_1][d_2]) < 2:
-                continue
-            sorted_ = d_temp[d_1][d_2]
-            s_ = 0
-            for i in range(len(sorted_)):
-                if not i + 1 == len(sorted_):
-                    s_ += abs(sorted_[i] - sorted_[i + 1])
-            if not s_ == len(sorted_) - 1:
-                to_del.append([d_1, d_2])
-
-    for d in to_del: del d_temp[d[0]][d[1]]
-    return d_temp
-
-
-def filter_dependencies(dependencies):
-    """
-    Same as filter_dependenciesV1(), except we fuse dependencies of compound word(two dependencies relation close to each other)
-    :param dependencies:
-    :return:
-    """
-    new_d = []
-    d_temp = {}
-    for d in dependencies:
-        if not d[-1] in d_temp: d_temp[d[-1]] = {}
-        if not d[0] in d_temp[d[-1]]: d_temp[d[-1]][d[0]] = set([])
-        d_temp[d[-1]][d[0]].add(d[1])
-    to_del = []
-    for d_1 in d_temp:
-        for d_2 in d_temp[d_1]:
-            d_temp[d_1][d_2] = sorted(d_temp[d_1][d_2])
-            if len(d_temp[d_1][d_2]) < 2:
-                continue
-            sorted_ = d_temp[d_1][d_2]
-            s_ = 0
-            for i in range(len(sorted_)):
-                if not i + 1 == len(sorted_):
-                    s_ += abs(sorted_[i] - sorted_[i + 1])
-            if not s_ == len(sorted_) - 1:
-                to_del.append([d_1, d_2])
-
-    for d in to_del: del d_temp[d[0]][d[1]]
-    to_del = []
-    for d_1 in d_temp:
-        for d_2 in d_temp[d_1]:
-            _depend = d_temp[d_1][d_2]
-            for k in d_temp[d_1]:
-                _depend2 = d_temp[d_1][k]
-                if k == _depend[0]:
-                    for d in _depend2: _depend.insert(0, d)
-                    d_temp[d_1][d_2] = _depend
-                    to_del.append([d_1, k])
-                elif k == _depend[-1]:
-                    _depend.extend(_depend2)
-                    d_temp[d_1][d_2] = _depend
-                    to_del.append([d_1, k])
-    for d in to_del: del d_temp[d[0]][d[1]]
-
-    return d_temp
-
-
-def transformed_sentences(output_core_nlp, dependencies):
-    """
-    Transform tokenized version to adapt word2vec input model
-    :param output_core_nlp:
-    :param dependencies:
-    :return:
-    """
-    sentences = []
-    j = 0
-    for s in output_core_nlp["sentences"]:
-        tokens = [t["originalText"].lower() for t in s["tokens"]]
-        # print(tokens)
-        if j in dependencies:
-            # print(dependencies[j])
-            to_tuple = []
-            to_del = []
-            for k, v in dependencies[j].items():
-                tuple = list(v)
-                tuple.append(k)
-                to_tuple.append(tuple)
-                if tuple[0] - 1 in to_del:
-                    set_ = set(to_del)
-                    set_.remove(tuple[0] - 1)
-                    to_del = list(set_)
-                to_del.extend((np.array(tuple[1:]) - 1).tolist())
-            for tup in to_tuple:
-                tokens[tup[0] - 1] = "_".join([tokens[t - 1] for t in tup])
-            k = 0
-            for d in to_del:
-                del tokens[d - k]
-                k += 1
-            sentences.append(tokens)
-        j += 1
-    return np.array(sentences)
-
-if __name__ == "__main__":
-
-
-
-    files = glob("data/EPI_ELENA/raw_text/*.txt")
-    nlp = StanfordCoreNLP("http://localhost:9000")
-    texts = [open(f).read() for f in files]
-    sentences = []
-    # Classic tokenization of sentences
-    for f in tqdm(texts):
-        text = f
-        if not text: continue
-        try:
-            text = Text(text)
-
-            for s in text.sentences:
-                tokens = []
-                for t in s.tokens: tokens.append(t.lower())
-                sentences.append(tokens)
-        except:
-            pass
-
-    # Add compound word version of sentences
-    for t in tqdm(texts):
-        if not t: continue
-        try:
-            nlp_o = nlp.annotate(t, properties={'annotators': 'tokenize,ssplit,depparse', 'outputFormat': 'json'})
-            dependenc = filter_dependencies(getDependant(nlp_o))
-            dependenc2 = filter_dependenciesV1(getDependant(nlp_o))
-            sentences.extend(transformed_sentences(nlp_o, dependenc))  # extend compound word
-            sentences.extend(transformed_sentences(nlp_o, dependenc2))  # classic compound word
-
-        except expression as identifier:
-            pass
-        
-    model = Word2Vec(sentences, min_count=10)
-    model.save("w2v_model_epi.w2v")
-
diff --git a/strpython/nlp/disambiguator/delozier/__init__.py b/strpython/nlp/disambiguator/delozier/__init__.py
deleted file mode 100644
index ec03d56..0000000
--- a/strpython/nlp/disambiguator/delozier/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# coding = utf-8
diff --git a/strpython/nlp/disambiguator/delozier/grid.py b/strpython/nlp/disambiguator/delozier/grid.py
deleted file mode 100644
index 1d17059..0000000
--- a/strpython/nlp/disambiguator/delozier/grid.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# coding = utf-8
-
-import geopandas as gpd
-import numpy as np
-from progressbar import ProgressBar
-from shapely.geometry import Point, Polygon
-
-
-class GeoGrid():
-    """"""
-
-    def __init__(self, upper_lat=70, bottom_lat=-70, step=2, square_size=0.25):
-        """Constructor for GoeGrid"""
-        self.h, self.w = (upper_lat * step) - (bottom_lat * step), 2 * 180 * step
-        self.grid_points = np.indices((self.h, self.w))[1]
-        # matrice avec indice de ligne
-
-        self.upper_lat = upper_lat
-        self.step = step
-
-        self.world_borders_data = gpd.read_file("./world_borders.shp")
-
-    def inside(self, point):
-        poly_from_point = Polygon(self.create_square(point))
-        for id_, row in self.world_borders_data.iterrows():
-            if poly_from_point.within(row["geometry"]):
-                return True
-        return False
-
-    def create_square(self, p1, size=0.25):
-        x, y = p1.x, p1.y
-        r = size / 2
-        return [[x - r, y + r], [x + r, y + r], [x + r, y - r], [x - r, y - r]]
-
-    def point_within(self, j, i):
-        p = Point(-180 + (j * (1 / self.step)), self.upper_lat - i * (1 / self.step))
-        if self.inside(p):
-            return np.int(0)
-        else:
-            return np.int(-1)
-
-    def createGeoGrid(self):
-        with ProgressBar(max_value=len(self.grid_points)) as bar:
-            for i in range(len(self.grid_points)):
-                self.grid_points[i] = np.apply_along_axis(self.point_within, 0, self.grid_points[i].reshape(1, -1), i)
-                bar.update(i)
-            bar.finish()
-        print("Geogrid Created")
-
-    def loadGeoGrid(self):
-        self.grid_points = np.load("./resources/grid_GEO.npy")
-        # avoid frontier problem
-        mask = np.arange(-1, 2)
-        for i in range(len(self.grid_points)):
-            for j in range(len(self.grid_points[i])):
-                if self.grid_points[i][j] == -1:
-                    if i - 1 > 0 and i + 1 < len(self.grid_points) and j - 1 > 0 and j + 1 < len(self.grid_points[i]):
-                        sub = np.abs(self.grid_points[np.ix_(mask + i, mask + j)])
-                        if np.sum(sub) < 5:
-                            self.grid_points[i][j] = 0
-
-    def get_points_coordinates(self, step=None):
-        if not step:
-            step = self.step
-        coordinates = []
-        for i in range(len(self.grid_points)):
-            for j in range(len(self.grid_points[i])):
-                if self.grid_points[i][j] == 0:
-                    p = [-180 + (j * (1 / step)), self.upper_lat - i * (1 / step)]
-                    coordinates.append(p)
-        coordinates = np.array(coordinates)
-        return coordinates
-
-    def draw_grid(self, step=None):
-        import matplotlib.pyplot as plt
-        coordinates = self.get_points_coordinates()
-        plt.scatter(coordinates[:, 0], coordinates[:, 1], s=0.5)
-        plt.show()
diff --git a/strpython/nlp/disambiguator/delozier/utils.py b/strpython/nlp/disambiguator/delozier/utils.py
deleted file mode 100644
index 15c2f4f..0000000
--- a/strpython/nlp/disambiguator/delozier/utils.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# coding = utf-8
-
-def weight_i_j(i, j):
-    dist = i.distance(j)
-    if dist <= 1:
-        return .75 * (1 - (dist))
-    return False
-
-
-def Gi_star(x):
-    pass
diff --git a/strpython/tt4py/__init__.py b/strpython/tt4py/__init__.py
deleted file mode 100644
index fec16bd..0000000
--- a/strpython/tt4py/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# coding = utf-8
-
-"""
-tt4t is a Tagged Texy Manipulation module for Python.
-
-tt4t was conceived in order to search through tokenized/pos-tagged version of texts.
-"""
\ No newline at end of file
diff --git a/strpython/tt4py/helpers.py b/strpython/tt4py/helpers.py
deleted file mode 100644
index 0e739d4..0000000
--- a/strpython/tt4py/helpers.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# coding = utf-8
-
-import numpy as np
-
-
-def dict_to_array(var):
-    res = []
-    for i, j in var.items():
-        res.append([i, j])
-    return np.array(res)
-
-
-def list_to_array(var):
-    return np.array(var)
-
-
-def flattern(A):
-    rt = []
-    for i in A:
-        if isinstance(i, list):
-            rt.extend(flattern(i))
-        elif isinstance(i, np.ndarray):
-            rt.extend(flattern(i.tolist()))
-        else:
-            rt.append(i)
-    return rt
-
-
-def flatten(var):
-    return flattern(var)
diff --git a/strpython/tt4py/tt4py.py b/strpython/tt4py/tt4py.py
deleted file mode 100644
index 889a2bf..0000000
--- a/strpython/tt4py/tt4py.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# coding = utf-8
-
-from enum import Enum
-
-from nltk.stem import WordNetLemmatizer, SnowballStemmer
-from termcolor import colored
-
-from .helpers import *
-
-_wn_lem = WordNetLemmatizer()
-_snowball_stemmer = SnowballStemmer("english")
-
-
-class TaggedType(Enum):
-    TOK = 2
-    POS = 2
-    POS_LEM = 3
-    POS_TAG = 3  # length of each token data
-    MIX_POS_TAG = 2
-
-
-class SearchFlag(Enum):
-    NO_CASE = lambda x: x.lower()
-    SP_WS = lambda x: x.split(" ")  # split using whitespaces
-    SP_P = lambda x: x.split(".")  # split using point
-    SP_D = lambda x: x.split("-")  # split using dash
-    WN_LEM = lambda x: _wn_lem.lemmatize(x)
-    SNW_STEM = lambda x: _snowball_stemmer.stem(x)
-
-
-class TaggedInputError(Exception):
-    def __init__(self):
-        super(Exception, self).__init__(
-            colored("Wrong input : check your input data type or the size for each token data ", "red"))
-
-
-class WrongThesaurusFormatError(Exception):
-    def __init__(self, var):
-        super(Exception, self).__init__(
-            colored(
-                "Wrong thesaurus format: use dict format instead of {0}. Ex. {'id_1':'label'}".format(str(type(var))),
-                "red"))
-
-
-class Text(object):
-    def __init__(self, tagged_text, type=TaggedType.MIX_POS_TAG):
-        # check if 'tagged_text' is an iterable object
-        try:
-            some_object_iterator = iter(tagged_text)
-        except TypeError:
-            raise TaggedInputError
-
-        # Convert input into numpy array
-        self.tagged_text = tagged_text
-
-        if isinstance(tagged_text, dict):
-            self.tagged_text = dict_to_array(tagged_text)
-        elif isinstance(tagged_text, list):
-            self.tagged_text = list_to_array(tagged_text)
-        else:
-            try:
-                self.tagged_text = np.array(list(tagged_text))
-            except:
-                print("Can't convert iterable given into a np array")
-
-        if not type.value == self.tagged_text.shape[1]:
-            raise TaggedInputError
-
-        self._original = self.tagged_text.copy()
-        self.flag_applied = []
-
-    def transform_tagged(self, flags=[SearchFlag.NO_CASE, SearchFlag.SP_WS, SearchFlag.SP_D]):
-        tagged = self._original.copy().tolist()
-        # Apply necessary for string search
-        for flag in flags:
-            tagged_t = []
-            for token in tagged:
-                res_ = flag(token[0])
-                if len(res_) > 1 and not isinstance(res_, str):
-                    res_ = []
-                    tagged_t.extend([[j, token[1]] for j in res_])
-                elif isinstance(res_, list):
-                    tagged_t.extend([[res_[-1], token[1]]])
-                else:
-                    tagged_t.extend([[res_, token[1]]])
-            tagged = tagged_t
-
-        self.tagged_text = np.array(tagged)
-        self.flag_applied = flags
-
-    def hasSameFlags(self, flags):
-        for f in flags:
-            if not f in self.flag_applied:
-                return False
-        return True
-
-    def get_occurrences(self, string, flags=[SearchFlag.NO_CASE, SearchFlag.SP_WS, SearchFlag.SP_D]):
-        if not self.hasSameFlags(flags):
-            self.transform_tagged(flags)
-
-        positions_list = []
-        t_1 = [string]
-        tagged = self.tagged_text[:, 0].copy()
-        # Apply necessary for string search
-        for flag in flags:
-            t_1 = flatten([flag(i) for i in t_1])
-        gram_1 = (True if len(t_1) == 1 else False)
-        t = 0
-        while t < (len(tagged)):
-            token = tagged[t]
-            if token == t_1[0] or token == t_1[0].rstrip("s") + "s":
-                if gram_1:
-                    positions_list.append([t, t])
-                    t += 1
-                else:
-                    j, f = 0, True
-                    while t + j < len(tagged) and j < len(t_1):
-                        if not tagged[t + j] == t_1[j]:
-                            f = False
-                            break
-                        j += 1
-                    if f:
-                        positions_list.append([t, t + j])
-                        t += j
-                    else:
-                        t += 1
-            else:
-                t += 1
-
-        return positions_list
-
-    def get_neighbor_words(self, window_size, pos1, pos2=None):
-        if not pos2:
-            pos2 = pos1
-        return self.tagged_text[pos1 - window_size:window_size + pos2]
-
-    def extract_token_by_tag(self, *tags):
-        res, posis_ = [], []
-        for tag in tags:
-            posis_.extend(np.argwhere(self.tagged_text[:, -1] == tag).flatten())
-        posis_ = sorted(posis_)
-
-        for pos in posis_:
-            pp = self.tagged_text[pos].tolist()
-            pp.append(pos)
-            res.append(pp)
-        return res
-
-    def tag_item_in_thesaurus(self, thesaurus, flags=[SearchFlag.NO_CASE, SearchFlag.SP_WS, SearchFlag.SP_D],
-                              prefix_="th_", stop_tag=["BEG-LOC", "LOC", "END-LOC"]):
-        if not self.hasSameFlags(flags):
-            self.transform_tagged(flags)
-
-        if not isinstance(thesaurus, dict):
-            raise WrongThesaurusFormatError(thesaurus)
-
-        t = " ".join(self.tagged_text[:, 0].tolist())
-        for id_, element in thesaurus.items():
-            if element.lower() in t:
-                positions_ = self.get_occurrences(element)
-                for d_ in positions_:
-                    f = True
-                    x, y = d_[0], d_[1]
-                    c = 0
-                    if not self.isWorthIt(x, y, prefix_):
-                        break
-                    for st in stop_tag:
-                        if x != y and st in self.tagged_text[x:y][:, 1]:
-                            f = False
-                        elif x == y and st in self.tagged_text[x][1]:
-                            f = False
-                    if f:
-                        if abs(x - y) > 0:
-                            self.tagged_text[x:y][:, 1] = prefix_  # prefix_ + id_
-                        else:
-                            self.tagged_text[x][1] = prefix_  # prefix_ + id_
-        new_tagged_ = []
-        j = 0
-        while j < len(self.tagged_text):
-            tag = self.tagged_text[j]
-            if prefix_ in tag[-1]:
-                curr = tag[-1]
-                t = 1
-                while j + t < len(self.tagged_text):
-                    if self.tagged_text[j + t][-1] != curr:
-                        break
-                    t += 1
-
-                new_tagged_.append([self.reconstruct_str(self.tagged_text[j:j + t][:, 0]), curr])
-                j += t
-            else:
-                new_tagged_.append(tag.tolist())
-                j += 1
-        self.tagged_text = np.array(new_tagged_)
-
-    def reconstruct_str(self, list_):
-        res = ""
-        no_sp_char = ["-"]
-        no_sp_bf = [","]
-        for ch in list_:
-            if not ch in no_sp_char and res:
-                if res[-1] in no_sp_char or ch in no_sp_bf:
-                    res += ch
-            if not res:
-                res += ch
-            else:
-                res += " " + ch
-
-        return res
-
-    def isWorthIt(self, x, y, prefix):
-        taille = abs(x - y)
-        count = 0
-        if x == y:
-            if prefix in self.tagged_text[x]:
-                count += 1
-            taille = 1
-        else:
-            # c=None
-            for item in self.tagged_text[x:y]:
-                if prefix in item[-1]:
-                    count += 1
-
-        decx, decy = 0, 0
-        fx, fy = True, True
-        while fx or fy:
-            fx, fy = False, False
-            if x - (decx + 1) > 0:
-                if prefix in self.tagged_text[x - (decx + 1)][-1]:
-                    fx = True
-                    decx += 1
-            if y + decy + 1 < len(self.tagged_text):
-                if prefix in self.tagged_text[y + decy + 1][-1]:
-                    fy = True
-                    decy += 1
-
-        if taille < count + decx + decy:
-            return False
-        return True
-- 
GitLab