diff --git a/.gitignore b/.gitignore
index 572be86ffe526155cc18737764a5f524cc854654..46d7054aaed84961ad64817eacf37927ec6fe18f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ __pycache__/
 /UNHCR_en.txt
 !/tests/
 .DS_Store
+.Rproj.user
diff --git a/data/graph_exp_july_19/selected.json b/data/graph_exp_july_19/selected.json
new file mode 100644
index 0000000000000000000000000000000000000000..dc133d29641c276240579a7d46b016cca3ec5aee
--- /dev/null
+++ b/data/graph_exp_july_19/selected.json
@@ -0,0 +1 @@
+[254, 566, 594, 642, 877, 887, 988, 1072, 1210, 1308, 1315, 1335, 1356, 1416, 1548, 1571, 1587, 1683, 1685, 1958, 1960, 1961, 1968, 2034, 2047, 2182, 2194, 2345, 2422, 2528, 2560, 2734, 3306, 3606, 3682, 3718, 3864, 4092, 4119, 4392, 4432, 4789, 5020, 5244, 5704, 5847, 5967, 6031, 6265, 6815, 6922, 7261, 7285, 7303, 7394, 7441, 7498, 7546, 7564, 7570, 7573, 7772, 7776, 9138, 12078, 12216, 12270, 12462, 12813, 12871, 12918, 13009, 13068, 13223, 13408, 13695, 13708, 13727, 13771, 13937, 14179, 14218, 14250, 14295, 14346, 14417, 14507, 14615, 14731, 14748, 14899, 14938, 15008, 15154, 15224, 15236, 15435, 15534, 15628, 15633]
\ No newline at end of file
diff --git a/exp_22_may.sh b/exp_22_may.sh
index 1ef7e4447cf659950fa98bcfce3a103ec2d3f4ab..e583b5ac38ce6a28bd2c062fa9e652a4b2391538 100755
--- a/exp_22_may.sh
+++ b/exp_22_may.sh
@@ -2,22 +2,22 @@
 
 path_csv=/Users/jacquesfize/LOD_DATASETS/disambiguation
 path_texts=/Users/jacquesfize/LOD_DATASETS/raw_bvlac/
-output_dir=data/graph_exp_may_25
+output_dir=data/graph_exp_july_19
 if [ "$1" == "generate" ]; then
 
-    mkdir -p $output_dir/normal
-    #python3 generate_data_csv.py $path_csv  $output_dir/normal  asso.json normal;
+    #mkdir -p $output_dir/normal
+    #python3 -W ignore generate_data_csv.py $path_csv  $output_dir/normal  asso.json normal;
 
     python3 generate_transform.py $output_dir/normal  $output_dir/extension_1  extension -a 1;
     python3 generate_transform.py $output_dir/normal  $output_dir/extension_2  extension -a 2;
     #python3 generate_transform.py $output_dir/normal  $output_dir/extension_3  extension -a 3;
 
-    #python3 generate_transform.py $output_dir/normal  $output_dir/gen_all_1  generalisation -t all -n 1;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_all_1  generalisation -t all -n 1;
     #python3 generate_transform.py $output_dir/normal  $output_dir/gen_all_2  generalisation -t all -n 2;
 
 
     python3 generate_transform.py $output_dir/normal  $output_dir/gen_region generalisation -t bounded -b region;
-    #python3 generate_transform.py $output_dir/normal  $output_dir/gen_capital generalisation -t bounded -b capital;
+    python3 generate_transform.py $output_dir/normal  $output_dir/gen_capital generalisation -t bounded -b capital;
     python3 generate_transform.py $output_dir/normal  $output_dir/gen_country generalisation -t bounded -b country;
 fi
 
diff --git a/generate_data_csv.py b/generate_data_csv.py
index 3a1ea4f61b3b2c8e3df639bf82f4537879bb7f62..28127de14def73f900ccfbff5f42d1c33900a199 100644
--- a/generate_data_csv.py
+++ b/generate_data_csv.py
@@ -6,6 +6,7 @@ import argparse,glob, string,time,re
 from progressbar import ProgressBar, Timer, Bar, ETA, Counter
 
 from strpython.helpers.boundary import get_all_shapes
+from strpython.models.str import STR
 from strpython.nlp.disambiguator.geodict_gaurav import *
 from strpython.pipeline import *
 import pandas as pd
@@ -70,7 +71,7 @@ start = time.time()
 associated_es={}
 count_per_doc={}
 i=0
-logging.info("Get associated spatial entities and ")
+#logging.info("Get associated spatial entities and ")
 with ProgressBar(max_value=len(files_glob),widgets=[' [', Timer(), '] ',Bar(),'(', Counter(),')','(', ETA(), ')']) as pg:
     for fn in files_glob:
 
@@ -85,14 +86,14 @@ with ProgressBar(max_value=len(files_glob),widgets=[' [', Timer(), '] ',Bar(),'(
             associated_es[id_]={}
         pg.update(i)
         i+=1
-logging.info("Fetch list of spatial entities available !")
+#logging.info("Fetch list of spatial entities available !")
 all_es=set([])
 for k,v in associated_es.items():
     for k2 in v:
         all_es.add(k2)
 
-logging.info("Get All Shapes from Database for all ES")
-all_shapes=get_all_shapes(list(all_es))
+#logging.info("Get All Shapes from Database for all ES")
+#all_shapes=get_all_shapes(list(all_es))
 
 i=0
 def foo_(x):
diff --git a/generate_similarity_matrix.py b/generate_similarity_matrix.py
index a54738062a25f24d8e0fff9d2898543f9e11e4b7..4c35fdfc6ae2564c00fb4dc366cf9e0f05872afe 100644
--- a/generate_similarity_matrix.py
+++ b/generate_similarity_matrix.py
@@ -1,7 +1,7 @@
 # coding = utf-8
 import glob
-
-from gmatch4py.bag_of_cliques import BagOfCliques
+# from gmatch4py.bag_of_cliques import BagOfCliques
+from gmatch4py.helpers.reader import import_dir
 from gmatch4py.base import Base
 from gmatch4py.ged.graph_edit_dist import GraphEditDistance
 from gmatch4py.ged.bipartite_graph_matching_2 import BP_2
@@ -11,13 +11,20 @@ from gmatch4py.jaccard import Jaccard
 from gmatch4py.kernels.weisfeiler_lehman import *
 from gmatch4py.mcs import MCS
 from gmatch4py.vertex_edge_overlap import VertexEdgeOverlap
+import argparse, os, sys, re, json, logging
+import datetime
 
-import argparse, os, sys, re, json
+logging.basicConfig(
+    filename="{0}.csv".format(datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")),
+    format="%(message)s,%(asctime)s",
+    level=logging.DEBUG
+)
 
 parser = argparse.ArgumentParser()
 parser.add_argument("graphs_input_dir")
 parser.add_argument("matrix_output_dir")
 parser.add_argument("-d", action="store_true", help="Return distance matrix")
+parser.add_argument("-s", action="store_true", help="Selected graph ?")
 
 args = parser.parse_args()
 if not os.path.exists(args.graphs_input_dir):
@@ -30,24 +37,17 @@ if not os.path.exists(args.matrix_output_dir):
     os.makedirs(args.matrix_output_dir)
     print("Directory created")
 
-graphs = []
-mapping_files_to_graphs = {}
-
-# Loading graphs
-fns = glob.glob(args.graphs_input_dir.rstrip("/") + "/*.gexf")
-if not fns:
-    print("Input dir empty! Not .gexf file found!")
-
-i = 0
-for fn in fns:
-    graphs.append(nx.read_gexf(fn))
-    mapping_files_to_graphs[i] = fn
-
-#print(graphs)
-
+logging.info(msg="L_G,BEGIN,\"\"")
+graphs = import_dir(args.graphs_input_dir)
+logging.info(msg="L_G,DONE,\"\"")
+# print(graphs)
+selected = None
+if args.s:
+    selected = json.load(open("selected.json"))
 # Compute matrices
-for class_ in [BagOfCliques, GraphEditDistance, BP_2, GreedyEditDistance, HED, Jaccard, WeisfeleirLehmanKernel, MCS,
+for class_ in [GraphEditDistance, BP_2, GreedyEditDistance, HED, Jaccard, MCS,
                VertexEdgeOverlap]:
+    logging.info(msg="C_S,BEG,\"{0}\"".format(class_.__name__))
     print("Computing the Similarity Matrix for {0}".format(class_.__name__))
 
     if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED):
@@ -55,20 +55,22 @@ for class_ in [BagOfCliques, GraphEditDistance, BP_2, GreedyEditDistance, HED, J
     elif class_ == WeisfeleirLehmanKernel:
         comparator = class_(h=2)
     else:
-        comparator=class_()
-    matrix = comparator.compare(graphs, None)
+        comparator = class_()
+    matrix = comparator.compare(graphs, selected)
     if not args.d:
         matrix = comparator.similarity(matrix)
     else:
-        matrix= comparator.distance(matrix)
-    print("Matrix ready. Saving ...")
-    output_fn="{0}/{1}_{2}.npy".format(
+        matrix = comparator.distance(matrix)
+    logging.info(msg="C_S,DONE,\"{0}\"".format(class_.__name__))
+    output_fn = "{0}/{1}_{2}.npy".format(
         args.matrix_output_dir.rstrip("/"),
         class_.__name__,
-        os.path.dirname(args.graphs_input_dir).replace("/","_")
+        os.path.dirname(args.graphs_input_dir).replace("/", "_")
     )
-    np.save(output_fn,matrix)
+    logging.info(msg="M_S,BEG,\"{0}\"".format(class_.__name__))
+    np.save(output_fn, matrix)
+    logging.info(msg="M_S,DONE,\"{0}\"".format(class_.__name__))
     print("Matrix Saved")
 
-json.dump(mapping_files_to_graphs,open("{0}/{1}".format(args.matrix_output_dir.rstrip("/"),"metadata.json")))
-print("Done")
\ No newline at end of file
+# json.dump(mapping_files_to_graphs,open("{0}/{1}".format(args.matrix_output_dir.rstrip("/"),"metadata.json")))
+print("Done")
diff --git a/generate_transform.py b/generate_transform.py
index bcf949b5cb521416701ed13a4c89ee8eac0d2998..0e8cd2d799dbff51f00f2e52cef6885823e04593 100644
--- a/generate_transform.py
+++ b/generate_transform.py
@@ -7,9 +7,11 @@ import logging
 import time
 from concurrent.futures import ThreadPoolExecutor
 
+import networkx as nx
 from progressbar import ProgressBar, Timer, Bar, ETA, Counter
 
 from strpython.helpers.boundary import get_all_shapes
+from strpython.models.str import STR
 from strpython.nlp.disambiguator.geodict_gaurav import *
 from strpython.pipeline import *
 
@@ -90,11 +92,7 @@ for k,v in associated_es.items():
     for k2 in v:
         all_es.add(k2)
 
-logging.info("Get All Shapes from Database for all ES")
-all_shapes=get_all_shapes(list(all_es))
 
-for id_ in graphs_:
-    graphs_[id].set_all_shapes(all_shapes)
 
 def workSTR(id_doc,g,list_gs,pg,argu):
     global i
@@ -103,6 +101,7 @@ def workSTR(id_doc,g,list_gs,pg,argu):
     # Save Metadata
 
     # Save Graph structure
+    print("savegraph")
     nx.write_gexf(list_gs[-1], argu.graphs_output_dir + "/{0}.gexf".format(id_doc))
     i+=1
     pg.update(i)
diff --git a/notebooks/EvalDesambiguisationMada.ipynb b/notebooks/EvalDesambiguisationMada.ipynb
index 9045fca33ba116b5dddf9bf768cabee270bcc841..3d58d2ac2442e408c4fb916f4cae1f1451ddd073 100644
--- a/notebooks/EvalDesambiguisationMada.ipynb
+++ b/notebooks/EvalDesambiguisationMada.ipynb
@@ -5,13 +5,14 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:09:12.991345Z",
-     "start_time": "2018-06-19T13:09:12.578369Z"
+     "end_time": "2018-08-24T14:18:40.551515Z",
+     "start_time": "2018-08-24T14:18:40.137529Z"
     }
    },
    "outputs": [],
    "source": [
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "%load_ext autoreload"
    ]
   },
   {
@@ -19,8 +20,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:09:13.002216Z",
-     "start_time": "2018-06-19T13:09:12.998336Z"
+     "end_time": "2018-08-24T14:18:40.558929Z",
+     "start_time": "2018-08-24T14:18:40.553463Z"
     }
    },
    "outputs": [
@@ -41,8 +42,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:09:14.674713Z",
-     "start_time": "2018-06-19T13:09:14.668234Z"
+     "end_time": "2018-08-24T14:18:40.565725Z",
+     "start_time": "2018-08-24T14:18:40.560729Z"
     }
    },
    "outputs": [],
@@ -57,8 +58,8 @@
    "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:09:14.912185Z",
-     "start_time": "2018-06-19T13:09:14.895298Z"
+     "end_time": "2018-08-24T14:18:40.582053Z",
+     "start_time": "2018-08-24T14:18:40.567425Z"
     }
    },
    "outputs": [],
@@ -73,24 +74,16 @@
    "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:09:20.638699Z",
-     "start_time": "2018-06-19T13:09:17.343687Z"
+     "end_time": "2018-08-24T14:18:43.957963Z",
+     "start_time": "2018-08-24T14:18:40.585425Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "ERROR:root:Line magic function `%autoreload` not found.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%autoreload\n",
-    "from nlp.disambiguator.geodict_gaurav import GauravGeodict\n",
-    "from nlp.disambiguator.most_common import MostCommonDisambiguator\n",
-    "from nlp.disambiguator.wikipedia_cooc import WikipediaDisambiguator\n",
+    "from strpython.nlp.disambiguator.geodict_gaurav import GauravGeodict\n",
+    "from strpython.nlp.disambiguator.most_common import MostCommonDisambiguator\n",
+    "from strpython.nlp.disambiguator.wikipedia_cooc import WikipediaDisambiguator\n",
     "disMost_common=MostCommonDisambiguator()\n",
     "disGaurav=GauravGeodict()\n",
     "disWiki=WikipediaDisambiguator()"
@@ -98,11 +91,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:10:29.965681Z",
-     "start_time": "2018-06-19T13:10:29.952223Z"
+     "end_time": "2018-08-24T14:18:44.015575Z",
+     "start_time": "2018-08-24T14:18:43.960053Z"
     }
    },
    "outputs": [],
@@ -117,25 +110,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-03T19:13:08.776780Z",
-     "start_time": "2018-06-03T19:13:08.752046Z"
+     "end_time": "2018-08-24T14:18:44.023135Z",
+     "start_time": "2018-08-24T14:18:44.017778Z"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-03T19:13:13.030925Z",
-     "start_time": "2018-06-03T19:13:13.028591Z"
+     "end_time": "2018-08-24T14:18:44.027539Z",
+     "start_time": "2018-08-24T14:18:44.024973Z"
     }
    },
    "outputs": [],
@@ -145,11 +147,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-03T19:13:13.238647Z",
-     "start_time": "2018-06-03T19:13:13.212601Z"
+     "end_time": "2018-08-24T14:18:44.061164Z",
+     "start_time": "2018-08-24T14:18:44.029278Z"
     }
    },
    "outputs": [],
@@ -171,11 +173,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-03T19:43:28.769834Z",
-     "start_time": "2018-06-03T19:15:06.598715Z"
+     "end_time": "2018-08-24T14:42:35.179291Z",
+     "start_time": "2018-08-24T14:18:44.063336Z"
     }
    },
    "outputs": [
@@ -186,6 +188,20 @@
       "/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:12: RuntimeWarning: invalid value encountered in long_scalars\n",
       "  if sys.path[0] == '':\n"
      ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-10-f81592812190>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mdf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m     \u001b[0macc_wiki\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracyWiki\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata_lang\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mid_\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m     \u001b[0;31m#acc_MC.append(accuracyMostCommon(df,data_lang[id_]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0;31m#acc_GEO.append(accuracyGeodict(df,data_lang[id_]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-9-7d392d282df9>\u001b[0m in \u001b[0;36maccuracyWiki\u001b[0;34m(df, lang)\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0maccuracyWiki\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlang\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0mdf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"GID\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"O\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"NR\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"o\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"text\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"GID\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m     \u001b[0mres_dis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdisWiki\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisambiguate_wiki\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"text\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlang\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m     \u001b[0mdf2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"disambiguation\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mres_dis\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres_dis\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGID\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisambiguation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/nas_cloud/Code/str-python/strpython/nlp/disambiguator/wikipedia_cooc.py\u001b[0m in \u001b[0;36mdisambiguate_wiki\u001b[0;34m(self, entities, lang)\u001b[0m\n\u001b[1;32m     79\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcand\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mcand2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m                     \u001b[0;31m# take the lowest co-occurrency between two candidates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m                     \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcand2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcand\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     82\u001b[0m                         \u001b[0;32mif\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcand2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcand\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"weight\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mprob\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     83\u001b[0m                             \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/networkx/classes/reportviews.py\u001b[0m in \u001b[0;36m__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1025\u001b[0m         \u001b[0mseen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1026\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbrs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nodes_nbrs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1027\u001b[0;31m             \u001b[0;32mfor\u001b[0m \u001b[0mnbr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mnbrs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1028\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mnbr\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mseen\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1029\u001b[0m                     \u001b[0;32myield\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
     }
    ],
    "source": [
@@ -195,32 +211,21 @@
     "    \n",
     "    df=pd.read_csv(fn)\n",
     "    acc_wiki.append(accuracyWiki(df,data_lang[id_]))\n",
-    "    #acc_MC.append(accuracyMostCommon(df,data_lang[id_]))\n",
-    "    #acc_GEO.append(accuracyGeodict(df,data_lang[id_]))\n",
+    "    acc_MC.append(accuracyMostCommon(df,data_lang[id_]))\n",
+    "    acc_GEO.append(accuracyGeodict(df,data_lang[id_]))\n",
     "    "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-05-17T01:22:15.574548Z",
-     "start_time": "2018-05-17T01:22:15.567387Z"
+     "end_time": "2018-08-24T14:42:35.180200Z",
+     "start_time": "2018-08-24T14:18:40.127Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6118508350166977"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "np.mean(np.nan_to_num(acc_GEO))"
@@ -228,50 +233,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-05-17T01:22:15.618633Z",
-     "start_time": "2018-05-17T01:22:15.612431Z"
+     "end_time": "2018-08-24T14:42:35.181124Z",
+     "start_time": "2018-08-24T14:18:40.128Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.7694373020389706"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "np.mean(np.nan_to_num(acc_MC))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-03T19:44:42.307528Z",
-     "start_time": "2018-06-03T19:44:42.295687Z"
+     "end_time": "2018-08-24T14:42:35.182157Z",
+     "start_time": "2018-08-24T14:18:40.130Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.740705700091002"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "np.mean(np.nan_to_num(acc_wiki))"
@@ -279,25 +262,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:12:33.632268Z",
-     "start_time": "2018-06-19T13:12:26.349957Z"
+     "end_time": "2018-08-24T14:42:35.182992Z",
+     "start_time": "2018-08-24T14:18:40.131Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "57451 9248\n",
-      "6.212262110726644\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "from helpers.gazeteer_helpers import count_of_se\n",
+    "from strpython.helpers.gazeteer_helpers import count_of_se\n",
     "sum_,count=0,0\n",
     "for fn in fns:\n",
     "    try:\n",
@@ -315,650 +289,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2018-06-19T13:10:32.794585Z",
-     "start_time": "2018-06-19T13:10:32.759937Z"
+     "end_time": "2018-08-24T14:42:35.184004Z",
+     "start_time": "2018-08-24T14:18:40.133Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Unnamed: 0</th>\n",
-       "      <th>Unnamed: 0.1</th>\n",
-       "      <th>Unnamed: 0.1.1</th>\n",
-       "      <th>Unnamed: 0.1.1.1</th>\n",
-       "      <th>diff2</th>\n",
-       "      <th>text</th>\n",
-       "      <th>pos_</th>\n",
-       "      <th>ent_type_</th>\n",
-       "      <th>GID</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>RÃ©union</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>Sud</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>BV Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>BV Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>Madagascar</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3404996</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>Madagascar</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3404996</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>8</td>\n",
-       "      <td>8</td>\n",
-       "      <td>8</td>\n",
-       "      <td>8</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>â€“</td>\n",
-       "      <td>PUNCT</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>9</td>\n",
-       "      <td>9</td>\n",
-       "      <td>9</td>\n",
-       "      <td>9</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>Etat</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>10</td>\n",
-       "      <td>10</td>\n",
-       "      <td>10</td>\n",
-       "      <td>10</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>Madagascar</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3404996</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>11</td>\n",
-       "      <td>11</td>\n",
-       "      <td>11</td>\n",
-       "      <td>11</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>Lac   2</td>\n",
-       "      <td>SPACE</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>12</td>\n",
-       "      <td>12</td>\n",
-       "      <td>12</td>\n",
-       "      <td>12</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>Madagascar</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3404996</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>13</td>\n",
-       "      <td>13</td>\n",
-       "      <td>13</td>\n",
-       "      <td>13</td>\n",
-       "      <td>13.0</td>\n",
-       "      <td>Madagascar</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3404996</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>14</td>\n",
-       "      <td>14</td>\n",
-       "      <td>14</td>\n",
-       "      <td>14</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>Directeur</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>15</td>\n",
-       "      <td>15</td>\n",
-       "      <td>15</td>\n",
-       "      <td>15</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>Lac</td>\n",
-       "      <td>SPACE</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>16</td>\n",
-       "      <td>16</td>\n",
-       "      <td>16</td>\n",
-       "      <td>16</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>17</td>\n",
-       "      <td>17</td>\n",
-       "      <td>17</td>\n",
-       "      <td>17</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>Paris</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD5400765</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>18</td>\n",
-       "      <td>18</td>\n",
-       "      <td>18</td>\n",
-       "      <td>18</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>Antananarivo</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3682867</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>19</td>\n",
-       "      <td>19</td>\n",
-       "      <td>19</td>\n",
-       "      <td>19</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>Directions   RÃ©gionales</td>\n",
-       "      <td>SPACE</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>20</td>\n",
-       "      <td>20</td>\n",
-       "      <td>20</td>\n",
-       "      <td>20</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>Centres</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>21</td>\n",
-       "      <td>21</td>\n",
-       "      <td>21</td>\n",
-       "      <td>21</td>\n",
-       "      <td>21.0</td>\n",
-       "      <td>Services   Agricoles</td>\n",
-       "      <td>SPACE</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>22</td>\n",
-       "      <td>22</td>\n",
-       "      <td>22</td>\n",
-       "      <td>22</td>\n",
-       "      <td>22.0</td>\n",
-       "      <td>BV Lac</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>23</td>\n",
-       "      <td>23</td>\n",
-       "      <td>23</td>\n",
-       "      <td>23</td>\n",
-       "      <td>23.0</td>\n",
-       "      <td>jusquâ€™</td>\n",
-       "      <td>VERB</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>24</td>\n",
-       "      <td>24</td>\n",
-       "      <td>24</td>\n",
-       "      <td>24</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>Antananarivo</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3682867</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>25</td>\n",
-       "      <td>25</td>\n",
-       "      <td>25</td>\n",
-       "      <td>25</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>Suivi</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>26</td>\n",
-       "      <td>26</td>\n",
-       "      <td>26</td>\n",
-       "      <td>26</td>\n",
-       "      <td>26.0</td>\n",
-       "      <td>Ambositra</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD6124882</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>27</td>\n",
-       "      <td>27</td>\n",
-       "      <td>27</td>\n",
-       "      <td>27</td>\n",
-       "      <td>27.0</td>\n",
-       "      <td>Farafangana</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD2452325</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>28</td>\n",
-       "      <td>28</td>\n",
-       "      <td>28</td>\n",
-       "      <td>28</td>\n",
-       "      <td>28.0</td>\n",
-       "      <td>du Sud</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>29</td>\n",
-       "      <td>29</td>\n",
-       "      <td>29</td>\n",
-       "      <td>29</td>\n",
-       "      <td>29.0</td>\n",
-       "      <td>Est</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30</th>\n",
-       "      <td>30</td>\n",
-       "      <td>30</td>\n",
-       "      <td>30</td>\n",
-       "      <td>30</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>seuil</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>31</th>\n",
-       "      <td>31</td>\n",
-       "      <td>31</td>\n",
-       "      <td>31</td>\n",
-       "      <td>31</td>\n",
-       "      <td>31.0</td>\n",
-       "      <td>BV Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>32</td>\n",
-       "      <td>32</td>\n",
-       "      <td>32</td>\n",
-       "      <td>32</td>\n",
-       "      <td>32.0</td>\n",
-       "      <td>jusquâ€™</td>\n",
-       "      <td>VERB</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>33</th>\n",
-       "      <td>33</td>\n",
-       "      <td>33</td>\n",
-       "      <td>33</td>\n",
-       "      <td>33</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>BV Lac 2</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>34</th>\n",
-       "      <td>34</td>\n",
-       "      <td>34</td>\n",
-       "      <td>34</td>\n",
-       "      <td>34</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>SecrÃ©taire</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>35</th>\n",
-       "      <td>35</td>\n",
-       "      <td>35</td>\n",
-       "      <td>35</td>\n",
-       "      <td>35</td>\n",
-       "      <td>35.0</td>\n",
-       "      <td>Alaotra</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>36</th>\n",
-       "      <td>36</td>\n",
-       "      <td>36</td>\n",
-       "      <td>36</td>\n",
-       "      <td>36</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>Mangoro</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3574285</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>37</th>\n",
-       "      <td>37</td>\n",
-       "      <td>37</td>\n",
-       "      <td>37</td>\n",
-       "      <td>37</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>Directeur</td>\n",
-       "      <td>NOUN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>38</th>\n",
-       "      <td>38</td>\n",
-       "      <td>38</td>\n",
-       "      <td>38</td>\n",
-       "      <td>38</td>\n",
-       "      <td>38.0</td>\n",
-       "      <td>Lac   2   et</td>\n",
-       "      <td>SPACE</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>39</th>\n",
-       "      <td>39</td>\n",
-       "      <td>39</td>\n",
-       "      <td>39</td>\n",
-       "      <td>39</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>Sous   rÃ©serve</td>\n",
-       "      <td>VERB</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>40</th>\n",
-       "      <td>40</td>\n",
-       "      <td>40</td>\n",
-       "      <td>40</td>\n",
-       "      <td>40</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>Grandjean</td>\n",
-       "      <td>PROPN</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>GD3254594</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>41</th>\n",
-       "      <td>41</td>\n",
-       "      <td>41</td>\n",
-       "      <td>41</td>\n",
-       "      <td>41</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>jusquâ€™</td>\n",
-       "      <td>VERB</td>\n",
-       "      <td>LOC</td>\n",
-       "      <td>O</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    Unnamed: 0  Unnamed: 0.1  Unnamed: 0.1.1  Unnamed: 0.1.1.1  diff2  \\\n",
-       "0            0             0               0                 0    0.0   \n",
-       "1            1             1               1                 1    1.0   \n",
-       "2            2             2               2                 2    2.0   \n",
-       "3            3             3               3                 3    3.0   \n",
-       "4            4             4               4                 4    4.0   \n",
-       "5            5             5               5                 5    5.0   \n",
-       "6            6             6               6                 6    6.0   \n",
-       "7            7             7               7                 7    7.0   \n",
-       "8            8             8               8                 8    8.0   \n",
-       "9            9             9               9                 9    9.0   \n",
-       "10          10            10              10                10   10.0   \n",
-       "11          11            11              11                11   11.0   \n",
-       "12          12            12              12                12   12.0   \n",
-       "13          13            13              13                13   13.0   \n",
-       "14          14            14              14                14   14.0   \n",
-       "15          15            15              15                15   15.0   \n",
-       "16          16            16              16                16   16.0   \n",
-       "17          17            17              17                17   17.0   \n",
-       "18          18            18              18                18   18.0   \n",
-       "19          19            19              19                19   19.0   \n",
-       "20          20            20              20                20   20.0   \n",
-       "21          21            21              21                21   21.0   \n",
-       "22          22            22              22                22   22.0   \n",
-       "23          23            23              23                23   23.0   \n",
-       "24          24            24              24                24   24.0   \n",
-       "25          25            25              25                25   25.0   \n",
-       "26          26            26              26                26   26.0   \n",
-       "27          27            27              27                27   27.0   \n",
-       "28          28            28              28                28   28.0   \n",
-       "29          29            29              29                29   29.0   \n",
-       "30          30            30              30                30   30.0   \n",
-       "31          31            31              31                31   31.0   \n",
-       "32          32            32              32                32   32.0   \n",
-       "33          33            33              33                33   33.0   \n",
-       "34          34            34              34                34   34.0   \n",
-       "35          35            35              35                35   35.0   \n",
-       "36          36            36              36                36   36.0   \n",
-       "37          37            37              37                37   37.0   \n",
-       "38          38            38              38                38   38.0   \n",
-       "39          39            39              39                39   39.0   \n",
-       "40          40            40              40                40   40.0   \n",
-       "41          41            41              41                41   41.0   \n",
-       "\n",
-       "                       text   pos_ ent_type_        GID  \n",
-       "0                   RÃ©union   NOUN       LOC          O  \n",
-       "1                     Lac 2  PROPN       LOC          O  \n",
-       "2                       Sud  PROPN       LOC          O  \n",
-       "3                     Lac 2  PROPN       LOC          O  \n",
-       "4                  BV Lac 2  PROPN       LOC          O  \n",
-       "5                  BV Lac 2  PROPN       LOC          O  \n",
-       "6                Madagascar  PROPN       LOC  GD3404996  \n",
-       "7                Madagascar  PROPN       LOC  GD3404996  \n",
-       "8                         â€“  PUNCT       LOC          O  \n",
-       "9                      Etat   NOUN       LOC          O  \n",
-       "10               Madagascar  PROPN       LOC  GD3404996  \n",
-       "11                  Lac   2  SPACE       LOC          O  \n",
-       "12               Madagascar  PROPN       LOC  GD3404996  \n",
-       "13               Madagascar  PROPN       LOC  GD3404996  \n",
-       "14                Directeur   NOUN       LOC          O  \n",
-       "15                    Lac    SPACE       LOC          O  \n",
-       "16                    Lac 2  PROPN       LOC          O  \n",
-       "17                    Paris  PROPN       LOC  GD5400765  \n",
-       "18             Antananarivo  PROPN       LOC  GD3682867  \n",
-       "19  Directions   RÃ©gionales  SPACE       LOC          O  \n",
-       "20                  Centres  PROPN       LOC          O  \n",
-       "21     Services   Agricoles  SPACE       LOC          O  \n",
-       "22                   BV Lac  PROPN       LOC          O  \n",
-       "23                   jusquâ€™   VERB       LOC          O  \n",
-       "24             Antananarivo  PROPN       LOC  GD3682867  \n",
-       "25                    Suivi  PROPN       LOC          O  \n",
-       "26                Ambositra  PROPN       LOC  GD6124882  \n",
-       "27              Farafangana  PROPN       LOC  GD2452325  \n",
-       "28                   du Sud  PROPN       LOC          O  \n",
-       "29                      Est   NOUN       LOC          O  \n",
-       "30                    seuil   NOUN       LOC          O  \n",
-       "31                 BV Lac 2  PROPN       LOC          O  \n",
-       "32                   jusquâ€™   VERB       LOC          O  \n",
-       "33                 BV Lac 2  PROPN       LOC          O  \n",
-       "34               SecrÃ©taire   NOUN       LOC          O  \n",
-       "35                  Alaotra  PROPN       LOC          O  \n",
-       "36                  Mangoro  PROPN       LOC  GD3574285  \n",
-       "37                Directeur   NOUN       LOC          O  \n",
-       "38           Lac   2   et    SPACE       LOC          O  \n",
-       "39           Sous   rÃ©serve   VERB       LOC          O  \n",
-       "40                Grandjean  PROPN       LOC  GD3254594  \n",
-       "41                   jusquâ€™   VERB       LOC          O  "
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df"
    ]
diff --git a/setup.py b/setup.py
index 0ab6becda299b213e3cf59d92d09c893b36896db..d1fb08b79346d9e3388d01743b55eab4439e2649 100644
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,6 @@ setup(
 )
 # Put default config file if not exists
 home = str(Path.home())
-if not os.path.exists(os.path.join(home,".strpython")) or not os.path.exists(os.path.join(home,".strpython/config.json")):
+if not os.path.exists(os.path.join(home,".strpython")): #or not os.path.exists(os.path.join(home,".strpython/config.json")):
     os.makedirs(os.path.dirname(os.path.join(home,".strpython/config.json")), exist_ok=True)
     shutil.copy2("strpython/config/config.json",os.path.join(home,".strpython/config.json"))
\ No newline at end of file
diff --git a/strpython/config/config.json b/strpython/config/config.json
index be883496a87684dab8bd417288950d2bb21dc98b..50c7d66c903142cb984b995e572b6e7986051f09 100644
--- a/strpython/config/config.json
+++ b/strpython/config/config.json
@@ -8,8 +8,8 @@
   "database_json":"resources/database_exp_25_may.db",
   "log_file":"extract_log",
   "wiki_cooc_dis":{
-    "cooc_freq":"/Users/jacquesfize/nas_cloud/Code/str-python/resources/coocurrence_wiki.pkl",
-    "count":"/Users/jacquesfize/nas_cloud/Code/str-python/resources/count_wiki.pkl"
+    "cooc_freq":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/coocurrence_wiki.pkl",
+    "count":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/count_wiki.pkl"
   },
-  "language_resources_path":"/Users/jacquesfize/nas_cloud/Code/str-python/resources/language_resources"
+  "language_resources_path":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/language_resources"
 }
\ No newline at end of file
diff --git a/strpython/config/configuration.py b/strpython/config/configuration.py
index 3a018e7831a45640f783967174dc006ec978736d..5e33da1cf7b8402fd5b32cb4342cf0a0d7677a3a 100644
--- a/strpython/config/configuration.py
+++ b/strpython/config/configuration.py
@@ -1,18 +1,32 @@
 # coding = utf-8
 
-import json,os
+import json
+import os
 from pathlib import Path
 
 
 class Configuration(object):
+    """
+    Define the `Configuration` instantiation. The `Configuration` object contains all the
+    shared variable of strpython like : the georeferential server address, the Stanford NER address, etc.
+    """
     def __init__(self, data):
-        self.__dict__=data
+        """
+        Constructor :param data: dict that contains all the configuration variable. In the module, these variables
+        are stored in a file at `~/.strpython/configuration.json`.
+        """
+        self.__dict__ = data
         for d in self.__dict__:
-            if isinstance(self.__dict__[d],dict):
-                self.__dict__[d]=Configuration(self.__dict__[d])
+            if isinstance(self.__dict__[d], dict):
+                self.__dict__[d] = Configuration(self.__dict__[d])
+
     def __getitem__(self, item):
         return self.__dict__[item]
 
-home = str(Path.home())
-config = Configuration(json.load(open(os.path.join(home,".strpython/config.json"))))
 
+"""
+Initialise the config variable
+Access this variable using `from strpython.config.configuration import config`
+"""
+home = str(Path.home())
+config = Configuration(json.load(open(os.path.join(home, ".strpython/config.json"))))
diff --git a/strpython/config/stopwords_en.txt b/strpython/config/stopwords_en.txt
deleted file mode 100644
index 6e190b70f008f497b37cdc652e89f0f19b9d90df..0000000000000000000000000000000000000000
--- a/strpython/config/stopwords_en.txt
+++ /dev/null
@@ -1,173 +0,0 @@
-a
-about
-above
-after
-again
-against
-all
-am
-an
-and
-any
-are
-aren't
-as
-at
-be
-because
-been
-before
-being
-below
-between
-both
-but
-by
-can't
-cannot
-could
-couldn't
-did
-didn't
-do
-does
-doesn't
-doing
-don't
-down
-during
-each
-few
-for
-from
-further
-had
-hadn't
-has
-hasn't
-have
-haven't
-having
-he
-he'd
-he'll
-he's
-her
-here
-here's
-hers
-herself
-him
-himself
-his
-how
-how's
-i
-i'd
-i'll
-i'm
-i've
-if
-in
-into
-is
-isn't
-it
-it's
-its
-itself
-let's
-me
-more
-most
-mustn't
-my
-myself
-no
-nor
-not
-of
-off
-on
-once
-only
-or
-other
-ought
-our
-ours 	ourselves
-out
-over
-own
-same
-shan't
-she
-she'd
-she'll
-she's
-should
-shouldn't
-so
-some
-such
-than
-that
-that's
-the
-their
-theirs
-them
-themselves
-then
-there
-there's
-these
-they
-they'd
-they'll
-they're
-they've
-this
-those
-through
-to
-too
-under
-until
-up
-very
-was
-wasn't
-we
-we'd
-we'll
-we're
-we've
-were
-weren't
-what
-what's
-when
-when's
-where
-where's
-which
-while
-who
-who's
-whom
-why
-why's
-with
-won't
-would
-wouldn't
-you
-you'd
-you'll
-you're
-you've
-your
-yours
-yourself
-yourselves
\ No newline at end of file
diff --git a/strpython/config/world_borders.shp b/strpython/config/world_borders.shp
deleted file mode 100755
index 7cd47e9ad5edeadbce0edecd3a488a133fa7dfa8..0000000000000000000000000000000000000000
Binary files a/strpython/config/world_borders.shp and /dev/null differ
diff --git a/strpython/eval/automatic_annotation.py b/strpython/eval/automatic_annotation.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c681bcde9a1ebd7f9b3734ccff49db34f6b202c
--- /dev/null
+++ b/strpython/eval/automatic_annotation.py
@@ -0,0 +1,72 @@
+# coding = utf-8
+
+from strpython.models.str import STR
+import networkx as nx
+import numpy as np
+import geopandas as gpd
+from shapely.geometry import MultiPoint,Polygon,Point,LineString
+
+class AnnotationAutomatic(object):
+    """
+
+    To facilitate the annotation, this class propose an automatic annotation.
+    Author : Jacques Fize
+    """
+    def __init__(self):
+        pass
+
+    def all(self,str1,str2):
+        return [self.criterion1(str1,str2),self.criterion2(str1,str2),self.criterion3(str1,str2),self.criterion4(str1,str2)]
+
+    def criterion1(self,str1,str2):
+        """
+        Return True if both STR contains similar spatial entities.
+        :param str1: STR
+        :param str2: STR
+        :return:
+        """
+        return len(set(str1.graph.nodes.keys()) & set(str2.graph.nodes.keys())) > 0
+
+    def criterion2(self,str1 : STR,str2 : STR):
+        """
+        Return True if two STR contains proper spatial entities that share a proximity.
+        :param str1: STR
+        :param str2: STR
+        :return:
+        """
+        stop_en=set(str1.graph.nodes.keys()) & set(str2.graph.nodes.keys())
+        for es in str1.spatial_entities:
+            for es2 in str2.spatial_entities:
+                if not es in stop_en and not es2 in stop_en:
+                    if str1.is_included_in(es,es2):
+                        return True
+                    if str1.is_adjacent(es,es2):
+                        return True
+        return False
+
+    def criterion3(self, str1 :STR , str2: STR):
+        """
+        Return True if one or multiple cluster of spatial entities have been found in both STR. Cluster
+        are constructed based on low distance between spatial entities. The clustering method used is Mean-Shift as
+        implemented in scikit-learn module.
+        :param str1:
+        :param str2:
+        :return:
+        """
+        try:
+            return str1.get_cluster().intersects(str2.get_cluster()).any()
+        except:
+            return False
+
+    def criterion4(self, str1, str2):
+        """
+        Return True if both str share the same clusters. Using the same clustering methods as in criterion3().
+        :param str1:
+        :param str2:
+        :return:
+        """
+        try:
+            return str1.get_cluster().intersects(str2.get_cluster()).all()
+        except:
+            return False
+
diff --git a/strpython/eval/stats.py b/strpython/eval/stats.py
index 915c554d3e7c648ffd787f7041071fd5c4dec8c2..86e92f19f734827f1d60eb81dec9f17bd17f1be3 100644
--- a/strpython/eval/stats.py
+++ b/strpython/eval/stats.py
@@ -1,8 +1,9 @@
 # coding = utf-8
 
-from ..helpers.gazeteer_helpers import get_data
+from ..helpers.geodict_helpers import get_data
 import numpy as np
 
+
 def flattern(A):
     rt = []
     for i in A:
@@ -14,18 +15,20 @@ def flattern(A):
             rt.append(i)
     return rt
 
+
 def most_common(lst):
-    if len(list(set(lst))) >1 and "P-PPL" in set(lst):
-        lst=[x for x in lst if x != "PPL"]
+    if len(list(set(lst))) > 1 and "P-PPL" in set(lst):
+        lst = [x for x in lst if x != "PPL"]
     return max(set(lst), key=lst.count)
 
+
 def granularity(graph):
     """
     Return the granularity of a STR
     :param graph:
     :return:
     """
-    class_list=flattern([get_data(n)["class"] for n in list(graph.nodes())])
+    class_list = flattern([get_data(n)["class"] for n in list(graph.nodes())])
     if not class_list:
         return []
-    return most_common(class_list)
\ No newline at end of file
+    return most_common(class_list)
diff --git a/strpython/helpers/boundary.py b/strpython/helpers/boundary.py
deleted file mode 100644
index 975e87fab5a3bfb5f4cfe03d0be17bef95aa22d1..0000000000000000000000000000000000000000
--- a/strpython/helpers/boundary.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# coding = utf-8
-
-import json
-import os
-from warnings import warn
-
-from .gazeteer_helpers import get_data
-from scipy.spatial import ConvexHull
-from shapely.geometry import Polygon, Point, shape
-from ..config.configuration import config
-from .collision import collide
-
-__collisions={}
-
-def parseHull(hull_object,points):
-    hull=[]
-    for simplex in hull_object.simplices:
-        hull.append(points[simplex[0]])
-        hull.append(points[simplex[1]])
-    return hull
-
-def getConvexHull(path):
-    data=json.load(open(os.path.join(config.osm_boundaries_directory,path)))
-    boundaries=data["geometry"]["coordinates"]
-    if data["geometry"]["type"]== "Polygon":
-        hull = parseHull(ConvexHull(boundaries[-1]),boundaries[-1])
-        return [hull]
-    else:
-        hull=[]
-        for bound in boundaries[-1]:
-            hull.append(parseHull(ConvexHull(bound),bound))
-        return hull
-
-def get_all_shapes(ids_list):
-    shapes = {}
-    for p in ids_list:
-        d = get_data(p)
-        #print(d["path"])
-        if "path" in d:
-            shapes[p] = getConvexHull(config.osm_boundaries_directory + "/" + d["path"])[0]
-        elif "coord" in d:
-            shapes[p] = [[d["coord"]["lat"], d["coord"]["lon"]]]
-    return shapes
-
-def get_adjacency_relationships(shapes):
-    collisions={}
-    for s in shapes:
-        for s2 in shapes:
-            if s != s2:
-                if not s in collisions and s2 in collisions:
-                    if not s in collisions[s2]:
-                        collisions[s2][s]=collide(shapes[s],shapes[s2])
-                elif not s2 in collisions and s in collisions:
-                    if not s2 in collisions[s]:
-                        collisions[s][s2]=collide(shapes[s],shapes[s2])
-    return collisions
-
-def is_intersect(id1,id2,shapes):
-    global __collisions
-    if id1 in  __collisions:
-        if id2 in __collisions[id1]:
-            return __collisions[id1][id2]
-    elif id2 in  __collisions:
-        if id1 in __collisions[id2]:
-            return __collisions[id2][id1]
-
-
-    if id1 in shapes and id2 in shapes:
-        if not id1 in __collisions:__collisions[id1]={}
-        if not id2 in __collisions: __collisions[id2] = {}
-        __collisions[id1][id2]=collide(shapes[id1],shapes[id2])
-        __collisions[id2][id1]=__collisions[id1][id2]
-        return __collisions[id1][id2]
-    else:
-        warn("{0} or {1} wasn't found in given shapes !".format(id1,id2))
-        return False
diff --git a/strpython/helpers/bow_polyglot.py b/strpython/helpers/bow_polyglot.py
index 94c60e8510b0852bd9f722f2bd2ba3cad20ee06e..af54e2262b9081322b67d6d3dffa67e037542112 100644
--- a/strpython/helpers/bow_polyglot.py
+++ b/strpython/helpers/bow_polyglot.py
@@ -7,6 +7,12 @@ from scipy.sparse import csc_matrix
 
 
 def get_vocabulary(corpus):
+    """
+    Return the vocabulary of a corpus, a list of documents. Each document is represented
+    using a list of tokens.
+    :param corpus: list or array-like
+    :return:
+    """
     vocabulary=set([])
     for text_tagged in corpus:
         for token in text_tagged:
@@ -14,6 +20,13 @@ def get_vocabulary(corpus):
     return list(vocabulary)
 
 def lemmatize(corpus,stopwords):
+    """
+    Lemmatize a corpus, a list of documents. Each document is represented
+    using a list of tokens.
+    :param corpus: list or array-like
+    :param stopwords: list or array-like
+    :return:
+    """
     pos_tag_corp=[]
     lemmatizer = WordNetLemmatizer()
     for text in corpus:
@@ -36,6 +49,13 @@ def lemmatize(corpus,stopwords):
     return pos_tag_corp
 
 def populate_bow(bow,voc_asso,corpus_tagged):
+    """
+    Populate the Bag of words representation for a vocabulary and a corpus.
+    :param bow:
+    :param voc_asso:
+    :param corpus_tagged:
+    :return:
+    """
     for t in range(len(corpus_tagged)):
         text=corpus_tagged[t]
         for token in text:
@@ -47,6 +67,12 @@ def populate_bow(bow,voc_asso,corpus_tagged):
     return bow
 
 def create_bow(corpus,stopwords):
+    """
+    Return a Bag of words representation of a corpus, a lists of document. Each document is a list of tokens.
+    :param corpus:
+    :param stopwords:
+    :return:
+    """
     stopwords=set(stopwords)
     post_tag_corp = lemmatize(corpus,stopwords)
     voc = get_vocabulary(post_tag_corp)
diff --git a/strpython/helpers/collision.py b/strpython/helpers/collision.py
index 60240f83bc92ba434410ba370da006825a204748..b46158e6d893e3cbd864dbf6b8e35330387e9067 100644
--- a/strpython/helpers/collision.py
+++ b/strpython/helpers/collision.py
@@ -1,103 +1,141 @@
-import numpy as np
+import json
+import os
+import warnings
 
-"""
-Source : https://hackmd.io/s/ryFmIZrsl#
-"""
-def is_separating_axis(o, p1, p2):
-    """
-    Return True and the push vector if o is a separating axis of p1 and p2.
-    Otherwise, return False and None.
-    """
-    min1, max1 = float('+inf'), float('-inf')
-    min2, max2 = float('+inf'), float('-inf')
-
-    for v in p1:
-        projection = np.dot(v, o)
-
-        min1 = min(min1, projection)
-        max1 = max(max1, projection)
-
-    for v in p2:
-        projection = np.dot(v, o)
+from shapely.geometry import  Point
 
-        min2 = min(min2, projection)
-        max2 = max(max2, projection)
+from ..config.configuration import config
+from .geodict_helpers import get_data
+import geopandas as gpd
 
-    if max1 >= min2 and max2 >= min1:
-        d = min(max2 - min1, max1 - min2)
-        # push a bit more than needed so the shapes do not overlap in future
-        # tests due to float precision
-        d_over_o_squared = d/np.dot(o, o) + 1e-10
-        pv = d_over_o_squared*o
-        return False, pv
-    else:
-        return True, None
+__cache = {}
+__cache_adjacency = {}
+__limit_cache = 2000
 
 
-def edges_of(vertices):
+def add_cache(id_, hull):
     """
-    Return the vectors for the edges of the polygon p.
-
-    p is a polygon.
+    Add the extracted data to a cache instance. This process manage the cache based on a limit defined in `__limit_cache`.
+    If this limit is attained, the less used data deleted from the cache.
+    :param id_: id of the first spatial entity
+    :param hull: data
+    :return:
     """
-    edges = []
-    N = len(vertices)
-
-    for i in range(N):
-        edge = vertices[(i + 1)%N] - vertices[i]
-        edges.append(edge)
+    global __cache, __limit_cache, __cache_frequency
+    if len(__cache) > __limit_cache:
+        warnings.warn("Limit broken")
+        del __cache[min(__cache_frequency, key=__cache_frequency.get)]
+    __cache[id_] = hull
+    if not id_ in __cache_frequency: __cache_frequency[id_] = 0
+    __cache_frequency[id_] += 1
 
-    return edges
 
-def orthogonal(v):
+def add_cache_adjacency(id_se1, id_se2):
     """
-    Return a 90 degree clockwise rotation of the vector v.
+    Add the adjacency between two spatial entitiy in a cache variable.
+    :param id_se1: id of the first spatial entity
+    :param id_se2: id of the second spatial entity
+    :return:
     """
-    return np.array([-v[1], v[0]])
-
-
-def collide(p1, p2):
-    '''
-    Return True and the MPV if the shapes collide. Otherwise, return False and
-    None.
+    global __cache_adjacency
+    if not id_se1 in __cache_adjacency:
+        __cache_adjacency[id_se1] = {}
+    __cache_adjacency[id_se1][id_se2] = True
 
-    p1 and p2 are lists of ordered pairs, the vertices of the polygons in the
-    counterclockwise direction.
-    '''
 
-    p1 = [np.array(v, 'float64') for v in p1]
-    p2 = [np.array(v, 'float64') for v in p2]
+def explode(gdf):
+    """
+    Explodes a geodataframe
 
-    edges = edges_of(p1)
-    edges += edges_of(p2)
-    orthogonals = [orthogonal(e) for e in edges]
+    Will explode muti-part geometries into single geometries. Original index is
+    stored in column level_0 and zero-based count of geometries per multi-
+    geometry is stored in level_1
 
-    push_vectors = []
-    for o in orthogonals:
-        separates, pv = is_separating_axis(o, p1, p2)
+    Args:
+        gdf (gpd.GeoDataFrame) : input geodataframe with multi-geometries
 
-        if separates:
-            # they do not collide and there is no push vector
-            return False, None
-        else:
-            push_vectors.append(pv)
+    Returns:
+        gdf (gpd.GeoDataFrame) : exploded geodataframe with a new index
+                                 and two new columns: level_0 and level_1
 
-    # they do collide and the push_vector with the smallest length is the MPV
-    mpv =  min(push_vectors, key=(lambda v: np.dot(v, v)))
+    """
+    gs = gdf.explode()
+    gdf2 = gs.reset_index().rename(columns={0: 'geometry'})
+    gdf_out = gdf2.merge(gdf.drop('geometry', axis=1), left_on='level_0', right_index=True)
+    gdf_out = gdf_out.set_index(['level_0', 'level_1']).set_geometry('geometry')
+    gdf_out.crs = gdf.crs
+    return gdf_out.reset_index(level=[0, 1])
 
-    # assert mpv pushes p1 away from p2
-    d = centers_displacement(p1, p2) # direction from p1 to p2
-    if np.dot(d, mpv) > 0: # if it's the same direction, then invert
-        mpv = -mpv
 
-    return True, mpv
+def getGEO(id_se):
+    """
+    Get the geofootprint of a spatial entity. If found, this geofootprint is a shape extracted from OSM. If not,
+    coordinates are used.
+    :param id_se: id of the spatial entity
+    :return: geopandas.GeoSeries
+    """
+    data = get_data(id_se)
+    if "path" in data:
+        return explode(gpd.read_file(os.path.join(config.osm_boundaries_directory, data["path"]))).convex_hull
+    elif "coord" in data:
+        return gpd.GeoDataFrame(gpd.GeoSeries([Point(data["coord"]["lon"], data["coord"]["lat"]).buffer(1.0)])).rename(
+            columns={0: 'geometry'})
+    return None
 
 
-def centers_displacement(p1, p2):
+def collide(se1, se2):
+    """
+    Return true, if two entities convex hull intersects.
+    :param se1: id of the first spatial entity
+    :param se2: id of the second spatial entity
+    :return:
+    """
+    try:
+        if se1 in __cache:
+            data_se1 = __cache[se1]
+            __cache_frequency[se1] += 1
+        else:
+            data_se1 = getGEO(se1)
+            add_cache(se1, data_se1)
+        if se2 in __cache:
+            data_se2 = __cache[se2]
+            __cache_frequency[se2] += 1
+        else:
+            data_se2 = getGEO(se2)
+            add_cache(se2, data_se2)
+    except:
+        return False
+    if not type(data_se1) == gpd.GeoDataFrame or not type(data_se2) == gpd.GeoDataFrame:
+        return False
+    try:
+        if data_se1.intersects(data_se2):
+            return True
+    except:
+        if data_se1.intersects(data_se2).any():
+            return True
+    return False
+
+
+def collisionTwoSEBoundaries(id_se1, id_se2):
     """
-    Return the displacement between the geometric center of p1 and p2.
+    Return True if two spatial entities are adjacent.
+    :param id_se1: id of the first spatial entity
+    :param id_se2: id of the second spatial entity
+    :return:
     """
-    # geometric center
-    c1 = np.mean(np.array(p1), axis=0)
-    c2 = np.mean(np.array(p2), axis=0)
-    return c2 - c1
\ No newline at end of file
+    global __cache, __cache_adjacency
+    if id_se1 in __cache_adjacency:
+        if id_se2 in __cache_adjacency[id_se1]:
+            return __cache_adjacency[id_se1][id_se2]
+    elif id_se2 in __cache_adjacency:
+        if id_se1 in __cache_adjacency[id_se2]:
+            return __cache_adjacency[id_se2][id_se1]
+
+    if not id_se1 in __cache_adjacency:
+        __cache_adjacency[id_se1] = {}
+
+    if collide(id_se1, id_se2):  # and not include_in(h1,h2):
+        __cache_adjacency[id_se1][id_se2] = True
+        return True
+    __cache_adjacency[id_se1][id_se2] = False
+    return False
diff --git a/strpython/helpers/collision_c.pyx b/strpython/helpers/collision_c.pyx
deleted file mode 100644
index 8df40d56ccd97f1475f59addfef03c1aec0d66d7..0000000000000000000000000000000000000000
--- a/strpython/helpers/collision_c.pyx
+++ /dev/null
@@ -1,99 +0,0 @@
-import numpy as np
-cimport numpy as np
-
-ctypedef np.ndarray numpy_array # for return np array
-"""
-Source : https://hackmd.io/s/ryFmIZrsl#
-"""
-cdef is_separating_axis(o, list p1, list p2):
-    """
-    Return True and the push vector if o is a separating axis of p1 and p2.
-    Otherwise, return False and None.
-    """
-    cdef float min1,max1,min2,max2
-    cdef np.float_t projection
-    min1, max1 = float('+inf'), float('-inf')
-    min2, max2 = float('+inf'), float('-inf')
-
-    for v in p1:
-        projection = np.dot(v, o)
-
-        min1 = min(min1, projection)
-        max1 = max(max1, projection)
-
-    for v in p2:
-        projection = np.dot(v, o)
-
-        min2 = min(min2, projection)
-        max2 = max(max2, projection)
-
-    if max1 >= min2 and max2 >= min1:
-        d = min(max2 - min1, max1 - min2)
-        # push a bit more than needed so the shapes do not overlap in future
-        # tests due to float precision
-        d_over_o_squared = d/np.dot(o, o) + 1e-10
-        pv = d_over_o_squared*o
-        return False, pv
-    else:
-        return True, None
-
-
-cdef list edges_of(list vertices):
-    """
-    Return the vectors for the edges of the polygon p.
-
-    p is a polygon.
-    """
-    cdef list edges = []
-    cdef int N = len(vertices)
-
-    for i in range(N):
-        edge = vertices[(i + 1)%N] - vertices[i]
-        edges.append(edge)
-
-    return edges
-
-cdef numpy_array orthogonal(v):
-    """
-    Return a 90 degree clockwise rotation of the vector v.
-    """
-    return np.array([-v[1], v[0]])
-
-
-def collide(p1, p2):
-    '''
-    Return True and the MPV if the shapes collide. Otherwise, return False and
-    None.
-
-    p1 and p2 are lists of ordered pairs, the vertices of the polygons in the
-    counterclockwise direction.
-    '''
-
-    p1 = [np.array(v, 'float64') for v in p1]
-    p2 = [np.array(v, 'float64') for v in p2]
-    cdef list edges
-    edges = edges_of(p1)
-    edges += edges_of(p2)
-    orthogonals = [orthogonal(e) for e in edges]
-
-    cdef push_vectors = []
-    for o in orthogonals:
-        separates, pv = is_separating_axis(o, p1, p2)
-
-        if separates:
-            # they do not collide and there is no push vector
-            return False
-        else:
-            push_vectors.append(pv)
-
-    return True
-
-
-cdef float centers_displacement(p1, p2):
-    """
-    Return the displacement between the geometric center of p1 and p2.
-    """
-    # geometric center
-    c1 = np.mean(np.array(p1), axis=0)
-    c2 = np.mean(np.array(p2), axis=0)
-    return c2 - c1
\ No newline at end of file
diff --git a/strpython/helpers/collision_with_gazetteer_data.py b/strpython/helpers/collision_with_gazetteer_data.py
deleted file mode 100644
index 4e0665d489596f7220e1ecd694e4cc9c5ef30955..0000000000000000000000000000000000000000
--- a/strpython/helpers/collision_with_gazetteer_data.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import json
-import os
-
-import shapely
-from scipy.spatial import ConvexHull
-from shapely.geometry import Polygon, Point, shape
-
-
-from ..config.configuration import config
-from .gazeteer_helpers import get_data
-#from .collision import collide
-import geopandas as gpd
-
-__cache={}
-__cache_adjacency={}
-__limit_cache=400
-
-def add_cache(id_,hull):
-    global __cache,__limit_cache
-    if len(__cache) > __limit_cache:
-        __cache={}
-    __cache[id_]=hull
-
-def getGEO(id_se):
-    data=get_data(id_se)
-    if "path" in data:
-        return gpd.read_file(os.path.join(config.osm_boundaries_directory, data["path"])).geometry
-    elif "coord" in data:
-        return Point(data["coord"]["lon"],data["coord"]["lat"])
-    return None
-def collide(se1,se2):
-    try:
-        if se1 in __cache:
-            data_se1=__cache[se1]
-        else:
-            data_se1 = gpd.GeoSeries(list(getGEO(se1).values[0]))
-            add_cache(se1,data_se1)
-        if se2 in __cache:
-            data_se2=__cache[se2]
-        else:
-            data_se2 = gpd.GeoSeries(list(getGEO(se2).values[0]))
-            add_cache(se2, data_se2)
-    except:
-        return False
-
-    if type(data_se1) != type(data_se2):
-        if type(data_se1) == gpd.geoseries.GeoSeries:
-            return data_se1.intersects(data_se2).any()
-        else:
-            return data_se2.intersects(data_se1).any()
-    try:
-        if data_se1.intersects(data_se2):
-            return True
-    except:
-        if data_se1.intersects(data_se2).any():
-            return True
-    return False
-
-
-
-
-
-def collisionTwoSEBoundaries(id_SE1,id_SE2):
-    global __cache,__cache_adjacency
-    if id_SE1 in __cache_adjacency:
-        if id_SE2 in __cache_adjacency[id_SE1]:
-            return __cache_adjacency[id_SE1][id_SE2]
-    elif id_SE2 in __cache_adjacency:
-        if id_SE1 in __cache_adjacency[id_SE2]:
-            return __cache_adjacency[id_SE2][id_SE1]
-
-    if not id_SE1 in __cache_adjacency:
-        __cache_adjacency[id_SE1]={}
-
-    if collide(id_SE1,id_SE2): #and not include_in(h1,h2):
-        __cache_adjacency[id_SE1][id_SE2] = True
-        return True
-    __cache_adjacency[id_SE1][id_SE2]=False
-    return False
diff --git a/strpython/helpers/gazeteer_helpers.py b/strpython/helpers/gazeteer_helpers.py
deleted file mode 100644
index cd806ad9dc17380095a19c20187c42f476020f9b..0000000000000000000000000000000000000000
--- a/strpython/helpers/gazeteer_helpers.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# coding=utf-8
-
-from elasticsearch import Elasticsearch
-from ..config.configuration import config
-
-es = Elasticsearch(config.es_server)
-
-
-def get_most_common_id_v2(label, lang="fr"):
-    res = es.search("gazetteer", "place",
-                    body={"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}},
-                          "from": 0,
-                          "size": 50, "sort": [{'score': "desc"}], "aggs": {}})
-    if res["hits"]["total"] > 0:
-        if res["hits"]["total"] > 1:
-            max_id, max_sc = 0, 0
-            i = 0
-            for hit in res["hits"]["hits"]:
-                if 'score' in hit['_source']:
-                    if float(hit['_source']["score"]) > max_sc: max_id, max_sc = i, float(hit['_source']["score"])
-                i += 1
-            res = [res["hits"]["hits"][max_id]]
-        else:
-            res = res["hits"]["hits"]
-        if not "score" in res[0]["_source"]:
-            return res[0]["_source"]["id"], -1
-        return res[0]["_source"]["id"], float(res[0]["_source"]["score"])
-    return None, 0
-
-def get_most_common_id_v3(label, lang='fr'):
-    id_, score = get_most_common_id_v2(label, lang)
-    if id_:
-        return id_, score
-    if not id_ and lang != 'en':
-        id_, score = get_most_common_id_v2(label, 'en')
-        if id_:
-            return id_, score
-    id_, score = get_most_common_id_alias_v2(label, lang)
-    if not id_ and lang != 'en':
-        id_, score = get_most_common_id_v2(label, 'en')
-        if id_:
-            return id_, score
-    return None, -1
-
-def get_most_common_id_alias_v2(alias, lang="fr"):
-    res = es.search("gazetteer", "place",
-                    body={"query": {"nested": {"path": "aliases",
-                                               "query": {"bool": {"must": [{"match": {"aliases." + lang: alias}}]}}}}})
-    if res["hits"]["total"] > 0:
-        if res["hits"]["total"] > 1:
-            max_id, max_sc = 0, 0
-            i = 0
-            for hit in res["hits"]["hits"]:
-                if 'score' in hit['_source']:
-                    if float(hit['_source']["score"]) > max_sc: max_id, max_sc = i, float(hit['_source']["score"])
-                i += 1
-            res = [res["hits"]["hits"][max_id]]
-        else:
-            res = res["hits"]["hits"]
-        if not "score" in res[0]["_source"]:
-            return res[0]["_source"]["id"], -1
-        return res[0]["_source"]["id"], float(res[0]["_source"]["score"])
-    return None, -1
-
-
-def get_data(id):
-    res = es.search("gazetteer", "place",
-                    body={"query": {"bool": {"must": [{"term": {"id": id}}], "must_not": [], "should": []}}, "from": 0,
-                          "size": 10, "sort": [], "aggs": {}})
-    if res["hits"]["total"] > 0:
-        res = res["hits"]["hits"][0]["_source"]
-    return res
-
-
-def get_data_by_wikidata_id(id):
-    res = es.search("gazetteer", "place",
-                    body={"query": {"bool": {"must": [{"term": {"wikidataID": id}}], "must_not": [], "should": []}},
-                          "from": 0,
-                          "size": 10, "sort": [], "aggs": {}})
-    if res["hits"]["total"] > 0:
-        res = res["hits"]["hits"][0]["_source"]
-
-    return res
-
-
-
-def get_by_label(label, lang):
-    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}, "size": 50}
-    response = es.search('gazetteer', 'place', body=query)
-    if 'hits' in response['hits']:
-        return response['hits']['hits']
-    return None
-
-
-def get_by_alias(alias, lang):
-    query = {
-        "query": {"nested": {"path": "aliases", "query": {"bool": {"must": [{"match": {"aliases." + lang: alias}}]}}}}}
-    response = es.search('gazetteer', 'place', body=query)
-    if 'hits' in response['hits']:
-        return response['hits']['hits']
-    return None
-
-def label_exists(label, lang):
-    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}}
-    response = es.count('gazetteer', 'place', body=query)
-    if response["count"] > 0:
-        return True
-    return False
-
-def alias_exists(alias, lang):
-    query = {
-        "query": {"nested": {"path": "aliases", "query": {"bool": {"must": [{"match": {"aliases." + lang: alias}}]}}}}}
-    response = es.count('gazetteer', 'place', body=query)
-    if response["count"] > 0:
-        return True
-    return False
-
-
-
-
-
-
-
-def count_of_se(label, lang):
-    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}}
-    response = es.count('gazetteer', 'place', body=query)
-    return response["count"]
-
-
-def get_top_candidate(label, lang):
-    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}, "sort": [
-        {
-            "score": {
-                "order": "desc"
-            }
-        }
-    ], "size": 5}
-    response = es.search('gazetteer', 'place', body=query)
-    if 'hits' in response['hits']:
-        return [x["_source"]["id"] for x in response['hits']['hits']]
-    return None
diff --git a/strpython/helpers/geodict_helpers.py b/strpython/helpers/geodict_helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..91d04087865e7f90c41a09de1e6cd850fe742877
--- /dev/null
+++ b/strpython/helpers/geodict_helpers.py
@@ -0,0 +1,437 @@
+# coding=utf-8
+import math
+import re
+
+from elasticsearch import Elasticsearch
+from ..config.configuration import config
+import pandas as pd
+from ..helpers.objectify import objectify
+
+es = Elasticsearch(config.es_server)
+
+geo_term={
+    "fr":open(config.language_resources_path.rstrip("/")+"/geo_term_fr").read().lower().strip().split("\n"),
+    "en":open(config.language_resources_path.rstrip("/")+"/geo_term_en").read().strip().split("\n")
+}
+
+def convert_es_to_pandas(es_query_results):
+    """
+    Return a `pandas.Dataframe` object built from the elasticsearch query results
+
+    Parameters
+    ----------
+    es_query_results : dict
+        elasticsearch.search() result
+
+    Returns
+    -------
+    pandas.DataFrame
+        Dataframe of the elasticsearch query results
+    """
+    if es_query_results["hits"]["total"] == 0:
+        return None
+    df = pd.DataFrame([g["_source"] for g in es_query_results["hits"]["hits"]])
+    if "score" in df:
+        df["score"] = df["score"].apply(lambda x: float(x))
+    else:
+        df["score"] = df.apply(lambda x: 0)
+    df["score"].fillna(-1, inplace=True)
+    return df
+
+
+def parse_score(score):
+    if math.isnan(score):
+        return -1
+    else:
+        return score
+
+def parse_label2(label : str,lang):
+    if not lang in geo_term:
+        return parse_label(label)
+
+    label = re.sub("[ ]+", " ", re.sub('[(?)]+', "", label.strip()))
+    label = label.strip("'").strip("â€™")
+
+    parts=label.split(" ")
+    # f=False
+    # for part in parts:
+    #     if part.lower() in geo_term[lang]:
+    #         f=True
+    # if not f:
+    #     return parse_label(label)
+    new_labels=[]
+    for part in parts:
+        if not part.lower() in geo_term[lang]:
+            new_labels.append(parse_label(part).strip("/?")+"+")
+        else:
+            new_labels.append(parse_label(part).strip("/"))
+    return "/"+"[ ]?".join(new_labels)+"/"
+
+
+
+
+def parse_label(label: str):
+    """
+    Parse label/toponym to a specific regular expression that allows dissimilarity with the official toponyms/aliases.
+
+    Parameters
+    ----------
+    label : str
+        toponym
+    Returns
+    -------
+    str
+        regular expression built from the toponym
+    """
+    label = re.sub("[ ]+", " ", re.sub('[(?)]+', "", label.strip()))
+    label = label.strip("'").strip("â€™")
+    new_label = ""
+    for c in label:
+        if c.isupper():
+            close_par = ")" if not (new_label.endswith(")") or new_label.endswith("?")) and new_label != "" else ""
+            # if new_label.endswith("]"):
+            #     new_label = new_label[:-1] + "({0}{1}]".format(c.lower(), c)
+            # else:
+            new_label += close_par + "([{0}{1}]".format(c.lower(), c)
+            # print("upper", new_label)
+        elif c == " ":
+            new_label += ")?[ ]?"
+            # print("espace", new_label)
+        elif c == "'" or c == "â€™":
+            new_label += c + ")?"
+            # print("apostrophe", new_label)
+        else:
+
+            new_label += ("(" if new_label == "" else "") + ("(" if new_label.endswith("?") else "") + c
+            # print("else", new_label)
+    new_label = "/" + new_label + ")?/"
+    return new_label
+
+
+def most_common_label(toponym: str, lang: str):
+    """
+
+
+    Parameters
+    ----------
+    toponym : str
+        toponym
+    lang : str
+        toponym language
+    Returns
+    -------
+
+    """
+    res = es.search("gazetteer", "place",
+                    body={"query":
+                              {"bool":
+                                   {"must": [{"term": {lang: toponym}}], "must_not": [], "should": []}
+                               },
+                          "from": 0,
+                          "size": 50, "sort": [{'score': "desc"}], "aggs": {}})
+    res = convert_es_to_pandas(res)
+    if not isinstance(res, pd.DataFrame):
+        return None, 0
+    return res.iloc[0].id, res.iloc[0].score
+
+
+def most_common_alias(toponym: str, lang: str):
+    """
+    Return most common spatial entity by itsje
+
+    Parameters
+    ----------
+    toponym : str
+        toponym
+    lang : str
+        toponym language
+    Returns
+    -------
+
+    """
+    res = es.search("gazetteer", "place",
+                    body={"query": {"nested": {"path": "aliases",
+                                               "query":
+                                                   {"bool":
+                                                        {"must": [{"term": {"aliases.{0}".format(lang): toponym}}], "must_not": [], "should": []}
+                                                    }
+                                               }},
+                          "sort": [{"score": "desc"}]})
+
+    res = convert_es_to_pandas(res)
+    if not isinstance(res, pd.DataFrame):
+        return None, 0
+    return res.iloc[0].id, res.iloc[0].score
+
+
+def n_label_similar(toponym, lang, n=5, score=True):
+    body = {
+        "query": {
+            "query_string": {
+                "default_field": lang,
+                "query": parse_label2(toponym,lang)
+            }
+        },
+        "from": 0,
+        "size": n
+    }
+    if score:
+        body["sort"] = [
+            {
+                'score': "desc"
+            }
+        ]
+
+    res = es.search("gazetteer", "place",
+                    body=body)
+    res = convert_es_to_pandas(res)
+    if not isinstance(res, pd.DataFrame):
+        return None
+    return res
+
+
+def n_alias_similar(toponym, lang, n=5, score=True):
+    body = {"query": {"nested": {"path": "aliases",
+                                 "query":
+                                     {
+                                         "query_string": {
+                                             "default_field": "aliases.{0}".format(lang),
+                                             "query": parse_label2(toponym,lang)
+                                         }
+                                     }
+                                 }},
+            "from": 0,
+            "size": n}
+    if score:
+        body["sort"] = [
+            {
+                'score': "desc"
+            }
+        ]
+    res = es.search("gazetteer", "place",
+                    body=body)
+
+    res = convert_es_to_pandas(res)
+    if not isinstance(res, pd.DataFrame):
+        return None, 0
+    return res.iloc[0].id, res.iloc[0].score
+
+
+def get_most_common_id_v2(label, lang="fr"):
+    """
+    Return the spatial entity and its score, based on a specific label and language that obtains the highest score.
+    :param label: str
+    :param lang: str
+    :return: str, float
+    """
+    query_2 = {"query_string": {
+        "default_field": lang,
+        "query": parse_label(label),
+
+    }}
+    res = es.search("gazetteer", "place",
+                    body={"query":
+                              {"bool":
+                                   {"must": [{"term": {lang: label}}], "must_not": [], "should": []}
+                               },
+                          "from": 0,
+                          "size": 50, "sort": [{'score': "desc"}], "aggs": {}})
+    res = convert_es_to_pandas(res)
+
+    if not isinstance(res, pd.DataFrame):
+        if not res:
+            res = convert_es_to_pandas(es.search("gazetteer", "place",
+                                                 body={"query": query_2}))
+        if not isinstance(res, pd.DataFrame):
+            return None, 0
+    return res.iloc[0].id, res.iloc[0].score
+
+
+def get_most_common_id_v3(label, lang='fr'):
+    """
+    Return the spatial entity and its score, based on a specific label and language that obtains the highest score.
+    The difference with the V2 is that it takes special cases:
+     * english placenames in a french text
+     * alias like China which designated also a spatial entity
+    :param label:
+    :param lang:
+    :return:
+    """
+    id_, score = most_common_label(label, lang)
+    if id_:
+        # China case
+        id_2, score2 = most_common_alias(label, lang)
+        if id_2 and score2 > score:
+            return id_2, score2
+        return id_, score
+
+    # if nothing found in english, search in aliases
+    id_, score = most_common_alias(label, lang)
+    if id_:
+        return id_, score
+
+    similar_label=n_label_similar(label,lang)
+    if isinstance(similar_label,pd.DataFrame):
+        return similar_label.iloc[0].id, similar_label.iloc[0].score
+
+    similar_alias = n_alias_similar(label, lang)
+    if isinstance(similar_alias,pd.DataFrame):
+        return similar_alias.iloc[0].id, similar_alias.iloc[0].score
+
+    return None, -1
+
+
+def get_most_common_id_alias_v2(alias, lang="fr"):
+    res = es.search("gazetteer", "place",
+                    body={"query": {"nested": {"path": "aliases",
+                                               "query":
+                                                   {
+                                                       "query_string": {
+                                                           "default_field": "aliases.{0}".format(lang),
+                                                           "query": parse_label(alias)
+                                                       }
+                                                   }
+                                               }},
+                          "sort": [{"score": "desc"}]})
+
+    res = convert_es_to_pandas(res)
+    if not isinstance(res, pd.DataFrame):
+        return None, 0
+    return res.iloc[0].id, res.iloc[0].score
+
+
+def get_data(id):
+    """
+    Return the data asssociated to an id in Geodict
+    :param id:
+    :return:
+    """
+    res = es.search("gazetteer", "place",
+                    body={"query": {"bool": {"must": [{"term": {"id": id}}], "must_not": [], "should": []}}, "from": 0,
+                          "size": 10, "sort": [], "aggs": {}})
+    if res["hits"]["total"] > 0:
+        res = res["hits"]["hits"][0]["_source"]
+        return objectify(res)
+    return None
+
+
+def get_data_by_wikidata_id(id):
+    """
+    Return the data asssociated to a wikidata id in Geodict
+    :param id:
+    :return:
+    """
+    res = es.search("gazetteer", "place",
+                    body={"query": {"bool": {"must": [{"term": {"wikidataID": id}}], "must_not": [], "should": []}},
+                          "from": 0,
+                          "size": 10, "sort": [], "aggs": {}})
+    if res["hits"]["total"] > 0:
+        res = res["hits"]["hits"][0]["_source"]
+        return objectify(res)
+    return None
+
+
+def get_data_by_geonames_id(id):
+    """
+    Return the data asssociated to a geonames id in Geodict
+    :param id:
+    :return:
+    """
+    res = es.search("gazetteer", "place",
+                    body={"query": {"bool": {"must": [{"term": {"geonameID": id}}], "must_not": [], "should": []}},
+                          "from": 0,
+                          "size": 10, "sort": [], "aggs": {}})
+    if res["hits"]["total"] > 0:
+        res = res["hits"]["hits"][0]["_source"]
+        return objectify(res)
+    return None
+
+
+def get_by_label(label, lang):
+    """
+    A Supprimer
+    :param label:
+    :param lang:
+    :return:
+    """
+    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}, "size": 50}
+    response = es.search('gazetteer', 'place', body=query)
+    if 'hits' in response['hits']:
+        return objectify(response['hits']['hits'])
+    return None
+
+
+def get_by_alias(alias, lang):
+    """
+    A supprimer
+    :param alias:
+    :param lang:
+    :return:
+    """
+    query = {
+        "query": {"nested": {"path": "aliases", "query": {"bool": {"must": [{"match": {"aliases." + lang: alias}}]}}}}}
+    response = es.search('gazetteer', 'place', body=query)
+    if 'hits' in response['hits']:
+        return objectify(response['hits']['hits'])
+    return None
+
+
+def label_exists(label, lang):
+    """
+    Return True if a spatial entity exists with a specific label in a specific language.
+    :param label: str
+    :param lang: str
+    :return: bool
+    """
+    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}}
+    response = es.count('gazetteer', 'place', body=query)
+    if response["count"] > 0:
+        return True
+    return False
+
+
+def alias_exists(alias, lang):
+    """
+    Return True if a spatial entity exists with a specific alias in a specific language.
+    :param alias: str
+    :param lang: str
+    :return: bool
+    """
+    query = {
+        "query": {"nested": {"path": "aliases", "query": {"bool": {"must": [{"match": {"aliases." + lang: alias}}]}}}}}
+    response = es.count('gazetteer', 'place', body=query)
+    if response["count"] > 0:
+        return True
+    return False
+
+
+def count_of_se(label, lang):
+    """
+    Return the number of spatial entities associated with a specific label in a specific language.
+    :param label: str
+    :param lang: str
+    :return: int
+    """
+    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}}
+    response = es.count('gazetteer', 'place', body=query)
+    return response["count"]
+
+
+def get_top_candidate(label, lang, n=5):
+    """
+    Return the 5-top candidates for a designated label in a specific language.
+    :param label: str
+    :param lang: str
+    :return: list
+    """
+    query = {"query": {"bool": {"must": [{"term": {lang: label}}], "must_not": [], "should": []}}, "sort": [
+        {
+            "score": {
+                "order": "desc"
+            }
+        }
+    ], "size": n}
+    response = es.search('gazetteer', 'place', body=query)
+    if 'hits' in response['hits']:
+        return [x["_source"]["id"] for x in response['hits']['hits']]
+    return []
diff --git a/strpython/helpers/objectify.py b/strpython/helpers/objectify.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bf4780e1a05caba16ed5336f2be2e34098aa1cc
--- /dev/null
+++ b/strpython/helpers/objectify.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+
+"""Scrap module.
+
+Just tiny bits & bolts.
+
+.. author: Adrian Castravete
+.. modified by : Jacques Fize (Implemented for Python 3 and recursive objectification)
+"""
+
+from functools import wraps
+
+
+def objectify(func):
+    """Mimic an object given a dictionary.
+
+    Given a dictionary, create an object and make sure that each of its
+    keys are accessible via attributes.
+    If func is a function act as decorator, otherwise just change the dictionary
+    and return it.
+    :param func: A function or another kind of object.
+    :returns: Either the wrapper for the decorator, or the changed value.
+
+    Example::
+
+    >>> obj = {'old_key': 'old_value'}
+    >>> oobj = objectify(obj)
+    >>> oobj['new_key'] = 'new_value'
+    >>> print oobj['old_key'], oobj['new_key'], oobj.old_key, oobj.new_key
+
+    >>> @objectify
+    ... def func():
+    ...     return {'old_key': 'old_value'}
+    >>> obj = func()
+    >>> obj['new_key'] = 'new_value'
+    >>> print obj['old_key'], obj['new_key'], obj.old_key, obj.new_key
+
+    """
+
+    def create_object(value):
+        """Create the object.
+
+        Given a dictionary, create an object and make sure that each of its
+        keys are accessible via attributes.
+        Ignore everything if the given value is not a dictionary.
+        :param value: A dictionary or another kind of object.
+        :returns: Either the created object or the given value.
+
+        """
+        if isinstance(value, dict):
+            # Build a simple generic object.
+            class Object(dict):
+                def __setitem__(self, key, val):
+                    setattr(self, key, val)
+                    return super(Object, self).__setitem__(key, val)
+
+            # Create that simple generic object.
+            ret_obj = Object()
+            # Assign the attributes given the dictionary keys.
+            for key, val in value.items():
+                if isinstance(val,dict):
+                    ret_obj[key] = objectify(val)
+                else:
+                    ret_obj[key] = val
+                setattr(ret_obj, key, val)
+            return ret_obj
+        else:
+            return value
+
+    # If func is a function, wrap around and act like a decorator.
+    if hasattr(func, '__call__'):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            """Wrapper function for the decorator.
+
+            :returns: The return value of the decorated function.
+
+            """
+            value = func(*args, **kwargs)
+            return create_object(value)
+
+        return wrapper
+
+    # Else just try to objectify the value given.
+    else:
+        return create_object(func)
diff --git a/strpython/helpers/sim_matrix.py b/strpython/helpers/sim_matrix.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ad26695ca6fc0cdfde3128a06fc44965913e7fe
--- /dev/null
+++ b/strpython/helpers/sim_matrix.py
@@ -0,0 +1,42 @@
+# coding = utf-8
+
+import argparse, bz2, os
+import json
+
+import pandas as pd
+import numpy as np
+
+
+def read_bz2_matrix(file_path):
+    f = bz2.BZ2File(file_path, 'r')
+    matrix_ = np.load(f)
+    return matrix_
+
+
+def filter_selected(matrix, selected):
+    return matrix[[selected]]
+
+
+def read_and_load(file_path, selected=None, bz2=True):
+    matrix = None
+    if bz2:
+        matrix = read_bz2_matrix(file_path)
+    else:
+        matrix = np.load(file_path)
+    if selected:
+        return filter_selected(matrix, selected)
+    else:
+        return matrix
+
+
+def matrix_to_pandas_dataframe(matrix, selected, sim_measure, type_str, n=5):
+    sim, type_ = sim_measure, type_str
+    tab_array = []
+    for line in range(len(matrix)):
+        top_n = np.argsort(matrix[line])[::-1][1:n + 1]
+        index = selected[line]
+        rank = 1
+        for val in top_n:
+            tab_array.append([index, val, sim, type_, rank, 0, 0, 0, 0])
+            rank += 1
+    return pd.DataFrame(tab_array, columns="G1 G2 sim_measure type_str rank c1 c2 c3 c4".split())
diff --git a/strpython/models/str.py b/strpython/models/str.py
index 3c255b020a05cfd122b52aff4522a091f5e966da..be98e0019b7dd184dcc4f337db41abc5e90307dd 100644
--- a/strpython/models/str.py
+++ b/strpython/models/str.py
@@ -1,24 +1,22 @@
 # coding = utf-8
+import copy
+import logging
 import time
 import warnings
 
+import geopandas as gpd
 import networkx as nx
 import pandas as pd
-import logging
+from shapely.geometry import MultiPoint,Polygon,Point,LineString
 
-from shapely.geometry import Point, MultiPoint, MultiLineString, LineString
+from ..helpers.collision import collisionTwoSEBoundaries, add_cache_adjacency
+from ..helpers.geodict_helpers import get_data, get_data_by_wikidata_id
+from ..eval.stats import most_common
 
-from ..config.configuration import config
-#logging.basicConfig(filename=config.log_file,level=logging.INFO)
+from sklearn.cluster import MeanShift, estimate_bandwidth
+# logging.basicConfig(filename=config.log_file,level=logging.INFO)
 
 
-from ..helpers.boundary import is_intersect
-from ..helpers.collision_with_gazetteer_data import collisionTwoSEBoundaries
-from ..helpers.deprecated import deprecated
-from ..helpers.gazeteer_helpers import get_data, get_data_by_wikidata_id
-from ..nlp.ner.ner import NER
-import geopandas as gpd
-
 def get_inclusion_chain(id_, prop):
     """
     For an entity return it geographical inclusion tree using a property.
@@ -38,14 +36,10 @@ class STR(object):
     """
     Str basic structure
     """
-
-    def __init__(self, tagged_text, spatial_entities, shapes=None):
+    __cache_inclusion = {}
+    def __init__(self, tagged_text, spatial_entities):
         self.tagged_text = tagged_text
-        self.shapes = shapes
-        if self.shapes:
-            self.spatial_entities = {k: v for k, v in spatial_entities.items() if k in self.shapes}
-        else:
-            self.spatial_entities = spatial_entities
+        self.spatial_entities = spatial_entities
 
         self.adjacency_relationships = {}
         self.inclusion_relationships = {}
@@ -124,6 +118,7 @@ class STR(object):
             except:
                 label = None
             self.add_spatial_entity(id, label, False)
+        # print(self.graph.nodes(data=True))
 
     def add_adjacency_rel(self, se1, se2,v=True):
         if not se1 in self.adjacency_relationships:
@@ -164,23 +159,34 @@ class STR(object):
         Method for updating links between spatial entities
         :return:
         """
-        nodes = self.graph.nodes(data=True)
+        nodes = copy.deepcopy(self.graph.nodes(data=True))
         self.graph.clear()
         self.graph.add_nodes_from(nodes)
 
+        print("inclusion")
+        self.get_inclusion_relationships()
+        for se1 in self.inclusion_relationships:
+            for se2 in self.inclusion_relationships[se1]:
+                if self.inclusion_relationships[se1][se2]:
+                    self.graph.add_edge(se1, se2, key=0, color="red")
+
+        print("adjacency")
         self.get_adjacency_relationships()
         for se1 in self.adjacency_relationships:
             for se2 in self.adjacency_relationships[se1]:
                 if self.adjacency_relationships[se1][se2]:
                     self.graph.add_edge(se1, se2, key=0, color="green")
+        print("fin adj")
+
+
 
-        self.get_inclusion_relationships()
-        for se1 in self.inclusion_relationships:
-            for se2 in self.inclusion_relationships[se1]:
-                if self.inclusion_relationships[se1][se2]:
-                    self.graph.add_edge(se1, se2, key=0, color="red")
 
+    def add_cache_inclusion(self,id1, id2):
+        if not id1 in STR.__cache_inclusion:
+            STR.__cache_inclusion[id1] = set([])
+            STR.__cache_inclusion[id1].add(id2)
     def is_included_in(self, se1_id, se2_id):
+        global __cache_inclusion
         """
         Return true if the two spatial entities identified by @se1_id and @se2_id share an inclusion relationship
         :param se1_id:
@@ -191,13 +197,19 @@ class STR(object):
             if se2_id in self.inclusion_relationships[se1_id]:
                 return self.inclusion_relationships[se1_id][se2_id]
 
+        if se1_id in STR.__cache_inclusion:
+            if se2_id in STR.__cache_inclusion[se1_id]:
+                return True
+
         inc_chain_P131 = get_inclusion_chain(se1_id, "P131")
         inc_chain_P706 = get_inclusion_chain(se1_id, "P706")
         inc_chain = inc_chain_P131
         inc_chain.extend(inc_chain_P706)
         inc_chain = set(inc_chain)
         if se2_id in inc_chain:
+            self.add_cache_inclusion(se1_id,se2_id)
             return True
+
         return False
 
     def get_inclusion_relationships(self):
@@ -223,49 +235,65 @@ class STR(object):
         p47se1 = []
         for el in data["P47"]:
             d = get_data_by_wikidata_id(el)
+            if not d:
+                continue
             if "id" in d:
                 p47se1.append(d["id"])
         return p47se1
 
+    def is_adjacent(self,se1,se2,datase1=None,datase2=None):
+        f = False
+        stop_class = set(["A-PCLI", "A-ADM1"])
+        if self.is_included_in(se1, se2):
+            return f
+
+        elif self.is_included_in(se2, se1):
+            return f
+
+        data_se1 = get_data(se1) if not datase1 else datase1 # Ã‰vite de recharger Ã  chaque fois -_-
+        data_se2 = get_data(se2) if not datase2 else datase2
+
+        # print("testP47")
+        if "P47" in data_se2:
+            if se1 in self.getP47AdjacencyData(data_se2):
+                return True
+                # print("P47")
+        if not f:
+            if "P47" in data_se1:
+                if se2 in self.getP47AdjacencyData(data_se1):
+                    return True
+                    # print("P47")
+        if not f:
+            # print("test collision")
+            if collisionTwoSEBoundaries(se1, se2):
+                return True
+        if not f:
+            if "coord" in data_se1 and "coord" in data_se2:
+                if Point(data_se1["coord"]["lon"], data_se1["coord"]["lat"]).distance(
+                        Point(data_se2["coord"]["lon"], data_se2["coord"]["lat"])) < 1 and len(
+                    set(data_se1["class"]) & stop_class) < 1 and len(set(data_se2["class"]) & stop_class) < 1:
+                    return True
+        return f
+
     def get_adjacency_relationships(self):
         """
         Return all the adjacency relationships between all the spatial entities in the STR.
         :return:
         """
-        stop_class=set(["A-PCLI","A-ADM1"])
-
+        data={se:get_data(se)for se in self.spatial_entities}
         for se1 in self.spatial_entities:
-            data_se1 = get_data(se1)
+            data_se1 = data[se1]
             for se2 in self.spatial_entities:
                 if se1 == se2: continue
-
-                if self.is_included_in(se1,se2) or self.is_included_in(se2,se1):
-                    continue
+                # print("test adjacency")
                 if se1 in self.adjacency_relationships:
                     if se2 in self.adjacency_relationships[se1]:
                         continue
                 if se2 in self.adjacency_relationships:
                     if se1 in self.adjacency_relationships[se2]:
                         continue
-                data_se2 = get_data(se2)
-                f = False
-                if "P47" in data_se2:
-                    if se1 in self.getP47AdjacencyData(data_se2):
-                        f = True
-                        #print(data_se1["en"], data_se2["en"], "P47")
-                if not f:
-                    if "P47" in data_se2:
-                        if se2 in self.getP47AdjacencyData(data_se2):
-                            f = True
-                            #print(data_se1["en"], data_se2["en"], "P47")
-                if not f:
-                    f = collisionTwoSEBoundaries(se1, se2)
-                if not f:
-                    if Point(data_se1["coord"]["lon"], data_se1["coord"]["lat"]).distance(
-                            Point(data_se2["coord"]["lon"], data_se2["coord"]["lat"])) < 1 and len(
-                            set(data_se1["class"]) & stop_class) < 1 and len(set(data_se2["class"]) & stop_class) < 1:
-                        f = True
-                self.add_adjacency_rel(se1, se2, f)
+                data_se2 = data[se2]
+                self.add_adjacency_rel(se1, se2, self.is_adjacent(se1,se2,data_se1,data_se2))
 
 
 
@@ -334,12 +362,55 @@ class STR(object):
             id1, id2 = edge[0], edge[1]
             if edge[2]["color"] == "green":
                 self.add_adjacency_rel(edge[0],edge[1])
+                add_cache_adjacency(id1, id2)
             elif edge[2]["color"] == "red":
                 self.add_inclusion_rel(edge[0], edge[1])
-    def set_all_shapes(self,shapes):
-        self.shapes=shapes
+                self.add_cache_inclusion(id1,id2)
+
 
-    def map_projection(self):
+    def get_geo_data_of_se(self):
+        points,label,class_ = [], [], []
+        for se in self.spatial_entities:
+            data = get_data(se)
+            try:
+                points.append(Point(data["coord"]["lon"], data["coord"]["lat"]))
+                label.append(data["en"])
+                class_.append(most_common(data["class"]))
+            except:
+                pass
+        df=gpd.GeoDataFrame({"geometry":points,"label":label,"classe":class_})
+        df["x"]=df.geometry.apply(lambda p: p.x)
+        df["y"] = df.geometry.apply(lambda p: p.y)
+        return df
+
+    def get_cluster(self):
+        data=self.get_geo_data_of_se()
+        bandwidth = estimate_bandwidth(data[["x", "y"]].values)
+        ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
+        ms.fit(data[["x", "y"]].values)
+        data["cluster"] = ms.labels_
+        """
+
+        # deuxiÃ¨me dÃ©coupe en cluster
+        c=data['cluster'].value_counts().idxmax()
+        X=data[data["cluster"] == c]
+        X=X[["x","y"]]
+        bandwidth = estimate_bandwidth(X.values)
+        ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
+        ms.fit(X.values)
+        X["cluster"]=ms.labels_+(data['cluster'].max()+1)
+        lab=ms.labels_
+        lab+=data['cluster'].max()+1
+        
+        data["cluster"][data["cluster"] == c]=X["cluster"]
+        """
+
+        geo = data.groupby("cluster").apply(to_Polygon)
+        cluster_polybuff = gpd.GeoDataFrame(geometry=geo)
+        return cluster_polybuff
+
+
+    def map_projection(self,plt=False):
         import matplotlib.pyplot as plt
         world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
         base = world.plot(color='white', edgecolor='black', figsize=(16, 9))
@@ -376,7 +447,24 @@ class STR(object):
         gpd.GeoSeries(points).plot(ax=base,marker='o',markersize=5,color="blue")
         gpd.GeoSeries(lines_adj).plot(ax=base, color="green")
         gpd.GeoSeries(lines_inc).plot(ax=base, color="red")
-        print("adj",gpd.GeoSeries(lines_adj))
-        print("inc",gpd.GeoSeries(lines_inc))
+
+        if not plt:
+            return base
         plt.show()
 
+
+def to_Multipoints(x):
+    #print(x[["x","y"]].values)
+    return Polygon([Point(z) for z in x[["x","y"]].values]).buffer(1)
+
+def to_Polygon(x):
+    points = [Point(z) for z in x[["x","y"]].values]
+    if len(points) > 2:
+        coords = [p.coords[:][0] for p in points]
+        poly = Polygon(coords).buffer(1)
+        return poly
+    elif len(points)==1:
+        return points[0].buffer(1)
+    else:
+        coords = [p.coords[:][0] for p in points]
+        return LineString(coords).buffer(1)
\ No newline at end of file
diff --git a/strpython/models/transformation/transform.py b/strpython/models/transformation/transform.py
index adbc968399ff40c386d38ed50ab519e88a9961fc..f97e123873503ea82df1398ca11d0e0f73c8429a 100644
--- a/strpython/models/transformation/transform.py
+++ b/strpython/models/transformation/transform.py
@@ -7,7 +7,7 @@ import numpy as np
 from elasticsearch import Elasticsearch
 
 from ...config.configuration import config
-from ...helpers.gazeteer_helpers import get_data
+from ...helpers.geodict_helpers import get_data
 from ..str import STR, get_inclusion_chain
 
 client = Elasticsearch(config.es_server)
@@ -15,12 +15,28 @@ client = Elasticsearch(config.es_server)
 
 class Transformation():
     """
-    Transform structure
+    Transform class basic structure
     """
+
     def transform(self, str_: STR, **kwargs) -> STR:
+        """
+        Transform a STR
+        :param str_: STR input
+        :param kwargs: dict --> args needed for the transformation
+        :return: STR
+        """
         pass
 
+
 class Generalisation(Transformation):
+    """
+    Generalisation transformation class declaration. Basically, the generalisation will replace spatial entity in a STR,
+    by its "upper" entity -- an upper entity e1 of e2, includes e2-- using different hypothesis. The first hypothesis
+    "transform_all", generalise all the spatial entity, while the second "transformation_bounded" only transform entities
+    between a certain level (town, country, region, ...).
+    """
+
+    # Level accepted for "transform_bounded()"
     bounded_class_references = {
         "country": ["A-PCLI"],
         "region": ["A-ADM1"],
@@ -29,6 +45,12 @@ class Generalisation(Transformation):
     }
 
     def transform(self, str_: STR, **kwargs) -> STR:
+        """
+
+        :param str_: STR
+        :param kwargs: args
+        :return: STR
+        """
         h = kwargs.get("n", 1)
         type_ = kwargs.get("type_gen", "all")
         bound = kwargs.get("bound", "country")
@@ -37,9 +59,9 @@ class Generalisation(Transformation):
         Store Inclusion Informations
         """
         if type_ == "all":
-            return self.transform_all(str_, h,cp=cp)
+            return self.transform_all(str_, h, cp=cp)
         if type_ == "bounded":
-            return self.transform_bounded(str_, bound,cp=cp)
+            return self.transform_bounded(str_, bound, cp=cp)
         else:
             warnings.warn("No Generalisation transform type specified! Using \"all\" generalisation by default")
             return self.transform_all(str_, h, cp=cp)
@@ -50,14 +72,14 @@ class Generalisation(Transformation):
         for node in graph.nodes():
             if not node in inclusion_dictionnary:
                 inc_list = []
-                data=get_data(node)
+                data = get_data(node)
                 try:
                     inc_list = data["inc_P131"]
                 except:
                     pass
                 if not inc_list:
                     if "inc_geoname" in data:
-                        inc_list=data["inc_geoname"]
+                        inc_list = data["inc_geoname"]
                 if inc_list:
                     inc_list = inc_list if isinstance(inc_list, list) else [inc_list]
 
@@ -80,8 +102,7 @@ class Generalisation(Transformation):
                     associated_classes[it] = classes_list
         return associated_classes
 
-
-    def transform_bounded(self, str_: STR, bound: str,cp=True) -> STR:
+    def transform_bounded(self, str_: STR, bound: str, cp=True) -> STR:
         if not bound in Generalisation.bounded_class_references:
             print("'bound' must be a value from {0}".format(str(Generalisation.bounded_class_references)))
             exit()
@@ -100,7 +121,7 @@ class Generalisation(Transformation):
             if t_:
                 transform_map[es] = t_
         if cp:
-            copy_= copy.deepcopy(str_)
+            copy_ = copy.deepcopy(str_)
             copy_.transform_spatial_entities(transform_map)
             copy_.update()
             return copy_
@@ -109,13 +130,13 @@ class Generalisation(Transformation):
         str_.update()
         return str_
 
-    def transform_all(self, str_: STR, h: int,cp=True) -> STR:
-        h=int(h)
-        graph=str_.graph
+    def transform_all(self, str_: STR, h: int, cp=True) -> STR:
+        h = int(h)
+        graph = str_.graph
         inclusion_dict = Generalisation.get_inclusion_map(graph)
         transform_map = {}
         new_label = {}
-        i=0
+        i = 0
         for node in graph.nodes():
             if node in inclusion_dict:
                 inc_chain = inclusion_dict[node]
@@ -126,7 +147,7 @@ class Generalisation(Transformation):
                     transform_map[node] = inc_chain[h - 1]
                     new_label[inc_chain[h - 1]] = get_data(inc_chain[h - 1])["en"]
         if cp:
-            copy_= copy.deepcopy(str_)
+            copy_ = copy.deepcopy(str_)
             copy_.transform_spatial_entities(transform_map)
             copy_.update()
             return copy_
@@ -136,7 +157,7 @@ class Generalisation(Transformation):
 
 
 class Expansion(Transformation):
-    def getAroundEntities(self, data, score, distance=150,unit="km",n=1):
+    def getAroundEntities(self, data, score, distance=150, unit="km", n=1):
         if not "coord" in data:
             return []
         hits = client.search("gazetteer", "place", {
@@ -155,7 +176,7 @@ class Expansion(Transformation):
                     ],
                     "filter": {
                         "geo_distance": {
-                            "distance": "{0}{1}".format(distance,unit),
+                            "distance": "{0}{1}".format(distance, unit),
                             "coord": data["coord"]
                         }
                     }
@@ -165,69 +186,65 @@ class Expansion(Transformation):
                 {"score": "desc"}
             ], "size": n})
         if hits["hits"]["total"] > 0:
-            ids_=[]
+            ids_ = []
             for h in hits["hits"]["hits"]:
                 ids_.append(h["_source"]["id"])
             return ids_
         return []
 
-    def select_es(self,graph):
+    def select_es(self, graph):
         es = np.array(list(graph.nodes))
-        score = [np.inf for i in range(len(es))]
+        score = [-1 for i in range(len(es))]
         for e in range(len(es)):
             data = get_data(es[e])
             if "score" in data:
                 score[e] = float(data["score"])
-        return es[score < np.median(score)]
+        return np.median(score), es[score < np.median(score)]
 
     def transform(self, str_: STR, **kwargs):
         type_ = "adjacency"
         distance = kwargs.get("distance", 150)
         unit = kwargs.get("unit", 150)
         n = kwargs.get("adjacent_count", 1)
-        cp=kwargs.get("cp", True)
+        cp = kwargs.get("cp", True)
         if type_ == "adjacency":
-            return self.transform_adj(str_, distance,unit,n,cp)
+            return self.transform_adj(str_, distance, unit, n, cp)
 
-
-    def transform_adj(self, str_: STR, distance: int,unit : str,n :int,cp=True) -> STR:
-        graph=str_.graph
-        selected_se = self.select_es(graph)
+    def transform_adj(self, str_: STR, distance: int, unit: str, n: int, cp=True) -> STR:
+        graph = str_.graph
+        median, selected_se = self.select_es(graph)
         data_se, scores_ = {}, []
         for node in selected_se:
             data_se[node] = get_data(node)
             if "score" in data_se[node]:
                 scores_.append(float(data_se[node]["score"]))
             else:
-                scores_.append(np.inf)
-        median = np.median(scores_)
-
-        new_nodes=[]
+                scores_.append(-1)
+        new_nodes = []
+        labels = []
         for node in selected_se:
             data_ = data_se[node]
             if (not "P-PPL" in data_["class"]) and (not "A-ADM4" in data_["class"]):
                 continue
             if not "country" in data_:
                 continue
-            neighbor = self.getAroundEntities(data_, median, distance,unit,n)
+            neighbor = self.getAroundEntities(data_, median, distance, unit, n)
             if not neighbor:
                 try:
-                    neighbor=[get_inclusion_chain(node,"P131")[0]]
+                    neighbor = [get_inclusion_chain(node, "P131")[0]]
                 except:
-                    neighbor=[]
+                    neighbor = []
+            labels.extend([get_data(n)["en"] for n in neighbor])
             new_nodes.extend(neighbor)
 
-        new_nodes=list(set(new_nodes))
-        labels=[]
-        for no in new_nodes:
-            #print(no,get_data(no))
-            labels.append(get_data(no)["en"])
+        new_nodes = list(set(new_nodes))
         if cp:
-            copy_= copy.deepcopy(str_)
-            copy_.add_spatial_entities(new_nodes,labels)
+            copy_ = copy.deepcopy(str_)
+            copy_.add_spatial_entities(new_nodes, labels)
+
             copy_.update()
             return copy_
 
-        str_.add_spatial_entities(new_nodes,labels)
+        str_.add_spatial_entities(new_nodes, labels)
         str_.update()
         return str_
diff --git a/strpython/nlp/disambiguator/geodict_gaurav.py b/strpython/nlp/disambiguator/geodict_gaurav.py
index 14eb26050e8b8648bac5465fdc46ee0151a9a83a..f6ae42277bb7e9bd5e6c553799e76d8db7a999de 100644
--- a/strpython/nlp/disambiguator/geodict_gaurav.py
+++ b/strpython/nlp/disambiguator/geodict_gaurav.py
@@ -1,11 +1,13 @@
 # coding = utf-8
 import math
 
-from ...helpers.collision_with_gazetteer_data import *
-from ...helpers.gazeteer_helpers import *
+from ...helpers.collision import *
+from ...helpers.geodict_helpers import *
 from .disambiguator import Disambiguator
 
 from ...models.str import get_inclusion_chain
+
+
 class GauravGeodict(Disambiguator):
 
     def __init__(self):
@@ -67,7 +69,7 @@ class GauravGeodict(Disambiguator):
                 id_fixed = fixed_entities[fixed]["id"]
                 if self.Adjacency_P47(id_cand, id_fixed):
                     score_dc[id_cand] += 3
-                if self.Adjacency_Hull(id_cand, id_fixed):
+                elif self.Adjacency_Hull(id_cand, id_fixed):
                     score_dc[id_cand] += 2
                 score_dc[id_cand] += self.get_inclusion_score(id_cand, id_fixed)
         m = max(score_dc, key=score_dc.get)
@@ -126,7 +128,7 @@ class GauravGeodict(Disambiguator):
         for amb_ent in ambiguous_entities:
             d = self.disambiguateOne(ambiguous_entities[amb_ent], fixed_entities)
             if not d:
-                d_amb_results[amb_ent] = get_most_common_id_v2(amb_ent, lang)[0]
+                d_amb_results[amb_ent] = get_most_common_id_v3(amb_ent, lang)[0]
             else:
                 d_amb_results[amb_ent] = d
         for k, v in fixed_entities.items():
diff --git a/strpython/nlp/disambiguator/models/bigram.py b/strpython/nlp/disambiguator/models/bigram.py
index d9ce129f14032e25035c7f69d597623e38529cbc..f45ba97b13382fa7cc7c7bccc421732284fba791 100644
--- a/strpython/nlp/disambiguator/models/bigram.py
+++ b/strpython/nlp/disambiguator/models/bigram.py
@@ -31,7 +31,7 @@ class BigramModel:
 
 
     def get_bigram_probability(self,uri1,uri2,pr1=1):
-        nna=0.00000000000000001
+        nna=0.00000001
         if  uri1 in self.cooc_freq:
             if  uri2 in self.cooc_freq[uri1]:
                 return (self.cooc_freq[uri1][uri2] / self.count_associated[uri1])+pr1
diff --git a/strpython/nlp/disambiguator/most_common.py b/strpython/nlp/disambiguator/most_common.py
index 60de2a6c50a38365dbd0f7a64bf5b5ed5cfe6dcf..12e448912f613c3b12b44c5ee08df0da5b4532ae 100644
--- a/strpython/nlp/disambiguator/most_common.py
+++ b/strpython/nlp/disambiguator/most_common.py
@@ -1,11 +1,18 @@
 # coding = utf-8
 
 
-from ...helpers.gazeteer_helpers import label_exists, alias_exists, get_most_common_id_v2,get_most_common_id_v3, get_most_common_id_alias_v2
+from ...helpers.geodict_helpers import  *
 from .disambiguator import Disambiguator
 import re, json, os
 from ...config.configuration import config
 
+from inflector import Inflector,English,Spanish,French
+
+inflectors= {
+    "en":Inflector(English()),
+    "fr":Inflector(French()),
+    "es":Inflector(Spanish())
+}
 stop_words = {
     "fr": set(open(os.path.join(config.language_resources_path,"stop_words_fr.txt")).read().split("\n")),
     "en": set(open(os.path.join(config.language_resources_path,"stop_words_en.txt")).read().split("\n"))
@@ -37,22 +44,26 @@ class MostCommonDisambiguator(Disambiguator):
     def disambiguate_(self, label, lang='fr'):
         if re.match("^\d+$", label):
             return 'O', -1
-        if label.lower().rstrip("s") in stop_words[lang] or label.lower().rstrip("s") in common_words[lang]:
-            return 'O', -1
+        if lang in stop_words: #and lang in common_words:
+            if label.lower().rstrip("s") in stop_words[lang]: #or label.lower().rstrip("s") in common_words[lang]:
+                return 'O', -1
 
-        plural = label.rstrip("s") + "s"
-        if plural.lower() in stop_words[lang] or plural.lower() in common_words[lang]:
-            return 'O', -1
+            if lang in inflectors:
+                plural=inflectors[lang].singularize(label)
+            else:
+                plural = label.rstrip("s") + "s"
+            if plural.lower() in stop_words[lang]:# or plural.lower() in common_words[lang]:
+                return 'O', -1
 
-        id_, score = get_most_common_id_v3(label, lang)
+        id_, score = most_common_label(label, lang)
         if id_:
             id_en, score_en = get_most_common_id_v3(label, "en")
             if id_en and score_en:
                 if score_en > score:
                     id_, score = id_en, score_en
-            id_alias, score_alias = get_most_common_id_alias_v2(label, lang)
+            id_alias, score_alias = most_common_alias(label, lang)
             if id_alias and score_alias:
                 if score_alias > score:
                     id_, score = id_alias, score_alias
-        print(label,id_,score)
+        #print(label,id_,score)
         return id_, score
diff --git a/strpython/nlp/disambiguator/pagerank.py b/strpython/nlp/disambiguator/pagerank.py
index 7c95ee46197626051ccb976d4e02d0578582a841..25eb02eb7edb9cdc37cf918a70f8690339f7f19f 100644
--- a/strpython/nlp/disambiguator/pagerank.py
+++ b/strpython/nlp/disambiguator/pagerank.py
@@ -1,6 +1,6 @@
 # coding = utf-8
 
-from ...helpers.gazeteer_helpers import label_exists, alias_exists, get_most_common_id_v2, get_most_common_id_alias_v2
+from ...helpers.geodict_helpers import *
 from .disambiguator import Disambiguator
 
 
@@ -13,11 +13,11 @@ class PageRankDisambiguator(Disambiguator):
         new_count = {}
         selected_en = {}
         for en in se_:
-            en_most_common, score_en = get_most_common_id_v2(en, "en")
+            en_most_common, score_en = get_most_common_id_v3(en, "en")
             if label_exists(en, lang):
-                id_, score = get_most_common_id_v2(en, lang)
+                id_, score = get_most_common_id_v3(en, lang)
             elif alias_exists(en, lang):
-                id_, score = get_most_common_id_alias_v2(en, lang)
+                id_, score = (en, lang)
 
             if en_most_common and score_en > score:
                 selected_en[en_most_common] = en
diff --git a/strpython/nlp/disambiguator/wikipedia_cooc.py b/strpython/nlp/disambiguator/wikipedia_cooc.py
index 485002244978a4c4e13b31cdb3781b3977dac47f..56ec7cd83f97971a9d392f74151eb4dc37c0f047 100644
--- a/strpython/nlp/disambiguator/wikipedia_cooc.py
+++ b/strpython/nlp/disambiguator/wikipedia_cooc.py
@@ -5,7 +5,7 @@ from .disambiguator import Disambiguator
 from .models.bigram import BigramModel
 import pickle
 from ...config.configuration import config
-from ...helpers.gazeteer_helpers import get_data,get_most_common_id_v3,get_top_candidate
+from ...helpers.geodict_helpers import get_data,get_most_common_id_v3,get_top_candidate
 from .most_common import stop_words,common_words
 import networkx as nx
 
@@ -14,11 +14,11 @@ def read_pickle(fn):
 
 class WikipediaDisambiguator(Disambiguator):
 
-    def __init__(self):
+    def __init__(self,measure="centrality"):
         Disambiguator.__init__(self)
         # Load model
         self.model=BigramModel(read_pickle(config.wiki_cooc_dis.cooc_freq),read_pickle(config.wiki_cooc_dis.count))
-
+        self.measure=measure
     def disambiguate(self, ner_result, lang="en"):
         count, se_ = self.extract_se_entities(ner_result)
         new_count = {}
@@ -38,21 +38,22 @@ class WikipediaDisambiguator(Disambiguator):
         for e in entities:
             if re.match("^\d+$", e):
                 continue
-            if e.lower().rstrip("s") in stop_words[lang] or e.lower().rstrip("s") in common_words[lang]:
+            if e.lower().rstrip("s") in stop_words[lang]:# or e.lower().rstrip("s") in common_words[lang]:
                 continue
 
             plural = e.rstrip("s") + "s"
-            if plural.lower() in stop_words[lang] or plural.lower() in common_words[lang]:
+            if plural.lower() in stop_words[lang]: #or plural.lower() in common_words[lang]:
                 continue
             spat_en.append(e)
-
+        spat_en=list(set(spat_en))
         g = nx.Graph()
 
         possible_candidates = []
         betw_cand={} # indicate which toponym group a candidate belong to #w maybe useless ...
         group_candidate = {} #candidates per toponym
+
         for e in spat_en:
-            cand = get_top_candidate(e, lang)
+            cand = get_top_candidate(e, lang,4)
             group_candidate[e] = cand
             betw_cand[e]=cand
             for n in cand:
@@ -62,35 +63,42 @@ class WikipediaDisambiguator(Disambiguator):
         for cand in possible_candidates:
             g.add_node(cand, label=get_data(cand)[lang])
 
+        data_candidate={ca :get_data(ca) for ca in possible_candidates}
         for cand in possible_candidates:
             for cand2 in possible_candidates:
                 # Get PageRank score
-                d = get_data(cand)
+                d = data_candidate[cand]
+
                 sc = 1
                 if "score" in d:
                     sc = float(d["score"])
-
                 # Compute probability
                 prob = self.model.get_coocurence_probability(sc, cand, cand2)
+
                 if cand2 in betw_cand[cand] or cand in betw_cand[cand2]:
                     prob = 0.0
                 if prob < 0.0000001:
                     prob = 0.0
                 if not cand == cand2:
                     # take the lowest co-occurrency between two candidates
-                    if (cand2, cand) in list(g.edges):
-                        if g.edge[cand2][cand]["weight"] < prob:
+                    if g.has_edge(cand2, cand) :
+                        if g.edges[cand2,cand]["weight"] < prob:
                             continue
                     g.add_edge(cand, cand2, weight=prob)
 
         selected = {}
+
         #Take the candidates with the highest degree weighted
         for gr in group_candidate:
             try:
-                selected[gr] = max(group_candidate[gr], key=lambda x: g.degree(x, weight='weight'))
+                if self.measure == "degree":
+                    selected[gr] = max(group_candidate[gr], key=lambda x: g.degree(x, weight='weight'))
+                elif self.measure == "centrality":
+                    selected[gr] = max(group_candidate[gr], key=lambda x: nx.closeness_centrality(g, x, distance="weight"))
+                else:# degree by default
+                    selected[gr] = max(group_candidate[gr], key=lambda x: g.degree(x, weight='weight'))
+
             except:
-                #print(group_candidate[gr]) empty group
-                selected[gr]=get_most_common_id_v3(gr,lang)
-        #print(entities,selected)
+                selected[gr]=get_most_common_id_v3(gr,lang)[0]
         return selected
 
diff --git a/strpython/nlp/ner/by_dict.py b/strpython/nlp/ner/by_dict.py
index af4a3619401de2a576929bd3ca5476d0f9a74fdf..ec7ed54fc0c51d685f69f1b92ebe5027584ec0b0 100644
--- a/strpython/nlp/ner/by_dict.py
+++ b/strpython/nlp/ner/by_dict.py
@@ -3,7 +3,7 @@ import numpy as np
 from polyglot.text import Text as PolyText
 
 from .ner import NER
-from ...helpers import gazeteer_helpers
+from ...helpers import geodict_helpers
 
 
 class ByDict(NER):
@@ -37,9 +37,9 @@ class ByDict(NER):
                 cur = f.tolist()
 
         for t in terms:
-            GID = gazeteer_helpers.get_most_common_id_v3(" ".join(pos_t[:, 0][t]), lang=self._lang)[0]
+            GID = geodict_helpers.get_most_common_id_v3(" ".join(pos_t[:, 0][t]), lang=self._lang)[0]
             if GID:
-                data = gazeteer_helpers.get_data(GID)
+                data = geodict_helpers.get_data(GID)
                 if "score" in data:
                     if not float(data["score"]) > self.threshold:
                         continue
diff --git a/strpython/pipeline.py b/strpython/pipeline.py
index 013963d797b3ba090354cdec0633ea796a7f965c..c2d5feb71956894c83d87f2f5cce666e62e6d836 100644
--- a/strpython/pipeline.py
+++ b/strpython/pipeline.py
@@ -1,5 +1,6 @@
 # coding =utf-8
-from .helpers.deprecated import deprecated
+from strpython.models.str import STR
+
 from .models.transformation.transform import Generalisation, Expansion
 from .nlp.disambiguator.disambiguator import Disambiguator
 from .nlp.disambiguator.most_common import MostCommonDisambiguator
@@ -117,42 +118,6 @@ class Pipeline(object):
             str_=Expansion().transform(str_,**kwargs)
         return str_
 
-    @deprecated("Have been proved to be not useable for now ...")
-    def buildSemSTR(self,text,win_size=5):
-        """
-        Return the corresponding STR for a text.
-        :param text:
-        :return: STR
-        """
-        _,output, se_identified = self.parse(text)
-
-        str_=STR_SEM(output,se_identified)
-        str_.build(win_size=win_size)
-        return str_
-    @deprecated("Have been proved to be not useable")
-    def build_class_variation_str(self,text):
-        """
-        Return the corresponding STR for a text.
-        :param text:
-        :return: STR
-        """
-        _,output, se_identified = self.parse(text)
-        str_=STRClassVariation(output,se_identified)
-        str_.build()
-        return str_
-
-    @deprecated("Have been proved to be not useable")
-    def build_population_variations_str(self,text):
-        """
-        Return the corresponding STR for a text.
-        :param text:
-        :return: STR
-        """
-        _,output, se_identified = self.parse(text)
-        str_=STRPopulationVariation(output,se_identified)
-        str_.build()
-        return str_
-
 
 if __name__ == '__main__':
     pass
\ No newline at end of file
diff --git a/strpython/resources/language_resources/dic_fr.json b/strpython/resources/language_resources/dic_fr.json
index a558ed10c05b6c5883911fb5da24829c8eb052ce..c36482ecbde2c0ebacdecf2849ca0beb45c743d5 100644
--- a/strpython/resources/language_resources/dic_fr.json
+++ b/strpython/resources/language_resources/dic_fr.json
@@ -229191,7 +229191,6 @@
     "iatrogÃ©nique",
     "affronteuse",
     "emplacement",
-    "belgique",
     "reparlementer",
     "pÃ©quin",
     "surmiser",
diff --git a/strpython/resources/language_resources/geo_term_en b/strpython/resources/language_resources/geo_term_en
new file mode 100644
index 0000000000000000000000000000000000000000..3911cd2fcf4220bf27800b6113f94c819bd7405d
--- /dev/null
+++ b/strpython/resources/language_resources/geo_term_en
@@ -0,0 +1,1030 @@
+absolute humidity
+absolute location
+accessibility resource
+accessibility
+acid rain
+active volcano
+agricultural geography
+air mass
+alluvia
+alluvial soils
+altitude
+antarctic
+antarctic circle
+anthracite
+anthropization
+anticline
+antimeridian
+antipodes
+aquifer
+archipelago
+arÃªte
+arroyo
+arctic
+arctic circle
+ash
+atlantic seaboard fall line
+atlas
+atmosphere
+atoll
+ayre
+azimuth
+badlands
+barrier ridge
+base level
+basin
+batholith
+bay
+beach
+bearing
+bedrock
+bight
+biological diversity
+biogeography
+biosphere
+biota
+bituminous
+blowout
+bocage
+body of water
+bootheel
+border
+break-in-bulk point
+built environment
+butte
+calanque
+caldera
+canal
+canyon
+cape
+caprock
+cardinal directions
+carrying capacity
+cartography
+cartogram
+cave
+cay
+cenote
+central business district
+census-designated place (cdp)
+channel
+chaparral
+chimney
+chinook
+chorography
+cinder cone
+circle of latitude
+cirque
+city
+city-state
+cliff
+climax vegetation
+coast
+col
+colony
+commonwealth
+compass
+compass rose
+confluence
+coniferous
+contiguity
+continent
+continental climate
+continental divide
+continental shelf
+continentality
+contour lines
+conurbation
+corrasion
+core area
+coulee
+couloir
+country
+county
+course
+crater
+crater lake
+crop-lien system
+crust
+cryosphere
+cryoturbation
+cuesta
+cultural geography
+culture
+culture hearth
+cut bank
+cwm
+cyclopean stairs
+dale
+dam
+de facto'' segregation
+de jure'' segregation
+deciduous forest
+degree
+degree day
+dell
+delta
+demography
+depression
+desert
+digital elevation model (dem)
+dike
+distributary
+district
+dome
+dormant volcano
+drainage basin
+drainage divide
+draw
+drumlin
+dry farming
+dry point
+dune
+eastern hemisphere
+economic geography
+economies of agglomeration
+edgelands
+elevation
+emergent coastline
+enclave
+endorheic basin
+equator
+erratic
+escarpment
+esker
+estuary
+evapotranspiration
+exclave
+exotic stream
+extinct volcano
+exurban
+fall line
+fallow
+fault
+fault-block mountain
+fault zone
+federation
+fen
+field
+firth
+fish ladder
+fjord
+floodplain
+focality
+forest
+functional diversity
+gazetteer
+geodesy
+geoid
+geoinformatics
+geographic information science (gis)
+geographic information system (gis)
+geographic names information system (gnis)
+geography
+geolocation
+geomatics
+geomorphology
+geosphere
+geostatistics
+ghetto
+glacial till
+glaciation
+glacier
+global positioning system (gps)
+globe
+graben
+great circle
+great-circle distance
+grid
+groundwater
+growing season
+groyne
+gulch
+gulf
+gully
+guyot
+hamlet
+hanging valley
+harmonic tremor
+heading
+headland
+hearth
+heartland
+heath
+hedgerow
+hemisphere
+highland
+highway
+hill
+hillock
+hinterland
+historical geography
+hogback
+horizon
+hotspot
+human geography
+hummock
+humus
+hydrography
+hydrosphere
+ice age
+ice cap
+iceberg
+igneous rock
+impact crater
+impoundment
+inertia costs of location
+inlet
+inselberg
+insular
+integrated geography
+intercardinal directions
+interfluve
+intermediate directions
+international date line
+international waters
+intervening opportunity
+intracoastal waterway system
+inverted river delta
+island
+islet
+isohyet
+isthmus
+jurisdiction
+jhum cultivation
+jungle
+kame
+karst
+kettle
+key col
+knoll
+lacustrine plain
+lagoon
+lahar
+lake
+land bridge
+landform
+landmark
+landmass
+lateral blast
+latitude
+lava
+leaching
+leeward
+legend
+ledc
+levee
+life-cycle stage
+lignite
+lithosphere
+lithospheric plates
+location
+loess
+longitude
+lowland
+magma
+main stem
+mainland
+makhtesh
+mantle
+map
+map projection
+maritime climate
+marsh
+masonâ€“dixon line
+massif
+meander
+meander scar
+medc
+mediterranean climate
+megalopolis
+megaregion
+meridian
+mesa
+metamorphic rock
+metes and bounds
+metropolis
+metropolitan area
+metropolitan coalescence
+mogote
+monadnock
+moor
+moraine
+mound
+mountain
+mountain range
+mouth
+mudflow
+multilingual
+municipality
+nadir
+nation
+national mapping agency
+national park
+natural landscape
+neighborhood
+nodal region
+north geographic pole
+north magnetic pole
+northern hemisphere
+oasis
+ocean
+open range
+ordinal directions
+orographic rainfall
+outwash
+overburden
+oxbow
+palisade
+panhandle
+parish
+permafrost
+peninsula
+photogrammetry
+physical geography
+physiographic region
+physiography
+piedmont
+pit crater
+place identity
+plain
+plate tectonics
+plateau
+platted land
+plural society
+polar circle
+polar ice cap
+polar region
+pole of inaccessibility
+political geography
+polynodal
+pond
+populated place
+population
+population geography
+post-industrial
+pothole
+precambrian rock
+prevailing winds
+primary sector
+prime meridian
+promontory
+protected area
+province
+psychogeography
+pueblo
+quadrangle
+quaternary sector
+quarry
+rail gauge
+rainforest
+rainshadow
+ravine
+region
+regiopolis
+relief
+relief map
+remote sensing
+reservoir
+resource
+retroreflector
+ribbon lake
+ridge
+rift valley
+riparian rights
+ria
+river
+riverine
+rural
+saddle
+salient
+salt pan
+scale
+scarp
+sea
+sea level
+seamount
+second home
+secondary-intercardinal directions
+secondary sector
+sedimentary rock
+seismograph
+settlement
+sheepback
+shield
+shield volcano
+shoal
+shore
+sinkhole
+site
+situation
+slough
+smog
+standard metropolitan statistical area (smsa)
+snowline
+soil horizon
+solubility
+sound
+south geographic pole
+south magnetic pole
+southern hemisphere
+space economy
+spatial citizenship
+spatial complementarity
+spatial interaction
+spatial reference system (srs)
+spreading ridges
+spring
+spur
+stack
+state
+steppe
+strait
+stratovolcano
+stream
+subduction zone
+suburban
+suburbanization
+summit
+surface water
+surveying
+swale
+swamp
+syrt
+taiga
+tarn
+temperature inversion
+tephra
+terrain
+territorial waters
+territory
+tertiary sector
+thalweg
+tide
+till
+time distance
+time geography
+time zone
+topographic map
+topographical relief
+topographic isolation
+topographic prominence
+topography
+toponymy
+tor
+town
+township and range
+transferability
+transhumance
+tree line
+tributary
+tropic of cancer
+tropic of capricorn
+tropics
+tundra
+underpopulation
+uniform region
+urban
+urban geography
+urban sprawl
+urbanization
+vale
+valley
+vertical exaggeration
+vent
+viewshed
+village
+volcanic avalanche
+volcanic crater
+volcano
+wadi
+water mapping
+water pollution
+water table
+watershed
+waterway
+weathering
+western hemisphere
+wetland
+wilderness
+windward
+world map
+zoning
+zenith
+absolute humidity
+absolute location
+accessibility resource
+accessibility
+acid rain
+active volcano
+agricultural geography
+air mass
+alluvia
+alluvial soils
+altitude
+antarctic
+antarctic circle
+anthracite
+anthropization
+anticline
+antimeridian
+antipodes
+aquifer
+archipelago
+arÃªte
+arroyo
+arctic
+arctic circle
+ash
+atlantic seaboard fall line
+atlas
+atmosphere
+atoll
+ayre
+azimuth
+badlands
+barrier ridge
+base level
+basin
+batholith
+bay
+beach
+bearing
+bedrock
+bight
+biological diversity
+biogeography
+biosphere
+biota
+bituminous
+blowout
+bocage
+body of water
+bootheel
+border
+break-in-bulk point
+built environment
+butte
+calanque
+caldera
+canal
+canyon
+cape
+caprock
+cardinal directions
+carrying capacity
+cartography
+cartogram
+cave
+cay
+cenote
+central business district
+census-designated place (cdp)
+channel
+chaparral
+chimney
+chinook
+chorography
+cinder cone
+circle of latitude
+cirque
+city
+city-state
+cliff
+climax vegetation
+coast
+col
+colony
+commonwealth
+compass
+compass rose
+confluence
+coniferous
+contiguity
+continent
+continental climate
+continental divide
+continental shelf
+continentality
+contour lines
+conurbation
+corrasion
+core area
+coulee
+couloir
+country
+county
+course
+crater
+crater lake
+crop-lien system
+crust
+cryosphere
+cryoturbation
+cuesta
+cultural geography
+culture
+culture hearth
+cut bank
+cwm
+cyclopean stairs
+dale
+dam
+de facto'' segregation
+de jure'' segregation
+deciduous forest
+degree
+degree day
+dell
+delta
+demography
+depression
+desert
+digital elevation model (dem)
+dike
+distributary
+district
+dome
+dormant volcano
+drainage basin
+drainage divide
+draw
+drumlin
+dry farming
+dry point
+dune
+eastern hemisphere
+economic geography
+economies of agglomeration
+edgelands
+elevation
+emergent coastline
+enclave
+endorheic basin
+equator
+erratic
+escarpment
+esker
+estuary
+evapotranspiration
+exclave
+exotic stream
+extinct volcano
+exurban
+fall line
+fallow
+fault
+fault-block mountain
+fault zone
+federation
+fen
+field
+firth
+fish ladder
+fjord
+floodplain
+focality
+forest
+functional diversity
+gazetteer
+geodesy
+geoid
+geoinformatics
+geographic information science (gis)
+geographic information system (gis)
+geographic names information system (gnis)
+geography
+geolocation
+geomatics
+geomorphology
+geosphere
+geostatistics
+ghetto
+glacial till
+glaciation
+glacier
+global positioning system (gps)
+globe
+graben
+great circle
+great-circle distance
+grid
+groundwater
+growing season
+groyne
+gulch
+gulf
+gully
+guyot
+hamlet
+hanging valley
+harmonic tremor
+heading
+headland
+hearth
+heartland
+heath
+hedgerow
+hemisphere
+highland
+highway
+hill
+hillock
+hinterland
+historical geography
+hogback
+horizon
+hotspot
+human geography
+hummock
+humus
+hydrography
+hydrosphere
+ice age
+ice cap
+iceberg
+igneous rock
+impact crater
+impoundment
+inertia costs of location
+inlet
+inselberg
+insular
+integrated geography
+intercardinal directions
+interfluve
+intermediate directions
+international date line
+international waters
+intervening opportunity
+intracoastal waterway system
+inverted river delta
+island
+islet
+isohyet
+isthmus
+jurisdiction
+jhum cultivation
+jungle
+kame
+karst
+kettle
+key col
+knoll
+lacustrine plain
+lagoon
+lahar
+lake
+land bridge
+landform
+landmark
+landmass
+lateral blast
+latitude
+lava
+leaching
+leeward
+legend
+ledc
+levee
+life-cycle stage
+lignite
+lithosphere
+lithospheric plates
+location
+loess
+longitude
+lowland
+magma
+main stem
+mainland
+makhtesh
+mantle
+map
+map projection
+maritime climate
+marsh
+masonâ€“dixon line
+massif
+meander
+meander scar
+medc
+mediterranean climate
+megalopolis
+megaregion
+meridian
+mesa
+metamorphic rock
+metes and bounds
+metropolis
+metropolitan area
+metropolitan coalescence
+mogote
+monadnock
+moor
+moraine
+mound
+mountain
+mountain range
+mouth
+mudflow
+multilingual
+municipality
+nadir
+nation
+national mapping agency
+national park
+natural landscape
+neighborhood
+nodal region
+north geographic pole
+north magnetic pole
+northern hemisphere
+oasis
+ocean
+open range
+ordinal directions
+orographic rainfall
+outwash
+overburden
+oxbow
+palisade
+panhandle
+parish
+permafrost
+peninsula
+photogrammetry
+physical geography
+physiographic region
+physiography
+piedmont
+pit crater
+place identity
+plain
+plate tectonics
+plateau
+platted land
+plural society
+polar circle
+polar ice cap
+polar region
+pole of inaccessibility
+political geography
+polynodal
+pond
+populated place
+population
+population geography
+post-industrial
+pothole
+precambrian rock
+prevailing winds
+primary sector
+prime meridian
+promontory
+protected area
+province
+psychogeography
+pueblo
+quadrangle
+quaternary sector
+quarry
+rail gauge
+rainforest
+rainshadow
+ravine
+region
+regiopolis
+relief
+relief map
+remote sensing
+reservoir
+resource
+retroreflector
+ribbon lake
+ridge
+rift valley
+riparian rights
+ria
+river
+riverine
+rural
+saddle
+salient
+salt pan
+scale
+scarp
+sea
+sea level
+seamount
+second home
+secondary-intercardinal directions
+secondary sector
+sedimentary rock
+seismograph
+settlement
+sheepback
+shield
+shield volcano
+shoal
+shore
+sinkhole
+site
+situation
+slough
+smog
+standard metropolitan statistical area (smsa)
+snowline
+soil horizon
+solubility
+sound
+south geographic pole
+south magnetic pole
+southern hemisphere
+space economy
+spatial citizenship
+spatial complementarity
+spatial interaction
+spatial reference system (srs)
+spreading ridges
+spring
+spur
+stack
+state
+steppe
+strait
+stratovolcano
+stream
+subduction zone
+suburban
+suburbanization
+summit
+surface water
+surveying
+swale
+swamp
+syrt
+taiga
+tarn
+temperature inversion
+tephra
+terrain
+territorial waters
+territory
+tertiary sector
+thalweg
+tide
+till
+time distance
+time geography
+time zone
+topographic map
+topographical relief
+topographic isolation
+topographic prominence
+topography
+toponymy
+tor
+town
+township and range
+transferability
+transhumance
+tree line
+tributary
+tropic of cancer
+tropic of capricorn
+tropics
+tundra
+underpopulation
+uniform region
+urban
+urban geography
+urban sprawl
+urbanization
+vale
+valley
+vertical exaggeration
+vent
+viewshed
+village
+volcanic avalanche
+volcanic crater
+volcano
+wadi
+water mapping
+water pollution
+water table
+watershed
+waterway
+weathering
+western hemisphere
+wetland
+wilderness
+windward
+world map
+zoning
+zenith
+north
+west
+south
+east
\ No newline at end of file
diff --git a/strpython/resources/language_resources/geo_term_fr b/strpython/resources/language_resources/geo_term_fr
new file mode 100644
index 0000000000000000000000000000000000000000..3d038768f7ea5c7f8f273eec490dd6c2fefb854f
--- /dev/null
+++ b/strpython/resources/language_resources/geo_term_fr
@@ -0,0 +1,416 @@
+About
+Abrupt
+AÃ©rodrome
+AÃ©roport
+Affluent
+AgglomÃ©ration
+AgglomÃ©ration de recensement
+AllÃ©e
+Anse
+Antre
+Arboretum
+Archipel
+ArrÃªt ferroviaire
+ArriÃ¨re-arriÃ¨re-fief
+ArriÃ¨re-fief
+Arrondissement
+Arrondissement historique
+Arrondissement naturel
+Autoroute
+Avenue
+Baie
+Baignage
+BaissiÃ¨re
+Banc
+Banc de pÃªche
+Banc de sable
+Barachois
+Barrage
+Barre
+BarriÃ¨re
+Base
+Base de plein air
+Base militaire
+Bas-fond
+Bassin
+Bassin hydrographique
+Bassin portuaire
+Batture
+BelvÃ©dÃ¨re
+Berge
+BleuetiÃ¨re
+Bogan
+Bois
+Bonnet
+Bosquet
+Boule
+Boulevard
+Branche
+Bras
+Bras mort
+Brisant
+Bureau de douane
+Bureau de poste
+Bureau de poste militaire
+Butte
+Buttereau
+Button
+Cabouron
+Cairn
+Calvette
+Camp
+Camp de plein air
+Camp de vacances
+Camp forestier
+Camp militaire
+Camp saisonnier
+Camp sportif
+Canal
+Canton
+Canyon
+Cap
+CarrÃ©
+Carrefour
+CarriÃ¨re
+Cascade
+Cascatelle
+Cataracte
+Caverne
+Caye
+Cayon
+CÃ©driÃ¨re
+CÃ©nacle
+Centrale
+Centrale hydroÃ©lectrique
+Centrale thermique
+Centrale nuclÃ©aire
+Centre de ski
+Centre de villÃ©giature
+Centre d'interprÃ©tation de la nature
+Centre Ã©cologique
+Centre Ã©ducatif forestier
+Cercle
+ChaÃ®ne
+ChaÃ®ne de montagnes
+Chalet
+Champ
+Charge
+ChaussÃ©e
+Chemin
+Chemin de front
+Chemin de ligne
+Chenail
+Chenal
+Chute
+Chuton
+CimetiÃ¨re
+Circonscription
+Circonscription Ã©lectorale
+Circonscription Ã©lectorale fÃ©dÃ©rale
+Circonscription Ã©lectorale provinciale
+CitÃ©
+Club de chasse et de pÃªche
+Col
+Colline
+CommunautÃ© rÃ©gionale
+CommunautÃ© urbaine
+Commune
+Concession
+Confluent
+Conseil rÃ©gional de la santÃ© et des services sociaux
+Cordon
+Cordon littoral
+Corniche
+CÃ´te
+Coteau
+Coude
+CoulÃ©e
+Coupe
+Courant
+Courbe
+Cours
+Cours d'eau
+Cours d'eau agricole
+Cran
+CratÃ¨re
+CratÃ¨re mÃ©tÃ©orique
+CrÃªte
+Crevasse
+Crique
+Croissant
+Cul-de-sac
+DÃ©barcadÃ¨re
+DÃ©charge
+DÃ©filÃ©
+DÃ©pÃ´t
+DÃ©pÃ´t forestier
+Descente
+Desserte
+DÃ©troit
+DÃ©versant
+Digue
+District Ã©lectoral
+District judiciaire
+Division d'enregistrement
+Division de recensement
+Domaine
+DÃ´me
+Dune
+Ã‰changeur
+Ã‰cluse
+Ã‰cueil
+Embranchement
+Ensemble rÃ©sidentiel
+EntrÃ©e
+Ã‰peron
+Escarpement
+Esker
+Esplanade
+Est
+Estran
+Estuaire
+Ã‰tablissement amÃ©rindien
+Ã‰tablissement piscicole
+Ã‰tang
+Ã‰vacuateur
+Falaise
+Ferme
+Fief
+Fjord
+FlÃ¨che
+Fleuve
+Fleuve cÃ´tier
+Fond
+FondriÃ¨re
+Fontaine
+ForÃªt
+ForÃªt d'enseignement et de recherche
+ForÃªt d'expÃ©rimentation
+Fort
+Fosse
+FossÃ©
+Fosse Ã  saumon
+Fourche
+Fronteau
+Gaine
+Gare
+Gare de triage
+Golfe
+Gorge
+Gouffre
+Goulet
+GrÃ¨ve
+Grotte
+Halte
+Halte routiÃ¨re
+Hameau
+Haut-fond
+Havre
+HÃ©liport
+Hydrobase
+ÃŽle
+ÃŽlet
+ÃŽlot
+Impasse
+Jardin
+Jardin zoologique
+JetÃ©e
+Jonction
+Kettle
+Lac
+Lac artificiel
+Lacon
+Lacotte
+Lac salÃ©
+Lagune
+Langue de terre
+Lieu-dit
+Ligne
+Littoral
+LocalitÃ©
+Marais
+Marche
+Mare
+MarÃ©cage
+Marina
+Marmite
+Massif
+MÃ©andre
+Mer
+Mine
+Mont
+Montagne
+MontÃ©e
+Morne
+Mouillage
+MunicipalitÃ©
+MunicipalitÃ© de canton
+MunicipalitÃ© de cantons unis
+MunicipalitÃ© de comtÃ©
+MunicipalitÃ© de paroisse
+MunicipalitÃ© de village
+MunicipalitÃ© de village cri
+MunicipalitÃ© de village naskapi
+MunicipalitÃ© de village nordique
+MunicipalitÃ© rÃ©gionale de comtÃ©
+Niche
+Nord
+Ouest
+Parc
+Parc de conservation
+Parc de maisons mobiles
+Parc de rÃ©crÃ©ation
+Parc historique national
+Parc industriel
+Parc national
+Parc national fÃ©dÃ©ral
+Parc public
+Paroi
+Paroisse
+Passage
+Passe
+Passerelle
+PÃ¢turage
+Pavillon
+PÃ©ninsule
+PÃ©piniÃ¨re
+Phare
+Pic
+PiÃ©mont
+Piste
+Piton
+Place
+Plage
+Plaine
+Plateau
+Plate-forme insulaire
+Platier
+Platin
+Platon
+PlÃ©
+PlÃ©e
+Pointe
+Ponceau
+Pont
+Pont naturel
+Pont-tunnel
+Port
+Portage
+Port de plaisance
+Poste
+Poste d'accueil
+Poste de douane
+Poste de traite
+Poste de transformation hydroÃ©lectrique
+Poulier
+Prairie
+PrÃ©
+PrÃ©e
+Presqu'Ã®le
+Promenade
+Promontoire
+Puits
+Puits artÃ©sien
+Puits naturel
+Quai
+Quartier
+Quartier rÃ©sidentiel
+Quartier scolaire
+Rade
+Rang
+Rapide
+Rapidon
+Ravin
+Ravine
+RÃ©cif
+RÃ©gion
+RÃ©gion administrative
+Remous
+RÃ©serve
+RÃ©serve de chasse
+RÃ©serve de chasse et de pÃªche
+RÃ©serve de la biosphÃ¨re
+RÃ©serve de pÃªche
+RÃ©serve Ã©cologique
+RÃ©serve faunique
+RÃ©serve faunique de saumon
+RÃ©serve indienne
+RÃ©serve militaire
+RÃ©servoir
+Rigole
+Rigolet
+Rivage
+Rive
+RiviÃ¨re
+Roche
+Rocher
+Route
+Rue
+Ruelle
+Ruisseau
+Ruisselet
+Sanctuaire
+Sanctuaire de pÃªche
+Sault
+Saut
+Savane
+Secteur
+Secteur rÃ©sidentiel
+Seigneurie
+Sente
+Sentier
+Sentier de migration
+Sentier Ã©cologique
+Site
+Site historique
+Sommet
+Source
+Square
+Station
+Station de mÃ©tro
+Station de pompage
+Station de relais
+Station de ski
+Station forestiÃ¨re
+Station halieutique
+Station mÃ©tÃ©orologique
+Station militaire
+Station radar
+Sud
+Terrain de camping
+Terrain de jeu
+Terrasse
+Terrier
+Territoire
+Territoire non organisÃ©
+Tour
+Tour Ã  feu
+TourbiÃ¨re
+Trait-carrÃ©
+Traverse
+Traverse d'animaux
+Trou
+Tunnel
+VallÃ©e
+Vallon
+Verger
+Versant
+Village
+Village cri
+Village forestier
+Village historique
+Village minier
+Village naskapi
+Ville
+Ville miniÃ¨re
+Voie
+Voie de communication
+Voie de desserte
+Voie d'Ã©vitement
+Zac
+Zec
+Zec-saumon
+Zone
+Zone d'amÃ©nagement et de conservation
+Zone d'exploitation contrÃ´lÃ©e
+Zoo
diff --git a/tools.py b/tools.py
index d978b5a21a9c2e33ee1c1594bdf95e9d24dbf5a3..d417d4539b13172d65d20bdb2d8338a289e9dfef 100644
--- a/tools.py
+++ b/tools.py
@@ -4,7 +4,7 @@ import argparse
 
 from termcolor import colored
 
-from strpython.helpers.gazeteer_helpers import get_most_common_id_v3, get_data, get_by_label
+from strpython.helpers.geodict_helpers import get_most_common_id_v3, get_data, get_by_label
 
 parser = argparse.ArgumentParser()