diff --git a/.gitignore b/.gitignore
index 308a8d4ece879df5bb3c154d77db471faba7ea9e..9166bcb9475f6e2fa67a0cab977517784f3d686a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,9 @@ latest-all.json.gz
 temp/*
 custom_process/__pycache__/*
 out_final.json
+__pycache__
+temp
+geodict*
+out*
+.idea
+.DS_Store
\ No newline at end of file
diff --git a/Readme.md b/Readme.md
index 2132e088aa5e33e346ae6468a2a490ed3e6b0542..a48c1df75e444a0ae73b732fe7938ae0680bce33 100644
--- a/Readme.md
+++ b/Readme.md
@@ -25,4 +25,9 @@ Simply run the command line
     $ python3 gazeteer2es.py [ES host if not localhost]
     
 
+## TODO
+
+* Add geonames missing entry (look into save.py)
+* Add a step that create unique ID for Geodict
+
 **Gaurav Shrivastava, Jacques Fize @ 2017**
\ No newline at end of file
diff --git a/config/configuration.json b/config/configuration.json
index e14e1e9a3a461639887b9c8e6fd82684c9fdd965..076015f3e65056a9ea1eb72c3ed5524d97481c31 100644
--- a/config/configuration.json
+++ b/config/configuration.json
@@ -1,7 +1,8 @@
 {
   "osm_boundaries_dir":"osm-boundaries",
+  "pre_dl_osm": "/Users/jacquesfize/install/osm-boundaries",
   "temp_dir":"temp",
-  "wikidata_dump":"latest-all.json.gz",
+  "wikidata_dump":"/Volumes/Sauvegarde/latest-all.json.gz",
   "lang_list":["en","fr","de","es"],
   "properties_to_extract":[
     {"id":"P47","isMultiple":true,"type":"EntityID","mappings":"keyword","mappings_details":{}},
diff --git a/config/mappings.json b/config/mappings.json
index 027778a38f5fa53f27caf3a6fcc20903a94df5a7..9d493847220e620efe127d139c78255cc839050b 100644
--- a/config/mappings.json
+++ b/config/mappings.json
@@ -58,6 +58,9 @@
                 "osmID": {
                     "type": "keyword"
                 },
+                "wikidataID": {
+                    "type": "keyword"
+                },
                 "path": {
                     "type": "keyword"
                 },
@@ -70,6 +73,15 @@
                 "P706": {
                     "type": "keyword"
                 },
+              "inc_P131": {
+                    "type": "keyword"
+                },
+              "inc_P706": {
+                    "type": "keyword"
+                },
+              "inc_geoname": {
+                    "type": "keyword"
+                },
                  "geometry": {
                     "type": "geo_shape",
                     "tree": "quadtree",
diff --git a/config/mappingsv2.json b/config/mappingsv2.json
new file mode 100644
index 0000000000000000000000000000000000000000..36e6f952a7a52c37e97dc438421d273ce92574cf
--- /dev/null
+++ b/config/mappingsv2.json
@@ -0,0 +1,97 @@
+{
+    "mappings": {
+        "_default_": {
+            "properties": {
+                "de": {
+                    "type": "keyword"
+                },
+                "en": {
+                    "type": "keyword"
+                },
+                "es": {
+                    "type": "keyword"
+                },
+                "fr": {
+                    "type": "keyword"
+                },
+                "aliases": {
+                    "type": "nested",
+                    "properties": {
+                        "de": {
+                            "type": "keyword"
+                        },
+                        "en": {
+                            "type": "keyword"
+                        },
+                        "es": {
+                            "type": "keyword"
+                        },
+                        "fr": {
+                            "type": "keyword"
+                        }
+                    }
+                },
+                "instance_of": {
+                    "type": "keyword"
+                },
+                "coord": {
+                    "type": "geo_point"
+                },
+                "geonameID": {
+                    "type": "keyword"
+                },
+                "class": {
+                    "type": "keyword"
+                },
+                "id": {
+                    "type": "keyword"
+                },
+                "country": {
+                    "type": "keyword"
+                },
+                "continent": {
+                    "type": "keyword"
+                },
+                "score": {
+                    "type": "float"
+                },
+                "osmID": {
+                    "type": "keyword"
+                },
+                "wikidataID": {
+                    "type": "keyword"
+                },
+                "path": {
+                    "type": "keyword"
+                },
+                "P47": {
+                    "type": "keyword"
+                },
+                "share_border_with": {
+                    "type": "keyword"
+                },
+                "P131": {
+                    "type": "keyword"
+                },
+                "P706": {
+                    "type": "keyword"
+                },
+                "located_in_adm_terr_ent": {
+                    "type": "keyword"
+                },
+                "located_in_terr_feature": {
+                    "type": "keyword"
+                },
+                "inc_geoname": {
+                    "type": "keyword"
+                },
+                "geometry": {
+                    "type": "geo_shape",
+                    "tree": "quadtree",
+                    "precision": "100m"
+                }
+
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/custom_process/__pycache__/__init__.cpython-36.pyc b/custom_process/__pycache__/__init__.cpython-36.pyc
index 32b2ceac0dd176fee582e3e5d3deace746143615..c31ddfe621f9a7f7589d980786e78ce493b880b0 100644
Binary files a/custom_process/__pycache__/__init__.cpython-36.pyc and b/custom_process/__pycache__/__init__.cpython-36.pyc differ
diff --git a/custom_process/__pycache__/basic_extraction.cpython-36.pyc b/custom_process/__pycache__/basic_extraction.cpython-36.pyc
index c1b722257898fa6ddc3fd047271d81092b6ac3d8..696cff8ebe4c37a5a81ddf6d79d1281e710accb3 100644
Binary files a/custom_process/__pycache__/basic_extraction.cpython-36.pyc and b/custom_process/__pycache__/basic_extraction.cpython-36.pyc differ
diff --git a/custom_process/__pycache__/class_extraction.cpython-36.pyc b/custom_process/__pycache__/class_extraction.cpython-36.pyc
index 80b54e87a17d6d781a3714190e7c09dddb421502..7b0e8984cc218fb24a251281dfabdbffed609a78 100644
Binary files a/custom_process/__pycache__/class_extraction.cpython-36.pyc and b/custom_process/__pycache__/class_extraction.cpython-36.pyc differ
diff --git a/custom_process/__pycache__/property_extract.cpython-36.pyc b/custom_process/__pycache__/property_extract.cpython-36.pyc
index de7db971e619abc91937ff3e89f474013442c533..3a1cbbcf4f4c92d01d8e3736d6f759b25ac0e81b 100644
Binary files a/custom_process/__pycache__/property_extract.cpython-36.pyc and b/custom_process/__pycache__/property_extract.cpython-36.pyc differ
diff --git a/custom_process/__pycache__/wiki_links.cpython-36.pyc b/custom_process/__pycache__/wiki_links.cpython-36.pyc
index 6763cf3c0d9ad9053ea0cc91d166545483c417f6..4c1647e55e5c3f7e32394fa4ed56cbf842addf52 100644
Binary files a/custom_process/__pycache__/wiki_links.cpython-36.pyc and b/custom_process/__pycache__/wiki_links.cpython-36.pyc differ
diff --git a/custom_process/basic_extraction.py b/custom_process/basic_extraction.py
index 68f00cea6e8809ae7ef174bfe4ad0e19dbf36076..7969e3b36840acdc9acd07b9f8f66e9ee0c5b109 100644
--- a/custom_process/basic_extraction.py
+++ b/custom_process/basic_extraction.py
@@ -13,7 +13,9 @@ from wikidata.reader import Reader
 from wikidata.process_wd import *
 
 config=Configuration("config/configuration.json")
+
 class BasicExtraction(Process):
+
     def __init__(self, id, labels_fn,page_rank):
         super(BasicExtraction, Process.__init__(self, id))
         self.dataframe = {}
@@ -32,6 +34,7 @@ class BasicExtraction(Process):
         self.labels_list = json.load(f)
         f.close()
 
+        print("Loading the PAGERANK DATA ...")
         f = open(page_rank,encoding = 'utf-8')
         self.scores = json.load(f)
         f.close()
diff --git a/gazetteer.py b/gazetteer.py
index e16a7bec3b45501c0258a2565e588381687732c8..bf3bcf08b1741ea68cfa7f78c949e31ef2f26cef 100644
--- a/gazetteer.py
+++ b/gazetteer.py
@@ -8,6 +8,8 @@ from custom_process.wiki_links import *
 from custom_process.class_extraction import *
 from custom_process.property_extract import *
 from gis.convex_hull import get_convex_hull
+from tqdm import tqdm
+from utils import wc_l
 
 __config=Configuration("config/configuration.json")
 
@@ -15,7 +17,7 @@ __config=Configuration("config/configuration.json")
 
 
 def temp(filename):
-    return os.path.join(__config.temp_dir,filename)
+    return os.path.join(__config.temp_dir, filename)
 
 def import_data():
 
@@ -30,25 +32,36 @@ def import_data():
 
     print("Downloading Geonames ...")
     filename=temp("allCountries.zip")
-    urllib.request.urlretrieve(
-        "http://download.geonames.org/export/dump/allCountries.zip",filename)
-    print("Geonames data retrieved !!")
-    print("Extracting the geonames data!")
-    zip_ref = zipfile.ZipFile(filename, 'r')
-    zip_ref.extractall("./{0}".format(__config.temp_dir))
-    print("Extracted !")
+    if not os.path.exists(temp("allCountries.txt")):
+        urllib.request.urlretrieve(
+            "http://download.geonames.org/export/dump/allCountries.zip",filename)
+        print("Geonames data retrieved !!")
+
+        print("Extracting the geonames data!")
+        zip_ref = zipfile.ZipFile(filename, 'r')
+        zip_ref.extractall("./{0}".format(__config.temp_dir))
+        print("Extracted !")
+
     print("Extracting labels")
     os.system('cut -f 1,2 {0} > {1}'.format(temp("allCountries.txt"),temp("labels.txt")))
+
     print("Extracting the class")
     os.system('cut -f 1,7,8 {0} > {1}'.format(temp("allCountries.txt"),temp("class_codes.txt")))
+
+    size_label_txt=wc_l(temp("labels.txt"))
     f = open(temp("labels.txt"), encoding = 'utf-8')
     labels = {}
-    for line in f:
+    for line in tqdm(f,total=size_label_txt,desc="Create JSON containing labels for every GeonameID "):
         line = line.strip().split("\t")
         labels[line[0]] = line[1]
     f.close()
+
     open(temp("labels.json"), "w").write(json.dumps(labels))#, ensure_ascii=False))
-    os.system('git clone https://github.com/missinglink/osm-boundaries.git')
+
+    if not "pre_dl_osm" in config:
+        os.system('git clone https://github.com/missinglink/osm-boundaries.git')
+    else:
+        config["osm_boundaries_dir"]=config["pre_dl_osm"]
 
 
 def basic_gazetteer(outfile):
@@ -62,17 +75,16 @@ def basic_gazetteer(outfile):
     """
 
     if not os.path.isfile(os.path.join(__config.temp_dir,"labels.json")):
-        print("Give correct labels file name!!")
-        return False
+        raise FileNotFoundError("Give correct labels file name!!")
+
     if not os.path.isfile(__config.wikidata_dump):
-        print('Give correct path to wikidata json dump ')
-        return False
+        raise FileNotFoundError('Give correct path to wikidata json dump ')
 
-    proc1 = BasicExtraction(1,os.path.join(__config.temp_dir,"labels.json"),"resources/wd_page_rank.json")
-    dump = Reader(__config.wikidata_dump,'utf-8')
+    proc1 = BasicExtraction(1, os.path.join(__config.temp_dir, "labels.json"), "resources/wd_page_rank.json")
+    dump = Reader(__config.wikidata_dump, 'utf-8')
     controller = WDController(dump,proc1)
     controller.process_all()
-    open(outfile, 'w').write(json.dumps(proc1.dataframe))#,ensure_ascii=False))
+    open(outfile, 'w').write(json.dumps(proc1.dataframe))
     return True
 
 
@@ -92,7 +104,7 @@ def add_properties(input_gazetteer,output_gazetteer,configuration_file):
     dump = Reader(__config.wikidata_dump,'utf-8')
     controller = WDController(dump,proc1)
     controller.process_all()
-    open(output_gazetteer, 'w').write(json.dumps(proc1.dataframe))#,ensure_ascii=False))
+    open(output_gazetteer, 'w').write(json.dumps(proc1.dataframe))
     return True
 
 
@@ -106,8 +118,8 @@ def extract_classes(gazeteer):
     :return:
     """
     if not os.path.isfile(__config.wikidata_dump):
-        print('Give correct path to wikidata json dump')
-        return None
+        raise FileNotFoundError('Give correct path to wikidata json dump')
+
     proc3 = ClassExtraction(1, os.path.join(__config.temp_dir,"class_codes.txt"), gazeteer)
     dump = Reader(__config.wikidata_dump, 'utf-8')
     controller = WDController(dump, proc3)
@@ -130,7 +142,7 @@ def add_classes(gazeteer,outfile):
     iterations = 0
     places = 0
     keys = set(data.keys())
-    for key in keys:
+    for key in tqdm(keys,desc="Add Classes"):
         iterations = iterations + 1
         temp_ = []
         if 'instance_of' in data[key].keys():
@@ -163,7 +175,7 @@ def extract_missing_WikiIDS(interm_outfile,outfile):
     iterations = 0
     output=open(interm_outfile,"w")
     total=len(paths)
-    output.write(json.dumps(finding_links(paths)))#,ensure_ascii=False))
+    output.write(json.dumps(finding_links(paths)))
     proc2 = WikipediaURI(2, outfile, interm_outfile)
     dump = Reader(__config.wikidata_dump, 'utf-8')
     controller = WDController(dump, proc2)
@@ -189,8 +201,8 @@ def missing_wikidata_IDS(missing_ids):
     df = read_tsv(os.path.join(__config.osm_boundaries_dir,'meta.tsv'),encoding = 'utf-8',columns = True)#'./osm-boundaries/meta.tsv'
     wikidata_IDs = []
     paths = [os.path.join(__config.osm_boundaries_dir,'data',path) for path in df['path']]
-    iterations = 0
-    for path in paths:
+    # iterations = 0
+    for path in tqdm(paths,desc="Browsing OSM data"):
         f = open(path,encoding = 'utf-8')
         dataframe = json.load(f)
         f.close()
@@ -207,9 +219,9 @@ def missing_wikidata_IDS(missing_ids):
                 wikidata_IDs.append(None)
         else:
             wikidata_IDs.append(None)
-        if iterations%1000 == 0:
-            sys.stdout.write("\r iterations: "+'{:,}'.format(iterations))
-        iterations = iterations + 1
+        # if iterations%1000 == 0:
+        #     sys.stdout.write("\r iterations: "+'{:,}'.format(iterations))
+        # iterations = iterations + 1
     df['Wiki_IDs'] = wikidata_IDs
     df.to_csv(temp('meta_all.csv'),index = False)#'temp/meta_all.csv'
 
@@ -230,11 +242,11 @@ def adding_geometry(infile,out_file,output_final_fn):
     Wiki_IDs = set(list(path_association.keys()))
     data = json.loads(open(out_file).read())
     outfile = open(output_final_fn, 'w')
-    iterations = 0
+    # iterations = 0
     places = 0
     keys = set(data.keys())
-    for key in keys:
-        iterations = iterations + 1
+    for key in tqdm(keys,desc="Browsing Geodict"):
+        # iterations = iterations + 1
         temp= data[key]
         temp["id"]=key
         if key in Wiki_IDs:
@@ -247,43 +259,85 @@ def adding_geometry(infile,out_file,output_final_fn):
         outfile.write(json.dumps(temp)+"\n")#,ensure_ascii=False
         del data[key]
 
-        if iterations % 100 == 0:
-            sys.stdout.write("\rEntity Parsed: " + '{:,}'.format(iterations) + " Places with boundaries parsed: " + '{:,}'.format(places))
-
+        # if iterations % 100 == 0:
+        #     sys.stdout.write("\rEntity Parsed: " + '{:,}'.format(iterations) + " Places with boundaries parsed: " + '{:,}'.format(places))
 
+def add_final_spatial_entities(input,output):
+    """
+    Add Missing Geonames entries and building Geodict IDs (En cours)
+    :param input:
+    :param output:
+    :return:
+    """
+    d_geo = {}
+    geonames_i = open(temp("allCountries.txt"))
+    for entry in geonames_i:
+        row = entry.split("\t")
+
+        d_geo[row[0]] = {lang: row[1] for lang in ["en", "fr", "es", "de"]}
+        d_geo[row[0]]["aliases"] = {lang: row[3].split(",") for lang in ["en", "fr", "es", "de"]}
+        d_geo[row[0]]["coord"] = {"lat": float(row[4]), "lon": float(row[5])}
+        d_geo[row[0]]["class"] = ["{0}-{1}".format(row[6], row[7])]
+
+    geoname_id_index = set(d_geo.keys())
+
+    already_in_geodict = set([])
+    for line in open(input):
+        data = json.loads(line.strip())
+        if "geonameID" in data:
+            already_in_geodict.add(data["geonameID"])
+
+    diff = geoname_id_index.difference(already_in_geodict)
+    prefix = "GD"
+    i = 1
+    output = open(output, 'w')
+    size_input = wc_l(input)
+    for line in tqdm(open(input),total=size_input,desc="Browsing Geodict"):
+        data = json.loads(line.strip())
+        data["wikidataID"] = data["id"]
+        data["id"] = prefix + str(i)
+        output.write(json.dumps(data) + "\n")
+        i += 1
+    for geo_id in tqdm(diff):
+        data = d_geo[geo_id]
+        data["id"] = prefix + str(i)
+        data["geonameID"] = geo_id
+        output.write(json.dumps(data) + "\n")
+        i += 1
 
 def main():
     start=time.time()
     if not os.path.exists(__config.temp_dir):
         os.makedirs(__config.temp_dir)
     # Import the data sources required to be harvested for creation of gazetteer
-    print("[1/6] Download required datasets...")
+    print("[1/7] Download required datasets...")
     import_data()
 
     # Create a first basic gazeteer
-    print("[2/6] Building the core gazetteer...")
+    print("[2/7] Building the core gazetteer...")
     basic_gazetteer(temp("1stoutput.json"))
 
     # Associate geonames classe to the instance_of(P31) values
-    print("[3/6] Associate a class to each entry...")
+    print("[3/7] Associate a class to each entry...")
     extract_classes(temp("1stoutput.json"))
-
     # Add class to each entity
     add_classes(temp("1stoutput.json"),temp("2ndoutput.json"))
 
     # Extract missing wikidata IDs in the boundary data
-    print("[4/6] Find missing WD ids within boundary data...")
+    print("[4/7] Find missing WD ids within boundary data...")
     extract_missing_WikiIDS(temp('found_missing_links.json'),temp('missing_Wikidata_IDS.txt'))
     missing_wikidata_IDS(temp('missing_Wikidata_IDS.txt'))
 
     # Adding properties from configuration_file
-    print("[5/6] Add user properties...")
+    print("[5/7] Add user properties...")
     add_properties(temp("2ndoutput.json"),temp("3rdoutput.json"),'config/configuration.json')
 
     # Add boundaries in the final data
-    print("[6/6] Adding adminstrative boundary/ies...")
+    print("[6/7] Adding adminstrative boundary/ies...")
     adding_geometry(temp("meta_all.csv"),temp("3rdoutput.json"),'out_final.json')
 
+    print("7/7")
+    add_final_spatial_entities("out_final.json","out_final_extended.json")
     print("The gazeteer was created in {0} hours".format(((time.time()-start)/60)/60))
 
 
diff --git a/gazetteer2es.py b/gazetteer2es.py
index f393a2546759a4885d1ea2f68704fcef7bcdafae..fd4fc1127bea7d914ac31d50a6c8f2f30e8a28e6 100644
--- a/gazetteer2es.py
+++ b/gazetteer2es.py
@@ -2,74 +2,75 @@ import argparse, json, sys
 from elasticsearch import Elasticsearch,helpers
 from elasticsearch import helpers
 import copy
+from tqdm import tqdm
+from mytoolbox.text.size import wc_l
 
-def polygon_transformation4ES(temp,simple=True):
-    final = []
-    if simple:
-        final=copy.copy(temp)
-        final.append(temp[0])
-        final=final
-    else:
-        for i in temp:
-            t=copy.copy(i)
-            t.append(i[0])
-            final.append(t)
-    return final
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("input", help="give input json file")
     parser.add_argument("-e", "--es_host", help="Elasticsearch Host address", default="127.0.0.1")
+    parser.add_argument("-p", "--es_port", help="Elasticsearch Host port", default="9200")
     args = parser.parse_args()
+
+    if not os.path.exists(args.input):
+        raise FileNotFoundError("Input File '{0}' not found !".format(args.input))
+
     file_name = args.input
     es_client = Elasticsearch(args.es_host)
+
     if not es_client.ping():
-        print("Can't connect to ES ! ")
-        sys.exit(1)
+        raise ConnectionError("Could not connect to Elasticserver at {0}".format(args.es_host))
+
+    # If exists in the dataase, delete !
     if es_client.indices.exists(index="gazetteer"):
         es_client.indices.delete(index="gazetteer")
+
+    # Open input file
     gazetteer = open(file_name, encoding='utf-8')
-    i = 1
-    mappings = json.load(open("config/mappings.json"))
+
+ 
+    mappings = json.load(open("config/mappings.json")) 
+    # complete Mapping depending on custom properties extracted
     property_to_be_mapped = json.load(open('config/configuration.json'))
     for prop in property_to_be_mapped["properties_to_extract"]:
         mappings['mappings']['_default_']['properties'][prop['id']] = {'type':prop["mappings"]}
         if prop["mappings_details"]:
             for k,v in prop["mappings_details"].items():
                 mappings['mappings']['_default_']['properties'][prop['id']][k]=v
-    print(mappings)
+    print("Mapping of Geodict index: ", mappings)
+
+    # Creation of the index in Elasticsearch databased
     es_client.indices.create(index="gazetteer", body=mappings)
     action_list=[]
-    for line in gazetteer:
+
+    number_of_entries = wc_l(file_name)
+
+    for line in tqdm(gazetteer,desc="Importing ...",total=number_of_entries):
         data = json.loads(line.strip())
         if '_score' in data.keys():
             data['score'] = data['_score']
             del data['_score']
         if "geometry" in data:
-            del data["geometry"]
+            del data["geometry"] # Difficult with ES ... so we delete it
         if "coord" in data:
-            if data["coord"]["lat"] >90 or data["coord"]["lon"] >180:
-                i+=1
-                continue
-        if not data["fr"]:
-            i+=1
-            continue
-                #print("AFTER",data["geometry"])
-                #return
-        #es_client.index("gazetteer", "place", data)
+            data["coord"]["lat"]=float(data["coord"]["lat"])
+            data["coord"]["lon"]= float(data["coord"]["lon"])
+
+            if data["coord"]["lat"] >90 or data["coord"]["lon"] >180:continue
+
+        if not data["fr"]:continue
+
         actions = {
         "_index": "gazetteer",
         "_type": "place",
         "_source": data
         }
-        #print(data["fr"])
         action_list.append(actions)
-        if i % 1000 == 0:
-            #print(action_list)
+        if len(action_list) % 1000 == 0:
             helpers.bulk(es_client,action_list,request_timeout=30)
             sys.stdout.write("\rEntity transferred: " + '{:,}'.format(i))
             action_list = []
-        i += 1
 
 
 if __name__ == '__main__':
diff --git a/hierarchy.py b/hierarchy.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e33428a1823850d37612c9efb615c4db77da478
--- /dev/null
+++ b/hierarchy.py
@@ -0,0 +1,60 @@
+import pandas as pd
+from tqdm import tqdm
+
+df = pd.read_csv("hierarchy.txt",sep="\t",header=None,names="parentId childId type".split())
+
+ids = df.parentId.values.tolist()
+ids.extend(df.childId.values.tolist())
+ids = list(set(ids))
+
+inclusion_relations_ = dict(df["childId parentId".split()].values)
+
+
+inc_dict_geonames = {} 
+for childId,parentId in tqdm(inclusion_relations_.items()):
+    if not childId in inc_dict_geonames:
+        inc_dict_geonames[childId] = [parentId]
+        if parentId in inc_dict_geonames:
+            inc_dict_geonames[childId].extend(inc_dict_geonames[parentId])
+        else:
+            B = parentId
+            while 1:
+                if B in inclusion_relations_:
+                    inc_dict_geonames[childId].append(inclusion_relations_[B])
+                    B = inclusion_relations_[B]
+                else:
+                    break
+            inc_dict_geonames[parentId] = inc_dict_geonames[childId][1:]
+            
+import json
+path="out_final_extended.json"
+geonames2GD,wikidata2GD = {}, {}
+
+from mytoolbox.text.size import wc_l
+
+size_data = wc_l(path)
+
+for line in tqdm(open(path),total=size_data):
+    data = json.loads(line.strip("\n,"))
+    if "geonameID" in data:
+        geonames2GD[data["geonameID"]]=data["id"]
+    if "wikidataID" in data:
+        wikidata2GD[data["wikidataID"]]=data["id"]
+
+output_path = "geodict_final_29_04_19.json"
+
+output = open(output_path,'w')
+
+name_col = {"P131":"located_in_adm_terr_ent",
+            "P706":"located_in_terr_feature",
+            "P47":"share_border_with"}
+
+for line in tqdm(open(path),total=size_data):
+    data = json.loads(line.strip("\n,"))
+    for property_ in ["P131","P706","P47"]:
+        if not property_ in data:
+            continue
+        data[name_col[property_]] = [wikidata2GD[id_] for id_ in data[property_] if id_ in wikidata2GD]
+    if "geonameID" in data and data["geonameID"] in inc_dict_geonames:
+        data["geoname_hierarchy"] = inc_dict_geonames[data["geonameID"]]
+    output.write("{0}\n,".format(json.dumps(data)))
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ef956215d06dfb9c1c7b04d06df1ce170ab3454
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,25 @@
+# coding = utf-8
+import os
+
+def blocks(files, size=65536):
+    while True:
+        b = files.read(size)
+        if not b: break
+        yield b
+
+def wc_l(text_input_fn):
+    """
+    Count the number of line in a file
+
+    Parameters
+    ----------
+    text_input_fn : str
+        filepath
+
+    """
+    if not os.path.exists(text_input_fn):
+        raise FileNotFoundError("{0} does not exists !".format(text_input_fn))
+
+    with open(text_input_fn, "r", encoding="utf-8", errors='ignore') as f:
+        return sum(bl.count("\n") for bl in blocks(f))
+
diff --git a/wikidata/helpers.py b/wikidata/helpers.py
index 05040f1512d31f8dbb321e150f0e437476a55c03..623a06292a5cbafa3e5cfb548f4d26acf56c37b7 100644
--- a/wikidata/helpers.py
+++ b/wikidata/helpers.py
@@ -62,6 +62,7 @@ def read_Tsv(filename,encoding='ascii'):
     column = text[0]
     del text[0]
     return pd.DataFrame(text,columns = column)
+
 #finding the missing link for wikipedia pages for which wikidata_IDs are not available
 def finding_links(files):
     missing_uri=[]
diff --git a/wikidata/property_wd.py b/wikidata/property_wd.py
index d75243e9afa670e10ff1f08cd0c274d3e055b18e..6181cf85007388a25c6adcc234d881d83e66658d 100644
--- a/wikidata/property_wd.py
+++ b/wikidata/property_wd.py
@@ -2,9 +2,9 @@
 
 class Property(object):
     """docstring for property."""
-    def __init__(self, id,isMultiple,type_):
+    def __init__(self, id, isMultiple, type_):
         self.id=id
-        self.isMultiple=isMultiple
+        self.isMultiple = isMultiple
         self.type=type_
 
     def exists(self,data):
@@ -14,4 +14,4 @@ class Property(object):
         return False
 
     def extractData(self,data):
-        return self.type.extractData(self.id,self.isMultiple,data)
+        return self.type.extractData(self.id, self.isMultiple, data)
diff --git a/wikidata/reader.py b/wikidata/reader.py
index cc6516d2303c88f179ebaaf91758cbfe1ad4b39a..56ea1817e461df5b66a073e86036a05091708703 100644
--- a/wikidata/reader.py
+++ b/wikidata/reader.py
@@ -1,6 +1,10 @@
 # coding=utf-8
 from gzip import GzipFile
 import json
+
+from utils import wc_l
+
+
 class Reader(object):
     """docstring for Reader."""
     def __init__(self, name, decoding):
@@ -9,6 +13,7 @@ class Reader(object):
         self.decoding = decoding
         self.dump = GzipFile(name,'r')
         self.line = self.dump.readline()
+        self.size_file = wc_l(name)
 
     def has_next(self):
         self.line = self.dump.readline().decode(self.decoding)
diff --git a/wikidata/types_wd.py b/wikidata/types_wd.py
index 42332e44bba00f8fc49903cf0b7dd59189c3f788..4564266ab77f09e5a2c46464454d8d9e5afdb0ed 100644
--- a/wikidata/types_wd.py
+++ b/wikidata/types_wd.py
@@ -125,15 +125,15 @@ class Time(Type):
     def extractMultiple(self, propID, data):
         result = []
         for i in range(len(data['claims'][propID])):
-            result.append(parsedate(data['claims'][propID][i]['mainsnak']['datavalue']['value']['time']))
+            result.append(parseDate(data['claims'][propID][i]['mainsnak']['datavalue']['value']['time']))
         return result
 
     def extractSingle(self, propID, data):
-        return parsedate(data['claims'][propID][0]['mainsnak']['datavalue']['value']["time"])
+        return parseDate(data['claims'][propID][0]['mainsnak']['datavalue']['value']["time"])
 
     def check_conformity(self, propID, data):
         try:
-            parsedate(data['claims'][propID][0]['mainsnak']['datavalue']['value']["time"])
+            parseDate(data['claims'][propID][0]['mainsnak']['datavalue']['value']["time"])
             return True
         except Exception as e:
             return False