An error occurred while loading the file. Please try again.
-
Fize Jacques authored
Debug disambiguator delete old disambiguator classes Add Parallelization for STR generation and Transform
e9d151de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# coding = utf-8
import copy
import string
import numpy as np
from ..ner.ner import NER
class Disambiguator(object):
def __init__(self,one_by_one=False,context_based=False):
"""Constructor for Disambiguator"""
self.one_by_one= one_by_one
self.context_based=context_based
def disambiguate(self,lang,ner_output=None,toponyms=None):
"""
Run the disambiguation on the NER output
Parameters
----------
ner_output : 2D numpy array
NER output
lang : str
language
Returns
-------
dict
{toponym : geodictID}
"""
if isinstance(ner_output, np.ndarray) and len(ner_output.shape) == 2 and ner_output.shape[1] == 2:
toponyms = self.parse_ner_output(ner_output)
elif len(ner_output.shape) != 2:
return {}
elif not toponyms:
raise ValueError("Either enter a list of toponyms or give ner_output")
if self.context_based:
return self.disambiguate_context_based(toponyms,lang)
else:
return self.disambiguate_one_by_one(toponyms,lang)
def disambiguate_one_by_one(self, toponyms, lang):
"""
Disambiguation process when toponyms are geocoded one by one.
Parameters
----------
toponyms :list
toponyms
Returns
-------
dict
{toponym : geodictID}
"""
raise NotImplementedError
def disambiguate_context_based(self,toponyms,lang):
"""
Disambiguation process when toponyms are geocoded using each one of them
Parameters
----------
toponyms :list
toponyms
Returns
-------
dict
{toponym : geodictID}
"""
raise NotImplementedError
def parse_ner_output(self,ner_output):
return [toponym[0] if isinstance(toponym[0],str) else " ".join(toponym[0]) for toponym in ner_output[ner_output[:,1] == NER._unified_tag["place"]]]