Commit 8815dccb authored by Fize Jacques's avatar Fize Jacques
Browse files

debug

parent daa40cdd
No related merge requests found
Showing with 5 additions and 3 deletions
+5 -3
......@@ -41,7 +41,7 @@ class Pipeline(object):
"""
self.lang=lang[:2]
self.ner = kwargs["ner"] if "ner" in kwargs else Spacy(lang=lang[:2])
self.disambiguator=kwargs["disambiguator"] if "disambiguator" in kwargs else MostCommonDisambiguator()
self.disambiguator=kwargs["disambiguator"] if "disambiguator" in kwargs else WikipediaDisambiguator()
self.corpus_name = kwargs["corpus_name"] if "corpus_name" in kwargs else "no_name"
self.no_name = False
......@@ -52,7 +52,7 @@ class Pipeline(object):
self.verbose = kwargs.get("verbose",False)
def parse(self,text,debug=False):
def parse(self,text,debug=False,stop_words=[]):
"""
:param text:
......@@ -65,7 +65,7 @@ class Pipeline(object):
# Disambiguation
se_identified = self.disambiguator.disambiguate(self.lang,ner_output=output)
for top_, id in list(se_identified.items()):
if not id.startswith("GD"):
if not id.startswith("GD") or top_.lower() in stop_words:
del se_identified[top_]
if debug:
print(se_identified)
......@@ -124,6 +124,8 @@ class Pipeline(object):
def pipe_build(self,texts, cpu_count=cpu_count(), **kwargs):
# Extract Spatial entities
stop_words = kwargs.get("stop_words",[])
text_and_spatial_entities = [self.parse(text) for text in tqdm(texts,desc="Extract spatial entities from the texts", disable=(not self.verbose))]
# Filter Output
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment