Commit 8815dccb authored by Fize Jacques's avatar Fize Jacques
Browse files

debug

parent daa40cdd
No related merge requests found
Showing with 5 additions and 3 deletions
+5 -3
...@@ -41,7 +41,7 @@ class Pipeline(object): ...@@ -41,7 +41,7 @@ class Pipeline(object):
""" """
self.lang=lang[:2] self.lang=lang[:2]
self.ner = kwargs["ner"] if "ner" in kwargs else Spacy(lang=lang[:2]) self.ner = kwargs["ner"] if "ner" in kwargs else Spacy(lang=lang[:2])
self.disambiguator=kwargs["disambiguator"] if "disambiguator" in kwargs else MostCommonDisambiguator() self.disambiguator=kwargs["disambiguator"] if "disambiguator" in kwargs else WikipediaDisambiguator()
self.corpus_name = kwargs["corpus_name"] if "corpus_name" in kwargs else "no_name" self.corpus_name = kwargs["corpus_name"] if "corpus_name" in kwargs else "no_name"
self.no_name = False self.no_name = False
...@@ -52,7 +52,7 @@ class Pipeline(object): ...@@ -52,7 +52,7 @@ class Pipeline(object):
self.verbose = kwargs.get("verbose",False) self.verbose = kwargs.get("verbose",False)
def parse(self,text,debug=False): def parse(self,text,debug=False,stop_words=[]):
""" """
:param text: :param text:
...@@ -65,7 +65,7 @@ class Pipeline(object): ...@@ -65,7 +65,7 @@ class Pipeline(object):
# Disambiguation # Disambiguation
se_identified = self.disambiguator.disambiguate(self.lang,ner_output=output) se_identified = self.disambiguator.disambiguate(self.lang,ner_output=output)
for top_, id in list(se_identified.items()): for top_, id in list(se_identified.items()):
if not id.startswith("GD"): if not id.startswith("GD") or top_.lower() in stop_words:
del se_identified[top_] del se_identified[top_]
if debug: if debug:
print(se_identified) print(se_identified)
...@@ -124,6 +124,8 @@ class Pipeline(object): ...@@ -124,6 +124,8 @@ class Pipeline(object):
def pipe_build(self,texts, cpu_count=cpu_count(), **kwargs): def pipe_build(self,texts, cpu_count=cpu_count(), **kwargs):
# Extract Spatial entities # Extract Spatial entities
stop_words = kwargs.get("stop_words",[])
text_and_spatial_entities = [self.parse(text) for text in tqdm(texts,desc="Extract spatial entities from the texts", disable=(not self.verbose))] text_and_spatial_entities = [self.parse(text) for text in tqdm(texts,desc="Extract spatial entities from the texts", disable=(not self.verbose))]
# Filter Output # Filter Output
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment