From d1d21e53ac9383bcd343fa34c792733fce44a200 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Wed, 3 Jul 2019 23:35:02 +0200 Subject: [PATCH] debug n_alias_similar and n_label_similar (checking string similarity) --- gazpy/gazetteer/geodict.py | 18 +++++++++++++++--- gazpy/helpers.py | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/gazpy/gazetteer/geodict.py b/gazpy/gazetteer/geodict.py index 9b9061c..aac2c4d 100644 --- a/gazpy/gazetteer/geodict.py +++ b/gazpy/gazetteer/geodict.py @@ -2,7 +2,8 @@ from .base import Base, parse_label2, return_on_failure - +import jellyfish +from ..helpers import is_date class Geodict(Base): """""" @@ -35,7 +36,12 @@ class Geodict(Base): sorted_by=self.score_field, sized=True, size=n) res = self.to_element(self.es_client.search("gazetteer", "place", query)) - return res + + res_filtered = [] + for el in res: + if not jellyfish.jaro_winkler(el.label.lang,label) < 0.5: + res_filtered.append(el) + return res_filtered except Exception as e: return [] @@ -51,7 +57,13 @@ class Geodict(Base): regexp_value=".* ({0}) .*".format(alias), field="aliases", value=alias, sorted=score, sorted_by=self.score_field, sized=True, size=n) - return res + res_filtered = [] + for el in res: + for al in el.alialiases.lang: + if not jellyfish.jaro_winkler(al, alias) < 0.5: + res_filtered.append(el) + break + return res_filtered except : return [] diff --git a/gazpy/helpers.py b/gazpy/helpers.py index d3f692b..d0c943d 100644 --- a/gazpy/helpers.py +++ b/gazpy/helpers.py @@ -1,3 +1,3 @@ # coding = utf-8 -import numpy as np \ No newline at end of file +import numpy as np -- GitLab