Commit d1d21e53 authored by Fize Jacques's avatar Fize Jacques
Browse files

debug n_alias_similar and n_label_similar (checking string similarity)

parent fab75f2b
No related merge requests found
Showing with 16 additions and 4 deletions
+16 -4
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
from .base import Base, parse_label2, return_on_failure from .base import Base, parse_label2, return_on_failure
import jellyfish
from ..helpers import is_date
class Geodict(Base): class Geodict(Base):
"""""" """"""
...@@ -35,7 +36,12 @@ class Geodict(Base): ...@@ -35,7 +36,12 @@ class Geodict(Base):
sorted_by=self.score_field, sized=True, sorted_by=self.score_field, sized=True,
size=n) size=n)
res = self.to_element(self.es_client.search("gazetteer", "place", query)) res = self.to_element(self.es_client.search("gazetteer", "place", query))
return res
res_filtered = []
for el in res:
if not jellyfish.jaro_winkler(el.label.lang,label) < 0.5:
res_filtered.append(el)
return res_filtered
except Exception as e: except Exception as e:
return [] return []
...@@ -51,7 +57,13 @@ class Geodict(Base): ...@@ -51,7 +57,13 @@ class Geodict(Base):
regexp_value=".* ({0}) .*".format(alias), field="aliases", value=alias, sorted=score, regexp_value=".* ({0}) .*".format(alias), field="aliases", value=alias, sorted=score,
sorted_by=self.score_field, sized=True, sorted_by=self.score_field, sized=True,
size=n) size=n)
return res res_filtered = []
for el in res:
for al in el.alialiases.lang:
if not jellyfish.jaro_winkler(al, alias) < 0.5:
res_filtered.append(el)
break
return res_filtered
except : except :
return [] return []
......
# coding = utf-8 # coding = utf-8
import numpy as np import numpy as np
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment