Commit fb8bfb1b authored by Fize Jacques's avatar Fize Jacques
Browse files

Create Element, an object to recover elegantly data

DEBUG
parent c59ec3c5
# Gazpy
# Install
```{bash}
$ pip3 install numpy elasticsearch pandas geopandas
$ pip3 install .
```
\ No newline at end of file
# coding = utf-8
from functools import wraps
def objectify(func):
"""Mimic an object given a dictionary.
Given a dictionary, create an object and make sure that each of its
keys are accessible via attributes.
If func is a function act as decorator, otherwise just change the dictionary
and return it.
:param func: A function or another kind of object.
:returns: Either the wrapper for the decorator, or the changed value.
Example::
>>> obj = {'old_key': 'old_value'}
>>> oobj = objectify(obj)
>>> oobj['new_key'] = 'new_value'
>>> print oobj['old_key'], oobj['new_key'], oobj.old_key, oobj.new_key
>>> @objectify
... def func():
... return {'old_key': 'old_value'}
>>> obj = func()
>>> obj['new_key'] = 'new_value'
>>> print obj['old_key'], obj['new_key'], obj.old_key, obj.new_key
"""
def create_object(value):
"""Create the object.
Given a dictionary, create an object and make sure that each of its
keys are accessible via attributes.
Ignore everything if the given value is not a dictionary.
:param value: A dictionary or another kind of object.
:returns: Either the created object or the given value.
"""
if isinstance(value, dict):
# Build a simple generic object.
class Object(dict):
def __setitem__(self, key, val):
setattr(self, key, val)
return super(Object, self).__setitem__(key, val)
# Create that simple generic object.
ret_obj = Object()
# Assign the attributes given the dictionary keys.
for key, val in value.items():
if isinstance(val, dict):
ret_obj[key] = objectify(val)
else:
ret_obj[key] = val
setattr(ret_obj, key, val)
return ret_obj
else:
return value
# If func is a function, wrap around and act like a decorator.
if hasattr(func, '__call__'):
@wraps(func)
def wrapper(*args, **kwargs):
"""Wrapper function for the decorator.
:returns: The return value of the decorated function.
"""
value = func(*args, **kwargs)
return create_object(value)
return wrapper
# Else just try to objectify the value given.
else:
return create_object(func)
class Element():
"""
Da Bomb Code ! Made by me <3
"""
def __init__(self, series, gazetteer):
self.data = series
self.p_holder = gazetteer
@property
def id(self):
return self.data[self.p_holder.id_field]
@property
def label(self):
if not isinstance(self.p_holder.label_fields, tuple) and not isinstance(self.p_holder.label_fields, list):
return self.data[self.p_holder.label_fields]
else:
return objectify({lang: self.data[lang] for lang in self.p_holder.label_fields})
@property
def alias(self):
if not isinstance(self.p_holder.alias_fields, tuple) and not isinstance(self.p_holder.label_fields, list):
return self.data[self.p_holder.alias_fields]
else:
return objectify({lang: self.data[field][lang] for field, lang in self.p_holder.alias_fields})
@property
def coord(self):
return objectify(self.data[self.p_holder.coordinates_field])
@property
def class_(self):
return self.data[self.p_holder.class_field]
@property
def score(self):
return self.data[self.p_holder.score_field]
@property
def other(self):
return self.data
# coding = utf-8
from ..element import Element
from ..query.query_builder import QueryBuilder
import pandas as pd
import re, os ,inspect
......@@ -78,12 +79,21 @@ class Base():
Base class for getter
"""
def __init__(self,es_client,field_score="score"):
def __init__(self,es_client,**kwargs):
"""Constructor for Base"""
self.score_field=field_score
self.qb=QueryBuilder()
self.es_client=es_client
self.id_field = kwargs.get("id_field","id")
self.label_fields = kwargs.get("label_fields",('fr','en','es','de'))
self.alias_fields = kwargs.get("alias_fields",(("aliases","fr"),("aliases","es"),("aliases","en"),("aliases","de")))
self.coordinates_field = kwargs.get("coordinates_field","coord")
self.class_field = kwargs.get("class_field","class")
self.score_field=kwargs.get("score_field","score")
def get_by_label(self,label,lang,score=True,size=1):
raise NotImplementedError()
......@@ -102,6 +112,12 @@ class Base():
def get_by_id(self,id):
raise NotImplementedError()
def to_element(self,es_query_results):
df=self.convert_es_to_pandas(es_query_results)
if df.empty:
return []
return [Element(item,self) for _,item in df.iterrows()]
def convert_es_to_pandas(self,es_query_results):
"""
Return a `pandas.Dataframe` object built from the elasticsearch query results
......@@ -117,7 +133,7 @@ class Base():
Dataframe of the elasticsearch query results
"""
if es_query_results["hits"]["total"] == 0:
return None
return pd.DataFrame()
df = pd.DataFrame([g["_source"] for g in es_query_results["hits"]["hits"]])
if self.score_field in df:
df[self.score_field] = df[self.score_field].apply(lambda x: float(x))
......
......@@ -12,35 +12,35 @@ class Geodict(Base):
def get_by_label(self, label, lang, score=True, size=1):
query=self.qb.query(term=True,field=lang,value=label,sorted=score,sorted_by=self.score_field,sized=True,size=size)
return self.convert_es_to_pandas(self.es_client.search("gazetteer","place",query))
return self.to_element(self.es_client.search("gazetteer","place",query))
def get_by_alias(self, alias, lang, score=True, size=1):
query = self.qb.query(term=True,nested=True,nested_field=lang,field="aliases", value=alias, sorted=score, sorted_by=self.score_field, sized=True,
size=size)
return self.convert_es_to_pandas(self.es_client.search("gazetteer", "place", query))
return self.to_element(self.es_client.search("gazetteer", "place", query))
def get_n_label_similar(self, label, lang, n, score=True):
query = self.qb.query(query_string=True, regexp=True,regexp_value=parse_label2(label,lang), field=lang, value=label, sorted=score,
sorted_by=self.score_field, sized=True,
size=n)
return self.convert_es_to_pandas(self.es_client.search("gazetteer", "place", query))
return self.to_element(self.es_client.search("gazetteer", "place", query))
def get_n_alias_similar(self, alias, lang, n, score=True):
query = self.qb.query(query_string=True, nested=True, nested_field=lang, regexp=True,regexp_value=parse_label2(alias,lang),field="aliases", value=alias, sorted=score,
sorted_by=self.score_field, sized=True,
size=n)
return self.convert_es_to_pandas(self.es_client.search("gazetteer", "place", query))
return self.to_element(self.es_client.search("gazetteer", "place", query))
def get_in_radius(self, lon, lat, unit="km",distance=10, score=True, size=1):
query = self.qb.query(match_all=True,in_radius=True,radius_size=distance,radius_unit=unit,radius_centroid=(lon,lat), sorted=score,
sorted_by=self.score_field, sized=True,geo_field="coord",
size=size)
return self.convert_es_to_pandas(self.es_client.search("gazetteer", "place", query))
return self.to_element(self.es_client.search("gazetteer", "place", query))
def get_by_id(self, id):
query = self.qb.query(term=True, field="id", value=id, sized=True,
size=1)
return self.convert_es_to_pandas(self.es_client.search("gazetteer", "place", query))
return self.to_element(self.es_client.search("gazetteer", "place", query))
def get_by_other_id(self,id,identifier="wikidata"):
if not identifier in ['wikidata','geonames']:
......@@ -51,4 +51,4 @@ class Geodict(Base):
id_field="geonameID"
query = self.qb.query(term=True, field=id_field, value=id, sized=True,
size=1)
return self.convert_es_to_pandas(self.es_client.search("gazetteer", "place", query))
\ No newline at end of file
return self.to_element(self.es_client.search("gazetteer", "place", query))
\ No newline at end of file
......@@ -9,37 +9,37 @@ class Geonames(Base):
def __init__(self,es_client,score="dem"):
"""Constructor for Geodict"""
Base.__init__(self,es_client,field_score=score)
Base.__init__(self,es_client,score_field=score,id_field="geonameid",label_fields="name",alias_fields="alternativenames",coordinates_field="coordinates",class_field="feature_code")
def get_by_label(self, label, lang, score=True, size=1):
query=self.qb.query(term=True,field="name",value=label,sorted=score,sorted_by=self.score_field,sized=True,size=size)
return self.convert_es_to_pandas(self.es_client.search("geonames","geoname",query))
return self.to_element(self.es_client.search("geonames","geoname",query))
def get_by_alias(self, alias, lang, score=True, size=1):
query = self.qb.query(term=True,field="alternativenames", value=alias, sorted=score, sorted_by=self.score_field, sized=True,
size=size)
return self.convert_es_to_pandas(self.es_client.search("geonames", "geoname", query))
return self.to_element(self.es_client.search("geonames", "geoname", query))
def get_n_label_similar(self, label, lang, n, score=True):
query = self.qb.query(query_string=True, regexp=True,regexp_value=parse_label2(label,lang), field="name", value=label, sorted=score,
sorted_by=self.score_field, sized=True,
size=n)
return self.convert_es_to_pandas(self.es_client.search("geonames", "geoname", query))
return self.to_element(self.es_client.search("geonames", "geoname", query))
def get_n_alias_similar(self, alias, lang, n, score=True):
query = self.qb.query(query_string=True, regexp=True,regexp_value=parse_label2(alias,lang),field="alternativenames", value=alias, sorted=score,
sorted_by=self.score_field, sized=True,
size=n)
return self.convert_es_to_pandas(self.es_client.search("geonames", "geoname", query))
return self.to_element(self.es_client.search("geonames", "geoname", query))
def get_in_radius(self, lon, lat, unit="km",distance=10, score=True, size=1):
query = self.qb.query(match_all=True,in_radius=True,radius_size=distance,radius_unit=unit,radius_centroid=(lon,lat), sorted=score,
sorted_by=self.score_field, sized=True,geo_field="coordinates",
size=size)
return self.convert_es_to_pandas(self.es_client.search("geonames", "geoname", query))
return self.to_element(self.es_client.search("geonames", "geoname", query))
def get_by_id(self, id):
query = self.qb.query(term=True, field="geonameid", value=id, sized=True,
size=1)
return self.convert_es_to_pandas(self.es_client.search("geonames", "geoname", query))
return self.to_element(self.es_client.search("geonames", "geoname", query))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment