Commit 05200669 authored by Decoupes Remy's avatar Decoupes Remy
Browse files

refractoring the elasticquery section

parent e39198ed
......@@ -48,6 +48,35 @@ def elastic_pagination_scrolling(result, headers):
pbar.close()
return results
def elasticquery(es_url, index_es, query):
"""
Query elastic with selected fields
:param es_url:
:param index_es:
:param query:
:return: a dataframe
"""
headers = {'content-type': 'application/json'}
try:
r = requests.get(es_url + index_es + "/_search?scroll=1m&size=1000", # context for scrolling up to 1 minute
data=query,
headers=headers,
)
except Exception as e:
print("Can not query: "+str(querys))
results = elastic_pagination_scrolling(r.json(), headers)
df_results = pd.DataFrame(results)
""" Formating elasticsearch respons
1. We extract nested fieds from elasticsearch: json_normalize the nested column
2. From cell value, extract the first element. Indeed: When we select specific fields in a elastic query,
the response always send value as list (even if there is one value).
"""
df_results = pd.json_normalize(df_results['fields'])
# for every cell we retrieve the first value from list. Sometimes there is NaN value when there is empty value.
df_results = df_results.applymap(lambda x: x[0] if isinstance(x, list) else '')
return df_results
if __name__ == '__main__':
""" Why not using eland ?
......@@ -68,21 +97,5 @@ if __name__ == '__main__':
jinja_env = Environment(loader=FileSystemLoader(template_dir))
template = jinja_env.get_template("filter_by_state_and_date.j2")
query = template.render(state="Auvergne")
headers = {'content-type': 'application/json'}
try:
r = requests.get(es_url + index_es + "/_search?scroll=1m&size=1000", # context for scrolling up to 1 minute
data=query,
headers=headers,
)
except Exception as e:
print("Can not filter by date and space")
results = elastic_pagination_scrolling(r.json(), headers)
df_results = pd.DataFrame(results)
# We extract nested fieds from elasticsearch
df_results = pd.json_normalize(df_results['fields'])
# When we select specific fields in a elastic query, the response always send value as list
# (even if there is one value)
df_results.to_pickle("/home/rdecoupe/Téléchargements/yip3.pkl")
# for every cell we retrieve the first value from list. Sometimes there is NaN value when there is empty value.
df_results = df_results.applymap(lambda x: x[0] if isinstance(x, list) else '')
df_results = elasticquery(es_url, index_es, query)
df_results.to_csv("/home/rdecoupe/Téléchargements/yip2.csv")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment