Commit e784662e authored by Decoupes Remy's avatar Decoupes Remy
Browse files

elastic: add scrolling mechanism

parent 649df252
......@@ -11,6 +11,42 @@ import os
import eland as ed
import requests
import pandas as pd
from tqdm import tqdm
import json
def elastic_pagination_scrolling(result, headers):
"""
Elasticsearch limit results of query at 10 000. To avoid this limit, we need to paginate results and scroll
This method append all pages form scroll search
:param result: a result of a ElasticSearcg query
:return:
"""
scroll_size = result['hits']['total']["value"]
results = []
# Progress bar
pbar = tqdm(total=scroll_size)
while (scroll_size > 0):
try:
scroll_id = result['_scroll_id']
# res = client.scroll(scroll_id=scroll_id, scroll='60s')
query = {
"scroll": "1m",
"scroll_id": scroll_id
}
query_json = json.dumps(query)
res = requests.get(es_url + "_search/scroll",
data=query_json,
headers=headers,
).json()
results += res['hits']['hits']
scroll_size = len(res['hits']['hits'])
pbar.update(scroll_size)
except:
pbar.close()
break
pbar.close()
return results
if __name__ == '__main__':
""" Why not using eland ?
......@@ -31,11 +67,16 @@ if __name__ == '__main__':
jinja_env = Environment(loader=FileSystemLoader(template_dir))
template = jinja_env.get_template("filter_by_state_and_date.j2")
query = template.render(state="Auvergne")
print(query)
headers = {'content-type': 'application/json'}
try:
r = requests.get(es_url + index_es + "/_search", data=query, headers=headers)
r = requests.get(es_url + index_es + "/_search?scroll=1m&size=1000", # context for scrolling up to 1 minute
data=query,
headers=headers,
)
except Exception as e:
print("Can not filter by date and space")
df_results = pd.json_normalize(r.json()["hits"]["hits"])
results = elastic_pagination_scrolling(r.json(), headers)
print(results)
print(type(results))
# df_results = pd.json_normalize(r.json()["hits"]["hits"])
#df_results.to_csv("/home/rdecoupe/Téléchargements/yip.csv")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment