Commit e39198ed authored by Decoupes Remy's avatar Decoupes Remy
Browse files

format/render elasticsearch response... though work

parent e784662e
{
"fields": [
{
"field": "*",
"include_unmapped": "true"
},
{
"field": "@timestamp",
"format": "strict_date_optional_time"
}
},
"text",
"id",
"extended_tweet.full_text",
"retweeted_status.id",
"rest.*"
],
"_source": false,
"query": {
"bool": {
"must": [],
......
......@@ -13,6 +13,7 @@ import requests
import pandas as pd
from tqdm import tqdm
import json
import math
def elastic_pagination_scrolling(result, headers):
"""
......@@ -76,7 +77,12 @@ if __name__ == '__main__':
except Exception as e:
print("Can not filter by date and space")
results = elastic_pagination_scrolling(r.json(), headers)
print(results)
print(type(results))
# df_results = pd.json_normalize(r.json()["hits"]["hits"])
#df_results.to_csv("/home/rdecoupe/Téléchargements/yip.csv")
\ No newline at end of file
df_results = pd.DataFrame(results)
# We extract nested fieds from elasticsearch
df_results = pd.json_normalize(df_results['fields'])
# When we select specific fields in a elastic query, the response always send value as list
# (even if there is one value)
df_results.to_pickle("/home/rdecoupe/Téléchargements/yip3.pkl")
# for every cell we retrieve the first value from list. Sometimes there is NaN value when there is empty value.
df_results = df_results.applymap(lambda x: x[0] if isinstance(x, list) else '')
df_results.to_csv("/home/rdecoupe/Téléchargements/yip2.csv")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment