Commit 2532e56a authored by Decoupes Remy's avatar Decoupes Remy
Browse files

tweet content: merge extended_tweet.full_text with text

parent ab95d7ca
......@@ -169,9 +169,17 @@ def get_tweet_content_by_disease(jinja_env, es_url, index_es, list_of_keywords,
return -1
list_of_tweets = []
for hit in r.json()["hits"]["hits"]:
list_of_tweets.append(hit["fields"])
try:# test is we have more than 140 characters, ie, extended_tweet
text = hit["fields"]["extended_tweet.full_text"][0]
except:# else we take only the 140 characters
text = hit["fields"]["text"][0]
tweet = {
"timestamp": hit["fields"]["@timestamp"][0],
"text": text
}
list_of_tweets.append(tweet)
df_results = pd.DataFrame(list_of_tweets)
# df_results.to_pickle("/home/rdecoupe/Téléchargements/test/get_tweet_content_by_disease.pkl")
df_results.to_pickle("/home/rdecoupe/Téléchargements/test/get_tweet_content_by_disease.pkl")
return df_results
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment