From 851fcdbd90ae11f05061a27cea2abde466a2958d Mon Sep 17 00:00:00 2001 From: Decoupes Remy <remy.decoupes@irstea.fr> Date: Wed, 30 Mar 2022 18:18:07 +0200 Subject: [PATCH] building output to graph visualization with Gephi --- .../eda_templates/filter_by_state_and_date.j2 | 6 +++++ elasticsearch/src/tf-idf-es.py | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/elasticsearch/src/eda_templates/filter_by_state_and_date.j2 b/elasticsearch/src/eda_templates/filter_by_state_and_date.j2 index e5abf50..7cbf771 100644 --- a/elasticsearch/src/eda_templates/filter_by_state_and_date.j2 +++ b/elasticsearch/src/eda_templates/filter_by_state_and_date.j2 @@ -6,8 +6,14 @@ }, "text", "id", + "in_reply_to_status_id", "extended_tweet.full_text", "retweeted_status.id", + "user.location", + "entities.media.expanded_url", + "entities.urls.expanded_url", + "user.name", + "retweeted_status.user.name", "rest.*" ], "_source": false, diff --git a/elasticsearch/src/tf-idf-es.py b/elasticsearch/src/tf-idf-es.py index bdf1dc2..4b077bd 100644 --- a/elasticsearch/src/tf-idf-es.py +++ b/elasticsearch/src/tf-idf-es.py @@ -137,3 +137,26 @@ if __name__ == '__main__': df_tfidf["tf_idf_terms"] = df_tfidf.index df_tfidf = df_tfidf.merge(df_results, on="index_of_tweet") df_tfidf.to_csv("/home/rdecoupe/Téléchargements/acquitaine3.csv") + # prepare to Gephi for graph vizu + # gephi = df_results + # gephi["Source"] = gephi["id"] + # gephi["Target"] = gephi["retweeted_status.id"] + # gephi["ID"] = gephi.index + # gephi["Timestamp"] = gephi["@timestamp"] + # gephi["label"] = gephi["retweeted_status.user.name"] + # gephi = gephi[gephi["Target"].str.len() !=0] # filter out tweet that are not retweeted + # gephi[["ID", "label", "Source", "Target", "Timestamp"]].to_csv( + # "/home/rdecoupe/Téléchargements/acquitaine_script_gephi.csv", + # index = False + # ) + gephi = df_results + gephi["Source"] = gephi["user.name"] + gephi["Target"] = gephi["retweeted_status.user.name"] + gephi["ID"] = gephi.index + gephi["Timestamp"] = gephi["@timestamp"] + gephi["label"] = gephi["retweeted_status.user.name"] + gephi = gephi[gephi["Target"].str.len() !=0] # filter out tweet that are not retweeted + gephi[["ID", "label", "Source", "Target", "Timestamp"]].to_csv( + "/home/rdecoupe/Téléchargements/acquitaine_script_gephi.csv", + index=False + ) -- GitLab