From 851fcdbd90ae11f05061a27cea2abde466a2958d Mon Sep 17 00:00:00 2001
From: Decoupes Remy <remy.decoupes@irstea.fr>
Date: Wed, 30 Mar 2022 18:18:07 +0200
Subject: [PATCH] building output to graph visualization with Gephi

---
 .../eda_templates/filter_by_state_and_date.j2 |  6 +++++
 elasticsearch/src/tf-idf-es.py                | 23 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/elasticsearch/src/eda_templates/filter_by_state_and_date.j2 b/elasticsearch/src/eda_templates/filter_by_state_and_date.j2
index e5abf50..7cbf771 100644
--- a/elasticsearch/src/eda_templates/filter_by_state_and_date.j2
+++ b/elasticsearch/src/eda_templates/filter_by_state_and_date.j2
@@ -6,8 +6,14 @@
     },
     "text",
     "id",
+    "in_reply_to_status_id",
     "extended_tweet.full_text",
     "retweeted_status.id",
+    "user.location",
+    "entities.media.expanded_url",
+    "entities.urls.expanded_url",
+    "user.name",
+    "retweeted_status.user.name",
     "rest.*"
   ],
   "_source": false,
diff --git a/elasticsearch/src/tf-idf-es.py b/elasticsearch/src/tf-idf-es.py
index bdf1dc2..4b077bd 100644
--- a/elasticsearch/src/tf-idf-es.py
+++ b/elasticsearch/src/tf-idf-es.py
@@ -137,3 +137,26 @@ if __name__ == '__main__':
     df_tfidf["tf_idf_terms"] = df_tfidf.index
     df_tfidf = df_tfidf.merge(df_results, on="index_of_tweet")
     df_tfidf.to_csv("/home/rdecoupe/Téléchargements/acquitaine3.csv")
+    # prepare to Gephi for graph vizu
+    # gephi = df_results
+    # gephi["Source"] = gephi["id"]
+    # gephi["Target"] = gephi["retweeted_status.id"]
+    # gephi["ID"] = gephi.index
+    # gephi["Timestamp"] = gephi["@timestamp"]
+    # gephi["label"] = gephi["retweeted_status.user.name"]
+    # gephi = gephi[gephi["Target"].str.len() !=0] # filter out tweet that are not retweeted
+    # gephi[["ID", "label", "Source", "Target", "Timestamp"]].to_csv(
+    #     "/home/rdecoupe/Téléchargements/acquitaine_script_gephi.csv",
+    #     index = False
+    # )
+    gephi = df_results
+    gephi["Source"] = gephi["user.name"]
+    gephi["Target"] = gephi["retweeted_status.user.name"]
+    gephi["ID"] = gephi.index
+    gephi["Timestamp"] = gephi["@timestamp"]
+    gephi["label"] = gephi["retweeted_status.user.name"]
+    gephi = gephi[gephi["Target"].str.len() !=0] # filter out tweet that are not retweeted
+    gephi[["ID", "label", "Source", "Target", "Timestamp"]].to_csv(
+        "/home/rdecoupe/Téléchargements/acquitaine_script_gephi.csv",
+        index=False
+    )
-- 
GitLab