Commit e6f6d979 authored by Decoupes Remy's avatar Decoupes Remy
Browse files

use plotly express to plot EDA: keywords by disease

parent 8acf0dbb
......@@ -18,3 +18,4 @@ elasticsearch/data/
elasticsearch/log/
elasticsearch/logstash/sincedb.log
elasticsearch/logfix_bad_quote_json.log
elasticsearch/src/figs/
......@@ -14,6 +14,10 @@ from jinja2 import FileSystemLoader, Environment
import os
import requests
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
def logsetup():
"""
......@@ -87,6 +91,7 @@ def count_tweets_by_disease_keywords(jinja_env, es_url, index_es, list_of_keywor
if __name__ == '__main__':
logger = logsetup()
logger.info("EDA start")
path_figs_dir = os.path.join(os.path.dirname(__file__), "figs")
# Init Elasticsearch configurations
es_url = "http://mo-mood-tetis-tweets-collect.montpellier.irstea.priv:9200/"
client_es = Elasticsearch(es_url)
......@@ -116,7 +121,18 @@ if __name__ == '__main__':
df_kw_by_disease = df
else:
df_kw_by_disease = df_kw_by_disease.append(df)
print(df_kw_by_disease)
df_kw_by_disease_without_covid = df_kw_by_disease[df_kw_by_disease["disease"] != "SARS-CoV-2 "]
# sunburst with plotly express
pie_fig = px.sunburst(df_kw_by_disease, path=['disease', df_kw_by_disease.index], values='doc_count', color='disease')
pio.write_image(pie_fig, path_figs_dir + "/count_tweets_by_disease_keywords_sunburst_all.png", format='png')
pie_fig = px.sunburst(df_kw_by_disease_without_covid, path=['disease', df_kw_by_disease_without_covid.index], values='doc_count', color='disease')
pio.write_image(pie_fig, path_figs_dir + "/count_tweets_by_disease_keywords_sunburst_without_covid.png", format='png')
# treemap
treemap_fig3 = px.treemap(df_kw_by_disease, path=['disease', df_kw_by_disease.index], values='doc_count', color='disease')
pio.write_image(treemap_fig3,path_figs_dir + "/count_tweets_by_disease_keywords_treemap_all.png", format='png')
treemap_without_covid_fig3 = px.treemap(df_kw_by_disease_without_covid, path=['disease', df_kw_by_disease_without_covid.index], values='doc_count',
color='disease')
pio.write_image(treemap_without_covid_fig3, path_figs_dir + "/count_tweets_by_disease_keywords_treemap_without_covid.png", format='png')
logger.info("EDA stop")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment