Commit c76f0abf authored by Decoupes Remy's avatar Decoupes Remy
Browse files

add hiearchical indexing on column name: for filter by disease

parent 13183a90
......@@ -93,11 +93,11 @@ def time_series_by_disease_keywords(jinja_env, es_url, index_es, list_of_keyword
"""
Return dataframe :
- index : timestamp : date + time
- Columns : each pair of disease:keyword :
Respiratory:lungdisease ... SARS-CoV-2 :SARS-CoV-2
2021-05-03T00:00:00.000+02:00 54.0 ... 3225
2021-06-02T00:00:00.000+02:00 127.0 ... 1979
2021-07-02T00:00:00.000+02:00 3.0 ... 2988
- Columns : Multi-indexing (or hiearchical indexing): 2 levels: [1] disease, [2] keyword :
disease Respiratory Unknown ... SARS-CoV-2
keyword lungdisease NewVirus ... SARS-CoV-2 SARS-CoV-2
2021-05-03T00:00:00.000+02:00 54.0 NaN ... 3225 3225
2021-06-02T00:00:00.000+02:00 127.0 NaN ... 1979 1979
To do so, we have to work on the formating of Elasticsearch result
......@@ -143,6 +143,8 @@ def time_series_by_disease_keywords(jinja_env, es_url, index_es, list_of_keyword
kw_time_serie.append([timestamp, value])
df_kw_timeserie = pd.DataFrame(kw_time_serie, columns=['timestamp', disease+':'+keyword])
df_kw_timeserie.set_index('timestamp', inplace=True)
# hierarchical indexing for column
df_kw_timeserie.columns = pd.MultiIndex.from_product([[disease], [keyword]], names=["disease", "keyword"])
if df_all_kw_timeserie_empty == True:
df_all_kw_timeserie = df_kw_timeserie
df_all_kw_timeserie_empty = False
......@@ -233,7 +235,7 @@ if __name__ == '__main__':
df_kw_by_disease = pd.concat([df_kw_by_disease, df], axis=1)
# df_kw_by_disease_without_covid = df_kw_by_disease[df_kw_by_disease["disease"] != "SARS-CoV-2 "]
logger.debug(df_kw_by_disease.keys())
timeseries_fig = px.bar(df_kw_by_disease)
timeseries_fig = px.bar(df_kw_by_disease["Influenza"], facet_col_wrap=2)
timeseries_fig.show()
logger.info("EDA stop")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment