Commit 649df252 authored by Decoupes Remy's avatar Decoupes Remy
Browse files

start retrieving filter by state

parent be40a51c
{
"fields": [
{
"field": "*",
"include_unmapped": "true"
},
{
"field": "@timestamp",
"format": "strict_date_optional_time"
}
],
"query": {
"bool": {
"must": [],
"filter": [
{
"bool": {
"should": [
{
"match_phrase": {
"rest.features.properties.state": "{{ state }}"
}
}
],
"minimum_should_match": 1
}
},
{
"range": {
"@timestamp": {
"format": "strict_date_optional_time",
"gte": "2019-03-25T15:51:46.172Z",
"lte": "2022-03-25T15:51:46.172Z"
}
}
}
],
"should": [],
"must_not": []
}
}
}
\ No newline at end of file
......@@ -8,11 +8,34 @@ Download metric from Elasticsearch to easily compute TF-IDF matric
from elasticsearch import Elasticsearch
from jinja2 import FileSystemLoader, Environment
import os
import eland as ed
import requests
import pandas as pd
if __name__ == '__main__':
""" Why not using eland ?
# we could not filter eland df with rest.features.properties.state certainly because there are to much fields ?
# Two solutions :
# - we are using eland with "ed_tweets["rest_user_osm.extent"] and intersect with polygones
# - we use elasticsearch package with Jinja2 template and normalization
# We prefere to use elasticsearch
"""
#ed_tweets = ed.DataFrame("http://mo-mood-tetis-tweets-collect.montpellier.irstea.priv:9200",
# es_index_pattern="mood-tetis-tweets-collect")
es_url = "http://mo-mood-tetis-tweets-collect.montpellier.irstea.priv:9200/"
client_es = Elasticsearch(es_url)
index_es = "mood-tetis-tweets-collect"
# init jinja2 configuration
template_dir = os.path.join(os.path.dirname(__file__), "eda_templates")
jinja_env = Environment(loader=FileSystemLoader(template_dir))
\ No newline at end of file
jinja_env = Environment(loader=FileSystemLoader(template_dir))
template = jinja_env.get_template("filter_by_state_and_date.j2")
query = template.render(state="Auvergne")
print(query)
headers = {'content-type': 'application/json'}
try:
r = requests.get(es_url + index_es + "/_search", data=query, headers=headers)
except Exception as e:
print("Can not filter by date and space")
df_results = pd.json_normalize(r.json()["hits"]["hits"])
#df_results.to_csv("/home/rdecoupe/Téléchargements/yip.csv")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment