From c9838f7b48c96b4fcae2624572c2034e43875d03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Decoupes?= <remy.decoupes@inrae.fr> Date: Mon, 17 Jan 2022 22:25:31 +0100 Subject: [PATCH] fix when fix_bad_quote is stopped and restarted --- elasticsearch/src/fix_bad_quote_json.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/elasticsearch/src/fix_bad_quote_json.py b/elasticsearch/src/fix_bad_quote_json.py index 864a333..f3b640c 100755 --- a/elasticsearch/src/fix_bad_quote_json.py +++ b/elasticsearch/src/fix_bad_quote_json.py @@ -43,14 +43,16 @@ logger.info("Transform jsonl single quotes into double quotes") for root, dirs, files in os.walk(path_dir_in): for name in files: fr = open(path_dir_in + "/" + name) - fw = open(path_dir_out + "/" + name, "w") + fw = open(path_dir_out + "/" + name) nb_lines_in = sum(1 for line in fr) try: nb_lines_out = sum(1 for line in fw) except: #file is empty nb_lines_out = 0 + logger.info("file: " + name + " in: "+ str(nb_lines_in) + " and out:" + str(nb_lines_out)) if nb_lines_in != nb_lines_out: fr.seek(0) # go to the start of the file + fw = open(path_dir_out + "/" + name, "w") for line in fr: json_dat = json.dumps(ast.literal_eval(line)) dict_dat = json.loads(json_dat) -- GitLab