From c9838f7b48c96b4fcae2624572c2034e43875d03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9my=20Decoupes?= <remy.decoupes@inrae.fr>
Date: Mon, 17 Jan 2022 22:25:31 +0100
Subject: [PATCH] fix when fix_bad_quote is stopped and restarted

---
 elasticsearch/src/fix_bad_quote_json.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/elasticsearch/src/fix_bad_quote_json.py b/elasticsearch/src/fix_bad_quote_json.py
index 864a333..f3b640c 100755
--- a/elasticsearch/src/fix_bad_quote_json.py
+++ b/elasticsearch/src/fix_bad_quote_json.py
@@ -43,14 +43,16 @@ logger.info("Transform jsonl single quotes into double quotes")
 for root, dirs, files in os.walk(path_dir_in):
 	for name in files:
 		fr = open(path_dir_in + "/" + name)
-		fw = open(path_dir_out + "/" + name, "w")
+		fw = open(path_dir_out + "/" + name)
 		nb_lines_in = sum(1 for line in fr)
 		try:
 			nb_lines_out = sum(1 for line in fw)
 		except: #file is empty
 			nb_lines_out = 0
+		logger.info("file: " + name + " in: "+ str(nb_lines_in) + " and out:" + str(nb_lines_out))
 		if nb_lines_in != nb_lines_out:
 			fr.seek(0) # go to the start of the file
+			fw = open(path_dir_out + "/" + name, "w")
 			for line in fr:
 				json_dat = json.dumps(ast.literal_eval(line))
 				dict_dat = json.loads(json_dat)
-- 
GitLab