diff --git a/elasticsearch/src/fix_bad_quote_json.py b/elasticsearch/src/fix_bad_quote_json.py index 864a33350ab1f1007ba07049014b56f15a89be8a..f3b640cde61f5abed7f3913806317327e1c6c58a 100755 --- a/elasticsearch/src/fix_bad_quote_json.py +++ b/elasticsearch/src/fix_bad_quote_json.py @@ -43,14 +43,16 @@ logger.info("Transform jsonl single quotes into double quotes") for root, dirs, files in os.walk(path_dir_in): for name in files: fr = open(path_dir_in + "/" + name) - fw = open(path_dir_out + "/" + name, "w") + fw = open(path_dir_out + "/" + name) nb_lines_in = sum(1 for line in fr) try: nb_lines_out = sum(1 for line in fw) except: #file is empty nb_lines_out = 0 + logger.info("file: " + name + " in: "+ str(nb_lines_in) + " and out:" + str(nb_lines_out)) if nb_lines_in != nb_lines_out: fr.seek(0) # go to the start of the file + fw = open(path_dir_out + "/" + name, "w") for line in fr: json_dat = json.dumps(ast.literal_eval(line)) dict_dat = json.loads(json_dat)