diff --git a/scenes/download.py b/scenes/download.py new file mode 100644 index 0000000000000000000000000000000000000000..7be738e77217f38b7ad176b2d042c02d3b2ca1d9 --- /dev/null +++ b/scenes/download.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +import os +import hashlib +import datetime +import pycurl +import io +import json +from urllib.parse import urlencode + + +def curl_url(url, postdata, verbose=False, fp=None, header=['Accept:application/json']): + """ + Use PyCurl to make some requests + :param url: url + :param postdata: POST data + :param verbose: verbose (True or False) + :param fp: file pointer + :param header: header + :return: decoded contents + """ + c = pycurl.Curl() + c.setopt(pycurl.URL, url) + c.setopt(pycurl.HTTPHEADER, header) + c.setopt(pycurl.SSL_VERIFYPEER, False) + c.setopt(pycurl.SSL_VERIFYHOST, False) + if postdata is not None: + c.setopt(pycurl.POST, 1) + postfields = urlencode(postdata) + c.setopt(pycurl.POSTFIELDS, postfields) + storage = io.BytesIO() + if verbose: + c.setopt(pycurl.VERBOSE, 1) + if fp is not None: + c.setopt(pycurl.WRITEDATA, fp) + else: + c.setopt(pycurl.WRITEFUNCTION, storage.write) + c.perform() + c.close() + content = storage.getvalue() + return content.decode(encoding="utf-8", errors="strict") + + +class TheiaDownloader: + """ + THEIA downloader + """ + def __init__(self, config_file): + """ + Constructor + """ + + # Read the Theia config file + try: + self.config = {} + f = open(config_file, 'r') + if f is None: + err_msg("File {} does not exist".format(config_file)) + for line in f.readlines(): + splits = line.split('=', 1) + if len(splits) == 2: + self.config[splits[0].strip()] = splits[1].strip() + except: + print("ERROR: parsing {}".format(config_file)) + finally: + f.close() + config_error = False + checking_keys = ["serveur", "resto", "login_theia", "password_theia", "token_type"] + if "proxy" in self.config.keys(): + checking_keys.extend(["login_proxy", "password_proxy"]) + + for key_name in checking_keys: + if key_name not in self.config.keys(): + config_error = True + print("error with config file, missing key : {}".format(key_name)) + if config_error: + print("error with config file opening or parsing") + + def _get_token(self): + """ + Get the THEIA token + """ + postdata_token = {"ident": self.config["login_theia"], "pass": self.config["password_theia"]} + url = "{}/services/authenticate/".format(self.config["serveur"]) + token = curl_url(url, postdata_token) + if not token: + print("Empty token. Please check your credentials in config file.") + return token + + def search(self, dict_query): + """ + Search products + Return a dict with the following structure + + TILENAME + +----DATE + +------id + +------url + +------checksum + + """ + # 1. Get the JSON + url = "{}/{}/api/collections/SENTINEL2/search.json?{}".format(self.config["serveur"], + self.config["resto"], urlencode(dict_query)) + print("Ask Theia catalog...") + search = json.loads(curl_url(url, None)) + print("Ok") + + # 2. JSON-->Dict + features = search["features"] + tile_dict = dict() + for record in features: + rid = record["id"] + rdate = datetime.datetime.strptime(record["properties"]["completionDate"][0:10], "%Y-%m-%d") + rloc = record["properties"]["location"] + rchksum = record["properties"]["services"]["download"]["checksum"] + rurl = record["properties"]["services"]["download"]["url"] + if rloc not in tile_dict.keys(): + tile_dict[rloc] = dict() + if rdate not in tile_dict[rloc].keys(): + tile_dict[rloc][rdate] = dict() + tile_dict[rloc][rdate]["id"] = rid + tile_dict[rloc][rdate]["url"] = rurl + tile_dict[rloc][rdate]["checksum"] = rchksum + + return tile_dict + + def _md5(self, fname): + """ + Compute md5sum of a file + """ + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + + def _file_complete(self, filename, md5sum): + """ + Tell if a file is complete + """ + # Does the file exist? + if not os.path.isfile(filename): + return False + + # Does the file completed? + return md5sum == self._md5(filename) + + + def download(self, tiles_dict, outfn_func): + """ + Download a product + """ + print("Get token...") + token = self._get_token() + print("OK ({})".format(token)) + for _rloc in tiles_dict: + print("Fetching products for tile {}...".format(_rloc)) + for _rdate in tiles_dict[_rloc]: + url = "{}/?issuerId=theia".format(tiles_dict[_rloc][_rdate]["url"]) + header = ['Authorization: Bearer {}'.format(token), 'Content-Type: application/json'] + filename = outfn_func(tiles_dict[_rloc][_rdate]["id"]) + + # Check if the destination file exist and is correct + if not self._file_complete(filename, tiles_dict[_rloc][_rdate]["checksum"]): + print("\tdownloading {}".format(_rdate)) + file_handle = open(filename, "wb") + curl_url(url, postdata=None, fp=file_handle, header=header) + file_handle.close() + else: + print("\t{} already in cache. Skipping.".format(_rdate)) + +def download(config_file, acq_envelope, acq_date): + """ + search theia catalog, download the files + """ + + # Important parameters + ndays_seek = datetime.timedelta(days=17) # temporal range to check for monthly synthesis + + # Query products + box = '{},{},{},{}'.format(acq_envelope[0], acq_envelope[2], acq_envelope[1], acq_envelope[3]) + dict_query = {'box': box} + start_date = acq_date - ndays_seek + end_date = acq_date + ndays_seek + + dict_query['startDate'] = start_date.strftime("%Y-%m-%d") + dict_query['completionDate'] = end_date.strftime("%Y-%m-%d") + dict_query['maxRecords'] = 500 + dict_query['processingLevel'] = "LEVEL3A" + + # Theia downloader + downloader = TheiaDownloader(config_file) + + # Search products + tile = downloader.search(dict_query) + + # DELTAS RANKING + # Add the "Delta" key/value + for _rloc in tile: + print(_rloc) + for _rdate in tile[_rloc]: + delta = acq_date - _rdate + delta = delta.days + tile[_rloc][_rdate]["delta"] = delta + + # Rank delta + selected_tile = dict() + for _rloc in tile: + n_dates = 0 + x = tile[_rloc] + sorted_x = sorted(x.items(), key=lambda kv: abs(kv[1]["delta"])) + selected_tile[_rloc] = dict() + for i in sorted_x: + _rdate = i[0] + entry = i[1] + selected_tile[_rloc][_rdate] = entry + n_dates += 1 + if n_dates == 1: + break + + # Print summary + print("Best tiles/dates:") + for _rloc in selected_tile: + print("Tile {}".format(_rloc)) + print("\tDate (delta)") + for _rdate in selected_tile[_rloc]: + print("\t{} ({})".format(_rdate, selected_tile[_rloc][_rdate]["delta"])) + + # Download products + #downloader.download(selected_tile, get_local_file) + + return selected_tile diff --git a/test/download_test.py b/test/download_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7bc93c79fd06c0a3c57ce7a1acf3274b62b49a33 --- /dev/null +++ b/test/download_test.py @@ -0,0 +1,12 @@ +import argparse +from scenes import download, utils + +# Arguments +parser = argparse.ArgumentParser(description="Download test",) +parser.add_argument("--refimage", required=True) +parser.add_argument("--theia_cfg", required=True) +params = parser.parse_args() + +# Get all scenes in the root_dir +_, _, bbox = utils.get_epsg_extent_bbox(params.refimage) +download.download(config_file=params.theia_cfg, acq_envelope=bbox, acq_date='01-01-2021') \ No newline at end of file