Commit 5205510b authored by Cresson Remi's avatar Cresson Remi
Browse files

WIP: S2 download

1 merge request!12ENH: S2A and S3A support
Pipeline #33440 passed with stages
in 1 minute and 18 seconds
Showing with 245 additions and 0 deletions
+245 -0
# -*- coding: utf-8 -*-
import os
import hashlib
import datetime
import pycurl
import io
import json
from urllib.parse import urlencode
def curl_url(url, postdata, verbose=False, fp=None, header=['Accept:application/json']):
"""
Use PyCurl to make some requests
:param url: url
:param postdata: POST data
:param verbose: verbose (True or False)
:param fp: file pointer
:param header: header
:return: decoded contents
"""
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.HTTPHEADER, header)
c.setopt(pycurl.SSL_VERIFYPEER, False)
c.setopt(pycurl.SSL_VERIFYHOST, False)
if postdata is not None:
c.setopt(pycurl.POST, 1)
postfields = urlencode(postdata)
c.setopt(pycurl.POSTFIELDS, postfields)
storage = io.BytesIO()
if verbose:
c.setopt(pycurl.VERBOSE, 1)
if fp is not None:
c.setopt(pycurl.WRITEDATA, fp)
else:
c.setopt(pycurl.WRITEFUNCTION, storage.write)
c.perform()
c.close()
content = storage.getvalue()
return content.decode(encoding="utf-8", errors="strict")
class TheiaDownloader:
"""
THEIA downloader
"""
def __init__(self, config_file):
"""
Constructor
"""
# Read the Theia config file
try:
self.config = {}
f = open(config_file, 'r')
if f is None:
err_msg("File {} does not exist".format(config_file))
for line in f.readlines():
splits = line.split('=', 1)
if len(splits) == 2:
self.config[splits[0].strip()] = splits[1].strip()
except:
print("ERROR: parsing {}".format(config_file))
finally:
f.close()
config_error = False
checking_keys = ["serveur", "resto", "login_theia", "password_theia", "token_type"]
if "proxy" in self.config.keys():
checking_keys.extend(["login_proxy", "password_proxy"])
for key_name in checking_keys:
if key_name not in self.config.keys():
config_error = True
print("error with config file, missing key : {}".format(key_name))
if config_error:
print("error with config file opening or parsing")
def _get_token(self):
"""
Get the THEIA token
"""
postdata_token = {"ident": self.config["login_theia"], "pass": self.config["password_theia"]}
url = "{}/services/authenticate/".format(self.config["serveur"])
token = curl_url(url, postdata_token)
if not token:
print("Empty token. Please check your credentials in config file.")
return token
def search(self, dict_query):
"""
Search products
Return a dict with the following structure
TILENAME
+----DATE
+------id
+------url
+------checksum
"""
# 1. Get the JSON
url = "{}/{}/api/collections/SENTINEL2/search.json?{}".format(self.config["serveur"],
self.config["resto"], urlencode(dict_query))
print("Ask Theia catalog...")
search = json.loads(curl_url(url, None))
print("Ok")
# 2. JSON-->Dict
features = search["features"]
tile_dict = dict()
for record in features:
rid = record["id"]
rdate = datetime.datetime.strptime(record["properties"]["completionDate"][0:10], "%Y-%m-%d")
rloc = record["properties"]["location"]
rchksum = record["properties"]["services"]["download"]["checksum"]
rurl = record["properties"]["services"]["download"]["url"]
if rloc not in tile_dict.keys():
tile_dict[rloc] = dict()
if rdate not in tile_dict[rloc].keys():
tile_dict[rloc][rdate] = dict()
tile_dict[rloc][rdate]["id"] = rid
tile_dict[rloc][rdate]["url"] = rurl
tile_dict[rloc][rdate]["checksum"] = rchksum
return tile_dict
def _md5(self, fname):
"""
Compute md5sum of a file
"""
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def _file_complete(self, filename, md5sum):
"""
Tell if a file is complete
"""
# Does the file exist?
if not os.path.isfile(filename):
return False
# Does the file completed?
return md5sum == self._md5(filename)
def download(self, tiles_dict, outfn_func):
"""
Download a product
"""
print("Get token...")
token = self._get_token()
print("OK ({})".format(token))
for _rloc in tiles_dict:
print("Fetching products for tile {}...".format(_rloc))
for _rdate in tiles_dict[_rloc]:
url = "{}/?issuerId=theia".format(tiles_dict[_rloc][_rdate]["url"])
header = ['Authorization: Bearer {}'.format(token), 'Content-Type: application/json']
filename = outfn_func(tiles_dict[_rloc][_rdate]["id"])
# Check if the destination file exist and is correct
if not self._file_complete(filename, tiles_dict[_rloc][_rdate]["checksum"]):
print("\tdownloading {}".format(_rdate))
file_handle = open(filename, "wb")
curl_url(url, postdata=None, fp=file_handle, header=header)
file_handle.close()
else:
print("\t{} already in cache. Skipping.".format(_rdate))
def download(config_file, acq_envelope, acq_date):
"""
search theia catalog, download the files
"""
# Important parameters
ndays_seek = datetime.timedelta(days=17) # temporal range to check for monthly synthesis
# Query products
box = '{},{},{},{}'.format(acq_envelope[0], acq_envelope[2], acq_envelope[1], acq_envelope[3])
dict_query = {'box': box}
start_date = acq_date - ndays_seek
end_date = acq_date + ndays_seek
dict_query['startDate'] = start_date.strftime("%Y-%m-%d")
dict_query['completionDate'] = end_date.strftime("%Y-%m-%d")
dict_query['maxRecords'] = 500
dict_query['processingLevel'] = "LEVEL3A"
# Theia downloader
downloader = TheiaDownloader(config_file)
# Search products
tile = downloader.search(dict_query)
# DELTAS RANKING
# Add the "Delta" key/value
for _rloc in tile:
print(_rloc)
for _rdate in tile[_rloc]:
delta = acq_date - _rdate
delta = delta.days
tile[_rloc][_rdate]["delta"] = delta
# Rank delta
selected_tile = dict()
for _rloc in tile:
n_dates = 0
x = tile[_rloc]
sorted_x = sorted(x.items(), key=lambda kv: abs(kv[1]["delta"]))
selected_tile[_rloc] = dict()
for i in sorted_x:
_rdate = i[0]
entry = i[1]
selected_tile[_rloc][_rdate] = entry
n_dates += 1
if n_dates == 1:
break
# Print summary
print("Best tiles/dates:")
for _rloc in selected_tile:
print("Tile {}".format(_rloc))
print("\tDate (delta)")
for _rdate in selected_tile[_rloc]:
print("\t{} ({})".format(_rdate, selected_tile[_rloc][_rdate]["delta"]))
# Download products
#downloader.download(selected_tile, get_local_file)
return selected_tile
import argparse
from scenes import download, utils
# Arguments
parser = argparse.ArgumentParser(description="Download test",)
parser.add_argument("--refimage", required=True)
parser.add_argument("--theia_cfg", required=True)
params = parser.parse_args()
# Get all scenes in the root_dir
_, _, bbox = utils.get_epsg_extent_bbox(params.refimage)
download.download(config_file=params.theia_cfg, acq_envelope=bbox, acq_date='01-01-2021')
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment