diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b4f77320025bc87bbd8fe0afaff096ca39f9b214..5f907b110ee36ed80c3453b2ceb3033724df4c17 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,12 +17,12 @@ stages: flake8: extends: .static_analysis_base script: - - sudo apt update && sudo apt install -y flake8 && python -m flake8 --max-line-length=120 $PWD/scenes + - sudo apt update && sudo apt install -y flake8 && python3 -m flake8 --max-line-length=120 $PWD/scenes pylint: extends: .static_analysis_base script: - - sudo apt update && sudo apt install -y pylint && pylint --disable=too-many-locals,too-many-statements,too-few-public-methods,too-many-instance-attributes,invalid-name,too-many-branches --max-line-length=120 --ignored-modules=tqdm,rtree,pyotb $PWD/scenes + - sudo apt update && sudo apt install -y pylint && pylint --disable=too-many-locals,too-many-statements,too-few-public-methods,too-many-instance-attributes,invalid-name,too-many-branches --max-line-length=120 --ignored-modules=tqdm,rtree,pyotb,pycurl $PWD/scenes codespell: extends: .static_analysis_base diff --git a/README.md b/README.md index e4ea73b09b823c7d9b4e854f696a31d9d7043557..00e906841ab8a34a6000f9991f993f5d37806f5e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ # Scenes -Spot 6/7 scenes +Ease the use of remote sensing products provided by Dinamis/Theia. + +Supported products: +- Spot 6/7 +- Sentinel-2 Level 2 (THEIA) +- Sentinel-2 Level 3 (THEIA) diff --git a/doc/arch.md b/doc/arch.md index f4e038371d9413c488547f49096810f18a695b4f..5840f4ec93d34044e2d09a139f24742c82d0a80b 100644 --- a/doc/arch.md +++ b/doc/arch.md @@ -4,7 +4,7 @@ List of main features ## Spatial and temporal indexation -Perform a search in space (WGS84 bounding box) and time (optional) with an RTree indexation structure. +Perform a query in space (WGS84 bounding box) and time (optional) with an RTree indexation structure. ``` scenes = drs.get_spot67_scenes(root_dir) # drs contains helpers to deal with Spot 6/7 geosud collections idx = indexation.Index(scenes) # spatial/temporal index @@ -50,15 +50,27 @@ superimposed.write("superimposed.tif") classDiagram Scene <|-- Spot67Scene - Imagery <|-- Spot67Imagery + Scene <|-- Sentinel2SceneBase + Sentinel2SceneBase <|-- Sentinel22AScene + Sentinel2SceneBase <|-- Sentinel23AScene + Scene --* Imagery: root_scene + Imagery <|-- Spot67Imagery + Imagery <|-- Sentinel2ImageryBase + Sentinel2ImageryBase <|-- Sentinel22AImagery + Sentinel2ImageryBase <|-- Sentinel23AImagery + Imagery --* Source: root_imagery + Source <|-- Spot67Source + Source <|-- Sentinel2Source + Sentinel2Source <|-- Sentinel22ASource + Sentinel2Source <|-- Sentinel23ASource class Scene{ +datetime acquisition_date +int epsg +bbox_wgs84 - +Imagery get_imagery() + +get_metadata() +__repr__() } @@ -70,31 +82,89 @@ classDiagram +float sun_azimuth_angle +float sun_elev_angle +Spot67Imagery get_imagery() - +__repr__() + +get_metadata() + } + + class Sentinel2SceneBase{ + +__init__(archive, tag) + +get_file() + +get_band() + +get_metadata() } + class Sentinel22AScene{ + +__init__(archive) + +str clm_r1_msk_file + +str clm_r2_msk_file + +str edg_r1_msk_file + +str edg_r2_msk_file + +Sentinel2AImagery get_imagery() + +get_metadata() + } + + class Sentinel23AScene{ + +__init__(archive) + +str flg_r1_msk_file + +str flg_r2_msk_file + +Sentinel3AImagery get_imagery() + +get_metadata() + } + class Source{ +__init__(root_imagery, out, parent=None) +Imagery root_imagery +Source parent - +Source drilled(self, msk_vec_file, nodata=0) - +Source cld_msk_drilled(self, nodata=0) - +Source resample_over(self, ref_img, interpolator="nn", nodata=0) - +Source clip_over_img(self, ref_img) - +Source clip_over_vec(self, ref_vec) - - new_source(*args) + +Source drilled(msk_vec_file, nodata=0) + +Source cld_msk_drilled(nodata=0) + +Source resample_over(ref_img, interpolator="nn", nodata=0) + +Source clip_over_img(ref_img) + +Source clip_over_vec(ref_vec) + +Source new_source(*args) } - + + class Spot67Source{ + +Source cld_msk_drilled(nodata=0) + } + + class Sentinel2Source{ + +R1_SIZE + +R2_SIZE + +Source msk_drilled(msk_dict, exp, nodata=0) + } + + class Sentinel22ASource{ + +Source cld_msk_drilled(nodata=0) + } + + class Sentinel23ASource{ + +Source flg_msk_drilled(keep_flags_values=(3, 4), nodata=0) + } + class Imagery{ +__init__(root_scene) +Scene root_scene } class Spot67Imagery{ - +__init__(root_scene, epsg=None, reflectance=None) + +__init__(root_scene) +Source get_pxs() +Source get_pan() +Source get_xs() } + + class Sentinel2ImageryBase{ + +_concatenate_10m_bands() + +_concatenate_20m_bands() + } + + class Sentinel22AImagery{ + +get_10m_bands() + +get_20m_bands() + } + + class Sentinel23AImagery{ + +get_10m_bands() + +get_20m_bands() + } ``` diff --git a/scenes/core.py b/scenes/core.py index 502468bec3f7d4cd35c2826590dc3bc661273820..e1474242997e9d297347748db4115d595587aa58 100644 --- a/scenes/core.py +++ b/scenes/core.py @@ -19,7 +19,7 @@ class Source(pyotb.Output): :param out: image to deliver (can be an image filename (str), a pyotb.App, etc.) :param parent: parent Source instance """ - assert isinstance(root_imagery, Imagery) + assert isinstance(root_imagery, Imagery), "root_imagery type is {}".format(type(root_imagery)) self.root_imagery = root_imagery # root imagery # Here we call the pyotb.Output() constructor. # Since it can only be called with pyotb apps, we do the following: @@ -51,20 +51,30 @@ class Source(pyotb.Output): :return: drilled source """ if utils.open_vector_layer(msk_vec_file): - # Cloud mask is not empty + # Vector data not empty rasterization = pyotb.Rasterization({"in": msk_vec_file, "im": self, "mode": "binary", "mode.binary.foreground": 0 if inside else 255, - "background": 255 if inside else 255}) - manage_nodata = pyotb.ManageNoData({"in": self, - "mode": "apply", - "mode.apply.mask": rasterization, - "mode.apply.ndval": nodata}) - return self.new_source(rasterization, manage_nodata) + "background": 255 if inside else 0}) + return self.masked(binary_mask=rasterization, nodata=nodata) return self # Nothing but a soft copy of the source - def resample_over(self, ref_img, interpolator="nn", nodata=0): + def masked(self, binary_mask, nodata=0): + """ + Return the source masked from an uint8 binary raster (0 or 1..255). + Pixels are set to "nodata" where the mask values are 0. + :param binary_mask: input mono-band binary raster filename + :param nodata: nodata value for rejected values + :return: masked source + """ + manage_nodata = pyotb.ManageNoData({"in": self, + "mode": "apply", + "mode.apply.mask": binary_mask, + "mode.apply.ndval": nodata}) + return self.new_source(binary_mask, manage_nodata) + + def resample_over(self, ref_img, interpolator="bco", nodata=0): """ Return the source superimposed over the input image :param ref_img: reference image @@ -72,10 +82,11 @@ class Source(pyotb.Output): :param nodata: no data value :return: resampled image source """ - return self.new_source(pyotb.Superimpose({"inm": self, - "inr": ref_img, - "interpolator": interpolator, - "fv": nodata})) + superimpose = pyotb.Superimpose({"inm": self, + "inr": ref_img, + "interpolator": interpolator, + "fv": nodata}) + return self.new_source(ref_img, superimpose) def clip_over_img(self, ref_img): """ @@ -83,9 +94,10 @@ class Source(pyotb.Output): :param ref_img: reference image :return: ROI clipped source """ - return self.new_source(pyotb.ExtractROI({"in": self, - "mode": "fit", - "mode.fit.im": ref_img})) + extract_roi = pyotb.ExtractROI({"in": self, + "mode": "fit", + "mode.fit.im": ref_img}) + return self.new_source(ref_img, extract_roi) def clip_over_vec(self, ref_vec): """ @@ -97,7 +109,7 @@ class Source(pyotb.Output): "mode": "fit", "mode.fit.vec": ref_vec})) - def reproject(self, epsg, interpolator="nn"): + def reproject(self, epsg, interpolator="bco"): """ Reproject the source into the specified EPSG :param epsg: EPSG (int) @@ -112,7 +124,7 @@ class Source(pyotb.Output): return self # Nothing but a soft copy of the source -class Imagery: +class Imagery(ABC): """ Imagery class. This class carry the base image source, and additional generic stuff common to all sensors imagery. diff --git a/scenes/download.py b/scenes/download.py new file mode 100644 index 0000000000000000000000000000000000000000..6fb3a047145446d608310377e1f0a4f5cb8f4b7c --- /dev/null +++ b/scenes/download.py @@ -0,0 +1,307 @@ +# -*- coding: utf-8 -*- +""" +Download Sentinel-2 images from Theia +""" +import os +import hashlib +import datetime +import io +import json +from urllib.parse import urlencode +import pycurl +import tqdm + + +def compute_md5(fname): + """ + Compute md5sum of a file + :param fname: file name + """ + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + +def is_file_complete(filename, md5sum): + """ + Tell if a file is complete + """ + # Does the file exist? + if not os.path.isfile(filename): + return False + + # Does the file completed? + return md5sum == compute_md5(filename) + + +def curl_url(url, postdata, verbose=False, fp=None, header=None): + """ + Use PyCurl to make some requests + :param url: url + :param postdata: POST data + :param verbose: verbose (True or False) + :param fp: file handle + :param header: header. If None is kept, ['Accept:application/json'] is used + :return: decoded contents + """ + if not header: + header = ['Accept:application/json'] + + c = pycurl.Curl() + c.setopt(pycurl.URL, url) + c.setopt(pycurl.HTTPHEADER, header) + c.setopt(pycurl.SSL_VERIFYPEER, False) + c.setopt(pycurl.SSL_VERIFYHOST, False) + if postdata is not None: + c.setopt(pycurl.POST, 1) + postfields = urlencode(postdata) + c.setopt(pycurl.POSTFIELDS, postfields) + storage = io.BytesIO() + if verbose: + c.setopt(pycurl.VERBOSE, 1) + if fp is not None: + progress_bar = None + last_download_d = 0 + print("Downloading", flush=True) + + def _status(download_t, download_d, *_): + """ + callback function for c.XFERINFOFUNCTION + https://stackoverflow.com/questions/19724222/pycurl-attachments-and-progress-functions + :param download_t: total + :param download_d: already downloaded + :return: + """ + if download_d > 0: + nonlocal progress_bar, last_download_d + if not progress_bar: + progress_bar = tqdm.tqdm(total=download_t, unit='iB', unit_scale=True) + progress_bar.update(download_d - last_download_d) + last_download_d = download_d + + c.setopt(c.NOPROGRESS, False) + c.setopt(c.XFERINFOFUNCTION, _status) + c.setopt(pycurl.WRITEDATA, fp) + else: + c.setopt(pycurl.WRITEFUNCTION, storage.write) + c.perform() + c.close() + content = storage.getvalue() + return content.decode(encoding="utf-8", errors="strict") + + +class TheiaDownloader: + """ + THEIA downloader + """ + def __init__(self, config_file, max_records=500): + """ + :param config_file: Theia configuration file + :param max_records: Maximum number of records + """ + + # Read the Theia config file + self.config = {} + f = open(config_file, 'r') + if f is None: + raise FileNotFoundError("File {} does not exist".format(config_file)) + for line in f.readlines(): + splits = line.split('=', 1) + if len(splits) == 2: + self.config[splits[0].strip()] = splits[1].strip() + f.close() + + # Check keys + checking_keys = ["serveur", "resto", "login_theia", "password_theia", "token_type"] + + # Proxy + if "proxy" in self.config.keys(): + checking_keys.extend(["login_proxy", "password_proxy"]) + + # Check keys + for key_name in checking_keys: + if key_name not in self.config.keys(): + raise ValueError("error with config file, missing key : {}".format(key_name)) + + # Maximum number of records + self.max_records = max_records + + def _get_token(self): + """ + Get the THEIA token + """ + postdata_token = {"ident": self.config["login_theia"], "pass": self.config["password_theia"]} + url = "{}/services/authenticate/".format(self.config["serveur"]) + token = curl_url(url, postdata_token) + if not token: + print("Empty token. Please check your credentials in config file.") + return token + + def _query(self, dict_query): + """ + Search products + Return a dict with the following structure + TILE_NAME + +---- DATE + +------ id + +------ url + +------ checksum + +------ product_name + :param dict_query: query + :return tile dictionary + """ + url = "{}/{}/api/collections/SENTINEL2/search.json?{}".format(self.config["serveur"], + self.config["resto"], + urlencode(dict_query)) + print("Ask Theia catalog...") + search = json.loads(curl_url(url, None)) + + tiles_dict = {} + for record in search["features"]: + tile_name = record["properties"]["location"] # T31TCJ + acq_date = record["properties"]["completionDate"][0:10] # YYYY-MM-DD + new_acquisition = { + "id": record["id"], + "product_name": record["properties"]["productIdentifier"], # SENTINEL2X_20191215-000000-000_L3A_T31... + "checksum": record["properties"]["services"]["download"]["checksum"], + "url": record["properties"]["services"]["download"]["url"] # https://theia.cnes.fr/atdistr.../download + } + if tile_name not in tiles_dict: + tiles_dict[tile_name] = {acq_date: new_acquisition} + tiles_dict[tile_name].update({acq_date: new_acquisition}) + + return tiles_dict + + def _download_tiles_and_dates(self, tiles_dict, download_dir): + """ + Download a product. + Updates the "tiles_dict" with the filename of the downloaded files + :param tiles_dict: tiles dictionary. Must have the following structure: + TILE_NAME + +---- DATE + +------ id + +------ url + +------ checksum + +------ product_name + :return: tiles_dict updated with local_file: + +------ local_file + """ + print("Get token...") + token = self._get_token() + print("OK ({})".format(token)) + for tile_name in tiles_dict: + print("Fetching products for tile {}...".format(tile_name)) + for acq_date in tiles_dict[tile_name]: + description = tiles_dict[tile_name][acq_date] + url = "{}/?issuerId=theia".format(description["url"]) + header = ['Authorization: Bearer {}'.format(token), 'Content-Type: application/json'] + filename = "{}.zip".format(os.path.join(download_dir, description["product_name"])) + + # Check if the destination file exist and is correct + if not is_file_complete(filename, description["checksum"]): + print("Downloading {}".format(acq_date)) + file_handle = open(filename, "wb") + curl_url(url, postdata=None, fp=file_handle, header=header) + file_handle.close() + else: + print("{} already downloaded. Skipping.".format(acq_date)) + description["local_file"] = filename + + return tiles_dict + + def download_in_range(self, bbox_wgs84, dates_range, download_dir=None, level="LEVEL3A"): + """ + Download all images within spatial and temporal ranges + :param bbox_wgs84: bounding box (WGS84) + :param dates_range: a tuple of datetime.datetime instances (start_date, end_date) + :param download_dir: download directory + :param level: LEVEL2A, LEVEL3A, ... + """ + start_date, end_date = dates_range + # lonmin, latmin, lonmax, latmax + box = '{},{},{},{}'.format(bbox_wgs84[2], bbox_wgs84[0], bbox_wgs84[3], bbox_wgs84[1]) + dict_query = { + "box": box, + "startDate": start_date.strftime("%Y-%m-%d"), + "completionDate": end_date.strftime("%Y-%m-%d"), + "maxRecords": self.max_records, + "processingLevel": level + } + + # Search products + search_results = self._query(dict_query) + + # Download products + if download_dir: + return self._download_tiles_and_dates(search_results, download_dir) + + return search_results + + def download_closest(self, bbox_wgs84, acq_date, download_dir=None, level="LEVEL3A"): + """ + query theia catalog, download_closest the files + :param bbox_wgs84: bounding box (WGS84) + :param acq_date: acquisition date to look around + :param download_dir: download directory + :param level: LEVEL2A, LEVEL3A, ... + :return: downloaded files + """ + + # Important parameters + ndays_seek = datetime.timedelta(days=17) # temporal range to check for monthly synthesis + + # Query products + # box is [lonmin, latmin, lonmax, latmax] + box = '{},{},{},{}'.format(bbox_wgs84[2], bbox_wgs84[0], bbox_wgs84[3], bbox_wgs84[1]) + dict_query = {'box': box} + start_date = acq_date - ndays_seek + end_date = acq_date + ndays_seek + + dict_query['startDate'] = start_date.strftime("%Y-%m-%d") + dict_query['completionDate'] = end_date.strftime("%Y-%m-%d") + dict_query['maxRecords'] = self.max_records + dict_query['processingLevel'] = level + + # Search products + search_results = self._query(dict_query) + + # Sort by closest date (add the "Delta" key/value) + for tile_name in search_results: + print(tile_name) + for description_date in search_results[tile_name]: + print("\t" + description_date) + dt = datetime.datetime.strptime(description_date, "%Y-%m-%d") + delta = acq_date - dt + delta = delta.days + search_results[tile_name][description_date]["delta"] = delta + + # Rank delta + selected_tile = dict() + for tile_name in search_results: + n_dates = 0 + x = search_results[tile_name] + sorted_x = sorted(x.items(), key=lambda kv: abs(kv[1]["delta"])) + selected_tile[tile_name] = dict() + for i in sorted_x: + description_date = i[0] + entry = i[1] + selected_tile[tile_name][description_date] = entry + n_dates += 1 + if n_dates == 1: + break + + # Print summary + print("Best tiles/dates:") + for tile_name in selected_tile: + print("Tile {}".format(tile_name)) + print("\tDate (delta)") + for description_date in selected_tile[tile_name]: + print("\t{} ({})".format(description_date, selected_tile[tile_name][description_date]["delta"])) + + # Download products + if download_dir: + return self._download_tiles_and_dates(selected_tile, download_dir) + + return selected_tile diff --git a/scenes/drs.py b/scenes/drs.py index fdd5b516fed1dc01a72246f6a512ebbacfba4684..11b72ec5e45fc762c52202b1968abca45b142b94 100644 --- a/scenes/drs.py +++ b/scenes/drs.py @@ -12,7 +12,7 @@ def find_all_dimaps(pth): :param pth: root directory :return: list of DIMAPS """ - return utils.find_file_in_all_subdirs(pth=pth, pattern="DIM_*.XML") + return utils.find_files_in_all_subdirs(pth=pth, pattern="DIM_*.XML") def get_spot67_scenes(root_dir): diff --git a/scenes/sentinel.py b/scenes/sentinel.py new file mode 100644 index 0000000000000000000000000000000000000000..ae909378955c787a97a0d828fbaa0c0d43e35b16 --- /dev/null +++ b/scenes/sentinel.py @@ -0,0 +1,299 @@ +""" +Sentinel-2 root_scene class +""" +import datetime +from abc import abstractmethod +import pyotb +from scenes import utils +from scenes.core import Source, Imagery, Scene + + +class Sentinel2Source(Source): + """ + Class for generic Sentinel-2 sources + """ + R1_SIZE = 10980 + R2_SIZE = 5490 + + def msk_drilled(self, msk_dict, exp, nodata=0): + """ + :param msk_dict: dict of masks + :param exp: bandmath expression to form the 0-255 binary mask + :param nodata: no-data value in masked output + :return: new masked source + """ + img_size = pyotb.ReadImageInfo(self).GetParameterInt('sizex') + bm = pyotb.BandMath({"il": msk_dict[img_size], "exp": exp}) + return self.masked(binary_mask=bm, nodata=nodata) + + +class Sentinel22ASource(Sentinel2Source): + """ + Sentinel-2 level 2A source class + """ + + def cld_msk_drilled(self, nodata=-10000): + """ + Return the source drilled from the cloud mask + :param nodata: nodata value inside holes + :return: drilled source + """ + return self.msk_drilled(msk_dict={self.R1_SIZE: self.root_imagery.root_scene.cld_r1_msk_file, + self.R2_SIZE: self.root_imagery.root_scene.cld_r2_msk_file}, + exp="im1b1==0?255:0", + nodata=nodata) + + +class Sentinel23ASource(Sentinel2Source): + """ + Sentinel-2 level 3A source class + """ + + def flg_msk_drilled(self, keep_flags_values=(3, 4), nodata=-10000): + """ + Return the source drilled from the FLG mask + :param keep_flags_values: flags values to keep. Can be: + 0 = No data + 1 = Cloud + 2 = Snow + 3 = Water + 4 = Land + (source: https://labo.obs-mip.fr/multitemp/theias-l3a-product-format/) + :param nodata: nodata value inside holes + :return: drilled source + """ + exp = "||".join(["im1b1=={}".format(val) for val in keep_flags_values]) + "?255:0" + return self.msk_drilled(msk_dict={self.R1_SIZE: self.root_imagery.root_scene.flg_r1_msk_file, + self.R2_SIZE: self.root_imagery.root_scene.flg_r2_msk_file}, + exp=exp, + nodata=nodata) + + +class Sentinel2ImageryBase(Imagery): + """ + Base class for Sentinel-2 level 2A imagery classes. + """ + + def _concatenate_10m_bands(self): + """ + :return: 10m spacing bands stacking pipeline + """ + return pyotb.ConcatenateImages([self.root_scene.band4_file, + self.root_scene.band3_file, + self.root_scene.band2_file, + self.root_scene.band8_file]) + + def _concatenate_20m_bands(self): + """ + :return: 20m spacing bands stacking pipeline + """ + return pyotb.ConcatenateImages([self.root_scene.band5_file, + self.root_scene.band6_file, + self.root_scene.band7_file, + self.root_scene.band8a_file, + self.root_scene.band11_file, + self.root_scene.band12_file]) + + +class Sentinel22AImagery(Sentinel2ImageryBase): + """ + Sentinel-2 level 2A class. + """ + + def get_10m_bands(self): + """ + :return: 10m spacing bands + """ + return Sentinel22ASource(self, self._concatenate_10m_bands()) + + def get_20m_bands(self): + """ + :return: 20m spacing bands + """ + return Sentinel22ASource(self, self._concatenate_20m_bands()) + + +class Sentinel23AImagery(Sentinel2ImageryBase): + """ + Sentinel-2 level 2A class. + """ + + def get_10m_bands(self): + """ + :return: 10m spacing bands + """ + return Sentinel23ASource(self, self._concatenate_10m_bands()) + + def get_20m_bands(self): + """ + :return: 20m spacing bands + """ + return Sentinel23ASource(self, self._concatenate_20m_bands()) + + +class Sentinel2SceneBase(Scene): + """ + Base class for Sentinel-2 images + """ + + @abstractmethod + def __init__(self, archive, tag): + """ + :param archive: product .zip or directory + """ + self.archive = archive + + # Retrieve the list of .tif files + is_zip = self.archive.lower().endswith(".zip") + if is_zip: + print("Input type is a .zip archive") + files = utils.list_files_in_zip(self.archive) + self.files = [utils.to_vsizip(self.archive, f) for f in files] + else: + print("Input type is a directory") + self.files = utils.find_files_in_all_subdirs(self.archive, "*.tif", case_sensitive=False) + + # Assign bands files + self.band2_file = self.get_band(tag, "B2") + self.band3_file = self.get_band(tag, "B3") + self.band4_file = self.get_band(tag, "B4") + self.band8_file = self.get_band(tag, "B8") + self.band5_file = self.get_band(tag, "B5") + self.band6_file = self.get_band(tag, "B6") + self.band7_file = self.get_band(tag, "B7") + self.band8a_file = self.get_band(tag, "B8A") + self.band11_file = self.get_band(tag, "B11") + self.band12_file = self.get_band(tag, "B12") + + # EPSG, bbox + epsg, _, bbox_wgs84 = utils.get_epsg_extent_bbox(self.band2_file) + + # Date + onefile = utils.basename(self.band2_file) # SENTINEL2A_20180630-105440-000_L2A_T31TEJ_D_V1-8 + datestr = onefile.split("_")[1] # 20180630-105440 + acquisition_date = datetime.datetime.strptime(datestr, '%Y%m%d-%H%M%S-%f') + + # Call parent constructor + super().__init__(acquisition_date=acquisition_date, bbox_wgs84=bbox_wgs84, epsg=epsg) + + def get_file(self, endswith): + """ + Return the specified file. + Throw an exception if none or multiple candidates are found. + :param endswith: filtered extension + :return: the file + """ + filtered_files_list = [f for f in self.files if f.endswith(endswith)] + nb_matches = len(filtered_files_list) + if nb_matches != 1: + raise ValueError("Cannot instantiate Sentinel-2 Theia product from {}:" + "{} occurrence(s) of file with suffix \"{}\"".format(self.archive, nb_matches, endswith)) + return filtered_files_list[0] + + def get_band(self, suffix1, suffix2): + """ + Return the file path for the specified band. + Throw an exception if none or multiple candidates are found. + :param suffix1: suffix 1, e.g. "FRE" + :param suffix2: suffix 2, e.g. "B3" + :return: file path for the band + """ + return self.get_file(endswith="_{}_{}.tif".format(suffix1, suffix2)) + + def get_metadata(self): + """ + Return the metadata + :return: metadata (dict) + """ + metadata = super().get_metadata() + metadata.update({ + "Archive": self.archive, + "Band 2": self.band2_file, + "Band 3": self.band3_file, + "Band 4": self.band4_file, + "Band 8": self.band8_file, + "Band 5": self.band5_file, + "Band 6": self.band6_file, + "Band 7": self.band7_file, + "Band 8a": self.band8a_file, + "Band 11": self.band11_file, + "Band 12": self.band12_file, + }) + return metadata + + +class Sentinel22AScene(Sentinel2SceneBase): + """ + Sentinel-2 level 2A scene class. + The class carries all the metadata from the root_scene, and can be used to retrieve its imagery. + """ + + def __init__(self, archive): + """ + :param archive: .zip file or folder. Must be a product from MAJA. + """ + # Call parent constructor + super().__init__(archive=archive, tag="FRE") + + # Additional rasters + self.clm_r1_msk_file = self.get_file("_CLM_R1.tif") + self.edg_r1_msk_file = self.get_file("_EDG_R1.tif") + self.clm_r2_msk_file = self.get_file("_CLM_R2.tif") + self.edg_r2_msk_file = self.get_file("_EDG_R2.tif") + + def get_imagery(self): # pylint: disable=arguments-differ + """ + Return the Sentinel-2 level 2A imagery + :return: Imagery instance + """ + return Sentinel22AImagery(self) + + def get_metadata(self): + """ + Return the metadata + :return: metadata (dict) + """ + metadata = super().get_metadata() + metadata.update({ + "CLD R1": self.clm_r1_msk_file, + "EDG R1": self.edg_r1_msk_file, + "CLD R2": self.clm_r2_msk_file, + "EDG R2": self.edg_r2_msk_file, + }) + return metadata + + +class Sentinel23AScene(Sentinel2SceneBase): + """ + Sentinel-2 level 3A scene class. + The class carries all the metadata from the root_scene, and can be used to retrieve its imagery. + """ + + def __init__(self, archive): + """ + :param archive: .zip file or folder. Must be a product from WASP. + """ + super().__init__(archive=archive, tag="FRC") + + # Additional rasters + self.flg_r1_msk_file = self.get_file("_FLG_R1.tif") + self.flg_r2_msk_file = self.get_file("_FLG_R2.tif") + + def get_imagery(self): # pylint: disable=arguments-differ + """ + Return the Sentinel-2 level 3A imagery + :return: Imagery instance + """ + return Sentinel23AImagery(self) + + def get_metadata(self): + """ + Return the metadata + :return: metadata (dict) + """ + metadata = super().get_metadata() + metadata.update({ + "FLG R1": self.flg_r1_msk_file, + "FLG R2": self.flg_r2_msk_file, + }) + return metadata diff --git a/scenes/spot.py b/scenes/spot.py index 1ddaf76e8ebde4fa62d6f183de40df7864411d3e..602582e0c620497e9dba6aef8113725ffe60f81a 100644 --- a/scenes/spot.py +++ b/scenes/spot.py @@ -51,13 +51,13 @@ class Spot67Imagery(Imagery): """ :return: XS source """ - return Source(self, self.xs) + return Spot67Source(self, self.xs) def get_pan(self): """ :return: PAN source """ - return Source(self, self.pan) + return Spot67Source(self, self.pan) def get_pxs(self, method="bayes"): """ @@ -66,8 +66,8 @@ class Spot67Imagery(Imagery): xs = self.get_xs() new_app = pyotb.BundleToPerfectSensor({"inp": self.pan, "inxs": xs, "method": method}) pansharp = xs.new_source(new_app) # new source with pansharpening - return pansharp.drilled(msk_vec_file=self.root_scene.roi_msk_file, - inside=False) # new source masked outside the original ROI + # new source masked outside the original ROI + return Spot67Source(self, pansharp.drilled(msk_vec_file=self.root_scene.roi_msk_file, inside=False)) class Spot67Scene(Scene): diff --git a/scenes/utils.py b/scenes/utils.py index 1a88dab266eeb039440d01dbbe827004a6b5193e..198ddc45e18cf980d8e58afa6bf6d73658b2ce98 100644 --- a/scenes/utils.py +++ b/scenes/utils.py @@ -5,6 +5,9 @@ import os import glob import pathlib import math +import zipfile +import re +import fnmatch from osgeo import osr, ogr, gdal @@ -204,14 +207,19 @@ def get_bbox_wgs84_from_vector(vector_file): return xmin, xmax, ymin, ymax -def find_file_in_all_subdirs(pth, pattern): +def find_files_in_all_subdirs(pth, pattern, case_sensitive=True): """ Returns the list of files matching the pattern in all subdirectories of pth :param pth: path :param pattern: pattern + :param case_sensitive: case sensitive True or False :return: list of str """ - return [y for x in os.walk(pth) for y in glob.glob(os.path.join(x[0], pattern))] + result = [] + reg_expr = re.compile(fnmatch.translate(pattern), 0 if case_sensitive else re.IGNORECASE) + for root, _, files in os.walk(pth, topdown=True): + result += [os.path.join(root, j) for j in files if re.match(reg_expr, j)] + return result def find_file_in_dir(pth, pattern): @@ -234,3 +242,37 @@ def get_parent_directory(pth): if not path: raise Exception("Cannot define path: {}".format(path)) return str(path.parent) + + +def list_files_in_zip(filename, endswith=None): + """ + List files in zip archive + :param filename: path of the zip + :param endswith: optional, end of filename to be matched + :return: list of the filepaths + """ + with zipfile.ZipFile(filename) as zip_file: + filelist = zip_file.namelist() + if endswith: + filelist = [f for f in filelist if f.endswith(endswith)] + + return filelist + + +def to_vsizip(zipfn, relpth): + """ + Create path from zip file + :param zipfn: zip archive + :param relpth: relative path (inside archive) + :return: vsizip path + """ + return "/vsizip/{}/{}".format(zipfn, relpth) + + +def basename(pth): + """ + Returns the basename. Works with files and paths + :param pth: path + :return: basename of the path + """ + return str(pathlib.Path(pth).name) diff --git a/test/download_test.py b/test/download_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ef89ef37020884819b533d19429267e0dfbac747 --- /dev/null +++ b/test/download_test.py @@ -0,0 +1,19 @@ +import argparse +from scenes import download, utils +import datetime + +# Arguments +parser = argparse.ArgumentParser(description="Download test",) +parser.add_argument("--refimage", required=True) +parser.add_argument("--theia_cfg", required=True) +parser.add_argument("--download_dir") +parser.add_argument("--year", type=int, default=2020) +parser.add_argument("--month", type=int, default=1) +parser.add_argument("--day", type=int, default=1) +params = parser.parse_args() + +# Get all scenes in the root_dir +_, _, bbox = utils.get_epsg_extent_bbox(params.refimage) +acq_date = datetime.datetime(year=params.year, month=params.month, day=params.day) +downloader = download.TheiaDownloader(config_file=params.theia_cfg) +downloader.download_closest(bbox_wgs84=bbox, acq_date=acq_date, download_dir=params.download_dir) diff --git a/test/drs_import.py b/test/drs_import.py index 2be81e5111b24df20dbcaf2ccf358a4ef58c1e22..a0c561ff7c7aad92bcf4cbbbc2819eac6e5eeabf 100644 --- a/test/drs_import.py +++ b/test/drs_import.py @@ -3,13 +3,13 @@ from scenes import drs # Arguments parser = argparse.ArgumentParser(description="Test",) -parser.add_argument("--root_dir", nargs='+', help="Root directories containing MS and PAN folders", required=True) +parser.add_argument("--root_dirs", nargs='+', help="Root directories containing MS and PAN folders", required=True) parser.add_argument("--out_pickle", help="Output pickle file", required=True) params = parser.parse_args() # Get all scenes in the root_dir scenes = [] -for root_dir in params.root_dir: +for root_dir in params.root_dirs: scenes += drs.get_spot67_scenes(root_dir) # Save scenes in a pickle file diff --git a/test/imagery_test.py b/test/imagery_test.py index b1d496b140d680bb25ea419bdd095c4c5d64b2b2..2284e1465a56ebd8bc8335bd3b89754537482af3 100644 --- a/test/imagery_test.py +++ b/test/imagery_test.py @@ -20,15 +20,24 @@ class ImageryTest(ScenesTestBase): pan = imagery.get_pan() self.compare_images(pan, tests_data.DIMAP1_P) - def test_pxs_imagery1(self): - imagery = self.get_scene1_imagery() - pxs = imagery.get_pxs() + def pxs_compare(self, pxs): pxs_baseline = pyotb.BundleToPerfectSensor({"inxs": tests_data.DIMAP1_XS, "inp": tests_data.DIMAP1_P, "method": "bayes"}) # We test the central area only, because bayes method messes with nodata outside image self.compare_images(pxs, pxs_baseline, roi=[2000, 2000, 1000, 1000]) + def test_pxs_imagery1(self): + imagery = self.get_scene1_imagery() + pxs = imagery.get_pxs() + self.pxs_compare(pxs) + + def test_pxs_imagery1_cld_msk_drilled(self): + imagery = self.get_scene1_imagery() + pxs = imagery.get_pxs() + pxs_drilled = pxs.cld_msk_drilled() + self.pxs_compare(pxs_drilled) + if __name__ == '__main__': unittest.main() diff --git a/test/s2_import.py b/test/s2_import.py new file mode 100644 index 0000000000000000000000000000000000000000..ca30e5e1306b1dd7b58541f5de9e542f5b7475e0 --- /dev/null +++ b/test/s2_import.py @@ -0,0 +1,19 @@ +import argparse +from scenes import utils, sentinel, drs + +# Arguments +parser = argparse.ArgumentParser(description="Test",) +parser.add_argument("--root_dirs", nargs='+', help="Root directories containing S2 archives (.zip)", required=True) +parser.add_argument("--out_pickle", help="Output pickle file", required=True) +parser.add_argument('--level', default='2A', const='2A', nargs='?', choices=['2A', '3A'], help='Product level') +params = parser.parse_args() + +# Get all scenes in the root_dir +product_type = sentinel.Sentinel23AScene if params.level == '2A' else sentinel.Sentinel23AScene +archives = [] +for root_dir in params.root_dirs: + archives += utils.find_files_in_all_subdirs(pth=root_dir, pattern="*.zip", case_sensitive=False) +scenes = [product_type(archive=archive) for archive in archives] + +# Save scenes in a pickle file +drs.save_scenes(scenes, params.out_pickle)