From 0fbdf2b60eaa9473bb6752debb4383ba891c3927 Mon Sep 17 00:00:00 2001 From: Raffaele Gaetano <raffaele.gaetano@cirad.fr> Date: Fri, 15 Dec 2023 09:05:15 +0100 Subject: [PATCH] ENH: Can mine folders containing tiles non overlapping with ROI. --- Common/geotools.py | 21 ++++++++++++++++- TimeSeries/s2theia.py | 51 ++++++++++++++++++++++++++--------------- Workflows/operations.py | 6 ++--- 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/Common/geotools.py b/Common/geotools.py index 646c2d2..eb313f2 100644 --- a/Common/geotools.py +++ b/Common/geotools.py @@ -1,6 +1,10 @@ from osgeo import gdal from osgeo import ogr from pyproj import Transformer as T +from shapely.geometry import box +from shapely.ops import transform +import rasterio +import geopandas def get_query_bbox(shp, query_srs=4326, return_all=False): ds = ogr.Open(shp) @@ -20,4 +24,19 @@ def get_query_bbox(shp, query_srs=4326, return_all=False): if return_all: return bbox, i_bbox, shp_srs else: - return bbox \ No newline at end of file + return bbox + +def check_extent_overlap(im,vect): + im_ds = rasterio.open(im) + im_srs = im_ds.crs + im_extent = box(*im_ds.bounds) + + vect_ds = geopandas.read_file(vect) + vect_srs = vect_ds.crs + vect_extent = box(*vect_ds.geometry.total_bounds) + + if (im_srs != vect_srs): + tr = T.from_crs(im_srs, vect_srs, always_xy=True) + im_extent = transform(tr.transform, im_extent) + + return im_extent.intersects(vect_extent) diff --git a/TimeSeries/s2theia.py b/TimeSeries/s2theia.py index 8c3f1fd..7830850 100644 --- a/TimeSeries/s2theia.py +++ b/TimeSeries/s2theia.py @@ -14,7 +14,7 @@ from osgeo import osr import datetime import uuid import shutil -from Common.geotools import get_query_bbox +from Common.geotools import get_query_bbox, check_extent_overlap from Common.geometry import get_displacements_to_ref, get_clearest_central_image def fetch(shp, dt, output_fld, credentials): @@ -145,9 +145,8 @@ class S2TheiaTilePipeline: @classmethod def _check_roi(cls, x, roi, min_surf=0.0, temp_fld='/tmp'): - if cls._check(x): + if cls._check(x): if os.path.isdir(x): - er = otb.Registry.CreateApplication('ExtractROI') bnd = glob.glob(os.path.join(x, cls.PTRN_20m[0]))[0] elif os.path.splitext(x)[-1] == '.zip': idf = cls.PTRN_20m[0].replace('*', '') @@ -158,6 +157,9 @@ class S2TheiaTilePipeline: tgt = open(bnd, 'wb') with fle,tgt: shutil.copyfileobj(fle, tgt) + if not check_extent_overlap(bnd, roi): + return False + er = otb.Registry.CreateApplication('ExtractROI') er.SetParameterString('in', bnd) er.SetParameterString('mode', 'fit') er.SetParameterString('mode.fit.vect', roi) @@ -219,24 +221,30 @@ class S2TheiaTilePipeline: self.input_dates = [self.input_dates[i] for i in idx] self.tile_cloud_percentage = [self.tile_cloud_percentage[i] for i in idx] - self.set_input_epsg() - self.output_epsg = self.input_epsg - self.output_dates = self.input_dates - - for img in self.image_list: - for p in self.PTRN_ful: - ifn, ofn = self.get_file(img, p) - self.append(to_otb_pipeline(ifn), ofn, self.REF_TYPE, 'out', is_output=True) - for p in self.PTRN_msk: - ifn, ofn = self.get_file(img, p) - self.append(to_otb_pipeline(ifn), ofn, self.MSK_TYPE, 'out', is_output=True) + if len(self.image_list) > 0: + self.set_input_epsg() + self.output_epsg = self.input_epsg + self.output_dates = self.input_dates + + for img in self.image_list: + for p in self.PTRN_ful: + ifn, ofn = self.get_file(img, p) + self.append(to_otb_pipeline(ifn), ofn, self.REF_TYPE, 'out', is_output=True) + for p in self.PTRN_msk: + ifn, ofn = self.get_file(img, p) + self.append(to_otb_pipeline(ifn), ofn, self.MSK_TYPE, 'out', is_output=True) + else: + warnings.warn('Empty pipeline after filtering for tile {}.'.format(self.tile_id)) else: - warnings.warn('Empty pipeline. Need to set preprocessed inputs?') + warnings.warn('Empty pipeline at init. Need to set preprocessed inputs?') def __del__(self): if os.path.exists(self.temp_fld): shutil.rmtree(self.temp_fld) + + def is_empty(self): + return len(self.image_list) == 0 def reset(self): self.pipe = [] @@ -775,20 +783,25 @@ class S2TheiaPipeline: _tile_id = S2TilePipeline._tile_id tiles = [] - def __init__(self, fld, temp_fld='/tmp', input_date_interval=None, max_clouds_percentage=None): + def __init__(self, fld, temp_fld='/tmp', input_date_interval=None, max_clouds_percentage=None, roi=None): self.folder = fld self.temp_fld = temp_fld self.input_date_interval = input_date_interval self.max_clouds_percentage = max_clouds_percentage self.tile_list = set() + self.roi = roi img_list = [os.path.abspath(x) for x in glob.glob(os.path.join(self.folder, self.S2TilePipeline.PTRN_dir)) if os.path.isdir(x) or os.path.splitext(x)[-1] == '.zip'] [self.tile_list.add(self._tile_id(x)) for x in img_list if self._check(x)] assert all([self.S2TilePipeline(fld, t, dummy_read=True) for t in self.tile_list]) - self.tiles = [self.S2TilePipeline(fld, t, self.temp_fld, self.input_date_interval, self.max_clouds_percentage) for t in self.tile_list] + self.tiles = [self.S2TilePipeline(fld, t, self.temp_fld, self.input_date_interval, self.max_clouds_percentage, filter_by_roi=self.roi) for t in self.tile_list] + self.tiles = [x for x in self.tiles if not x.is_empty()] self.output_dates = None - self.roi = None - self.output_epsg = self.tiles[0].input_epsg + + if len(self.tiles) > 0: + self.output_epsg = self.tiles[0].input_epsg + else: + warnings.warn('Empty pipeline.') def __del__(self): for x in self.tiles: diff --git a/Workflows/operations.py b/Workflows/operations.py index 43c8f96..db70174 100644 --- a/Workflows/operations.py +++ b/Workflows/operations.py @@ -40,9 +40,9 @@ def preprocess_s2(in_fld, out_fld, output_dates_file=None, roi=None, else: raise ValueError("Unsupported/non-valid provider") - s2 = S2Processor(in_fld, temp_fld=out_fld) - if roi is not None: - s2.set_roi(roi) + s2 = S2Processor(in_fld, temp_fld=out_fld, roi=roi) + #if roi is not None: + # s2.set_roi(roi) if output_dates_file is not None: s2.set_output_dates_by_file(output_dates_file) else: -- GitLab