From 0fbdf2b60eaa9473bb6752debb4383ba891c3927 Mon Sep 17 00:00:00 2001
From: Raffaele Gaetano <raffaele.gaetano@cirad.fr>
Date: Fri, 15 Dec 2023 09:05:15 +0100
Subject: [PATCH] ENH: Can mine folders containing tiles non overlapping with
 ROI.

---
 Common/geotools.py      | 21 ++++++++++++++++-
 TimeSeries/s2theia.py   | 51 ++++++++++++++++++++++++++---------------
 Workflows/operations.py |  6 ++---
 3 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/Common/geotools.py b/Common/geotools.py
index 646c2d2..eb313f2 100644
--- a/Common/geotools.py
+++ b/Common/geotools.py
@@ -1,6 +1,10 @@
 from osgeo import gdal
 from osgeo import ogr
 from pyproj import Transformer as T
+from shapely.geometry import box
+from shapely.ops import transform
+import rasterio
+import geopandas
 
 def get_query_bbox(shp, query_srs=4326, return_all=False):
     ds = ogr.Open(shp)
@@ -20,4 +24,19 @@ def get_query_bbox(shp, query_srs=4326, return_all=False):
     if return_all:
         return bbox, i_bbox, shp_srs
     else:
-        return bbox
\ No newline at end of file
+        return bbox
+    
+def check_extent_overlap(im,vect):
+    im_ds = rasterio.open(im)
+    im_srs = im_ds.crs
+    im_extent = box(*im_ds.bounds)
+
+    vect_ds = geopandas.read_file(vect)
+    vect_srs = vect_ds.crs
+    vect_extent = box(*vect_ds.geometry.total_bounds)
+
+    if (im_srs != vect_srs):
+        tr = T.from_crs(im_srs, vect_srs, always_xy=True)
+        im_extent = transform(tr.transform, im_extent)
+        
+    return im_extent.intersects(vect_extent)
diff --git a/TimeSeries/s2theia.py b/TimeSeries/s2theia.py
index 8c3f1fd..7830850 100644
--- a/TimeSeries/s2theia.py
+++ b/TimeSeries/s2theia.py
@@ -14,7 +14,7 @@ from osgeo import osr
 import datetime
 import uuid
 import shutil
-from Common.geotools import get_query_bbox
+from Common.geotools import get_query_bbox, check_extent_overlap
 from Common.geometry import get_displacements_to_ref, get_clearest_central_image
 
 def fetch(shp, dt, output_fld, credentials):
@@ -145,9 +145,8 @@ class S2TheiaTilePipeline:
 
     @classmethod
     def _check_roi(cls, x, roi, min_surf=0.0, temp_fld='/tmp'):
-        if cls._check(x):
+        if cls._check(x):            
             if os.path.isdir(x):
-                er = otb.Registry.CreateApplication('ExtractROI')
                 bnd = glob.glob(os.path.join(x, cls.PTRN_20m[0]))[0]
             elif os.path.splitext(x)[-1] == '.zip':
                 idf = cls.PTRN_20m[0].replace('*', '')
@@ -158,6 +157,9 @@ class S2TheiaTilePipeline:
                 tgt = open(bnd, 'wb')
                 with fle,tgt:
                     shutil.copyfileobj(fle, tgt)
+            if not check_extent_overlap(bnd, roi):
+                return False
+            er = otb.Registry.CreateApplication('ExtractROI')
             er.SetParameterString('in', bnd)
             er.SetParameterString('mode', 'fit')
             er.SetParameterString('mode.fit.vect', roi)
@@ -219,24 +221,30 @@ class S2TheiaTilePipeline:
                     self.input_dates = [self.input_dates[i] for i in idx]
                     self.tile_cloud_percentage = [self.tile_cloud_percentage[i] for i in idx]
 
-                self.set_input_epsg()
-                self.output_epsg = self.input_epsg
-                self.output_dates = self.input_dates
-
-                for img in self.image_list:
-                    for p in self.PTRN_ful:
-                        ifn, ofn = self.get_file(img, p)
-                        self.append(to_otb_pipeline(ifn), ofn, self.REF_TYPE, 'out', is_output=True)
-                    for p in self.PTRN_msk:
-                        ifn, ofn = self.get_file(img, p)
-                        self.append(to_otb_pipeline(ifn), ofn, self.MSK_TYPE, 'out', is_output=True)
+                if len(self.image_list) > 0:
+                    self.set_input_epsg()
+                    self.output_epsg = self.input_epsg
+                    self.output_dates = self.input_dates
+
+                    for img in self.image_list:
+                        for p in self.PTRN_ful:
+                            ifn, ofn = self.get_file(img, p)
+                            self.append(to_otb_pipeline(ifn), ofn, self.REF_TYPE, 'out', is_output=True)
+                        for p in self.PTRN_msk:
+                            ifn, ofn = self.get_file(img, p)
+                            self.append(to_otb_pipeline(ifn), ofn, self.MSK_TYPE, 'out', is_output=True)
+                else:
+                    warnings.warn('Empty pipeline after filtering for tile {}.'.format(self.tile_id))
 
         else:
-            warnings.warn('Empty pipeline. Need to set preprocessed inputs?')
+            warnings.warn('Empty pipeline at init. Need to set preprocessed inputs?')
 
     def __del__(self):
         if os.path.exists(self.temp_fld):
             shutil.rmtree(self.temp_fld)
+    
+    def is_empty(self):
+        return len(self.image_list) == 0
 
     def reset(self):
         self.pipe = []
@@ -775,20 +783,25 @@ class S2TheiaPipeline:
     _tile_id = S2TilePipeline._tile_id
     tiles = []
 
-    def __init__(self, fld, temp_fld='/tmp', input_date_interval=None, max_clouds_percentage=None):
+    def __init__(self, fld, temp_fld='/tmp', input_date_interval=None, max_clouds_percentage=None, roi=None):
         self.folder = fld
         self.temp_fld = temp_fld
         self.input_date_interval = input_date_interval
         self.max_clouds_percentage = max_clouds_percentage
         self.tile_list = set()
+        self.roi = roi
         img_list = [os.path.abspath(x) for x in glob.glob(os.path.join(self.folder, self.S2TilePipeline.PTRN_dir))
                     if os.path.isdir(x) or os.path.splitext(x)[-1] == '.zip']
         [self.tile_list.add(self._tile_id(x)) for x in img_list if self._check(x)]
         assert all([self.S2TilePipeline(fld, t, dummy_read=True) for t in self.tile_list])
-        self.tiles = [self.S2TilePipeline(fld, t, self.temp_fld, self.input_date_interval, self.max_clouds_percentage) for t in self.tile_list]
+        self.tiles = [self.S2TilePipeline(fld, t, self.temp_fld, self.input_date_interval, self.max_clouds_percentage, filter_by_roi=self.roi) for t in self.tile_list]
+        self.tiles = [x for x in self.tiles if not x.is_empty()]
         self.output_dates = None
-        self.roi = None
-        self.output_epsg = self.tiles[0].input_epsg
+
+        if len(self.tiles) > 0:
+            self.output_epsg = self.tiles[0].input_epsg
+        else:
+            warnings.warn('Empty pipeline.')
 
     def __del__(self):
         for x in self.tiles:
diff --git a/Workflows/operations.py b/Workflows/operations.py
index 43c8f96..db70174 100644
--- a/Workflows/operations.py
+++ b/Workflows/operations.py
@@ -40,9 +40,9 @@ def preprocess_s2(in_fld, out_fld, output_dates_file=None, roi=None,
     else:
         raise ValueError("Unsupported/non-valid provider")
 
-    s2 = S2Processor(in_fld, temp_fld=out_fld)
-    if roi is not None:
-        s2.set_roi(roi)
+    s2 = S2Processor(in_fld, temp_fld=out_fld, roi=roi)
+    #if roi is not None:
+    #    s2.set_roi(roi)
     if output_dates_file is not None:
         s2.set_output_dates_by_file(output_dates_file)
     else:
-- 
GitLab