Commit 55eff17f authored by Cresson Remi's avatar Cresson Remi
Browse files

Merge branch '29-pipeline_cache' into 'develop'

Resolve "Pipelines cache"

Closes #29

See merge request !47
1 merge request!47Resolve "Pipelines cache"
Pipeline #39576 failed with stages
in 3 minutes and 58 seconds
Showing with 79 additions and 6 deletions
+79 -6
...@@ -76,7 +76,8 @@ disable=too-few-public-methods, ...@@ -76,7 +76,8 @@ disable=too-few-public-methods,
too-many-instance-attributes, too-many-instance-attributes,
too-many-locals, too-many-locals,
too-many-branches, too-many-branches,
too-many-statements too-many-statements,
too-many-arguments
# Enable the message, report, category or checker with the given id(s). You can # Enable the message, report, category or checker with the given id(s). You can
......
...@@ -2,9 +2,16 @@ ...@@ -2,9 +2,16 @@
""" """
This module helps to process local remote sensing products This module helps to process local remote sensing products
""" """
__version__ = "1.0.1" __version__ = "1.1.0"
from os.path import dirname, basename, isfile, join
import glob
import importlib
from .core import load_scenes, save_scenes # noqa: 401 from .core import load_scenes, save_scenes # noqa: 401
from .indexation import Index # noqa: 401 from .indexation import Index # noqa: 401
from .download import TheiaDownloader # noqa: 401 from .download import TheiaDownloader # noqa: 401
from .spatial import BoundingBox # noqa: 401 from .spatial import BoundingBox # noqa: 401
modules = glob.glob(join(dirname(__file__), "*.py"))
for f in modules:
if isfile(f) and f.endswith('.py') and not f.endswith('__init__.py'):
importlib.import_module(f".{basename(f)[:-3]}", __name__)
"""
This module provides mechanisms to enable pyotb raster caching on the local filesystem.
"""
from __future__ import annotations
import json
import hashlib
import tempfile
import os
import pyotb
class Cache(pyotb.Input):
"""
Enable to manage a given pipeline output, depending on if it's already in the cache.
"""
def __init__(self, pyotb_output, temporary_directory: str = None, output_parameter_key: str = None,
extension: str = None, pixel_type: str = None):
"""
Initialize the cache.
Args:
pyotb_output: a pyotb.Output instance.
temporary_directory: a temporary directory for the cached files. Default is system temp directory.
output_parameter_key: output parameter key (default is first key)
extension: file extension (default: .tif)
pixel_type: pixel type
"""
# Get app
pyotb_app = pyotb_output.pyotb_app
# Get summary
summary = pyotb_app.summarize() # need pyotb >= 1.5.1
# Summary --> md5sum
desc = json.dumps(summary)
md5sum = hashlib.md5(desc.encode('utf-8')).hexdigest()
# App name
app_name = summary["name"]
# Cache filename
output_parameters_key = pyotb_app.output_param if not output_parameter_key else output_parameter_key
extension = extension if extension else ".tif?&gdal:co:COMPRESS=DEFLATE&gdal:co:BIGTIFF=YES"
if not extension.startswith("."):
extension = f".{extension}"
pixel_type = pixel_type if pixel_type else "float"
tmpdir = temporary_directory if temporary_directory else tempfile.gettempdir()
prefix = os.path.join(tmpdir, f"{app_name}_{output_parameters_key}_{md5sum}")
cache_file = f"{prefix}{extension}"
json_file = f"{prefix}.json"
# Check which cache files already exist
if not os.path.exists(json_file):
# pyotb write
pyotb_output.write(cache_file, pixel_type=pixel_type)
# json
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(summary, f, ensure_ascii=False, indent=4)
super().__init__(filepath=cache_file.split("?")[0])
...@@ -49,7 +49,7 @@ class Source(pyotb.Output): ...@@ -49,7 +49,7 @@ class Source(pyotb.Output):
""" """
def __init__(self, root_scene: Scene, out: str | pyotb.core.otbObject, parent: Source = None, def __init__(self, root_scene: Scene, out: str | pyotb.core.otbObject | Source, parent: Source = None,
output_parameter_key: str = 'out'): output_parameter_key: str = 'out'):
""" """
Args: Args:
...@@ -65,8 +65,12 @@ class Source(pyotb.Output): ...@@ -65,8 +65,12 @@ class Source(pyotb.Output):
# Since it can only be called with pyotb apps, we do the following: # Since it can only be called with pyotb apps, we do the following:
# - if the output is a str, (e.g. the original dimap filename), we instantiate a pyotb.Input(), # - if the output is a str, (e.g. the original dimap filename), we instantiate a pyotb.Input(),
# - else we use the original output (should be pyotb application) # - else we use the original output (should be pyotb application)
super().__init__(app=pyotb.Input(out).pyotb_app if isinstance(out, str) else out, app = out # Fine for all otbApplication, pyotb.App based classes
output_parameter_key=output_parameter_key) if isinstance(out, str):
app = pyotb.Input(out)
elif isinstance(out, pyotb.Output):
app = out.pyotb_app
super().__init__(app=app, output_parameter_key=output_parameter_key)
assert parent is not self, "You cannot assign a new source to its parent instance" assert parent is not self, "You cannot assign a new source to its parent instance"
self.parent = parent # parent source (is another Source instance) self.parent = parent # parent source (is another Source instance)
self._app_stack = [] # list of otb applications or output to keep trace self._app_stack = [] # list of otb applications or output to keep trace
......
...@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh: ...@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
setuptools.setup( setuptools.setup(
name="scenes", name="scenes",
version="1.0.0", version="1.1.0",
author="Rémi Cresson", author="Rémi Cresson",
author_email="remi.cresson@inrae.fr", author_email="remi.cresson@inrae.fr",
description="Library to ease the processing of local remote sensing products", description="Library to ease the processing of local remote sensing products",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment