From e23c7f08f293dd7f2e9b004a0750168a01709073 Mon Sep 17 00:00:00 2001
From: jmmonnet <jean-matthieu.monnet@inrae.fr>
Date: Fri, 15 Jul 2022 17:28:38 +0200
Subject: [PATCH] gap detection benefits from catalog engine

---
 R/coregistration.Rmd           |  9 +---
 R/forest.structure.metrics.Rmd |  2 +-
 R/gaps.edges.detection.Rmd     | 93 +++++++++++++++++++++++++++++++---
 3 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/R/coregistration.Rmd b/R/coregistration.Rmd
index 6299cfc..3754c68 100755
--- a/R/coregistration.Rmd
+++ b/R/coregistration.Rmd
@@ -156,13 +156,8 @@ r <- lidaRtRee::circle2Raster(centre$XGPS, centre$YGPS, p_radius, resolution = r
 # keep only trees with diameter information
 trees <- trees[!is.na(trees[, "dia"]), ]
 # create plot mask
-r_mask <- lidaRtRee::raster_xy_mask(rbind(
-  c(centre$XGPS, centre$YGPS),
-  c(centre$XGPS, centre$YGPS)
-),
-c(p_radius, p_radius), r,
-binary = T
-)
+r_mask <- lidaRtRee::raster_xy_mask(c(centre$XGPS, centre$YGPS),
+                                    p_radius, r, binary = T)
 # replace 0 by NA
 r_mask[r_mask == 0] <- NA
 # specify projection
diff --git a/R/forest.structure.metrics.Rmd b/R/forest.structure.metrics.Rmd
index e3214fe..156908a 100755
--- a/R/forest.structure.metrics.Rmd
+++ b/R/forest.structure.metrics.Rmd
@@ -322,7 +322,7 @@ Tree tops are detected with the function `treeSegmentation` and then extracted w
 # tree top detection (default parameters)
 segms <- lidaRtRee::tree_segmentation(chm, hmin = 5)
 # extraction to data.frame
-trees <- lidaRtRee::tree_extraction(segms$filled_dem, segms$local_maxima, segms$segments_id)
+trees <- lidaRtRee::tree_extraction(segms)
 # display maps
 par(mfrow = c(1, 2))
 terra::plot(chm, main = "CHM and tree tops")
diff --git a/R/gaps.edges.detection.Rmd b/R/gaps.edges.detection.Rmd
index e4d84dc..340d58f 100755
--- a/R/gaps.edges.detection.Rmd
+++ b/R/gaps.edges.detection.Rmd
@@ -29,15 +29,20 @@ Many thanks to Pascal ObstÃ©tar for checking code and improvement suggestions.
 
 ## Study area and ALS data
 
-The study area  is a 200m x 200m forest located in the Jura mountain (France). The following code shows how to extract the ALS data from a folder containing normalized LAS files, and compute the Canopy Height Model (CHM) at 1 m resolution.
+The study area  is a 500 m x 500 m forest located in the Jura mountain (France). The following code shows how to extract the ALS data from a folder containing normalized LAS files, and compute the Canopy Height Model (CHM) at 1 m resolution. For processing a set of several LAS/LAZ files, please see the "Batch processing section".
 ```{r extractals, eval=FALSE}
 # build catalog from folder with normalized LAS/LAZ files
-cata <- lidR::readALSLAScatalog("/las.folder/")
+# option to read only xyzc attributes (coordinates, intensity, echo order and classification) from height files
+# option to drop points with height above 50
+cata <- lidR::readALSLAScatalog("/las.folder/",
+                                progress = FALSE,
+                                select = "xyzirnc",
+                                filter = "-drop_z_above 50"
+                                )
 # "/media/reseau/lessem/ProjetsCommuns/Lidar/data/Franche-Comte/norm.laz/"
 # set coordinate system
 lidR::projection(cata) <- 2154
-# option to read only xyzc attributes (coordinates, intensity, echo order and classification) from height files
-lidR::opt_select(cata) <- "xyzirnc"
+#
 # define region of interest where points should be extracted
 centre <- c(944750, 6638750)
 size <- 250
@@ -69,8 +74,6 @@ Missing, low and high values are replaced for better visualization and processin
 chm[is.na(chm)] <- 0
 # replace negative values by 0
 chm[chm < 0] <- 0
-# set maximum threshold at 50m
-chm[chm > 50] <- 50
 # display CHM and areas where vegetation height is lower than 1m
 par(mfrow = c(1, 2))
 terra::plot(chm, main = "CHM (m)")
@@ -189,4 +192,82 @@ terra::plot(edge_outside, main = "Edges (detection by dilation)", legend = FALSE
 ```
 Percentage of edges can be calculated as the ratio between edge pixels surface and total surface, multiplied by 100. The result is dependent on the raster resolution. Obtained percentage for the "erosion" method is `r round(sum(terra::values(edge_inside))/(nrow(edge_inside)*ncol(edge_inside))*100, 1)`, whereas it is `r round(sum(terra::values(edge_outside))/(nrow(edge_outside)*ncol(edge_outside))*100, 1)` for method "dilation".
 
+## Batch processing
+
+In case of a LAS-catalog object referencing LAS/LAZ files (which should be normalized; if not, please refer to the [preprocessing tutorial](https://gitlab.irstea.fr/jean-matthieu.monnet/lidartree_tutorials/-/blob/master/R/ALS_data_preprocessing.Rmd)), the `lidR` catalog engine provides the data as chunks to the detection algorithm, and results are aggregated afterwards. In order to avoid border effects, a buffer zone is added to chunks. 
+
+- `chunk_size` is a trade-off between the number of chunks to process and the amount of RAM required to process a single chunk. It is advisable to set its value to 0 (processing by file) in case files are non-overlapping tiles and they can be loaded in RAM.
+- `chunk_buffer` size is a trade-off between redundant processing of the overlap area, and making sure that a gap at the border of a chunk is fully contained in the buffer and hence total surface is correctly estimated.
+
+In case a gap spans over two files:
+
+* if it is included in the buffer, then the gap portion in each tile has a different id, but the total surface of the gap is correct.
+- if it is partially included in the buffer, then the total surface is under-estimated.
+
+Chunks can be processed with parallel computing, within limits of the cluster's RAM and number of cores. I
+
+The steps for processing a batch of las/laz files are :
+
+- build catalog of files and set catalog processing options
+- provide gap detection parameters
+- set cluster options for parallel processing
+- apply `lidaRtRee::gap_detection` function to catalog.
+
+```{r gap_detection_LAScatalog, include=TRUE, fig.width = 5.5, fig.height = 4.5}
+rm(list = ls())
+# BUILD CATALOG OF FILES AND SET CATALOG PROCESSING OPTIONS
+# folder containing the files
+lazdir <- "../data/forest.structure.metrics"
+# build catalog and set options
+# - progress: disable or not display of catalog processing
+# - select: read only xyzc attributes (coordinates, intensity,
+# echo order and classification) from height files
+# - chunk_size: tile size to split area into chunks to process,
+# trade-off between RAM capacity VS total number of tiles to process
+# here 70 for example purpose with small area
+# - chunk_buffer: around each tile to avoid border effects on segmentation results
+# trade-off between making sure a whole tree crown is processed in case its top
+# is on the border VS duplicate processing
+# 5 m is minimum, 10 is probably better depending on tree size
+cata <- lidR::readALSLAScatalog(lazdir,
+                                progress = FALSE, 
+                                select = "xyzc",
+                                chunk_size = 0,
+                                chunk_buffer = 20)
+# set coordinate system
+lidR::projection(cata) <- 2154
+#
+# CLUSTER PARAMETERS
+# specify to use two parallel sessions
+future::plan("multisession", workers = 2L)
+# remove warning when using random numbers in parallel sessions
+options(future.rng.onMisuse = "ignore")
+#
+# DETECTION PARAMETERS
+resolution <- 1
+#
+# PROCESSING
+# perform tree detection with crown polygon output
+gaps <- lidaRtRee::gap_detection(cata, res = resolution)
+```
+
+The figures below show that the large gap at the top-left had by chance the same id in both chunks, but its surface is underestimated when computed from the right tile.
+
+
+```{r gap_detection_LAScatalog_plot, include=TRUE, fig.width = 12, fig.height = 3}
+# display CHM and areas where vegetation height is lower than 1m
+par(mfrow = c(1, 2))
+# plot gap surface
+terra::plot(gaps$gap_id,
+  main = "Gap id",
+  col = rainbow(255, end = 5 / 6)
+)
+terra::plot(log(gaps$gap_surface),
+  main = "log(Gap surface)",
+  col = rainbow(255, end = 5 / 6)
+)
+```
+
+In case the spatial extent is large, it might not be possible to hold the result for all files in memory, so that results for each chunk should be save on disk (see [corresponding documentation of lidR package](https://cran.r-project.org/web/packages/lidR/vignettes/lidR-LAScatalog-engine.html#write-independent-chunks-on-disk)).
+
 ## References
\ No newline at end of file
-- 
GitLab