Commit d05ee5d6 authored by Jean-Baptiste Feret's avatar Jean-Baptiste Feret
Browse files

- changed name of some variables in internal functiosn for improved coherence

- completed the error message and error report when unexpected data arrive in kmeans
parent ea1296f7
......@@ -23,7 +23,7 @@
#' @param Input_Mask_File character. Path of the mask corresponding to the image
#' @param Pix_Per_Partition numeric. number of pixels for each partition
#' @param nb_partitions numeric. number of partition
#' @param CR boolean. Set to TRUE if continuum removal should be applied
#' @param Continuum_Removal boolean. Set to TRUE if continuum removal should be applied
#' @param nbCPU numeric. Number of CPUs to use in parallel.
#' @param MaxRAM numeric. MaxRAM maximum size of chunk in GB to limit RAM allocation when reading image file.
#' @param nbclusters numeric. number of clusters defined in k-Means
......@@ -31,7 +31,7 @@
#' @return None
#' @importFrom utils read.table
#' @export
map_spectral_species <- function(Input_Image_File,Output_Dir,PCA_Files,PCA_model,SpectralFilter,Input_Mask_File,Pix_Per_Partition,nb_partitions,CR= TRUE,TypePCA = "SPCA", nbclusters = 50, nbCPU = 1, MaxRAM = FALSE) {
map_spectral_species <- function(Input_Image_File,Output_Dir,PCA_Files,PCA_model,SpectralFilter,Input_Mask_File,Pix_Per_Partition,nb_partitions,Continuum_Removal= TRUE,TypePCA = "SPCA", nbclusters = 50, nbCPU = 1, MaxRAM = FALSE) {
# if no prior diversity map has been produced --> need PCA file
if (!file.exists(PCA_Files)) {
......@@ -58,8 +58,9 @@ map_spectral_species <- function(Input_Image_File,Output_Dir,PCA_Files,PCA_model
ImPathHDR <- get_HDR_name(Input_Image_File)
HDR <- read_ENVI_header(ImPathHDR)
Subset <- get_random_subset_from_image(Input_Image_File, HDR, Input_Mask_File, nb_partitions, Pix_Per_Partition)
SubsetInit <- Subset
# if needed, apply continuum removal
if (CR == TRUE) {
if (Continuum_Removal == TRUE) {
Subset$DataSubset <- apply_continuum_removal(Subset$DataSubset, SpectralFilter, nbCPU = nbCPU)
}
print("Apply PCA to subset prior to k-Means")
......@@ -88,9 +89,35 @@ map_spectral_species <- function(Input_Image_File,Output_Dir,PCA_Files,PCA_model
print("perform k-means clustering for each subset and define centroids")
# scaling factor subPCA between 0 and 1
Kmeans_info <- init_kmeans(dataPCA, Pix_Per_Partition, nb_partitions, nbclusters, nbCPU)
## 3- ASSIGN SPECTRAL SPECIES TO EACH PIXEL
print("apply Kmeans to the whole image and determine spectral species")
apply_kmeans(PCA_Files, PC_Select, Input_Mask_File, Kmeans_info, Spectral_Species_Path, nbCPU, MaxRAM)
if (Kmeans_info$Error==FALSE){
## 3- ASSIGN SPECTRAL SPECIES TO EACH PIXEL
print("apply Kmeans to the whole image and determine spectral species")
apply_kmeans(PCA_Files, PC_Select, Input_Mask_File, Kmeans_info, Spectral_Species_Path, nbCPU, MaxRAM)
} else {
## produce error report
# create directory where error should be stored
Output_Dir_Error <- define_output_subdir(Output_Dir, Input_Image_File, TypePCA, "ErrorReport")
# identify which samples cause problems
LocError <- unique(c(which(!is.finite(Kmeans_info$MinVal)),which(!is.finite(Kmeans_info$MaxVal))))
ValError <- which(!is.finite(dataPCA[,LocError[1]]))
# Get the original data corresponding to the first sample
DataError <- SubsetInit$DataSubset[ValError,]
DataErrorCR <- Subset$DataSubset[ValError,]
CoordinatesError <- SubsetInit$coordPix[ValError,]
# save these in a RData file
FileError <- file.path(Output_Dir_Error,'ErrorPixels.RData')
ErrorReport <- list('CoordinatesError' = CoordinatesError,'DataError' = DataError,'DataError_afterCR' = DataErrorCR)
save(ErrorReport, file = FileError)
message("")
message("*********************************************************")
message(" An error report directory has been produced. ")
message("Please check information about data causing errors here: ")
print(FileError)
message(" The process will now stop ")
message("*********************************************************")
message("")
stop()
}
}
return(invisible())
}
......@@ -122,27 +149,29 @@ init_kmeans <- function(dataPCA, Pix_Per_Partition, nb_partitions, nbclusters, n
message(" ")
message(" if nothing wrong identified with input data, please ")
message(" submit a bug report, reproduce the bug with reduced ")
message(" dataset and contact teh authors of the package ")
message(" dataset and contact the authors of the package ")
message(" process aborted ")
message("*********************************************************")
message("")
stop()
}
dataPCA <- center_reduce(dataPCA, m0, d0)
# get the dimensions of the images, and the number of subimages to process
dataPCA <- split(as.data.frame(dataPCA), rep(1:nb_partitions, each = Pix_Per_Partition))
# multiprocess kmeans clustering
plan(multiprocess, workers = nbCPU) ## Parallelize using four cores
Schedule_Per_Thread <- ceiling(length(dataPCA) / nbCPU)
res <- future_lapply(dataPCA, FUN = kmeans, centers = nbclusters, iter.max = 50, nstart = 10,
algorithm = c("Hartigan-Wong"), future.scheduling = Schedule_Per_Thread)
plan(sequential)
Centroids <- list(nb_partitions)
for (i in (1:nb_partitions)) {
Centroids[[i]] <- res[[i]]$centers
my_list <- list("Centroids" = NULL, "MinVal" = m0, "MaxVal" = M0, "Range" = d0, "Error" = TRUE)
return(my_list)
} else {
dataPCA <- center_reduce(dataPCA, m0, d0)
# get the dimensions of the images, and the number of subimages to process
dataPCA <- split(as.data.frame(dataPCA), rep(1:nb_partitions, each = Pix_Per_Partition))
# multiprocess kmeans clustering
plan(multiprocess, workers = nbCPU) ## Parallelize using four cores
Schedule_Per_Thread <- ceiling(length(dataPCA) / nbCPU)
res <- future_lapply(dataPCA, FUN = kmeans, centers = nbclusters, iter.max = 50, nstart = 10,
algorithm = c("Hartigan-Wong"), future.scheduling = Schedule_Per_Thread)
plan(sequential)
Centroids <- list(nb_partitions)
for (i in (1:nb_partitions)) {
Centroids[[i]] <- res[[i]]$centers
}
my_list <- list("Centroids" = Centroids, "MinVal" = m0, "MaxVal" = M0, "Range" = d0)
return(my_list)
}
my_list <- list("Centroids" = Centroids, "MinVal" = m0, "MaxVal" = M0, "Range" = d0)
return(my_list)
}
# Applies Kmeans clustering to PCA image
......
......@@ -11,8 +11,8 @@
#' Performs PCA for all images and create PCA file with either all or a selection of PCs
#'
#' @param ImPath character. Path of the image to be processed
#' @param MaskPath character. Path of the mask corresponding to the image
#' @param Input_Image_File character. Path of the image to be processed
#' @param Input_Mask_File character. Path of the mask corresponding to the image
#' @param Output_Dir character. Path for output directory
#' @param Continuum_Removal boolean. Set to TRUE if continuum removal should be applied
#' @param TypePCA character. Type of PCA: choose either "PCA" or "SPCA"
......@@ -25,29 +25,29 @@
#'
#' @return list of paths corresponding to resulting PCA files
#' @export
perform_PCA <- function(ImPath, MaskPath, Output_Dir, Continuum_Removal=TRUE, TypePCA="SPCA", NbPCs_To_Keep=FALSE,
perform_PCA <- function(Input_Image_File, Input_Mask_File, Output_Dir, Continuum_Removal=TRUE, TypePCA="SPCA", NbPCs_To_Keep=FALSE,
FilterPCA = FALSE, Excluded_WL = FALSE, nb_partitions = 20, nbCPU = 1, MaxRAM = 0.25) {
# check if format of raster data is as expected
check_data(ImPath)
if (!MaskPath==FALSE){
check_data(MaskPath,Mask = TRUE)
check_data(Input_Image_File)
if (!Input_Mask_File==FALSE){
check_data(Input_Mask_File,Mask = TRUE)
}
# define the path corresponding to image, mask and output directory
ImNames <- list()
ImNames$Input.Image <- ImPath
ImNames$Mask_list <- MaskPath
Output_Dir_Full <- define_output_directory(Output_Dir, ImPath, TypePCA)
ImNames$Input.Image <- Input_Image_File
ImNames$Mask_list <- Input_Mask_File
Output_Dir_Full <- define_output_directory(Output_Dir, Input_Image_File, TypePCA)
# Identify water vapor absorption bands in image and possibly other spectral domains to discard
SpectralFilter <- exclude_spectral_domains(ImPath, Excluded_WL = Excluded_WL)
SpectralFilter <- exclude_spectral_domains(Input_Image_File, Excluded_WL = Excluded_WL)
# Extract valid data subset and check validity
print("Extract pixels from the images to perform PCA on a subset")
# define number of pixels to be extracted from the image for each iteration
Pix_Per_Partition <- define_pixels_per_iter(ImNames, nb_partitions = nb_partitions)
nb_Pix_To_Sample <- nb_partitions * Pix_Per_Partition
ImPathHDR <- get_HDR_name(ImPath)
ImPathHDR <- get_HDR_name(Input_Image_File)
HDR <- read_ENVI_header(ImPathHDR)
# extract a random selection of pixels from image
Subset <- get_random_subset_from_image(ImPath, HDR, MaskPath, nb_partitions, Pix_Per_Partition)
Subset <- get_random_subset_from_image(Input_Image_File, HDR, Input_Mask_File, nb_partitions, Pix_Per_Partition)
# if needed, apply continuum removal
if (Continuum_Removal == TRUE) {
Subset$DataSubset <- apply_continuum_removal(Subset$DataSubset, SpectralFilter, nbCPU = nbCPU)
......@@ -93,10 +93,10 @@ perform_PCA <- function(ImPath, MaskPath, Output_Dir, Continuum_Removal=TRUE, T
PCsel <- 1:dim(PCA_model$dataPCA)[2]
}
Shade_Update <- paste(Output_Dir_Full, "ShadeMask_Update_PCA", sep = "")
MaskPath <- filter_PCA(ImPath, HDR, MaskPath, Shade_Update, SpectralFilter, Continuum_Removal, PCA_model, PCsel, TypePCA, nbCPU = nbCPU, MaxRAM = MaxRAM)
Input_Mask_File <- filter_PCA(Input_Image_File, HDR, Input_Mask_File, Shade_Update, SpectralFilter, Continuum_Removal, PCA_model, PCsel, TypePCA, nbCPU = nbCPU, MaxRAM = MaxRAM)
## Compute PCA 2 based on the updated shade mask ##
# extract a random selection of pixels from image
Subset <- get_random_subset_from_image(ImPath, HDR, MaskPath, nb_partitions, Pix_Per_Partition)
Subset <- get_random_subset_from_image(Input_Image_File, HDR, Input_Mask_File, nb_partitions, Pix_Per_Partition)
# if needed, apply continuum removal
if (Continuum_Removal == TRUE) {
Subset$DataSubset <- apply_continuum_removal(Subset$DataSubset, SpectralFilter, nbCPU = nbCPU)
......@@ -138,21 +138,21 @@ perform_PCA <- function(ImPath, MaskPath, Output_Dir, Continuum_Removal=TRUE, T
PCA_model$dataPCA <- NULL
# CREATE PCA FILE CONTAINING ONLY SELECTED PCs
print("Apply PCA model to the whole image")
Output_Dir_PCA <- define_output_subdir(Output_Dir, ImPath, TypePCA, "PCA")
Output_Dir_PCA <- define_output_subdir(Output_Dir, Input_Image_File, TypePCA, "PCA")
PCA_Files <- paste(Output_Dir_PCA, "OutputPCA_", Nb_PCs, "_PCs", sep = "")
write_PCA_raster(ImPath, MaskPath, PCA_Files, PCA_model, SpectralFilter, Nb_PCs, Continuum_Removal, TypePCA, nbCPU, MaxRAM = MaxRAM)
write_PCA_raster(Input_Image_File, Input_Mask_File, PCA_Files, PCA_model, SpectralFilter, Nb_PCs, Continuum_Removal, TypePCA, nbCPU, MaxRAM = MaxRAM)
# save workspace for this stage
WS_Save <- paste(Output_Dir_PCA, "PCA_Info.RData", sep = "")
save(PCA_model, file = WS_Save)
my_list <- list("PCA_Files" = PCA_Files,"Pix_Per_Partition" =Pix_Per_Partition, "nb_partitions" = nb_partitions, "MaskPath" = MaskPath, "PCA_model" =PCA_model,"SpectralFilter"= SpectralFilter)
my_list <- list("PCA_Files" = PCA_Files,"Pix_Per_Partition" =Pix_Per_Partition, "nb_partitions" = nb_partitions, "MaskPath" = Input_Mask_File, "PCA_model" =PCA_model,"SpectralFilter"= SpectralFilter)
return(my_list)
}
# perform filtering based on extreme values PCA identified through PCA
#
# @param ImPath image path
# @param HDR hdr file file correspmonding to ImPath
# @param MaskPath shade file path
# @param Input_Image_File image path
# @param HDR hdr file file correspmonding to Input_Image_File
# @param Input_Mask_File shade file path
# @param Shade_Update updated shade mask
# @param Spectral spectral information from data
# @param CR logical: does continuum removal need to be performed?
......@@ -165,7 +165,7 @@ perform_PCA <- function(ImPath, MaskPath, Output_Dir, Continuum_Removal=TRUE, T
# @return Shade_Update = updated shade mask
# ' @importFrom stats sd
# ' @importFrom matlab ones
filter_PCA <- function(ImPath, HDR, MaskPath, Shade_Update, Spectral, CR, PCA_model, PCsel, TypePCA, nbCPU = 1, MaxRAM = 0.25) {
filter_PCA <- function(Input_Image_File, HDR, Input_Mask_File, Shade_Update, Spectral, CR, PCA_model, PCsel, TypePCA, nbCPU = 1, MaxRAM = 0.25) {
# 1- get extreme values falling outside of mean +- 3SD for PCsel first components
# compute mean and sd of the 5 first components of the sampled data
......@@ -227,14 +227,14 @@ filter_PCA <- function(ImPath, HDR, MaskPath, Shade_Update, Spectral, CR, PCA_mo
nbLines <- SeqRead_Image$Lines_Per_Chunk[i]
lenBin <- SeqRead_Image$ReadByte_End[i] - SeqRead_Image$ReadByte_Start[i] + 1
ImgFormat <- "2D"
Image_Chunk <- read_image_subset(ImPath, HDR, Byte_Start, lenBin, nbLines, Image_Format, ImgFormat)
Image_Chunk <- read_image_subset(Input_Image_File, HDR, Byte_Start, lenBin, nbLines, Image_Format, ImgFormat)
Byte_Start <- SeqRead_Shade$ReadByte_Start[i]
nbLines <- SeqRead_Shade$Lines_Per_Chunk[i]
lenBin <- SeqRead_Shade$ReadByte_End[i] - SeqRead_Shade$ReadByte_Start[i] + 1
ImgFormat <- "Shade"
if ((!MaskPath == FALSE) & (!MaskPath == "")) {
Shade_Chunk <- read_image_subset(MaskPath, HDR_Shade, Byte_Start, lenBin, nbLines, Shade_Format, ImgFormat)
if ((!Input_Mask_File == FALSE) & (!Input_Mask_File == "")) {
Shade_Chunk <- read_image_subset(Input_Mask_File, HDR_Shade, Byte_Start, lenBin, nbLines, Shade_Format, ImgFormat)
} else {
Shade_Chunk <- ones(nbLines * HDR$samples, 1)
}
......@@ -291,8 +291,8 @@ filter_PCA <- function(ImPath, HDR, MaskPath, Shade_Update, Spectral, CR, PCA_mo
# writes an ENVI image corresponding to PCA
#
# @param ImPath path for the raster on which PCA is applied
# @param MaskPath path for the corresponding mask
# @param Input_Image_File path for the raster on which PCA is applied
# @param Input_Mask_File path for the corresponding mask
# @param PCA_Path path for resulting PCA
# @param PCA_model PCA model description
# @param Spectral spectral information to be used in the image
......@@ -303,11 +303,11 @@ filter_PCA <- function(ImPath, HDR, MaskPath, Shade_Update, Spectral, CR, PCA_mo
# @param MaxRAM max RAM when initial image is read (in Gb)
#
# @return None
write_PCA_raster <- function(ImPath, MaskPath, PCA_Path, PCA_model, Spectral, Nb_PCs, CR, TypePCA, nbCPU = 1, MaxRAM = 0.25) {
ImPathHDR <- get_HDR_name(ImPath)
write_PCA_raster <- function(Input_Image_File, Input_Mask_File, PCA_Path, PCA_model, Spectral, Nb_PCs, CR, TypePCA, nbCPU = 1, MaxRAM = 0.25) {
ImPathHDR <- get_HDR_name(Input_Image_File)
HDR <- read_ENVI_header(ImPathHDR)
if ((!MaskPath == FALSE) & (!MaskPath == "")) {
ShadeHDR <- get_HDR_name(MaskPath)
if ((!Input_Mask_File == FALSE) & (!Input_Mask_File == "")) {
ShadeHDR <- get_HDR_name(Input_Mask_File)
HDR_Shade <- read_ENVI_header(ShadeHDR)
} else {
HDR_Shade <- FALSE
......@@ -366,7 +366,7 @@ write_PCA_raster <- function(ImPath, MaskPath, PCA_Path, PCA_model, Spectral, Nb
nbLines <- SeqRead_Image$Lines_Per_Chunk[i]
lenBin <- SeqRead_Image$ReadByte_End[i] - SeqRead_Image$ReadByte_Start[i] + 1
ImgFormat <- "2D"
Image_Chunk <- read_image_subset(ImPath, HDR, Byte_Start, lenBin, nbLines, Image_Format, ImgFormat)
Image_Chunk <- read_image_subset(Input_Image_File, HDR, Byte_Start, lenBin, nbLines, Image_Format, ImgFormat)
Byte_Start <- SeqRead_Shade$ReadByte_Start[i]
nbLines <- SeqRead_Shade$Lines_Per_Chunk[i]
......@@ -374,7 +374,7 @@ write_PCA_raster <- function(ImPath, MaskPath, PCA_Path, PCA_model, Spectral, Nb
if (typeof(HDR_Shade) == 'list') {
ImgFormat <- "Shade"
Shade_Chunk <- read_image_subset(MaskPath, HDR_Shade, Byte_Start, lenBin, nbLines, Shade_Format, ImgFormat)
Shade_Chunk <- read_image_subset(Input_Mask_File, HDR_Shade, Byte_Start, lenBin, nbLines, Shade_Format, ImgFormat)
elimShade <- which(Shade_Chunk == 0)
keepShade <- which(Shade_Chunk == 1)
Image_Chunk <- Image_Chunk[keepShade, ]
......@@ -457,10 +457,10 @@ pca <- function(X, type) {
#
# @return Pix_Per_Partition number of pixels per iteration
define_pixels_per_iter <- function(ImNames, nb_partitions = nb_partitions) {
ImPath <- ImNames$Input.Image
MaskPath <- ImNames$Mask_list
Input_Image_File <- ImNames$Input.Image
Input_Mask_File <- ImNames$Mask_list
# define dimensions of the image
ImPathHDR <- get_HDR_name(ImPath)
ImPathHDR <- get_HDR_name(Input_Image_File)
HDR <- read_ENVI_header(ImPathHDR)
Image_Format <- ENVI_type2bytes(HDR)
ipix <- as.double(HDR$lines)
......@@ -469,10 +469,10 @@ define_pixels_per_iter <- function(ImNames, nb_partitions = nb_partitions) {
lenTot <- nbPixels * as.double(HDR$bands)
ImSizeGb <- (lenTot * Image_Format$Bytes) / (1024^3)
# if shade mask, update number of pixels
if ((!MaskPath == FALSE) & (!MaskPath == "")) {
if ((!Input_Mask_File == FALSE) & (!Input_Mask_File == "")) {
# read shade mask
fid <- file(
description = MaskPath, open = "rb", blocking = TRUE,
description = Input_Mask_File, open = "rb", blocking = TRUE,
encoding = getOption("encoding"), raw = FALSE
)
ShadeMask <- readBin(fid, integer(), n = nbPixels, size = 1)
......
......@@ -5,8 +5,9 @@
\title{maps spectral species based on PCA file computed previously}
\usage{
map_spectral_species(Input_Image_File, Output_Dir, PCA_Files, PCA_model,
SpectralFilter, MaskPath, Pix_Per_Partition, nb_partitions, CR = TRUE,
TypePCA = "SPCA", nbclusters = 50, nbCPU = 1, MaxRAM = FALSE)
SpectralFilter, Input_Mask_File, Pix_Per_Partition, nb_partitions,
Continuum_Removal = TRUE, TypePCA = "SPCA", nbclusters = 50,
nbCPU = 1, MaxRAM = FALSE)
}
\arguments{
\item{Input_Image_File}{character. Path of the image to be processed}
......@@ -20,13 +21,13 @@ map_spectral_species(Input_Image_File, Output_Dir, PCA_Files, PCA_model,
\item{SpectralFilter}{list. information about spectral band location
(central wavelength), bands to keep...}
\item{MaskPath}{character. Path of the mask corresponding to the image}
\item{Input_Mask_File}{character. Path of the mask corresponding to the image}
\item{Pix_Per_Partition}{numeric. number of pixels for each partition}
\item{nb_partitions}{numeric. number of partition}
\item{CR}{boolean. Set to TRUE if continuum removal should be applied}
\item{Continuum_Removal}{boolean. Set to TRUE if continuum removal should be applied}
\item{TypePCA}{character. Type of PCA: choose either "PCA" or "SPCA"}
......
......@@ -4,15 +4,15 @@
\alias{perform_PCA}
\title{Performs PCA for all images and create PCA file with either all or a selection of PCs}
\usage{
perform_PCA(ImPath, MaskPath, Output_Dir, Continuum_Removal = TRUE,
TypePCA = "SPCA", NbPCs_To_Keep = FALSE, FilterPCA = FALSE,
Excluded_WL = FALSE, nb_partitions = 20, nbCPU = 1,
MaxRAM = 0.25)
perform_PCA(Input_Image_File, Input_Mask_File, Output_Dir,
Continuum_Removal = TRUE, TypePCA = "SPCA", NbPCs_To_Keep = FALSE,
FilterPCA = FALSE, Excluded_WL = FALSE, nb_partitions = 20,
nbCPU = 1, MaxRAM = 0.25)
}
\arguments{
\item{ImPath}{character. Path of the image to be processed}
\item{Input_Image_File}{character. Path of the image to be processed}
\item{MaskPath}{character. Path of the mask corresponding to the image}
\item{Input_Mask_File}{character. Path of the mask corresponding to the image}
\item{Output_Dir}{character. Path for output directory}
......
......@@ -150,7 +150,7 @@ Here is the code to perform PCA and select PCA bands:
```{r PCA}
print("PERFORM PCA ON RASTER")
PCA_Output = perform_PCA(Input_Image_File, Input_Mask_File, Output_Dir,
TypePCA = TypePCA, FilterPCA = FilterPCA, nbCPU = nbCPU,MaxRAM = MaxRAM)
TypePCA = TypePCA, FilterPCA = FilterPCA, nbCPU = nbCPU,MaxRAM = MaxRAM, Continuum_Removal = TRUE)
# path for the PCA raster
PCA_Files = PCA_Output$PCA_Files
# number of pixels used for each partition used for k-means clustering
......@@ -178,7 +178,7 @@ The first step towards \alpha and \beta diversity mapping corresponds to the co
print("MAP SPECTRAL SPECIES")
map_spectral_species(Input_Image_File, Output_Dir, PCA_Files, PCA_model, SpectralFilter, Input_Mask_File,
Pix_Per_Partition, nb_partitions, nbCPU=nbCPU, MaxRAM=MaxRAM,
nbclusters = nbclusters, TypePCA = TypePCA, CR = TRUE)
nbclusters = nbclusters, TypePCA = TypePCA, Continuum_Removal = TRUE)
```
SpectralSpecies is then stored in a raster file located here:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment