Lib_MapSpectralSpecies.R 11.7 KB
Newer Older
jbferet's avatar
jbferet committed
1
# ==============================================================================
Florian de Boissieu's avatar
Florian de Boissieu committed
2
# biodivMapR
jbferet's avatar
jbferet committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Lib_MapSpectralSpecies.R
# ==============================================================================
# PROGRAMMERS:
# Jean-Baptiste FERET <jb.feret@irstea.fr>
# Copyright 2019/06 Jean-Baptiste FERET
# ==============================================================================
# This Library applies clustering on a selection of components stored in a PCA
# file previously created ("Perform_PCA.R") and produces corresponding spectral
# species
# ==============================================================================

#' maps spectral species based on PCA file computed previously
#'
#' @param Input.Image.File Path and name of the image to be processed
#' @param Output.Dir output directory
De Boissieu Florian's avatar
De Boissieu Florian committed
18
#' @param TypePCA Type of PCA: "PCA" or "SPCA"
jbferet's avatar
jbferet committed
19
#' @param PCA.Files path for PCA file
20
21
#' @param nbCPU numeric. Number of CPUs to use in parallel.
#' @param MaxRAM numeric. MaxRAM maximum size of chunk in GB to limit RAM allocation when reading image file.
jbferet's avatar
jbferet committed
22
23
24
#' @param nbclusters number of clusters defined in k-Means
#'
#' @export
25
Map.Spectral.Species <- function(Input.Image.File, Output.Dir, PCA.Files, TypePCA = "SPCA", nbclusters = 50, nbCPU = 1, MaxRAM = FALSE) {
jbferet's avatar
jbferet committed
26
27

  # for each image
28
29
30
31
  Output.Dir2 <- Define.Output.Dir(Output.Dir, Input.Image.File, TypePCA)
  Output.Dir.SS <- Define.Output.SubDir(Output.Dir, Input.Image.File, TypePCA, "SpectralSpecies")
  Output.Dir.PCA <- Define.Output.SubDir(Output.Dir, Input.Image.File, TypePCA, "PCA")
  Spectral.Species.Path <- paste(Output.Dir.SS, "SpectralSpecies", sep = "")
jbferet's avatar
jbferet committed
32
  # if no prior diversity map has been produced --> need PCA file
33
34
35
36
  if (!file.exists(PCA.Files)) {
    message("")
    message("*********************************************************")
    message("WARNING: PCA file expected but not found")
jbferet's avatar
jbferet committed
37
    print(PCA.Files)
38
39
40
    message("process aborted")
    message("*********************************************************")
    message("")
jbferet's avatar
jbferet committed
41
42
    stop()
  } else {
43
    WS_Save <- paste(Output.Dir2, "PCA_Info.RData", sep = "")
jbferet's avatar
jbferet committed
44
45
    load(file = WS_Save)
    ##            1- Select components used to perform clustering           ##
46
47
48
49
50
51
    PC.Select.Path <- paste(Output.Dir.PCA, "Selected_Components.txt", sep = "")
    if (file.exists(PC.Select.Path)) {
      PC.Select <- read.table(PC.Select.Path)[[1]]
      dataPCA <- PCA.model$dataPCA[, PC.Select]
      if (length(PC.Select) == 1) {
        dataPCA <- matrix(dataPCA, ncol = 1)
jbferet's avatar
jbferet committed
52
      }
53
      message("Selected components:")
jbferet's avatar
jbferet committed
54
      print(PC.Select)
55
56
      message("Please add carriage return after last selected component if not part of the list")
      message("If these do not match with your selection, please correct file following file:")
jbferet's avatar
jbferet committed
57
58
      print(PC.Select.Path)
    } else {
59
60
      print(paste("File named ", PC.Select.Path, "needs to be created first"))
      print("Image processing aborted")
jbferet's avatar
jbferet committed
61
62
63
      exit()
    }
    ##    2- PERFORM KMEANS FOR EACH ITERATION & DEFINE SPECTRAL SPECIES    ##
64
    print("perform k-means clustering for each subset and define centroids")
jbferet's avatar
jbferet committed
65
    # scaling factor subPCA between 0 and 1
66
    Kmeans.info <- Init.Kmeans(dataPCA, Pix.Per.Iter, NbIter, nbclusters, nbCPU)
jbferet's avatar
jbferet committed
67
    ##              3- ASSIGN SPECTRAL SPECIES TO EACH PIXEL                ##
68
69
    print("apply Kmeans to the whole image and determine spectral species")
    Apply.Kmeans(PCA.Files, PC.Select, ImPathShade, Kmeans.info, Spectral.Species.Path, nbCPU, MaxRAM)
jbferet's avatar
jbferet committed
70
71
72
73
  }
  return()
}

74
75
76
77
78
79
80
81
82
# computes k-means from NbIter subsets taken from dataPCA
#
# @param dataPCA initial dataset sampled from PCA image
# @param Pix.Per.Iter number of pixels per iteration
# @param NbIter number of iterations
# @param nbCPU
# @param nbclusters number of clusters used in kmeans
#
# @return list of centroids and parameters needed to center/reduce data
De Boissieu Florian's avatar
De Boissieu Florian committed
83
#' @importFrom future plan multiprocess sequential
84
#' @importFrom future.apply future_lapply
De Boissieu Florian's avatar
De Boissieu Florian committed
85
#' @importFrom stats kmeans
86
87
88
89
90
Init.Kmeans <- function(dataPCA, Pix.Per.Iter, NbIter, nbclusters, nbCPU = 1) {
  m0 <- apply(dataPCA, 2, function(x) min(x))
  M0 <- apply(dataPCA, 2, function(x) max(x))
  d0 <- M0 - m0
  dataPCA <- Center.Reduce(dataPCA, m0, d0)
jbferet's avatar
jbferet committed
91
  # get the dimensions of the images, and the number of subimages to process
92
  dataPCA <- split(as.data.frame(dataPCA), rep(1:NbIter, each = Pix.Per.Iter))
jbferet's avatar
jbferet committed
93
94
  # multiprocess kmeans clustering
  plan(multiprocess, workers = nbCPU) ## Parallelize using four cores
95
  Schedule.Per.Thread <- ceiling(length(dataPCA) / nbCPU)
De Boissieu Florian's avatar
De Boissieu Florian committed
96
97
  res <- future_lapply(dataPCA, FUN = kmeans, centers = nbclusters, iter.max = 50, nstart = 10,
                       algorithm = c("Hartigan-Wong"), future.scheduling = Schedule.Per.Thread)
jbferet's avatar
jbferet committed
98
  plan(sequential)
99
100
101
  Centroids <- list(NbIter)
  for (i in (1:NbIter)) {
    Centroids[[i]] <- res[[i]]$centers
jbferet's avatar
jbferet committed
102
  }
103
  my_list <- list("Centroids" = Centroids, "MinVal" = m0, "MaxVal" = M0, "Range" = d0)
jbferet's avatar
jbferet committed
104
105
106
  return(my_list)
}

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Applies Kmeans clustering to PCA image
#
# @param PCA.Path path for the PCA image
# @param PC.Select PCs selected from PCA
# @param ImPathShade shade mask
# @param Kmeans.info information about kmeans computed in previous step
# @param nbCPU
# @param MaxRAM
# @param Spectral.Species.Path path for spectral species file to be written
#
# @return
Apply.Kmeans <- function(PCA.Path, PC.Select, ImPathShade, Kmeans.info, Spectral.Species.Path, nbCPU = 1, MaxRAM = FALSE) {
  ImPathHDR <- Get.HDR.Name(PCA.Path)
  HDR.PCA <- read.ENVI.header(ImPathHDR)
  PCA.Format <- ENVI.Type2Bytes(HDR.PCA)
  HDR.Shade <- Get.HDR.Name(ImPathShade)
  HDR.Shade <- read.ENVI.header(HDR.Shade)
jbferet's avatar
jbferet committed
124
  # prepare for sequential processing: SeqRead.Image informs about byte location to read
125
126
  if (MaxRAM == FALSE) {
    MaxRAM <- 0.25
jbferet's avatar
jbferet committed
127
  }
128
129
130
  nbPieces <- Split.Image(HDR.PCA, MaxRAM)
  SeqRead.PCA <- Where.To.Read(HDR.PCA, nbPieces)
  SeqRead.Shade <- Where.To.Read(HDR.Shade, nbPieces)
jbferet's avatar
jbferet committed
131
  # create output file for spectral species assignment
132
133
134
135
136
137
138
139
140
141
142
143
144
  HDR.SS <- HDR.PCA
  nbIter <- length(Kmeans.info$Centroids)
  HDR.SS$bands <- nbIter
  HDR.SS$`data type` <- 1
  Iter <- paste('Iter', 1:nbIter, collapse = ", ")
  HDR.SS$`band names` <- Iter
  HDR.SS$wavelength <- NULL
  HDR.SS$fwhm <- NULL
  HDR.SS$resolution <- NULL
  HDR.SS$bandwidth <- NULL
  HDR.SS$purpose <- NULL
  HDR.SS$`byte order` <- Get.Byte.Order()
  headerFpath <- paste(Spectral.Species.Path, ".hdr", sep = "")
jbferet's avatar
jbferet committed
145
146
  write.ENVI.header(HDR.SS, headerFpath)
  # create Spectral species file
147
148
149
150
  fidSS <- file(
    description = Spectral.Species.Path, open = "wb", blocking = TRUE,
    encoding = getOption("encoding"), raw = FALSE
  )
jbferet's avatar
jbferet committed
151
152
  close(fidSS)

153
154
155
156
157
158
159
160
161
162
163
  for (i in 1:nbPieces) {
    print(paste("Spectral Species Piece #", i, "/", nbPieces))
    Location.RW <- list()
    Location.RW$nbLines <- SeqRead.PCA$Lines.Per.Chunk[i]
    Location.RW$Byte.Start.PCA <- SeqRead.PCA$ReadByte.Start[i]
    Location.RW$lenBin.PCA <- SeqRead.PCA$ReadByte.End[i] - SeqRead.PCA$ReadByte.Start[i] + 1
    Location.RW$Byte.Start.Shade <- SeqRead.Shade$ReadByte.Start[i]
    Location.RW$lenBin.Shade <- SeqRead.Shade$ReadByte.End[i] - SeqRead.Shade$ReadByte.Start[i] + 1
    Location.RW$Byte.Start.SS <- 1 + (SeqRead.Shade$ReadByte.Start[i] - 1) * nbIter
    Location.RW$lenBin.SS <- nbIter * (SeqRead.Shade$ReadByte.End[i] - SeqRead.Shade$ReadByte.Start[i]) + 1
    Compute.Spectral.Species(PCA.Path, ImPathShade, Spectral.Species.Path, Location.RW, PC.Select, Kmeans.info, nbCPU)
jbferet's avatar
jbferet committed
164
165
166
167
  }
  return()
}

168
169
170
171
172
173
174
175
176
177
178
179
180
181
# this function reads PCA file and defines the spectral species for each pixel
# based on the set of cluster centroids defined for each iteration
# applies kmeans --> closest cluster corresponds to the "spectral species"
#
# @param PCA.Path path for the PCA image
# @param ImPathShade shade mask
# @param Spectral.Species.Path path for spectral species file to be written
# @param Location.RW where to read/write information in binary file
# @param PC.Select PCs selected from PCA
# @param nbCPU
# @param Kmeans.info information about kmeans computed in previous step
#
# @return
#' @importFrom snow splitRows
De Boissieu Florian's avatar
De Boissieu Florian committed
182
#' @importFrom future plan multiprocess sequential
183
184
#' @importFrom future.apply future_lapply
Compute.Spectral.Species <- function(PCA.Path, ImPathShade, Spectral.Species.Path, Location.RW, PC.Select, Kmeans.info, nbCPU = 1) {
jbferet's avatar
jbferet committed
185
186

  # characteristics of the centroids computed during preprocessing
187
188
189
  nbIter <- length(Kmeans.info$Centroids)
  nbCentroids <- nrow(Kmeans.info$Centroids[[1]])
  CentroidsArray <- do.call("rbind", Kmeans.info$Centroids)
jbferet's avatar
jbferet committed
190
191

  # read shade file and PCA file
192
193
194
195
196
  ShadeHDR <- Get.HDR.Name(ImPathShade)
  HDR.Shade <- read.ENVI.header(ShadeHDR)
  Shade.Format <- ENVI.Type2Bytes(HDR.Shade)
  ImgFormat <- "Shade"
  Shade.Chunk <- Read.Image.Subset(ImPathShade, HDR.Shade, Location.RW$Byte.Start.Shade, Location.RW$lenBin.Shade, Location.RW$nbLines, Shade.Format, ImgFormat)
jbferet's avatar
jbferet committed
197

198
199
200
  PCA.HDR <- Get.HDR.Name(PCA.Path)
  HDR.PCA <- read.ENVI.header(PCA.HDR)
  PCA.Format <- ENVI.Type2Bytes(HDR.PCA)
jbferet's avatar
jbferet committed
201
  # read "unfolded" (2D) PCA image
202
203
204
205
206
  ImgFormat <- "2D"
  PCA.Chunk <- Read.Image.Subset(PCA.Path, HDR.PCA, Location.RW$Byte.Start.PCA, Location.RW$lenBin.PCA, Location.RW$nbLines, PCA.Format, ImgFormat)
  PCA.Chunk <- PCA.Chunk[, PC.Select]
  if (length(PC.Select) == 1) {
    PCA.Chunk <- matrix(PCA.Chunk, ncol = 1)
jbferet's avatar
jbferet committed
207
  }
208
  PCA.Chunk <- Center.Reduce(PCA.Chunk, Kmeans.info$MinVal, Kmeans.info$Range)
jbferet's avatar
jbferet committed
209
  # eliminate shaded pixels
210
211
  keepShade <- which(Shade.Chunk == 1)
  PCA.Chunk <- matrix(PCA.Chunk[keepShade, ], ncol = length(PC.Select))
jbferet's avatar
jbferet committed
212
213

  # Prepare writing of spectral species distribution file
214
215
216
  SS.HDR <- Get.HDR.Name(Spectral.Species.Path)
  HDR.SS <- read.ENVI.header(SS.HDR)
  SS.Format <- ENVI.Type2Bytes(HDR.SS)
jbferet's avatar
jbferet committed
217
218

  # for each pixel in the subset, compute the nearest cluster for each iteration
219
  Nearest.Cluster <- matrix(0, nrow = Location.RW$nbLines * HDR.PCA$samples, ncol = nbIter)
jbferet's avatar
jbferet committed
220
  # rdist consumes RAM  --> divide data into pieces if too big and multiprocess
221
222
223
224
  nbSamples.Per.Rdist <- 25000
  if (length(keepShade) > 0) {
    nbSubsets <- ceiling(length(keepShade) / nbSamples.Per.Rdist)
    PCA.Chunk <- splitRows(PCA.Chunk, nbSubsets)
jbferet's avatar
jbferet committed
225
226

    plan(multiprocess, workers = nbCPU) ## Parallelize using four cores
227
228
    Schedule.Per.Thread <- ceiling(nbSubsets / nbCPU)
    res <- future_lapply(PCA.Chunk, FUN = RdistList, CentroidsArray = CentroidsArray, nbClusters = nrow(Kmeans.info$Centroids[[1]]), nbIter = nbIter, future.scheduling = Schedule.Per.Thread)
jbferet's avatar
jbferet committed
229
    plan(sequential)
230
231
232
233
    res <- do.call("rbind", res)
    Nearest.Cluster[keepShade, ] <- res
    rm(res)
    gc()
jbferet's avatar
jbferet committed
234
  }
235
  Nearest.Cluster <- array(Nearest.Cluster, c(Location.RW$nbLines, HDR.PCA$samples, nbIter))
jbferet's avatar
jbferet committed
236
  # Write spectral species distribution in the output file
237
238
239
240
241
242
243
  fidSS <- file(
    description = Spectral.Species.Path, open = "r+b", blocking = TRUE,
    encoding = getOption("encoding"), raw = FALSE
  )
  Nearest.Cluster <- aperm(Nearest.Cluster, c(2, 3, 1))
  if (!Location.RW$Byte.Start.SS == 1) {
    seek(fidSS, where = SS.Format$Bytes * (Location.RW$Byte.Start.SS - 1), origin = "start", rw = "write")
jbferet's avatar
jbferet committed
244
  }
245
  writeBin(c(as.integer(Nearest.Cluster)), fidSS, size = SS.Format$Bytes, endian = .Platform$endian, useBytes = FALSE)
jbferet's avatar
jbferet committed
246
  close(fidSS)
247
  rm(list = ls())
jbferet's avatar
jbferet committed
248
249
250
251
  gc()
  return()
}

252
253
254
255
256
257
258
259
260
261
# Compute distance between each pixel of input data and each of the nbClusters x nbIter centroids
#
# @param InputData
# @param CentroidsArray
# @param nbClusters
# @param nbIter
#
# @return ResDist
#' @importFrom fields rdist
RdistList <- function(InputData, CentroidsArray, nbClusters, nbIter) {
jbferet's avatar
jbferet committed
262
  # number of pixels in input data
263
  nbPixels <- nrow(InputData)
jbferet's avatar
jbferet committed
264
  # compute distance between each pixel and each centroid
265
  cluster.dist <- rdist(InputData, CentroidsArray)
jbferet's avatar
jbferet committed
266
  # reshape distance into a matrix: all pixels from iteration 1, then all pixels from it2...
267
268
  cluster.dist <- matrix(aperm(array(cluster.dist, c(nbPixels, nbClusters, nbIter)), c(1, 3, 2)), nrow = nbPixels * nbIter)
  ResDist <- matrix(max.col(-cluster.dist), nrow = nbPixels)
jbferet's avatar
jbferet committed
269
270
  return(ResDist)
}