Newer
Older
###################################################
###################################################
###################################################
##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
############################
## FUNCTION remove trailing white space
trim.trailing <- function (x) sub("\\s+$", "", x)
#############################################
## FUN to clean species name
fun.clean.species.tab <- function(species.tab){
species.tab2 <- species.tab[!is.na(species.tab$Latin_name),]
### species IFN reformat names
## clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn))
## remove trailing white space
species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn)
species.clean <- species.tab2[!duplicated(species.tab2$Latin_name),
c("code","Latin_name","Exotic_Native_cultivated")]
return(species.clean)}
#####################################################
### compute quantile 99% and sd with a bootstrap
f.quantile <- function (x,ind,probs){
require(boot)
quantile(x[ind],probs=probs,na.rm=TRUE)}
f.quantile.boot <- function(i,x,fac,R,probs=0.99){
require(boot)
if(length(na.exclude(x[fac==i]))>0){
quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs)
return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1))
}else{
return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1))
}
}
#######################
### function to compute number of dead per plot
function.perc.dead <- function(dead){
sum(dead)/length(dead)}
function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) }
##' .. Function to plot map of tree with circle function of their dbh..
##'
##' .. content for \details{} ..
##' @title
##' @param plot.select the plot for which draw the map
##' @param x
##' @param y
##' @param plot vectore of plot id for each tree
##' @param D diameter in cm
##' @param inches controling the circle size
##' @param ...
##' @return
##' @author Kunstler
fun.circles.plot <- function(plot.select,x,y,plot,D,inches,...){
x.t <- x[plot==plot.select]
y.t <- y[plot==plot.select]
D.t <- D[plot==plot.select]
D.t[is.na(D.t)] <- 0
symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...)
##' .. Compute the basal area of competitor in a plot..
##'
##' .. content for \details{} ..
##' @title
##' @param diam diameters of each individuals in cm
##' @param weights weight to compute the basal area in cm^2/m^2
##' @return basal area in cm^2/m^2
##' @author Kunstler
####
##' .. function to compute the basal area of neighborood tree in plots ..
##'
##' .. content for \details{} ..
##' @title
##' @param obs.id tree obs identifier
##' @param diam diam of tree in cm
##' @param sp species name or code
##' @param id.plot identifier of the plot
##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2
##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots)
##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree)
BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){
print(length(obs.id))
obs.id <- as.character(obs.id)
if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights)))
stop("length of obs.id diam,sp id.plot & weights need to be the same")
## check sp is not numeric
if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')")
# compute BA tot per species per plot
BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T)
print(dim(BASP))
DATA.BASP <- data.table(id.plot= rownames(BASP),BASP)
setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP)))
setkeyv(DATA.BASP,c("id.plot"))
sp.name <- colnames(BASP)
rm(BASP)
print("first table created")
#### MERGE with indivudal tree
## use library(data.table)
if(!is.na(weight.full.plot)){
data.indiv <- data.table(obs.id=obs.id,sp=sp,
id.plot=id.plot,diam=diam,
BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam))))
setkeyv(data.indiv,"id.plot")
print("second table created")
data.merge <- merge(data.indiv,DATA.BASP)
print("merge done")
# substract target BA
for (i in (sp.name)){
eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
}
}else{
data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot,
diam=diam,weights=weights,
BA.indiv=BA.fun(diam,weights))
setkeyv(data.indiv,"id.plot")
print("second table created")
data.merge <- merge(data.indiv,DATA.BASP)
print("merge done")
for (i in (sp.name)){
eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
}
}
print("replacment done")
data.merge[,BA.indiv:=NULL]
print("first column removed")
#### delete column not used
data.merge[,sp:=NULL]
data.merge[,diam:=NULL]
data.merge[,id.plot:=NULL]
data.merge[,weights:=NULL]
print("columns removed")
return( (data.merge))
######################
#### apply BA.SP.FUN per census
# function to run on a subset to only one census
BA.SP.FUN.l<- function(census.id,census,obs.id,diam,sp,id.plot,weights,weight.full.plot){
return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id],sp=sp[census==census.id],id.plot=id.plot[census==census.id],weights=weights[census==census.id],weight.full.plot=weight.full.plot))
## function to apply over all census and merge back together
BA.SP.FUN.census<- function(census,obs.id,diam,sp,id.plot,weights,weight.full.plot){
require(data.table)
res.list <- lapply(unique.census,FUN=BA.SP.FUN.l,census,obs.id,diam,sp,id.plot,weights,weight.full.plot)
res.mat <- data.table(res.mat[match(obs.id,res.mat[["obs.id"]]),])
####
##' .. function compute competition index with X Y coordinates based on a neighborhood of radius R ..
##'
##' .. content for \details{} ..
##' @title
##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment
##' @param xy.table table with x.y of teh individual
##' @param diam diam in cm
##' @param sp species
##' @param Rlim radius of neighborhood search
##' @param parallel run in paralle or not ?
##' @param rpuDist run with GPU distance computation
##' @return a data frame with nrow = length of obs.id and ncol =unique(sp)
BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
print(paste("n observation = ",nrow(xy.table)))
print("start computing distance matrix")
if(rpuDist){
require(rpud)
dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE)
}else{
dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE))
print('distance matrix computed')
dist.mat[dist.mat <Rlim] <- 1
dist.mat[dist.mat >Rlim] <- 0
diag(dist.mat) <- 0
print('distance matrix set to 0 1')
BA <- BA.fun(diam,weights=1/(pi*Rlim^2))
BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE)
print('starting tapply over species')
fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE)
if(parallel){
## parallel version
require(doParallel)
registerDoParallel(cores=4)
mat <- dist.mat*BA.mat
res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% {
fun.sum.sp(mat[i,],sp)
}
res.temp <- data.frame(obs.id=obs.id,res.temp)
res.temp <- data.frame(obs.id=obs.id,t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp)))
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
####
##' .. function to compute competition index with X Y coordinates based on a neighborhood of radius R WITHOUT THE DISTANCE MATRIX..
##'
##' .. content for \details{} ..
##' @title
##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment
##' @param xy.table table with x.y of teh individual
##' @param diam diam in cm
##' @param sp species
##' @param Rlim radius of neighborhood search
##' @param parallel run in paralle or not ?
##' @param rpuDist run with GPU distance computation
##' @return a data frame with nrow = length of obs.id and ncol =unique(sp)
##' @author Kunstler
BA.SP.FUN.XY2 <- function(obs.id,xy.table,diam,sp,Rlim,parallel){
rownames(xy.table) <- obs.id
print(paste("n observation = ",nrow(xy.table)))
print("start computing distance matrix")
## create local function
fun.compute.local <- function(obs.id.t,obs.id,xy.table,diam,sp,Rlim){
dist.t <- as.vector(sqrt((outer(xy.table[obs.id==obs.id.t,"x"],xy.table[,"x"],FUN="-"))^2 +
(outer(xy.table[obs.id==obs.id.t,"y"],xy.table[,"y"],FUN="-"))^2))
dist.t[dist.t>Rlim] <- 0
dist.t[dist.t<=Rlim] <- 1
res.BA.t <- tapply(BA.fun(diam,weights=dist.t/(pi*Rlim^2)),INDEX=sp,FUN=sum,na.rm=TRUE)
res.BA.t[is.na(res.BA.t)] <- 0
return(data.frame(obs.id=obs.id.t,t(res.BA.t)))
}
#lapply function
if(parallel){
library(doParallel)
print("start mclapply")
list.BA.SP.data <- mclapply(obs.id,FUN=fun.compute.local,obs.id,xy.table,
diam,sp,Rlim,mc.cores=4)
data.BA.SP <- rbind.fill(list.BA.SP.data)
}else{
list.BA.SP.data <- lapply(obs.id,FUN=fun.compute.local,obs.id,xy.table,
diam,sp,Rlim)
data.BA.SP <- rbind.fill(list.BA.SP.data)
}
return(data.BA.SP)
}
## function for lapply per census
BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist){
print(dim(xy.table))
if(length(obs.id[census==census.id]) <10000){
data.temp<-BA.SP.FUN.XY(obs.id=obs.id[census==census.id],
xy.table=xy.table[census==census.id,],
diam=diam[census==census.id],sp=sp[census==census.id],
Rlim=Rlim,parallel,rpuDist)
}else{
data.temp<-BA.SP.FUN.XY2(obs.id=obs.id[census==census.id],
xy.table=xy.table[census==census.id,],
diam=diam[census==census.id],sp=sp[census==census.id],
Rlim=Rlim,parallel)
}
return(data.temp)
### wrapping function to run BA.SP.FUN.XY per census
BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
unique.census <- unique(census)
print(paste("Vector of census to lapply over" ,paste(unique.census,collapse=","),sep=" "))
res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,census=census,obs.id=obs.id,xy.table=xy.table,diam=diam,sp=sp,Rlim=Rlim,parallel,rpuDist)
res.DF <- res.mat[match(obs.id,res.mat[["obs.id"]]),]
return(res.DF)
#####################################
#####################################
### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER
#### function to be apply per site
fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){
data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i)
print(paste("size of the data for plot",i,"is",nrow(data.tree.s)))
BA.SP.temp <- BA.SP.FUN.XY.census(census=data.tree[["census"]],
obs.id=data.tree.s[['obs.id']],
xy.table=data.tree.s[,xy.name],
diam=data.tree.s[['D']],
sp=(data.tree.s[['sp']]),
Rlim=Rlim,
parallel,
rpuDist)
## replace NA per zero
print('replacing NA per zero')
BA.SP.temp[is.na(BA.SP.temp)] <- 0
print('done')
### TEST GOOD ORDER
if(sum(! (BA.SP.temp[["obs.id"]])==data.tree.s[['obs.id']]) >0) stop('rows not in the good order')
if(sum(!colnames(BA.SP.temp[,-1])==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name')
### compute sum per row
BATOT <- apply(BA.SP.temp[,-1],MARGIN=1,FUN=sum)
data.res <- data.frame(BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE)
return(data.res)
### function to replace missing value per zero in competition matrix
function.replace.NA.negative <- function(data.BA.SP){
for (i in 2:ncol(data.BA.SP)){
eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep="")))
eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep="")))
Georges Kunstler
committed
}
print('NA and negative replaced')
return(data.BA.SP)
}
##############################################################
##function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion,data.tot,plot.name,weight.full.plot,name.country,data.TRY=NA,species.lookup=NA){
if(is.null(data.tot[['weights']])) stop("Please create a weights vector, even if it is completely NA")
require(data.table)
data.tot <- data.table(data.tot)
data <- data.tot[ecocode==ecoregion,]
print(dim(data))
data.BA.SP <- BA.SP.FUN.census(census=data[['census']],
obs.id=as.vector(data[['obs.id']]),
diam=as.vector(data[['D']]),
sp=as.vector(data[['sp']]),
weights=data[['weights']],
weight.full.plot=weight.full.plot)
print('competition index computed')
## change NA and <0 data for 0
data.BA.SP <- function.replace.NA.negative(data.BA.SP)
### CHECK IF sp and sp name for column are the same
if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data")
#### compute BA tot for all competitors
## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why??
BATOT.s <- apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum)
data.BA.SP[,BATOT:=BATOT.s]
print('BATOT COMPUTED')
### create data frame
DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]])
setkeyv(DT.temp,"obs.id")
setkeyv(data.BA.SP,"obs.id")
print('starting last merge')
data.BA.sp <- merge(DT.temp,data.BA.SP)
## reorder data
data <- data.table(data)
setkeyv(data,"obs.id")
if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data")
if(is.data.frame(data.TRY)){
sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),]
data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY)
### TODO ADD OPTION TO INCLUE OTHER DATA on MAX HEIGHT
## save everything as a list
print(dim(data.traits))
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits)
save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep="."))
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA)
saveRDS(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep="."))
}
#######
#### NEED TO WRITE FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT
fun.data.per.bigplot <- function(data,name.site,data.TRAITS=NA,Rlim=15,xy.name,parallel=FALSE,rpuDist=FALSE){
require(data.table)
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
## compute competition matrix
list.BA.SP.data <- lapply(unique(data[["plot"]]),FUN=fun.compute.BA.SP.XY.per.plot,data=data,Rlim=Rlim,xy.name=xy.name,parallel=TRUE,rpuDist=FALSE)
print('competition index computed in list')
data.BA.SP <- rbind.fill(list.BA.SP.data)
print('competition index computed')
### test
if(sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]),"BATOT"))) >0) stop("competition index sp name not the same as in data")
### TODO REMOVE TREE IN BUFFER ZONE
obs.id.no.buffer <- subset(data,subset=((data[["x"]]-15)>0 & (data[["x"]]+15)<max(data[["x"]])) &
((data[["y"]]-15)>0 & (data[["y"]]+15)<max(data[["y"]])),
select="obs.id")
data <- subset(data,subset=data[["obs.id"]] %in% as.character(obs.id.no.buffer[,1]))
data.BA.SP <- subset(data.BA.SP,subset=data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,1]))
data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id)
## reorder data
data <- data.table(data)
setkeyv(data,"obs.id")
data.BA.SP <- data.table(data.BA.SP)
setkeyv(data.BA.SP,"obs.id")
## test if same order
if(sum(!as.character(data.BA.SP[["obs.id"]]) == data[["obs.id"]]) >0) stop("competition index not in the same order than data")
#####
## ADD TRY DATA OR TRAITS IF NEEDED
if(is.data.frame(data.TRAITS)){
data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=unique(data[["sp"]]),
Latin_name=unique(data[["sp.name"]]),
data=data.TRAITS,name.match.traits="Latin_name")
list.temp <- list(data.tree=data,data.BA.SP=data.BA.SP,data.traits=data.traits)
save(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep="."))
}else{
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA)
saveRDS(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep="."))
}
}