format.function.R 19.12 KiB
###################################################
###################################################
###################################################
##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP 
#### G. Kunstler 11/09/2013
############################
## FUNCTION remove trailing white space
trim.trailing <- function (x) sub("\\s+$", "", x)
#####################################################
### compute quantile 99% and sd with a bootstrap
##' ..  compute quantile 99% and sd with a bootstra..
##'
##' .. content for \details{} ..
##' @title f.quantile.boot
##' @param i subset (species) for which to compute quantile
##' @param x vector of height
##' @param fac vector of subset
##' @param R number of resampling
##' @param probs probability of quantile to compute
##' @return a matrix with # col and 1 row with mean sd adn nobs
##' @author Kunstler
f.quantile.boot <-  function(i,x,fac,R,probs=0.99){
require(boot)
 if(length(na.exclude(x[fac==i]))>0){
     quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs)
     return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1))
 }else{
     return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1))
f.quantile <- function (x,ind,probs){
    require(boot)
    quantile(x[ind],probs=probs,na.rm=TRUE)
######
######
## FUNCTION TO PLOT MAP OF TREE with function of DBH
##' .. Function to plot map of tree with circle function of their dbh..
##'
##' .. content for \details{} ..
##' @title 
##' @param plot.select the plot for which draw the map
##' @param x 
##' @param y 
##' @param plot vectore of plot id for each tree
##' @param D diameter in cm
##' @param inches controling the circle size
##' @param ... 
##' @return 
##' @author Kunstler
fun.circles.plot <- function(plot.select,x,y,plot,D,inches,...){
x.t <- x[plot==plot.select]
y.t <- y[plot==plot.select]
D.t <- D[plot==plot.select]
D.t[is.na(D.t)] <-  0
symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...)
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
######################### ## ##' .. Compute the basal area of competitor in a plot.. ##' ##' .. content for \details{} .. ##' @title ##' @param diam diameters of each individuals in cm ##' @param weights weight in 1/m^2 to compute the basal area in cm^2/m^2 ##' @return basal area in cm^2/m^2 ##' @author Kunstler BA.fun <- function(diam,weights){ ((diam/2)^2)*pi*weights } #### ##' .. function to compute the basal area of neighborood tree in plots .. ##' ##' .. content for \details{} .. ##' @title ##' @param obs.id tree obs identifier ##' @param diam diam of tree in cm ##' @param sp species name or code ##' @param id.plot identifier of the plot ##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2 ##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots) ##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree) ##' @author Kunstler BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){ print(paste("Number of observation :",length(obs.id))) require(data.table) id.plot <- as.character(id.plot) obs.id <- as.character(obs.id) ## check equal length if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights))) stop("length of obs.id diam,sp id.plot & weights need to be the same") ## check sp is not numeric if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") # compute BA tot per species per plot BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T) DATA.BASP <- data.table(id.plot= rownames(BASP),BASP) setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP))) setkeyv(DATA.BASP,c("id.plot")) sp.name <- colnames(BASP) rm(BASP) print("plot level table created") #### MERGE with indivudal tree ## use library(data.table) if(!is.na(weight.full.plot)){ data.indiv <- data.table(obs.id=obs.id,sp=sp, id.plot=id.plot,diam=diam, BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam)))) setkeyv(data.indiv,"id.plot") print("tree level table created") data.merge <- merge(data.indiv,DATA.BASP) print("merge done") # substract target BA for (i in (sp.name)){ eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) } }else{ data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot, diam=diam,weights=weights, BA.indiv=BA.fun(diam,weights))
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
setkeyv(data.indiv,"id.plot") print("tree level table created") data.merge <- merge(data.indiv,DATA.BASP) print("merge done") for (i in (sp.name)){ eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) } } print("substraction of target tree BA done") #### delete column not used data.merge[,BA.indiv:=NULL] data.merge[,sp:=NULL] data.merge[,diam:=NULL] data.merge[,id.plot:=NULL] data.merge[,weights:=NULL] print("columns removed") return( (data.merge)) } ###################### #### apply BA.SP.FUN per census # function to run on a subset to only one census BA.SP.FUN.l<- function(census.id,census,obs.id,diam,sp,id.plot,weights,weight.full.plot){ return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id], sp=sp[census==census.id],id.plot=id.plot[census==census.id], weights=weights[census==census.id],weight.full.plot=weight.full.plot)) } ## function to apply over all census and merge back together ##' .. function to apply competition index computation over all census and merge back together .. ##' ##' .. content for \details{} .. ##' @title BA.SP.FUN.census ##' @param census vector of census of length N ##' @param obs.id vector of obs.id of length N ##' @param diam vector of DBH in cm of length N ##' @param sp vector of species of length N ##' @param id.plot vector of id.plot of length N ##' @param weights vector of weight in 1/m^2 of length N ##' @param weight.full.plot optional if want to use a different weight to remove the BA of target tree (if NA weights is use instead) ##' @return a dta.table of dim N & length(unique(sp))+1 (obs.id and basal area of all species ##' @author Kunstler BA.SP.FUN.census<- function(census,obs.id,diam,sp,id.plot,weights,weight.full.plot){ require(data.table) unique.census <- unique(census) res.list <- lapply(unique.census,FUN=BA.SP.FUN.l,census,obs.id,diam,sp,id.plot,weights,weight.full.plot) res.mat <- rbind.fill(res.list ) res.mat <- data.table(res.mat[match(obs.id,res.mat[["obs.id"]]),]) return(res.mat) } #### ##' .. function compute competition index with X Y coordinates based on a neighborhood of radius Rlim using a global distance matrix .. ##' ##' .. content for \details{} .. ##' @title ##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment ##' @param xy.table table with x.y of teh individual ##' @param diam diam in cm ##' @param sp species ##' @param Rlim radius of neighborhood search ##' @param parallel TRUE/FALSE run in paralle or not ##' @param rpuDist TRUE/FALSE run with GPU distance computation (need top install additional software only on linux) ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ print(paste("n observation = ",nrow(xy.table))) print("start computing distance matrix") if(rpuDist){ require(rpud)
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE) }else{ dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE)) } print('distance matrix computed') dist.mat[dist.mat <Rlim] <- 1 dist.mat[dist.mat >Rlim] <- 0 diag(dist.mat) <- 0 print('distance matrix set to 0 1') BA <- BA.fun(diam,weights=1/(pi*Rlim^2)) print(c(length(BA),length(diam))) BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE) print('starting tapply over species') fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) if(parallel){ ## parallel version require(doParallel) registerDoParallel(cores=4) mat <- dist.mat*BA.mat res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% { fun.sum.sp(mat[i,],sp) } res.temp <- data.frame(obs.id=obs.id,res.temp) return((res.temp)) }else{ res.temp <- data.frame(obs.id=obs.id,t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))) return(res.temp) } } #### ##' .. function to compute competition index with X Y coordinates based on a neighborhood of radius R without the global distance matrix.. ##' ##' .. content for \details{} .. ##' @title ##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment ##' @param xy.table table with x.y of teh individual ##' @param diam diam in cm ##' @param sp species ##' @param Rlim radius of neighborhood search ##' @param parallel run in paralle or not ? ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler BA.SP.FUN.XY2 <- function(obs.id,xy.table,diam,sp,Rlim,parallel){ rownames(xy.table) <- obs.id print(paste("n observation = ",nrow(xy.table))) print("start computing distance matrix") ## create local function fun.compute.local <- function(obs.id.t,obs.id,xy.table,diam,sp,Rlim){ dist.t <- as.vector(sqrt((outer(xy.table[obs.id==obs.id.t,"x"],xy.table[,"x"],FUN="-"))^2 + (outer(xy.table[obs.id==obs.id.t,"y"],xy.table[,"y"],FUN="-"))^2)) dist.t[dist.t>Rlim] <- 0 dist.t[dist.t<=Rlim] <- 1 res.BA.t <- tapply(BA.fun(diam,weights=dist.t/(pi*Rlim^2)),INDEX=sp,FUN=sum,na.rm=TRUE) res.BA.t[is.na(res.BA.t)] <- 0 return(data.frame(obs.id=obs.id.t,t(res.BA.t))) } #lapply function if(parallel){ library(doParallel) print("start mclapply") list.BA.SP.data <- mclapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, diam,sp,Rlim,mc.cores=4) print("done") data.BA.SP <- rbind.fill(list.BA.SP.data) }else{ list.BA.SP.data <- lapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, diam,sp,Rlim) data.BA.SP <- rbind.fill(list.BA.SP.data)
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
} return(data.BA.SP) } ## function for lapply per census BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist){ print(dim(xy.table)) if(length(obs.id[census==census.id]) <10000){ data.temp<-BA.SP.FUN.XY(obs.id=obs.id[census==census.id], xy.table=xy.table[census==census.id,], diam=diam[census==census.id],sp=sp[census==census.id], Rlim=Rlim,parallel,rpuDist) }else{ data.temp<-BA.SP.FUN.XY2(obs.id=obs.id[census==census.id], xy.table=xy.table[census==census.id,], diam=diam[census==census.id],sp=sp[census==census.id], Rlim=Rlim,parallel) } return(data.temp) } ### wrapping function to run BA.SP.FUN.XY per census and merge together all census BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ unique.census <- unique(census) print(paste("Vector of census to lapply over" ,paste(unique.census,collapse=","),sep=" ")) res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,census=census,obs.id=obs.id,xy.table=xy.table,diam=diam,sp=sp,Rlim=Rlim,parallel,rpuDist) res.mat <- rbind.fill(res.list ) res.DF <- res.mat[match(obs.id,res.mat[["obs.id"]]),] return(res.DF) } ##################################### ##################################### ### FUNCTION TO COMPUTE BA.SP.XY PER PLOT #### function to be lapply per site ##' .. FUNCTION TO COMPUTE BA.SP.XY PER PLOT to be lapply over all plot or only one plot .. ##' ##' .. content for \details{} .. ##' @title fun.compute.BA.SP.XY.per.plot ##' @param i selected plot to compute BA ##' @param data.tree data.frame with column plot, census, D, sp and xy.name ##' @param Rlim maximum radius to compute local competition index ##' @param xy.name name of variable for x and y (vector of length 2) ##' @param parallel TRUE/FALSE run in parallel ##' @param rpuDist TRUE/FALSE use rpuDist to compute distance matrix only available on linux with additional software ##' @return a data.frame with nrow = length of obs.id[plot==i] and ncol =unique(sp)+ 2 (BATOT added) ##' @author Kunstler fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) print(paste("size of the data for plot",i,"is",nrow(data.tree.s))) BA.SP.temp <- BA.SP.FUN.XY.census(census=data.tree[["census"]], obs.id=data.tree.s[['obs.id']], xy.table=data.tree.s[,xy.name], diam=data.tree.s[['D']], sp=(data.tree.s[['sp']]), Rlim=Rlim, parallel, rpuDist) ## replace NA per zero print('replacing NA per zero') BA.SP.temp[is.na(BA.SP.temp)] <- 0 print('done') ### TEST GOOD ORDER if(sum(! (BA.SP.temp[["obs.id"]])==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') if(sum(!colnames(BA.SP.temp[,-1])==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') ### compute sum per row BATOT <- apply(BA.SP.temp[,-1],MARGIN=1,FUN=sum)
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
data.res <- data.frame(BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE) return(data.res) } ########################## ### function to replace missing value per zero in competition matrix function.replace.NA.negative <- function(data.BA.SP){ for (i in 2:ncol(data.BA.SP)){ eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep=""))) eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep=""))) } print('NA and negative replaced') return(data.BA.SP) } ############################################################## ##function to generate data in good format per ecoregion fun.data.per.ecoregion <- function(ecoregion,data.tot,plot.name,weight.full.plot,name.country,data.TRY=NA,data.TRAITS=NA,data.max.height=NA,species.lookup=NA,name.match.traits="Latin_name"){ if(is.null(data.tot[['weights']])) stop("Please create a weights vector(1/m^2), even if it is completely NA") require(data.table) data.tot <- data.table(data.tot) data <- data.tot[ecocode==ecoregion,] print(paste("number of obs in ecoregion",ecoregion," = ",nrow(data))) rm(data.tot) print('start computing competition index') data.BA.SP <- BA.SP.FUN.census(census=data[['census']], obs.id=as.vector(data[['obs.id']]), diam=as.vector(data[['D']]), sp=as.vector(data[['sp']]), id.plot=as.vector(data[[plot.name]]), weights=data[['weights']], weight.full.plot=weight.full.plot) print('competition index computed') ## change NA and <0 data for 0 data.BA.SP <- function.replace.NA.negative(data.BA.SP) ### CHECK IF sp and sp name for column are the same if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data") #### compute BA tot for all competitors ## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why?? BATOT.s <- apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum) data.BA.SP[,BATOT:=BATOT.s] print('BATOT COMPUTED') ### create data frame DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]]) setkeyv(DT.temp,"obs.id") setkeyv(data.BA.SP,"obs.id") data.BA.sp <- merge(DT.temp,data.BA.SP) ## reorder data data <- data.table(data) setkeyv(data,"obs.id") ## test if same order if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data") ##### ## ADD TRY DATA IF NEEDED if(is.data.frame(data.TRY)){ sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),] data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY) ### OPTION TO INCLUDE MAX HEIGHT if(is.data.frame(data.max.height)){ data.traits <- merge(data.traits,subset(data.max.height,select=c("Max.height.mean","Max.height.sd")),by="sp",all.x=TRUE,all.y=FALSE) } ## save everything as a list print(paste("nrow of traits data",nrow(data.traits))) list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) }else{ if(is.data.frame(data.TRAITS)){ data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=unique(data[["sp"]]),
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
Latin_name=unique(data[["sp.name"]]), data=data.TRAITS,name.match.traits=name.match.traits) list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) }else{ list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) saveRDS(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) } } } ####### #### NEED TO WRITE FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT fun.data.per.bigplot <- function(data,name.site,data.TRAITS=NA,Rlim=15,xy.name,parallel=FALSE,rpuDist=FALSE){ require(data.table) ## compute competition matrix list.BA.SP.data <- lapply(unique(data[["plot"]]),FUN=fun.compute.BA.SP.XY.per.plot,data=data,Rlim=Rlim,xy.name=xy.name,parallel=TRUE,rpuDist=FALSE) print('competition index computed in list') data.BA.SP <- rbind.fill(list.BA.SP.data) print('competition index computed') ### test if(sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]),"BATOT"))) >0) stop("competition index sp name not the same as in data") ### REMOVE TREE IN BUFFER ZONE obs.id.no.buffer <- subset(data,subset=((data[["x"]]-15)>0 & (data[["x"]]+15)<max(data[["x"]])) & ((data[["y"]]-15)>0 & (data[["y"]]+15)<max(data[["y"]])), select="obs.id") data <- subset(data,subset=data[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) data.BA.SP <- subset(data.BA.SP,subset=data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) ## reorder data data <- data.table(data) setkeyv(data,"obs.id") data.BA.SP <- data.table(data.BA.SP) setkeyv(data.BA.SP,"obs.id") ## test if same order if(sum(!as.character(data.BA.SP[["obs.id"]]) == data[["obs.id"]]) >0) stop("competition index not in the same order than data") ##### ## ADD TRY DATA OR TRAITS IF NEEDED if(is.data.frame(data.TRAITS)){ data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=unique(data[["sp"]]), Latin_name=unique(data[["sp.name"]]), data=data.TRAITS,name.match.traits="Latin_name") list.temp <- list(data.tree=data,data.BA.SP=data.BA.SP,data.traits=data.traits) save(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep=".")) }else{ list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) saveRDS(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep=".")) } }