################################################### ################################################### ################################################### ##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS ############################ ## FUNCTION remove trailing white space trim.trailing <- function (x) sub("\\s+$", "", x) ############################################# ## FUN to clean species name fun.clean.species.tab <- function(species.tab){ species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] ### species IFN reformat names ## clean species names and synonyme names species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) ## remove trailing white space species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), c("code","Latin_name","Exotic_Native_cultivated")] return(species.clean)} ##################################################### ### compute quantile 99% and sd with a bootstrap f.quantile <- function (x,ind,probs){ require(boot) quantile(x[ind],probs=probs,na.rm=TRUE)} f.quantile.boot <- function(i,x,fac,R,probs=0.99){ require(boot) if(length(na.exclude(x[fac==i]))>0){ quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) }else{ return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1)) } } ####################### ### function to compute number of dead per plot function.perc.dead <- function(dead){ sum(dead)/length(dead)} function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) } ###### ###### ## FUNCTION TO PLOT MAP OF TREE ##' .. Function to plot map of tree with circle function of their dbh.. ##' ##' .. content for \details{} .. ##' @title ##' @param plot.select the plot for which draw the map ##' @param x ##' @param y ##' @param plot vectore of plot id for each tree ##' @param D diameter in cm ##' @param inches controling the circle size ##' @param ... ##' @return ##' @author Kunstler fun.circles.plot <- function(plot.select,x,y,plot,D,inches,...){ x.t <- x[plot==plot.select] y.t <- y[plot==plot.select] D.t <- D[plot==plot.select] D.t[is.na(D.t)] <- 0 symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...) } ######################### ## ##' .. Compute the basal area of competitor in a plot.. ##' ##' .. content for \details{} .. ##' @title ##' @param diam diameters of each individuals in cm ##' @param weights weight to compute the basal area in cm^2/m^2 ##' @return basal area in cm^2/m^2 ##' @author Kunstler BA.fun <- function(diam,weights){ ((diam/2)^2)*pi*weights } #### ##' .. function to compute the basal area of neighborood tree in plots .. ##' ##' .. content for \details{} .. ##' @title ##' @param obs.id tree obs identifier ##' @param diam diam of tree in cm ##' @param sp species name or code ##' @param id.plot identifier of the plot ##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2 ##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots) ##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree) ##' @author Kunstler BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){ print(length(obs.id)) require(data.table) id.plot <- as.character(id.plot) obs.id <- as.character(obs.id) ## check equal length if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights))) stop("length of obs.id diam,sp id.plot & weights need to be the same") ## check sp is not numeric if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") # compute BA tot per species per plot BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T) print(dim(BASP)) DATA.BASP <- data.table(id.plot= rownames(BASP),BASP) setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP))) setkeyv(DATA.BASP,c("id.plot")) sp.name <- colnames(BASP) rm(BASP) print("first table created") #### MERGE with indivudal tree ## use library(data.table) if(!is.na(weight.full.plot)){ data.indiv <- data.table(obs.id=obs.id,sp=sp, id.plot=id.plot,diam=diam, BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam)))) setkeyv(data.indiv,"id.plot") print("second table created") data.merge <- merge(data.indiv,DATA.BASP) print("merge done") # substract target BA for (i in (sp.name)){ eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) } }else{ data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot, diam=diam,weights=weights, BA.indiv=BA.fun(diam,weights)) setkeyv(data.indiv,"id.plot") print("second table created") data.merge <- merge(data.indiv,DATA.BASP) print("merge done") for (i in (sp.name)){ eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) } } print("replacment done") data.merge[,BA.indiv:=NULL] print("first column removed") #### delete column not used data.merge[,sp:=NULL] data.merge[,diam:=NULL] data.merge[,id.plot:=NULL] data.merge[,weights:=NULL] print("columns removed") return( (data.merge)) } ###################### #### apply BA.SP.FUN per census # function to run on a subset to only one census BA.SP.FUN.l<- function(census.id,census,obs.id,diam,sp,id.plot,weights,weight.full.plot){ return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id],sp=sp[census==census.id],id.plot=id.plot[census==census.id],weights=weights[census==census.id],weight.full.plot=weight.full.plot)) } ## function to apply over all census and merge back together BA.SP.FUN.census<- function(census,obs.id,diam,sp,id.plot,weights,weight.full.plot){ require(data.table) unique.census <- unique(census) res.list <- lapply(unique.census,FUN=BA.SP.FUN.l,census,obs.id,diam,sp,id.plot,weights,weight.full.plot) res.mat <- rbind.fill(res.list ) res.mat <- data.table(res.mat[match(obs.id,res.mat[["obs.id"]]),]) return(res.mat) } #### ##' .. function compute competition index with X Y coordinates based on a neighborhood of radius R .. ##' ##' .. content for \details{} .. ##' @title ##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment ##' @param xy.table table with x.y of teh individual ##' @param diam diam in cm ##' @param sp species ##' @param Rlim radius of neighborhood search ##' @param parallel run in paralle or not ? ##' @param rpuDist run with GPU distance computation ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ print(paste("n observation = ",nrow(xy.table))) print("start computing distance matrix") if(rpuDist){ require(rpud) dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE) }else{ dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE)) } print('distance matrix computed') dist.mat[dist.mat <Rlim] <- 1 dist.mat[dist.mat >Rlim] <- 0 diag(dist.mat) <- 0 print('distance matrix set to 0 1') BA <- BA.fun(diam,weights=1/(pi*Rlim^2)) print(c(length(BA),length(diam))) BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE) print('starting tapply over species') fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) if(parallel){ ## parallel version require(doParallel) registerDoParallel(cores=4) mat <- dist.mat*BA.mat res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% { fun.sum.sp(mat[i,],sp) } res.temp <- data.frame(obs.id=obs.id,res.temp) return((res.temp)) }else{ res.temp <- data.frame(obs.id=obs.id,t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))) return(res.temp) } } #### ##' .. function to compute competition index with X Y coordinates based on a neighborhood of radius R WITHOUT THE DISTANCE MATRIX.. ##' ##' .. content for \details{} .. ##' @title ##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment ##' @param xy.table table with x.y of teh individual ##' @param diam diam in cm ##' @param sp species ##' @param Rlim radius of neighborhood search ##' @param parallel run in paralle or not ? ##' @param rpuDist run with GPU distance computation ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler BA.SP.FUN.XY2 <- function(obs.id,xy.table,diam,sp,Rlim,parallel){ rownames(xy.table) <- obs.id print(paste("n observation = ",nrow(xy.table))) print("start computing distance matrix") ## create local function fun.compute.local <- function(obs.id.t,obs.id,xy.table,diam,sp,Rlim){ dist.t <- as.vector(sqrt((outer(xy.table[obs.id==obs.id.t,"x"],xy.table[,"x"],FUN="-"))^2 + (outer(xy.table[obs.id==obs.id.t,"y"],xy.table[,"y"],FUN="-"))^2)) dist.t[dist.t>Rlim] <- 0 dist.t[dist.t<=Rlim] <- 1 res.BA.t <- tapply(BA.fun(diam,weights=dist.t/(pi*Rlim^2)),INDEX=sp,FUN=sum,na.rm=TRUE) res.BA.t[is.na(res.BA.t)] <- 0 return(data.frame(obs.id=obs.id.t,t(res.BA.t))) } #lapply function if(parallel){ library(doParallel) print("start mclapply") list.BA.SP.data <- mclapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, diam,sp,Rlim,mc.cores=4) print("done") data.BA.SP <- rbind.fill(list.BA.SP.data) }else{ list.BA.SP.data <- lapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, diam,sp,Rlim) data.BA.SP <- rbind.fill(list.BA.SP.data) } return(data.BA.SP) } ## function for lapply per census BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist){ print(dim(xy.table)) if(length(obs.id[census==census.id]) <10000){ data.temp<-BA.SP.FUN.XY(obs.id=obs.id[census==census.id], xy.table=xy.table[census==census.id,], diam=diam[census==census.id],sp=sp[census==census.id], Rlim=Rlim,parallel,rpuDist) }else{ data.temp<-BA.SP.FUN.XY2(obs.id=obs.id[census==census.id], xy.table=xy.table[census==census.id,], diam=diam[census==census.id],sp=sp[census==census.id], Rlim=Rlim,parallel) } return(data.temp) } ### wrapping function to run BA.SP.FUN.XY per census BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ unique.census <- unique(census) print(paste("Vector of census to lapply over" ,paste(unique.census,collapse=","),sep=" ")) res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,census=census,obs.id=obs.id,xy.table=xy.table,diam=diam,sp=sp,Rlim=Rlim,parallel,rpuDist) res.mat <- rbind.fill(res.list ) res.DF <- res.mat[match(obs.id,res.mat[["obs.id"]]),] return(res.DF) } ##################################### ##################################### ### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER #### function to be apply per site fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) print(paste("size of the data for plot",i,"is",nrow(data.tree.s))) BA.SP.temp <- BA.SP.FUN.XY.census(census=data.tree[["census"]], obs.id=data.tree.s[['obs.id']], xy.table=data.tree.s[,xy.name], diam=data.tree.s[['D']], sp=(data.tree.s[['sp']]), Rlim=Rlim, parallel, rpuDist) ## replace NA per zero print('replacing NA per zero') BA.SP.temp[is.na(BA.SP.temp)] <- 0 print('done') ### TEST GOOD ORDER if(sum(! (BA.SP.temp[["obs.id"]])==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') if(sum(!colnames(BA.SP.temp[,-1])==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') ### compute sum per row BATOT <- apply(BA.SP.temp[,-1],MARGIN=1,FUN=sum) data.res <- data.frame(BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE) return(data.res) } ########################## ### function to replace missing value per zero in competition matrix function.replace.NA.negative <- function(data.BA.SP){ for (i in 2:ncol(data.BA.SP)){ eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep=""))) eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep=""))) } print('NA and negative replaced') return(data.BA.SP) } ############################################################## ##function to generate data in good format per ecoregion fun.data.per.ecoregion <- function(ecoregion,data.tot,plot.name,weight.full.plot,name.country,data.TRY=NA,species.lookup=NA){ if(is.null(data.tot[['weights']])) stop("Please create a weights vector, even if it is completely NA") require(data.table) data.tot <- data.table(data.tot) data <- data.tot[ecocode==ecoregion,] print(dim(data)) rm(data.tot) data.BA.SP <- BA.SP.FUN.census(census=data[['census']], obs.id=as.vector(data[['obs.id']]), diam=as.vector(data[['D']]), sp=as.vector(data[['sp']]), id.plot=as.vector(data[[plot.name]]), weights=data[['weights']], weight.full.plot=weight.full.plot) print('competition index computed') ## change NA and <0 data for 0 data.BA.SP <- function.replace.NA.negative(data.BA.SP) ### CHECK IF sp and sp name for column are the same if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data") #### compute BA tot for all competitors ## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why?? BATOT.s <- apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum) data.BA.SP[,BATOT:=BATOT.s] print('BATOT COMPUTED') ### create data frame DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]]) setkeyv(DT.temp,"obs.id") setkeyv(data.BA.SP,"obs.id") print('starting last merge') data.BA.sp <- merge(DT.temp,data.BA.SP) ## reorder data data <- data.table(data) setkeyv(data,"obs.id") ## test if same order if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data") ##### ## ADD TRY DATA OR TRAITS IF NEEDED if(is.data.frame(data.TRY)){ sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),] data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY) ### TODO ADD OPTION TO INCLUE OTHER DATA on MAX HEIGHT ## save everything as a list print(dim(data.traits)) list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) }else{ list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) saveRDS(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) } } ####### #### NEED TO WRITE FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT fun.data.per.bigplot <- function(data,name.site,data.TRAITS=NA,Rlim=15,xy.name,parallel=FALSE,rpuDist=FALSE){ require(data.table) ## compute competition matrix list.BA.SP.data <- lapply(unique(data[["plot"]]),FUN=fun.compute.BA.SP.XY.per.plot,data=data,Rlim=Rlim,xy.name=xy.name,parallel=TRUE,rpuDist=FALSE) print('competition index computed in list') data.BA.SP <- rbind.fill(list.BA.SP.data) print('competition index computed') ### test if(sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]),"BATOT"))) >0) stop("competition index sp name not the same as in data") ### TODO REMOVE TREE IN BUFFER ZONE obs.id.no.buffer <- subset(data,subset=((data[["x"]]-15)>0 & (data[["x"]]+15)<max(data[["x"]])) & ((data[["y"]]-15)>0 & (data[["y"]]+15)<max(data[["y"]])), select="obs.id") data <- subset(data,subset=data[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) data.BA.SP <- subset(data.BA.SP,subset=data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) ## reorder data data <- data.table(data) setkeyv(data,"obs.id") data.BA.SP <- data.table(data.BA.SP) setkeyv(data.BA.SP,"obs.id") ## test if same order if(sum(!as.character(data.BA.SP[["obs.id"]]) == data[["obs.id"]]) >0) stop("competition index not in the same order than data") ##### ## ADD TRY DATA OR TRAITS IF NEEDED if(is.data.frame(data.TRAITS)){ data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=unique(data[["sp"]]), Latin_name=unique(data[["sp.name"]]), data=data.TRAITS,name.match.traits="Latin_name") list.temp <- list(data.tree=data,data.BA.SP=data.BA.SP,data.traits=data.traits) save(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep=".")) }else{ list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) saveRDS(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep=".")) } }