diff --git a/R/process.data/process.fun.R b/R/process.data/process.fun.R index 6a8475993bb6bab0e7a7b399c9c4ea9f77bc45cd..cc0d222b6cfb75627390b849de47effe564eefe4 100644 --- a/R/process.data/process.fun.R +++ b/R/process.data/process.fun.R @@ -1,13 +1,8 @@ -#################################################### -#################################################### -#################################################### -#### function to process data -### install all unstallled packages +#################################################### function to process data install all unstallled packages source("R/packages.R") -check_packages(c("reshape", "data.table","doParallel", "data.table")) +check_packages(c("reshape", "data.table", "doParallel", "data.table")) -######################### -## +######################### ##' .. Compute the basal area of competitor in a plot.. ##' ##' .. content for \details{} .. @@ -16,8 +11,8 @@ check_packages(c("reshape", "data.table","doParallel", "data.table")) ##' @param weights weight in 1/m^2 to compute the basal area in cm^2/m^2 ##' @return basal area in cm^2/m^2 ##' @author Kunstler -BA.fun <- function(diam,weights){ -((diam/2)^2)*pi*weights +BA.fun <- function(diam, weights) { + ((diam/2)^2) * pi * weights } @@ -36,71 +31,70 @@ BA.fun <- function(diam,weights){ ##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots) ##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree) ##' @author Kunstler -BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){ -print(paste("Number of observation :",length(obs.id))) -require(data.table) -id.plot <- as.character(id.plot) -obs.id <- as.character(obs.id) -## check equal length -if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights))) - stop("length of obs.id diam,sp id.plot & weights need to be the same") - -## check sp is not numeric -if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") - -# compute BA tot per species per plot -BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T) -DATA.BASP <- data.table(id.plot= rownames(BASP),BASP) -setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP))) -setkeyv(DATA.BASP,c("id.plot")) -sp.name <- colnames(BASP) -rm(BASP) -print("plot level table created") -#### MERGE with indivudal tree -## use library(data.table) -if(!is.na(weight.full.plot)){ - data.indiv <- data.table(obs.id=obs.id,sp=sp, - id.plot=id.plot,diam=diam, - BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam)))) - setkeyv(data.indiv,"id.plot") - print("tree level table created") - data.merge <- merge(data.indiv,DATA.BASP) - print("merge done") - # substract target BA - for (i in (sp.name)){ - eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) - } - -}else{ - data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot, - diam=diam,weights=weights, - BA.indiv=BA.fun(diam,weights)) - setkeyv(data.indiv,"id.plot") - print("tree level table created") - data.merge <- merge(data.indiv,DATA.BASP) - print("merge done") - for (i in (sp.name)){ - eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) - } -} -print("substraction of target tree BA done") -#### delete column not used -data.merge[,BA.indiv:=NULL] -data.merge[,sp:=NULL] -data.merge[,diam:=NULL] -data.merge[,id.plot:=NULL] -data.merge[,weights:=NULL] -print("columns removed") -return( (data.merge)) +BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights, weight.full.plot) { + print(paste("Number of observation :", length(obs.id))) + require(data.table) + id.plot <- as.character(id.plot) + obs.id <- as.character(obs.id) + ## check equal length + if (!(length(obs.id) == length(diam) & length(obs.id) == length(sp) & length(obs.id) == + length(id.plot) & length(obs.id) == length(weights))) + stop("length of obs.id diam,sp id.plot & weights need to be the same") + + ## check sp is not numeric + if (is.numeric(sp)) + stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") + + # compute BA tot per species per plot + BASP <- tapply(BA.fun(diam, weights), INDEX = list(id.plot, sp), FUN = sum, na.rm = T) + DATA.BASP <- data.table(id.plot = rownames(BASP), BASP) + setnames(DATA.BASP, old = 1:ncol(DATA.BASP), c("id.plot", colnames(BASP))) + setkeyv(DATA.BASP, c("id.plot")) + sp.name <- colnames(BASP) + rm(BASP) + print("plot level table created") + #### MERGE with indivudal tree use library(data.table) + if (!is.na(weight.full.plot)) { + data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, + BA.indiv = BA.fun(diam, rep(weight.full.plot, length(diam)))) + setkeyv(data.indiv, "id.plot") + print("tree level table created") + data.merge <- merge(data.indiv, DATA.BASP) + print("merge done") + # substract target BA + for (i in (sp.name)) { + eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", + sep = ""))) + } + + } else { + data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, + weights = weights, BA.indiv = BA.fun(diam, weights)) + setkeyv(data.indiv, "id.plot") + print("tree level table created") + data.merge <- merge(data.indiv, DATA.BASP) + print("merge done") + for (i in (sp.name)) { + eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", + sep = ""))) + } + } + print("substraction of target tree BA done") + #### delete column not used + data.merge[, `:=`(BA.indiv, NULL)] + data.merge[, `:=`(sp, NULL)] + data.merge[, `:=`(diam, NULL)] + data.merge[, `:=`(id.plot, NULL)] + data.merge[, `:=`(weights, NULL)] + print("columns removed") + return((data.merge)) } -###################### -#### apply BA.SP.FUN per census -# function to run on a subset to only one census -BA.SP.FUN.l<- function(census.id,census,obs.id,diam,sp,id.plot,weights,weight.full.plot){ -return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id], - sp=sp[census==census.id],id.plot=id.plot[census==census.id], - weights=weights[census==census.id],weight.full.plot=weight.full.plot)) +###################### apply BA.SP.FUN per census function to run on a subset to only one census +BA.SP.FUN.l <- function(census.id, census, obs.id, diam, sp, id.plot, weights, weight.full.plot) { + return(BA.SP.FUN(obs.id = obs.id[census == census.id], diam = diam[census == + census.id], sp = sp[census == census.id], id.plot = id.plot[census == census.id], + weights = weights[census == census.id], weight.full.plot = weight.full.plot)) } ## function to apply over all census and merge back together ##' .. function to apply competition index computation over all census and merge back together .. @@ -116,13 +110,14 @@ return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id], ##' @param weight.full.plot optional if want to use a different weight to remove the BA of target tree (if NA weights is use instead) ##' @return a dta.table of dim N & length(unique(sp))+1 (obs.id and basal area of all species ##' @author Kunstler -BA.SP.FUN.census<- function(census,obs.id,diam,sp,id.plot,weights,weight.full.plot){ -require(data.table) -unique.census <- unique(census) -res.list <- lapply(unique.census,FUN=BA.SP.FUN.l,census,obs.id,diam,sp,id.plot,weights,weight.full.plot) -res.mat <- rbind.fill(res.list ) -res.mat <- data.table(res.mat[match(obs.id,res.mat[["obs.id"]]),]) -return(res.mat) +BA.SP.FUN.census <- function(census, obs.id, diam, sp, id.plot, weights, weight.full.plot) { + require(data.table) + unique.census <- unique(census) + res.list <- lapply(unique.census, FUN = BA.SP.FUN.l, census, obs.id, diam, sp, + id.plot, weights, weight.full.plot) + res.mat <- rbind.fill(res.list) + res.mat <- data.table(res.mat[match(obs.id, res.mat[["obs.id"]]), ]) + return(res.mat) } @@ -140,39 +135,40 @@ return(res.mat) ##' @param rpuDist TRUE/FALSE run with GPU distance computation (need top install additional software only on linux) ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler -BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ -print(paste("n observation = ",nrow(xy.table))) -print("start computing distance matrix") -if(rpuDist){ - require(rpud) - dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE) -}else{ - dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE)) -} -print('distance matrix computed') -dist.mat[dist.mat <Rlim] <- 1 -dist.mat[dist.mat >Rlim] <- 0 -diag(dist.mat) <- 0 -print('distance matrix set to 0 1') -BA <- BA.fun(diam,weights=1/(pi*Rlim^2)) -print(c(length(BA),length(diam))) -BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE) -print('starting tapply over species') -fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) - if(parallel){ - ## parallel version - require(doParallel) - registerDoParallel(cores=4) - mat <- dist.mat*BA.mat - res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% { - fun.sum.sp(mat[i,],sp) - } - res.temp <- data.frame(obs.id=obs.id,res.temp) - return((res.temp)) - }else{ - res.temp <- data.frame(obs.id=obs.id,t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))) - return(res.temp) - } +BA.SP.FUN.XY <- function(obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, rpuDist = FALSE) { + print(paste("n observation = ", nrow(xy.table))) + print("start computing distance matrix") + if (rpuDist) { + require(rpud) + dist.mat <- rpuDist(xy.table, upper = TRUE, diag = TRUE) + } else { + dist.mat <- as.matrix(dist(xy.table, upper = TRUE, diag = TRUE)) + } + print("distance matrix computed") + dist.mat[dist.mat < Rlim] <- 1 + dist.mat[dist.mat > Rlim] <- 0 + diag(dist.mat) <- 0 + print("distance matrix set to 0 1") + BA <- BA.fun(diam, weights = 1/(pi * Rlim^2)) + print(c(length(BA), length(diam))) + BA.mat <- matrix(rep(BA, length(BA)), nrow = length(BA), byrow = TRUE) + print("starting tapply over species") + fun.sum.sp <- function(x, sp) tapply(x, INDEX = sp, FUN = sum, na.rm = TRUE) + if (parallel) { + ## parallel version + require(doParallel) + registerDoParallel(cores = 4) + mat <- dist.mat * BA.mat + res.temp <- foreach(i = 1:nrow(mat), .combine = rbind) %dopar% { + fun.sum.sp(mat[i, ], sp) + } + res.temp <- data.frame(obs.id = obs.id, res.temp) + return((res.temp)) + } else { + res.temp <- data.frame(obs.id = obs.id, t(apply(dist.mat * BA.mat, MARGIN = 1, + FUN = fun.sum.sp, sp))) + return(res.temp) + } } @@ -189,70 +185,71 @@ fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) ##' @param parallel run in paralle or not ? ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler -BA.SP.FUN.XY2 <- function(obs.id,xy.table,diam,sp,Rlim,parallel){ -rownames(xy.table) <- obs.id -print(paste("n observation = ",nrow(xy.table))) -print("start computing distance matrix") -## create local function -fun.compute.local <- function(obs.id.t,obs.id,xy.table,diam,sp,Rlim){ -dist.t <- as.vector(sqrt((outer(xy.table[obs.id==obs.id.t,"x"],xy.table[,"x"],FUN="-"))^2 + - (outer(xy.table[obs.id==obs.id.t,"y"],xy.table[,"y"],FUN="-"))^2)) -dist.t[dist.t>Rlim] <- 0 -dist.t[dist.t<=Rlim] <- 1 -res.BA.t <- tapply(BA.fun(diam,weights=dist.t/(pi*Rlim^2)),INDEX=sp,FUN=sum,na.rm=TRUE) -res.BA.t[is.na(res.BA.t)] <- 0 -return(data.frame(obs.id=obs.id.t,t(res.BA.t))) -} -#lapply function -if(parallel){ - library(doParallel) - print("start mclapply") - list.BA.SP.data <- mclapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, - diam,sp,Rlim,mc.cores=4) - print("done") - data.BA.SP <- rbind.fill(list.BA.SP.data) - }else{ - list.BA.SP.data <- lapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, - diam,sp,Rlim) - data.BA.SP <- rbind.fill(list.BA.SP.data) - } -return(data.BA.SP) +BA.SP.FUN.XY2 <- function(obs.id, xy.table, diam, sp, Rlim, parallel) { + rownames(xy.table) <- obs.id + print(paste("n observation = ", nrow(xy.table))) + print("start computing distance matrix") + ## create local function + fun.compute.local <- function(obs.id.t, obs.id, xy.table, diam, sp, Rlim) { + dist.t <- as.vector(sqrt((outer(xy.table[obs.id == obs.id.t, "x"], xy.table[, + "x"], FUN = "-"))^2 + (outer(xy.table[obs.id == obs.id.t, "y"], xy.table[, + "y"], FUN = "-"))^2)) + dist.t[dist.t > Rlim] <- 0 + dist.t[dist.t <= Rlim] <- 1 + res.BA.t <- tapply(BA.fun(diam, weights = dist.t/(pi * Rlim^2)), INDEX = sp, + FUN = sum, na.rm = TRUE) + res.BA.t[is.na(res.BA.t)] <- 0 + return(data.frame(obs.id = obs.id.t, t(res.BA.t))) + } + # lapply function + if (parallel) { + library(doParallel) + print("start mclapply") + list.BA.SP.data <- mclapply(obs.id, FUN = fun.compute.local, obs.id, xy.table, + diam, sp, Rlim, mc.cores = 4) + print("done") + data.BA.SP <- rbind.fill(list.BA.SP.data) + } else { + list.BA.SP.data <- lapply(obs.id, FUN = fun.compute.local, obs.id, xy.table, + diam, sp, Rlim) + data.BA.SP <- rbind.fill(list.BA.SP.data) + } + return(data.BA.SP) } ## function for lapply per census -BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist){ -print(dim(xy.table)) -if(length(obs.id[census==census.id]) <10000){ -data.temp<-BA.SP.FUN.XY(obs.id=obs.id[census==census.id], - xy.table=xy.table[census==census.id,], - diam=diam[census==census.id],sp=sp[census==census.id], - Rlim=Rlim,parallel,rpuDist) - }else{ -data.temp<-BA.SP.FUN.XY2(obs.id=obs.id[census==census.id], - xy.table=xy.table[census==census.id,], - diam=diam[census==census.id],sp=sp[census==census.id], - Rlim=Rlim,parallel) - } -return(data.temp) +BA.SP.FUN.XY.l <- function(census.id, census, obs.id, xy.table, diam, sp, Rlim, parallel, + rpuDist) { + print(dim(xy.table)) + if (length(obs.id[census == census.id]) < 10000) { + data.temp <- BA.SP.FUN.XY(obs.id = obs.id[census == census.id], xy.table = xy.table[census == + census.id, ], diam = diam[census == census.id], sp = sp[census == census.id], + Rlim = Rlim, parallel, rpuDist) + } else { + data.temp <- BA.SP.FUN.XY2(obs.id = obs.id[census == census.id], xy.table = xy.table[census == + census.id, ], diam = diam[census == census.id], sp = sp[census == census.id], + Rlim = Rlim, parallel) + } + return(data.temp) } ### wrapping function to run BA.SP.FUN.XY per census and merge together all census -BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ -unique.census <- unique(census) -print(paste("Vector of census to lapply over" ,paste(unique.census,collapse=","),sep=" ")) -res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,census=census,obs.id=obs.id,xy.table=xy.table,diam=diam,sp=sp,Rlim=Rlim,parallel,rpuDist) -res.mat <- rbind.fill(res.list ) -res.DF <- res.mat[match(obs.id,res.mat[["obs.id"]]),] -return(res.DF) +BA.SP.FUN.XY.census <- function(census, obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, + rpuDist = FALSE) { + unique.census <- unique(census) + print(paste("Vector of census to lapply over", paste(unique.census, collapse = ","), + sep = " ")) + res.list <- lapply(unique.census, FUN = BA.SP.FUN.XY.l, census = census, obs.id = obs.id, + xy.table = xy.table, diam = diam, sp = sp, Rlim = Rlim, parallel, rpuDist) + res.mat <- rbind.fill(res.list) + res.DF <- res.mat[match(obs.id, res.mat[["obs.id"]]), ] + return(res.DF) } -##################################### -##################################### -### FUNCTION TO COMPUTE BA.SP.XY PER PLOT -#### function to be lapply per site +##################################### FUNCTION TO COMPUTE BA.SP.XY PER PLOT function to be lapply per site ##' .. FUNCTION TO COMPUTE BA.SP.XY PER PLOT to be lapply over all plot or only one plot .. ##' ##' .. content for \details{} .. @@ -265,162 +262,171 @@ return(res.DF) ##' @param rpuDist TRUE/FALSE use rpuDist to compute distance matrix only available on linux with additional software ##' @return a data.frame with nrow = length of obs.id[plot==i] and ncol =unique(sp)+ 2 (BATOT added) ##' @author Kunstler -fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ -data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) -print(paste("size of the data for plot",i,"is",nrow(data.tree.s))) -BA.SP.temp <- BA.SP.FUN.XY.census(census=data.tree[["census"]], - obs.id=data.tree.s[['obs.id']], - xy.table=data.tree.s[,xy.name], - diam=data.tree.s[['D']], - sp=(data.tree.s[['sp']]), - Rlim=Rlim, - parallel, - rpuDist) -## replace NA per zero -print('replacing NA per zero') -BA.SP.temp[is.na(BA.SP.temp)] <- 0 -print('done') -### TEST GOOD ORDER -if(sum(! (BA.SP.temp[["obs.id"]])==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') -if(sum(!colnames(BA.SP.temp[,-1])==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') -### compute sum per row -BATOT <- apply(BA.SP.temp[,-1],MARGIN=1,FUN=sum) -data.res <- data.frame(BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE) -return(data.res) +fun.compute.BA.SP.XY.per.plot <- function(i, data.tree, Rlim, xy.name = c("x", "y"), + parallel = FALSE, rpuDist = FALSE) { + data.tree.s <- subset(data.tree, subset = data.tree[["plot"]] == i) + print(paste("size of the data for plot", i, "is", nrow(data.tree.s))) + BA.SP.temp <- BA.SP.FUN.XY.census(census = data.tree[["census"]], obs.id = data.tree.s[["obs.id"]], + xy.table = data.tree.s[, xy.name], diam = data.tree.s[["D"]], sp = (data.tree.s[["sp"]]), + Rlim = Rlim, parallel, rpuDist) + ## replace NA per zero + print("replacing NA per zero") + BA.SP.temp[is.na(BA.SP.temp)] <- 0 + print("done") + ### TEST GOOD ORDER + if (sum(!(BA.SP.temp[["obs.id"]]) == data.tree.s[["obs.id"]]) > 0) + stop("rows not in the good order") + if (sum(!colnames(BA.SP.temp[, -1]) == as.character((levels(data.tree.s[["sp"]])))) > + 0) + stop("colnames does mot match species name") + ### compute sum per row + BATOT <- apply(BA.SP.temp[, -1], MARGIN = 1, FUN = sum) + data.res <- data.frame(BA.SP.temp, BATOT = BATOT, stringsAsFactors = FALSE) + return(data.res) } -########################## -### function to replace missing value per zero in competition matrix -function.replace.NA.negative <- function(data.BA.SP){ - for (i in 2:ncol(data.BA.SP)){ -eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep=""))) -eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep=""))) +########################## function to replace missing value per zero in competition matrix +function.replace.NA.negative <- function(data.BA.SP) { + for (i in 2:ncol(data.BA.SP)) { + eval(parse(text = paste("data.BA.SP[is.na(", names(data.BA.SP)[i], "),", + names(data.BA.SP)[i], ":=0]", sep = ""))) + eval(parse(text = paste("data.BA.SP[", names(data.BA.SP)[i], "<0,", names(data.BA.SP)[i], + ":=0]", sep = ""))) + } + print("NA and negative replaced") + return(data.BA.SP) } -print('NA and negative replaced') -return(data.BA.SP) -} - -############################################################## -##function to generate data in good format per ecoregion -fun.data.per.ecoregion <- function(ecoregion,data.tot,weight.full.plot,site.name,data.TRAITS=NA,sp.code="sp",sp.code2="sp", - out.dir = "output/processed/"){ -if(is.null(data.tot[['weights']])) stop("Please create a weights vector(1/m^2), even if it is completely NA") -data.tot <- data.table(data.tot) -data <- data.tot[ecocode==ecoregion,] -rm(data.tot) - -print(paste("number of obs in ecoregion",ecoregion," = ",nrow(data))) - -path <- file.path(out.dir,site.name,ecoregion) -dir.create(path, recursive=TRUE,showWarnings=FALSE) - -print('start computing competition index') -data.BA.SP <- BA.SP.FUN.census(census=data[['census']], - obs.id=as.vector(data[['obs.id']]), - diam=as.vector(data[['D']]), - sp=as.vector(data[['sp']]), - id.plot=as.vector(data[['plot']]), - weights=data[['weights']], - weight.full.plot=weight.full.plot) -data$obs.id <- as.character(data$obs.id) -data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) - -print('competition index computed') -## change NA and <0 data for 0 -data.BA.SP <- function.replace.NA.negative(data.BA.SP) -### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data") -#### compute BA tot for all competitors -## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why?? -BATOT.s <- apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum) -data.BA.SP[,BATOT:=BATOT.s] -print('BATOT COMPUTED') -### create data frame -DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]]) -setkeyv(DT.temp,"obs.id") -setkeyv(data.BA.SP,"obs.id") -data.BA.sp <- merge(DT.temp,data.BA.SP) -## reorder data -data <- data.table(data) -setkeyv(data,"obs.id") -## test if same order -if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data") - -##### -data.traits=NA - -## ADD TRY DATA IF NEEDED -if(is.data.frame(data.TRAITS)) - data.TRAITS.s <- subset(data.TRAITS,subset=data.TRAITS[[sp.code]] %in% data[[sp.code2]]) -else - data.TRAITS.s <- NA - - -write.csv(data, file=file.path(path, "data.tree.csv"), quote=FALSE, row.names=FALSE) -write.csv(data.BA.sp, file=file.path(path, "data.BA.SP.csv"), quote=FALSE, row.names=FALSE) -write.csv(data.TRAITS.s, file=file.path(path, "data.traits.csv"), quote=FALSE, row.names=FALSE) -} - -####### -#### FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT - -fun.data.per.bigplot <- function(data,site.name,data.TRAITS=NA,Rlim=15,xy.name,parallel=FALSE,rpuDist=FALSE,sp.code="sp",sp.code2="sp"){ -require(data.table) -dir.create(paste("output/processed/",site.name,sep=""), recursive=TRUE,showWarnings=FALSE) -data$sp <- factor(data$sp) -data.TRAITS$sp <- factor(data.TRAITS$sp) -print(paste("plots :",unique(data[["plot"]]))) -## compute competition matrix -list.BA.SP.data <- lapply(unique(data[["plot"]]),FUN=fun.compute.BA.SP.XY.per.plot,data=data,Rlim=Rlim,xy.name=xy.name,parallel=TRUE,rpuDist=FALSE) -print('competition index computed in list') -data.BA.SP <- rbind.fill(list.BA.SP.data) -print('competition index computed') -### test -if(sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]),"BATOT"))) >0) stop("competition index sp name not the same as in data") - -### REMOVE TREE IN BUFFER ZONE -obs.id.no.buffer <- subset(data,subset=((data[["x"]]-15)>0 & (data[["x"]]+15)<max(data[["x"]])) & - ((data[["y"]]-15)>0 & (data[["y"]]+15)<max(data[["y"]])),select="obs.id") -data <- subset(data,subset=data[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) -data.BA.SP <- subset(data.BA.SP,subset=data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) -## reorder data -data$obs.id <- as.character(data$obs.id) -data <- data.table(data) -setkeyv(data,"obs.id") -data.BA.SP$obs.id <- as.character(data.BA.SP[["obs.id"]]) -data.BA.SP <- data.table(data.BA.SP) -setkeyv(data.BA.SP,"obs.id") -## test if same order -if(sum(!as.character(data.BA.SP[["obs.id"]]) == as.character(data[["obs.id"]])) >0) stop("competition index not in the same order than data") -##### -## ADD TRY DATA OR TRAITS IF NEEDED -if(is.data.frame(data.TRAITS)){ - data.TRAITS.s <- subset(data.TRAITS,subset=data.TRAITS[[sp.code]] %in% data[[sp.code2]]) - list.temp <- list(data.tree=data,data.BA.SP=data.BA.SP,data.traits=data.TRAITS.s) - saveRDS(list.temp,file=paste("output/processed/",site.name,"/list.rds",sep="")) - }else{ - list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) - saveRDS(list.temp,file=paste("output/processed/",site.name,"/list.rds",sep="")) - } -} - -process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed"){ - #load data - data.tree <- read.csv(file.path(path.formatted , set, "tree.csv"), stringsAsFactors = FALSE) - data.traits <- read.csv(file.path(path.formatted , set, "traits.csv"), stringsAsFactors = FALSE) - - #remove nas - data.tree <- subset(data.tree,subset=!is.na(data.tree[["D"]])) - - ## change sp - data.traits$sp <- paste("sp",data.traits$sp,sep=".") - data.tree$sp <- paste("sp",data.tree$sp,sep=".") +############################################################## function to generate data in good format per ecoregion +fun.data.per.ecoregion <- function(ecoregion, data.tot, weight.full.plot, site.name, + data.TRAITS = NA, sp.code = "sp", sp.code2 = "sp", out.dir = "output/processed/") { + if (is.null(data.tot[["weights"]])) + stop("Please create a weights vector(1/m^2), even if it is completely NA") + data.tot <- data.table(data.tot) + data <- data.tot[ecocode == ecoregion, ] + rm(data.tot) + + print(paste("number of obs in ecoregion", ecoregion, " = ", nrow(data))) + + path <- file.path(out.dir, site.name, ecoregion) + dir.create(path, recursive = TRUE, showWarnings = FALSE) + + print("start computing competition index") + data.BA.SP <- BA.SP.FUN.census(census = data[["census"]], obs.id = as.vector(data[["obs.id"]]), + diam = as.vector(data[["D"]]), sp = as.vector(data[["sp"]]), id.plot = as.vector(data[["plot"]]), + weights = data[["weights"]], weight.full.plot = weight.full.plot) + + data$obs.id <- as.character(data$obs.id) + data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) + + print("competition index computed") + ## change NA and <0 data for 0 + data.BA.SP <- function.replace.NA.negative(data.BA.SP) + ### CHECK IF sp and sp name for column are the same + if (sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) > 0) + stop("competition index sp name not the same as in data") + #### compute BA tot for all competitors data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## + #### slower than apply why?? + BATOT.s <- apply(data.frame(data.BA.SP)[, -1], MARGIN = 1, FUN = sum) + data.BA.SP[, `:=`(BATOT, BATOT.s)] + print("BATOT COMPUTED") + ### create data frame + DT.temp <- data.table(obs.id = data[["obs.id"]], ecocode = data[["ecocode"]]) + setkeyv(DT.temp, "obs.id") + setkeyv(data.BA.SP, "obs.id") + data.BA.sp <- merge(DT.temp, data.BA.SP) + ## reorder data + data <- data.table(data) + setkeyv(data, "obs.id") + ## test if same order + if (sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) > 0) + stop("competition index not in the same order than data") + + ##### + data.traits = NA + + ## ADD TRY DATA IF NEEDED + if (is.data.frame(data.TRAITS)) + data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% + data[[sp.code2]]) else data.TRAITS.s <- NA + + + write.csv(data, file = file.path(path, "data.tree.csv"), quote = FALSE, row.names = FALSE) + write.csv(data.BA.sp, file = file.path(path, "data.BA.SP.csv"), quote = FALSE, + row.names = FALSE) + write.csv(data.TRAITS.s, file = file.path(path, "data.traits.csv"), quote = FALSE, + row.names = FALSE) +} - # split into ecoregions - tmp <- lapply( unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, - data.tot = data.tree, weight.full.plot = NA, site.name = set, data.TRAITS = data.traits, out.dir = path.processed) - cat("finished",file=file.path(path.processed, set, "Done.txt")) +####### FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT + +fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, Rlim = 15, xy.name, + parallel = FALSE, rpuDist = FALSE, sp.code = "sp", sp.code2 = "sp") { + require(data.table) + dir.create(paste("output/processed/", site.name, sep = ""), recursive = TRUE, + showWarnings = FALSE) + data$sp <- factor(data$sp) + data.TRAITS$sp <- factor(data.TRAITS$sp) + print(paste("plots :", unique(data[["plot"]]))) + ## compute competition matrix + list.BA.SP.data <- lapply(unique(data[["plot"]]), FUN = fun.compute.BA.SP.XY.per.plot, + data = data, Rlim = Rlim, xy.name = xy.name, parallel = TRUE, rpuDist = FALSE) + print("competition index computed in list") + data.BA.SP <- rbind.fill(list.BA.SP.data) + print("competition index computed") + ### test + if (sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]), "BATOT"))) > 0) + stop("competition index sp name not the same as in data") + + ### REMOVE TREE IN BUFFER ZONE + obs.id.no.buffer <- subset(data, subset = ((data[["x"]] - 15) > 0 & (data[["x"]] + + 15) < max(data[["x"]])) & ((data[["y"]] - 15) > 0 & (data[["y"]] + 15) < + max(data[["y"]])), select = "obs.id") + data <- subset(data, subset = data[["obs.id"]] %in% as.character(obs.id.no.buffer[, + 1])) + data.BA.SP <- subset(data.BA.SP, subset = data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[, + 1])) + ## reorder data + data$obs.id <- as.character(data$obs.id) + data <- data.table(data) + setkeyv(data, "obs.id") + data.BA.SP$obs.id <- as.character(data.BA.SP[["obs.id"]]) + data.BA.SP <- data.table(data.BA.SP) + setkeyv(data.BA.SP, "obs.id") + ## test if same order + if (sum(!as.character(data.BA.SP[["obs.id"]]) == as.character(data[["obs.id"]])) > + 0) + stop("competition index not in the same order than data") + ##### ADD TRY DATA OR TRAITS IF NEEDED + if (is.data.frame(data.TRAITS)) { + data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% + data[[sp.code2]]) + list.temp <- list(data.tree = data, data.BA.SP = data.BA.SP, data.traits = data.TRAITS.s) + saveRDS(list.temp, file = paste("output/processed/", site.name, "/list.rds", + sep = "")) + } else { + list.temp <- list(data.tree = data, data.BA.SP = data.BA.sp, data.traits = NA) + saveRDS(list.temp, file = paste("output/processed/", site.name, "/list.rds", + sep = "")) + } } + +process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed") { + # load data + data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"), stringsAsFactors = FALSE) + data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE) + + # remove nas + data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]])) + + ## change sp + data.traits$sp <- paste("sp", data.traits$sp, sep = ".") + data.tree$sp <- paste("sp", data.tree$sp, sep = ".") + + # split into ecoregions + tmp <- lapply(unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, data.tot = data.tree, + weight.full.plot = NA, site.name = set, data.TRAITS = data.traits, out.dir = path.processed) + cat("finished", file = file.path(path.processed, set, "Done.txt")) +}