Commit 3f9998d2 authored by Daniel Falster's avatar Daniel Falster
Browse files

run formatR to tidy source

No related merge requests found
Showing with 323 additions and 317 deletions
+323 -317
####################################################
####################################################
####################################################
#### function to process data
### install all unstallled packages
#################################################### function to process data install all unstallled packages
source("R/packages.R")
check_packages(c("reshape", "data.table","doParallel", "data.table"))
check_packages(c("reshape", "data.table", "doParallel", "data.table"))
#########################
##
#########################
##' .. Compute the basal area of competitor in a plot..
##'
##' .. content for \details{} ..
......@@ -16,8 +11,8 @@ check_packages(c("reshape", "data.table","doParallel", "data.table"))
##' @param weights weight in 1/m^2 to compute the basal area in cm^2/m^2
##' @return basal area in cm^2/m^2
##' @author Kunstler
BA.fun <- function(diam,weights){
((diam/2)^2)*pi*weights
BA.fun <- function(diam, weights) {
((diam/2)^2) * pi * weights
}
......@@ -36,71 +31,70 @@ BA.fun <- function(diam,weights){
##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots)
##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree)
##' @author Kunstler
BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){
print(paste("Number of observation :",length(obs.id)))
require(data.table)
id.plot <- as.character(id.plot)
obs.id <- as.character(obs.id)
## check equal length
if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights)))
stop("length of obs.id diam,sp id.plot & weights need to be the same")
## check sp is not numeric
if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')")
# compute BA tot per species per plot
BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T)
DATA.BASP <- data.table(id.plot= rownames(BASP),BASP)
setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP)))
setkeyv(DATA.BASP,c("id.plot"))
sp.name <- colnames(BASP)
rm(BASP)
print("plot level table created")
#### MERGE with indivudal tree
## use library(data.table)
if(!is.na(weight.full.plot)){
data.indiv <- data.table(obs.id=obs.id,sp=sp,
id.plot=id.plot,diam=diam,
BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam))))
setkeyv(data.indiv,"id.plot")
print("tree level table created")
data.merge <- merge(data.indiv,DATA.BASP)
print("merge done")
# substract target BA
for (i in (sp.name)){
eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
}
}else{
data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot,
diam=diam,weights=weights,
BA.indiv=BA.fun(diam,weights))
setkeyv(data.indiv,"id.plot")
print("tree level table created")
data.merge <- merge(data.indiv,DATA.BASP)
print("merge done")
for (i in (sp.name)){
eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
}
}
print("substraction of target tree BA done")
#### delete column not used
data.merge[,BA.indiv:=NULL]
data.merge[,sp:=NULL]
data.merge[,diam:=NULL]
data.merge[,id.plot:=NULL]
data.merge[,weights:=NULL]
print("columns removed")
return( (data.merge))
BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights, weight.full.plot) {
print(paste("Number of observation :", length(obs.id)))
require(data.table)
id.plot <- as.character(id.plot)
obs.id <- as.character(obs.id)
## check equal length
if (!(length(obs.id) == length(diam) & length(obs.id) == length(sp) & length(obs.id) ==
length(id.plot) & length(obs.id) == length(weights)))
stop("length of obs.id diam,sp id.plot & weights need to be the same")
## check sp is not numeric
if (is.numeric(sp))
stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')")
# compute BA tot per species per plot
BASP <- tapply(BA.fun(diam, weights), INDEX = list(id.plot, sp), FUN = sum, na.rm = T)
DATA.BASP <- data.table(id.plot = rownames(BASP), BASP)
setnames(DATA.BASP, old = 1:ncol(DATA.BASP), c("id.plot", colnames(BASP)))
setkeyv(DATA.BASP, c("id.plot"))
sp.name <- colnames(BASP)
rm(BASP)
print("plot level table created")
#### MERGE with indivudal tree use library(data.table)
if (!is.na(weight.full.plot)) {
data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam,
BA.indiv = BA.fun(diam, rep(weight.full.plot, length(diam))))
setkeyv(data.indiv, "id.plot")
print("tree level table created")
data.merge <- merge(data.indiv, DATA.BASP)
print("merge done")
# substract target BA
for (i in (sp.name)) {
eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]",
sep = "")))
}
} else {
data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam,
weights = weights, BA.indiv = BA.fun(diam, weights))
setkeyv(data.indiv, "id.plot")
print("tree level table created")
data.merge <- merge(data.indiv, DATA.BASP)
print("merge done")
for (i in (sp.name)) {
eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]",
sep = "")))
}
}
print("substraction of target tree BA done")
#### delete column not used
data.merge[, `:=`(BA.indiv, NULL)]
data.merge[, `:=`(sp, NULL)]
data.merge[, `:=`(diam, NULL)]
data.merge[, `:=`(id.plot, NULL)]
data.merge[, `:=`(weights, NULL)]
print("columns removed")
return((data.merge))
}
######################
#### apply BA.SP.FUN per census
# function to run on a subset to only one census
BA.SP.FUN.l<- function(census.id,census,obs.id,diam,sp,id.plot,weights,weight.full.plot){
return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id],
sp=sp[census==census.id],id.plot=id.plot[census==census.id],
weights=weights[census==census.id],weight.full.plot=weight.full.plot))
###################### apply BA.SP.FUN per census function to run on a subset to only one census
BA.SP.FUN.l <- function(census.id, census, obs.id, diam, sp, id.plot, weights, weight.full.plot) {
return(BA.SP.FUN(obs.id = obs.id[census == census.id], diam = diam[census ==
census.id], sp = sp[census == census.id], id.plot = id.plot[census == census.id],
weights = weights[census == census.id], weight.full.plot = weight.full.plot))
}
## function to apply over all census and merge back together
##' .. function to apply competition index computation over all census and merge back together ..
......@@ -116,13 +110,14 @@ return(BA.SP.FUN(obs.id=obs.id[census==census.id],diam=diam[census==census.id],
##' @param weight.full.plot optional if want to use a different weight to remove the BA of target tree (if NA weights is use instead)
##' @return a dta.table of dim N & length(unique(sp))+1 (obs.id and basal area of all species
##' @author Kunstler
BA.SP.FUN.census<- function(census,obs.id,diam,sp,id.plot,weights,weight.full.plot){
require(data.table)
unique.census <- unique(census)
res.list <- lapply(unique.census,FUN=BA.SP.FUN.l,census,obs.id,diam,sp,id.plot,weights,weight.full.plot)
res.mat <- rbind.fill(res.list )
res.mat <- data.table(res.mat[match(obs.id,res.mat[["obs.id"]]),])
return(res.mat)
BA.SP.FUN.census <- function(census, obs.id, diam, sp, id.plot, weights, weight.full.plot) {
require(data.table)
unique.census <- unique(census)
res.list <- lapply(unique.census, FUN = BA.SP.FUN.l, census, obs.id, diam, sp,
id.plot, weights, weight.full.plot)
res.mat <- rbind.fill(res.list)
res.mat <- data.table(res.mat[match(obs.id, res.mat[["obs.id"]]), ])
return(res.mat)
}
......@@ -140,39 +135,40 @@ return(res.mat)
##' @param rpuDist TRUE/FALSE run with GPU distance computation (need top install additional software only on linux)
##' @return a data frame with nrow = length of obs.id and ncol =unique(sp)
##' @author Kunstler
BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
print(paste("n observation = ",nrow(xy.table)))
print("start computing distance matrix")
if(rpuDist){
require(rpud)
dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE)
}else{
dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE))
}
print('distance matrix computed')
dist.mat[dist.mat <Rlim] <- 1
dist.mat[dist.mat >Rlim] <- 0
diag(dist.mat) <- 0
print('distance matrix set to 0 1')
BA <- BA.fun(diam,weights=1/(pi*Rlim^2))
print(c(length(BA),length(diam)))
BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE)
print('starting tapply over species')
fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE)
if(parallel){
## parallel version
require(doParallel)
registerDoParallel(cores=4)
mat <- dist.mat*BA.mat
res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% {
fun.sum.sp(mat[i,],sp)
}
res.temp <- data.frame(obs.id=obs.id,res.temp)
return((res.temp))
}else{
res.temp <- data.frame(obs.id=obs.id,t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp)))
return(res.temp)
}
BA.SP.FUN.XY <- function(obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, rpuDist = FALSE) {
print(paste("n observation = ", nrow(xy.table)))
print("start computing distance matrix")
if (rpuDist) {
require(rpud)
dist.mat <- rpuDist(xy.table, upper = TRUE, diag = TRUE)
} else {
dist.mat <- as.matrix(dist(xy.table, upper = TRUE, diag = TRUE))
}
print("distance matrix computed")
dist.mat[dist.mat < Rlim] <- 1
dist.mat[dist.mat > Rlim] <- 0
diag(dist.mat) <- 0
print("distance matrix set to 0 1")
BA <- BA.fun(diam, weights = 1/(pi * Rlim^2))
print(c(length(BA), length(diam)))
BA.mat <- matrix(rep(BA, length(BA)), nrow = length(BA), byrow = TRUE)
print("starting tapply over species")
fun.sum.sp <- function(x, sp) tapply(x, INDEX = sp, FUN = sum, na.rm = TRUE)
if (parallel) {
## parallel version
require(doParallel)
registerDoParallel(cores = 4)
mat <- dist.mat * BA.mat
res.temp <- foreach(i = 1:nrow(mat), .combine = rbind) %dopar% {
fun.sum.sp(mat[i, ], sp)
}
res.temp <- data.frame(obs.id = obs.id, res.temp)
return((res.temp))
} else {
res.temp <- data.frame(obs.id = obs.id, t(apply(dist.mat * BA.mat, MARGIN = 1,
FUN = fun.sum.sp, sp)))
return(res.temp)
}
}
......@@ -189,70 +185,71 @@ fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE)
##' @param parallel run in paralle or not ?
##' @return a data frame with nrow = length of obs.id and ncol =unique(sp)
##' @author Kunstler
BA.SP.FUN.XY2 <- function(obs.id,xy.table,diam,sp,Rlim,parallel){
rownames(xy.table) <- obs.id
print(paste("n observation = ",nrow(xy.table)))
print("start computing distance matrix")
## create local function
fun.compute.local <- function(obs.id.t,obs.id,xy.table,diam,sp,Rlim){
dist.t <- as.vector(sqrt((outer(xy.table[obs.id==obs.id.t,"x"],xy.table[,"x"],FUN="-"))^2 +
(outer(xy.table[obs.id==obs.id.t,"y"],xy.table[,"y"],FUN="-"))^2))
dist.t[dist.t>Rlim] <- 0
dist.t[dist.t<=Rlim] <- 1
res.BA.t <- tapply(BA.fun(diam,weights=dist.t/(pi*Rlim^2)),INDEX=sp,FUN=sum,na.rm=TRUE)
res.BA.t[is.na(res.BA.t)] <- 0
return(data.frame(obs.id=obs.id.t,t(res.BA.t)))
}
#lapply function
if(parallel){
library(doParallel)
print("start mclapply")
list.BA.SP.data <- mclapply(obs.id,FUN=fun.compute.local,obs.id,xy.table,
diam,sp,Rlim,mc.cores=4)
print("done")
data.BA.SP <- rbind.fill(list.BA.SP.data)
}else{
list.BA.SP.data <- lapply(obs.id,FUN=fun.compute.local,obs.id,xy.table,
diam,sp,Rlim)
data.BA.SP <- rbind.fill(list.BA.SP.data)
}
return(data.BA.SP)
BA.SP.FUN.XY2 <- function(obs.id, xy.table, diam, sp, Rlim, parallel) {
rownames(xy.table) <- obs.id
print(paste("n observation = ", nrow(xy.table)))
print("start computing distance matrix")
## create local function
fun.compute.local <- function(obs.id.t, obs.id, xy.table, diam, sp, Rlim) {
dist.t <- as.vector(sqrt((outer(xy.table[obs.id == obs.id.t, "x"], xy.table[,
"x"], FUN = "-"))^2 + (outer(xy.table[obs.id == obs.id.t, "y"], xy.table[,
"y"], FUN = "-"))^2))
dist.t[dist.t > Rlim] <- 0
dist.t[dist.t <= Rlim] <- 1
res.BA.t <- tapply(BA.fun(diam, weights = dist.t/(pi * Rlim^2)), INDEX = sp,
FUN = sum, na.rm = TRUE)
res.BA.t[is.na(res.BA.t)] <- 0
return(data.frame(obs.id = obs.id.t, t(res.BA.t)))
}
# lapply function
if (parallel) {
library(doParallel)
print("start mclapply")
list.BA.SP.data <- mclapply(obs.id, FUN = fun.compute.local, obs.id, xy.table,
diam, sp, Rlim, mc.cores = 4)
print("done")
data.BA.SP <- rbind.fill(list.BA.SP.data)
} else {
list.BA.SP.data <- lapply(obs.id, FUN = fun.compute.local, obs.id, xy.table,
diam, sp, Rlim)
data.BA.SP <- rbind.fill(list.BA.SP.data)
}
return(data.BA.SP)
}
## function for lapply per census
BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist){
print(dim(xy.table))
if(length(obs.id[census==census.id]) <10000){
data.temp<-BA.SP.FUN.XY(obs.id=obs.id[census==census.id],
xy.table=xy.table[census==census.id,],
diam=diam[census==census.id],sp=sp[census==census.id],
Rlim=Rlim,parallel,rpuDist)
}else{
data.temp<-BA.SP.FUN.XY2(obs.id=obs.id[census==census.id],
xy.table=xy.table[census==census.id,],
diam=diam[census==census.id],sp=sp[census==census.id],
Rlim=Rlim,parallel)
}
return(data.temp)
BA.SP.FUN.XY.l <- function(census.id, census, obs.id, xy.table, diam, sp, Rlim, parallel,
rpuDist) {
print(dim(xy.table))
if (length(obs.id[census == census.id]) < 10000) {
data.temp <- BA.SP.FUN.XY(obs.id = obs.id[census == census.id], xy.table = xy.table[census ==
census.id, ], diam = diam[census == census.id], sp = sp[census == census.id],
Rlim = Rlim, parallel, rpuDist)
} else {
data.temp <- BA.SP.FUN.XY2(obs.id = obs.id[census == census.id], xy.table = xy.table[census ==
census.id, ], diam = diam[census == census.id], sp = sp[census == census.id],
Rlim = Rlim, parallel)
}
return(data.temp)
}
### wrapping function to run BA.SP.FUN.XY per census and merge together all census
BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
unique.census <- unique(census)
print(paste("Vector of census to lapply over" ,paste(unique.census,collapse=","),sep=" "))
res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,census=census,obs.id=obs.id,xy.table=xy.table,diam=diam,sp=sp,Rlim=Rlim,parallel,rpuDist)
res.mat <- rbind.fill(res.list )
res.DF <- res.mat[match(obs.id,res.mat[["obs.id"]]),]
return(res.DF)
BA.SP.FUN.XY.census <- function(census, obs.id, xy.table, diam, sp, Rlim, parallel = FALSE,
rpuDist = FALSE) {
unique.census <- unique(census)
print(paste("Vector of census to lapply over", paste(unique.census, collapse = ","),
sep = " "))
res.list <- lapply(unique.census, FUN = BA.SP.FUN.XY.l, census = census, obs.id = obs.id,
xy.table = xy.table, diam = diam, sp = sp, Rlim = Rlim, parallel, rpuDist)
res.mat <- rbind.fill(res.list)
res.DF <- res.mat[match(obs.id, res.mat[["obs.id"]]), ]
return(res.DF)
}
#####################################
#####################################
### FUNCTION TO COMPUTE BA.SP.XY PER PLOT
#### function to be lapply per site
##################################### FUNCTION TO COMPUTE BA.SP.XY PER PLOT function to be lapply per site
##' .. FUNCTION TO COMPUTE BA.SP.XY PER PLOT to be lapply over all plot or only one plot ..
##'
##' .. content for \details{} ..
......@@ -265,162 +262,171 @@ return(res.DF)
##' @param rpuDist TRUE/FALSE use rpuDist to compute distance matrix only available on linux with additional software
##' @return a data.frame with nrow = length of obs.id[plot==i] and ncol =unique(sp)+ 2 (BATOT added)
##' @author Kunstler
fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){
data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i)
print(paste("size of the data for plot",i,"is",nrow(data.tree.s)))
BA.SP.temp <- BA.SP.FUN.XY.census(census=data.tree[["census"]],
obs.id=data.tree.s[['obs.id']],
xy.table=data.tree.s[,xy.name],
diam=data.tree.s[['D']],
sp=(data.tree.s[['sp']]),
Rlim=Rlim,
parallel,
rpuDist)
## replace NA per zero
print('replacing NA per zero')
BA.SP.temp[is.na(BA.SP.temp)] <- 0
print('done')
### TEST GOOD ORDER
if(sum(! (BA.SP.temp[["obs.id"]])==data.tree.s[['obs.id']]) >0) stop('rows not in the good order')
if(sum(!colnames(BA.SP.temp[,-1])==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name')
### compute sum per row
BATOT <- apply(BA.SP.temp[,-1],MARGIN=1,FUN=sum)
data.res <- data.frame(BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE)
return(data.res)
fun.compute.BA.SP.XY.per.plot <- function(i, data.tree, Rlim, xy.name = c("x", "y"),
parallel = FALSE, rpuDist = FALSE) {
data.tree.s <- subset(data.tree, subset = data.tree[["plot"]] == i)
print(paste("size of the data for plot", i, "is", nrow(data.tree.s)))
BA.SP.temp <- BA.SP.FUN.XY.census(census = data.tree[["census"]], obs.id = data.tree.s[["obs.id"]],
xy.table = data.tree.s[, xy.name], diam = data.tree.s[["D"]], sp = (data.tree.s[["sp"]]),
Rlim = Rlim, parallel, rpuDist)
## replace NA per zero
print("replacing NA per zero")
BA.SP.temp[is.na(BA.SP.temp)] <- 0
print("done")
### TEST GOOD ORDER
if (sum(!(BA.SP.temp[["obs.id"]]) == data.tree.s[["obs.id"]]) > 0)
stop("rows not in the good order")
if (sum(!colnames(BA.SP.temp[, -1]) == as.character((levels(data.tree.s[["sp"]])))) >
0)
stop("colnames does mot match species name")
### compute sum per row
BATOT <- apply(BA.SP.temp[, -1], MARGIN = 1, FUN = sum)
data.res <- data.frame(BA.SP.temp, BATOT = BATOT, stringsAsFactors = FALSE)
return(data.res)
}
##########################
### function to replace missing value per zero in competition matrix
function.replace.NA.negative <- function(data.BA.SP){
for (i in 2:ncol(data.BA.SP)){
eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep="")))
eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep="")))
########################## function to replace missing value per zero in competition matrix
function.replace.NA.negative <- function(data.BA.SP) {
for (i in 2:ncol(data.BA.SP)) {
eval(parse(text = paste("data.BA.SP[is.na(", names(data.BA.SP)[i], "),",
names(data.BA.SP)[i], ":=0]", sep = "")))
eval(parse(text = paste("data.BA.SP[", names(data.BA.SP)[i], "<0,", names(data.BA.SP)[i],
":=0]", sep = "")))
}
print("NA and negative replaced")
return(data.BA.SP)
}
print('NA and negative replaced')
return(data.BA.SP)
}
##############################################################
##function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion,data.tot,weight.full.plot,site.name,data.TRAITS=NA,sp.code="sp",sp.code2="sp",
out.dir = "output/processed/"){
if(is.null(data.tot[['weights']])) stop("Please create a weights vector(1/m^2), even if it is completely NA")
data.tot <- data.table(data.tot)
data <- data.tot[ecocode==ecoregion,]
rm(data.tot)
print(paste("number of obs in ecoregion",ecoregion," = ",nrow(data)))
path <- file.path(out.dir,site.name,ecoregion)
dir.create(path, recursive=TRUE,showWarnings=FALSE)
print('start computing competition index')
data.BA.SP <- BA.SP.FUN.census(census=data[['census']],
obs.id=as.vector(data[['obs.id']]),
diam=as.vector(data[['D']]),
sp=as.vector(data[['sp']]),
id.plot=as.vector(data[['plot']]),
weights=data[['weights']],
weight.full.plot=weight.full.plot)
data$obs.id <- as.character(data$obs.id)
data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id)
print('competition index computed')
## change NA and <0 data for 0
data.BA.SP <- function.replace.NA.negative(data.BA.SP)
### CHECK IF sp and sp name for column are the same
if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data")
#### compute BA tot for all competitors
## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why??
BATOT.s <- apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum)
data.BA.SP[,BATOT:=BATOT.s]
print('BATOT COMPUTED')
### create data frame
DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]])
setkeyv(DT.temp,"obs.id")
setkeyv(data.BA.SP,"obs.id")
data.BA.sp <- merge(DT.temp,data.BA.SP)
## reorder data
data <- data.table(data)
setkeyv(data,"obs.id")
## test if same order
if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data")
#####
data.traits=NA
## ADD TRY DATA IF NEEDED
if(is.data.frame(data.TRAITS))
data.TRAITS.s <- subset(data.TRAITS,subset=data.TRAITS[[sp.code]] %in% data[[sp.code2]])
else
data.TRAITS.s <- NA
write.csv(data, file=file.path(path, "data.tree.csv"), quote=FALSE, row.names=FALSE)
write.csv(data.BA.sp, file=file.path(path, "data.BA.SP.csv"), quote=FALSE, row.names=FALSE)
write.csv(data.TRAITS.s, file=file.path(path, "data.traits.csv"), quote=FALSE, row.names=FALSE)
}
#######
#### FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT
fun.data.per.bigplot <- function(data,site.name,data.TRAITS=NA,Rlim=15,xy.name,parallel=FALSE,rpuDist=FALSE,sp.code="sp",sp.code2="sp"){
require(data.table)
dir.create(paste("output/processed/",site.name,sep=""), recursive=TRUE,showWarnings=FALSE)
data$sp <- factor(data$sp)
data.TRAITS$sp <- factor(data.TRAITS$sp)
print(paste("plots :",unique(data[["plot"]])))
## compute competition matrix
list.BA.SP.data <- lapply(unique(data[["plot"]]),FUN=fun.compute.BA.SP.XY.per.plot,data=data,Rlim=Rlim,xy.name=xy.name,parallel=TRUE,rpuDist=FALSE)
print('competition index computed in list')
data.BA.SP <- rbind.fill(list.BA.SP.data)
print('competition index computed')
### test
if(sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]),"BATOT"))) >0) stop("competition index sp name not the same as in data")
### REMOVE TREE IN BUFFER ZONE
obs.id.no.buffer <- subset(data,subset=((data[["x"]]-15)>0 & (data[["x"]]+15)<max(data[["x"]])) &
((data[["y"]]-15)>0 & (data[["y"]]+15)<max(data[["y"]])),select="obs.id")
data <- subset(data,subset=data[["obs.id"]] %in% as.character(obs.id.no.buffer[,1]))
data.BA.SP <- subset(data.BA.SP,subset=data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,1]))
## reorder data
data$obs.id <- as.character(data$obs.id)
data <- data.table(data)
setkeyv(data,"obs.id")
data.BA.SP$obs.id <- as.character(data.BA.SP[["obs.id"]])
data.BA.SP <- data.table(data.BA.SP)
setkeyv(data.BA.SP,"obs.id")
## test if same order
if(sum(!as.character(data.BA.SP[["obs.id"]]) == as.character(data[["obs.id"]])) >0) stop("competition index not in the same order than data")
#####
## ADD TRY DATA OR TRAITS IF NEEDED
if(is.data.frame(data.TRAITS)){
data.TRAITS.s <- subset(data.TRAITS,subset=data.TRAITS[[sp.code]] %in% data[[sp.code2]])
list.temp <- list(data.tree=data,data.BA.SP=data.BA.SP,data.traits=data.TRAITS.s)
saveRDS(list.temp,file=paste("output/processed/",site.name,"/list.rds",sep=""))
}else{
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA)
saveRDS(list.temp,file=paste("output/processed/",site.name,"/list.rds",sep=""))
}
}
process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed"){
#load data
data.tree <- read.csv(file.path(path.formatted , set, "tree.csv"), stringsAsFactors = FALSE)
data.traits <- read.csv(file.path(path.formatted , set, "traits.csv"), stringsAsFactors = FALSE)
#remove nas
data.tree <- subset(data.tree,subset=!is.na(data.tree[["D"]]))
## change sp
data.traits$sp <- paste("sp",data.traits$sp,sep=".")
data.tree$sp <- paste("sp",data.tree$sp,sep=".")
############################################################## function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion, data.tot, weight.full.plot, site.name,
data.TRAITS = NA, sp.code = "sp", sp.code2 = "sp", out.dir = "output/processed/") {
if (is.null(data.tot[["weights"]]))
stop("Please create a weights vector(1/m^2), even if it is completely NA")
data.tot <- data.table(data.tot)
data <- data.tot[ecocode == ecoregion, ]
rm(data.tot)
print(paste("number of obs in ecoregion", ecoregion, " = ", nrow(data)))
path <- file.path(out.dir, site.name, ecoregion)
dir.create(path, recursive = TRUE, showWarnings = FALSE)
print("start computing competition index")
data.BA.SP <- BA.SP.FUN.census(census = data[["census"]], obs.id = as.vector(data[["obs.id"]]),
diam = as.vector(data[["D"]]), sp = as.vector(data[["sp"]]), id.plot = as.vector(data[["plot"]]),
weights = data[["weights"]], weight.full.plot = weight.full.plot)
data$obs.id <- as.character(data$obs.id)
data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id)
print("competition index computed")
## change NA and <0 data for 0
data.BA.SP <- function.replace.NA.negative(data.BA.SP)
### CHECK IF sp and sp name for column are the same
if (sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) > 0)
stop("competition index sp name not the same as in data")
#### compute BA tot for all competitors data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ##
#### slower than apply why??
BATOT.s <- apply(data.frame(data.BA.SP)[, -1], MARGIN = 1, FUN = sum)
data.BA.SP[, `:=`(BATOT, BATOT.s)]
print("BATOT COMPUTED")
### create data frame
DT.temp <- data.table(obs.id = data[["obs.id"]], ecocode = data[["ecocode"]])
setkeyv(DT.temp, "obs.id")
setkeyv(data.BA.SP, "obs.id")
data.BA.sp <- merge(DT.temp, data.BA.SP)
## reorder data
data <- data.table(data)
setkeyv(data, "obs.id")
## test if same order
if (sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) > 0)
stop("competition index not in the same order than data")
#####
data.traits = NA
## ADD TRY DATA IF NEEDED
if (is.data.frame(data.TRAITS))
data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in%
data[[sp.code2]]) else data.TRAITS.s <- NA
write.csv(data, file = file.path(path, "data.tree.csv"), quote = FALSE, row.names = FALSE)
write.csv(data.BA.sp, file = file.path(path, "data.BA.SP.csv"), quote = FALSE,
row.names = FALSE)
write.csv(data.TRAITS.s, file = file.path(path, "data.traits.csv"), quote = FALSE,
row.names = FALSE)
}
# split into ecoregions
tmp <- lapply( unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion,
data.tot = data.tree, weight.full.plot = NA, site.name = set, data.TRAITS = data.traits, out.dir = path.processed)
cat("finished",file=file.path(path.processed, set, "Done.txt"))
####### FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT
fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, Rlim = 15, xy.name,
parallel = FALSE, rpuDist = FALSE, sp.code = "sp", sp.code2 = "sp") {
require(data.table)
dir.create(paste("output/processed/", site.name, sep = ""), recursive = TRUE,
showWarnings = FALSE)
data$sp <- factor(data$sp)
data.TRAITS$sp <- factor(data.TRAITS$sp)
print(paste("plots :", unique(data[["plot"]])))
## compute competition matrix
list.BA.SP.data <- lapply(unique(data[["plot"]]), FUN = fun.compute.BA.SP.XY.per.plot,
data = data, Rlim = Rlim, xy.name = xy.name, parallel = TRUE, rpuDist = FALSE)
print("competition index computed in list")
data.BA.SP <- rbind.fill(list.BA.SP.data)
print("competition index computed")
### test
if (sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]), "BATOT"))) > 0)
stop("competition index sp name not the same as in data")
### REMOVE TREE IN BUFFER ZONE
obs.id.no.buffer <- subset(data, subset = ((data[["x"]] - 15) > 0 & (data[["x"]] +
15) < max(data[["x"]])) & ((data[["y"]] - 15) > 0 & (data[["y"]] + 15) <
max(data[["y"]])), select = "obs.id")
data <- subset(data, subset = data[["obs.id"]] %in% as.character(obs.id.no.buffer[,
1]))
data.BA.SP <- subset(data.BA.SP, subset = data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,
1]))
## reorder data
data$obs.id <- as.character(data$obs.id)
data <- data.table(data)
setkeyv(data, "obs.id")
data.BA.SP$obs.id <- as.character(data.BA.SP[["obs.id"]])
data.BA.SP <- data.table(data.BA.SP)
setkeyv(data.BA.SP, "obs.id")
## test if same order
if (sum(!as.character(data.BA.SP[["obs.id"]]) == as.character(data[["obs.id"]])) >
0)
stop("competition index not in the same order than data")
##### ADD TRY DATA OR TRAITS IF NEEDED
if (is.data.frame(data.TRAITS)) {
data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in%
data[[sp.code2]])
list.temp <- list(data.tree = data, data.BA.SP = data.BA.SP, data.traits = data.TRAITS.s)
saveRDS(list.temp, file = paste("output/processed/", site.name, "/list.rds",
sep = ""))
} else {
list.temp <- list(data.tree = data, data.BA.SP = data.BA.sp, data.traits = NA)
saveRDS(list.temp, file = paste("output/processed/", site.name, "/list.rds",
sep = ""))
}
}
process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed") {
# load data
data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"), stringsAsFactors = FALSE)
data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE)
# remove nas
data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]]))
## change sp
data.traits$sp <- paste("sp", data.traits$sp, sep = ".")
data.tree$sp <- paste("sp", data.tree$sp, sep = ".")
# split into ecoregions
tmp <- lapply(unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, data.tot = data.tree,
weight.full.plot = NA, site.name = set, data.TRAITS = data.traits, out.dir = path.processed)
cat("finished", file = file.path(path.processed, set, "Done.txt"))
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment