format.function.R


###################################################
###################################################
###################################################
##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS


######
######
## FUNCTION TO PLOT MAP OF TREE
##' .. Function to plot map of tree with circle function of their dbh..
##'
##' .. content for \details{} ..
##' @title 
##' @param plot.select the plot for which draw the map
##' @param x 
##' @param y 
##' @param plot vectore of plot id for each tree
##' @param D diameter in cm
##' @param inches controling the circle size
##' @param ... 
##' @return 
##' @author Kunstler
fun.circles.plot <- function(plot.select,x,y,plot,D,inches,...){
x.t <- x[plot==plot.select]
y.t <- y[plot==plot.select]
D.t <- D[plot==plot.select]
D.t[is.na(D.t)] <-  0
symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...)
}


#########################
## 
##' .. Compute the basal area of competitor in a plot..
##'
##' .. content for \details{} ..
##' @title 
##' @param diam diameters of each individuals in cm
##' @param weights weight to compute the basal area in cm^2/m^2
##' @return basal area in cm^2/m^2 
##' @author Kunstler
BA.fun <- function(diam,weights){
((diam/2)^2)*pi*weights
}


#### 
##' .. function to compute the basal area of neighborood tree in plots ..
##'
##' .. content for \details{} ..
##' @title 
##' @param tree.id tree obs identifier
##' @param diam diam of tree in cm
##' @param sp species name or code
##' @param id.plot identifier of the plot
##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2
##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots)
##' @return data frame with tree.id and one column per species with basal area of the species (without the target tree)
##' @author Kunstler
BA.SP.FUN <-  function(tree.id,diam,sp,id.plot,weights,weight.full.plot){
require(data.table)
id.plot <- as.character(id.plot)
tree.id <-  as.character(tree.id)

## check equal length
if(!(length(tree.id)==length(diam) & length(tree.id)==length(sp) & length(tree.id)==length(id.plot) & length(tree.id)==length(weights)))
    stop("length of tree.id diam,sp id.plot & weights need to be the same")

## check sp is not numeric
if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')")

# compute BA tot per species per plot
BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T)
print(dim(BASP))
DATA.BASP <-  data.table(id.plot= rownames(BASP),BASP)
setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP)))
setkeyv(DATA.BASP,c("id.plot"))
sp.name <- colnames(BASP)
rm(BASP)
print("first table created")
#### MERGE with indivudal tree
## use library(data.table)
if(!is.na(weight.full.plot)){
     data.indiv <- data.table(tree.id=tree.id,sp=sp,
                              id.plot=id.plot,diam=diam,
                              BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam))))
     setkeyv(data.indiv,"id.plot")
     print("second table created")
     data.merge <-  merge(data.indiv,DATA.BASP)
     print("merge done")
     # substract target BA
     for (i in (sp.name)){
     eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
     }

}else{
     data.indiv <- data.table(tree.id=tree.id,sp=sp,id.plot=id.plot,
                         diam=diam,weights=weights,
                         BA.indiv=BA.fun(diam,weights))
     setkeyv(data.indiv,"id.plot")
     print("second table created")
     data.merge <-  merge(data.indiv,DATA.BASP)
     print("merge done")
     for (i in (sp.name)){
     eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
     }
}
print("replacment done")
data.merge[,BA.indiv:=NULL]
print("first column removed")
#### delete column not used
data.merge[,sp:=NULL]
data.merge[,diam:=NULL]
data.merge[,id.plot:=NULL]
data.merge[,weights:=NULL]
print("columns removed")
return( (data.merge))
}

######################
#### apply BA.SP.FUN per census
 
BA.SP.FUN.l<-  function(census.id,census,obs.id,diam,sp,id.plot,weights,weight.full.plot){

return(BA.SP.FUN(obs.id[census==census.id],diam[census==census.id],sp[census==census.id],id.plot[census==census.id],weights[census==census.id],weight.full.plot))
}

BA.SP.FUN.census<-  function(census,obs.id,diam,sp,id.plot,weights,weight.full.plot){
unique.census <- unique(census)
res.list <- lapply(unique.census,FUN=BA.SP.FUN.l,obs.id,diam,sp,weights,weights.full.plot)
res.mat <- rbind.fill(res.list )
res.mat <- res.mat[match(rownames(res.mat),obs.id),]
return(res.mat)
}


#### 
##' .. function compute competition index with X Y coordinates based on a neighborhood of radius R ..
##'
##' .. content for \details{} ..
##' @title 
##' @param tree.id id of the observation (if one tree as multiple growth measurement one tree.id per measurment
##' @param xy.table table with x.y of teh individual
##' @param diam diam in cm
##' @param sp species
##' @param Rlim radius of neighborhood search
##' @param parallel run in paralle or not ?
##' @param rpuDist run with GPU distance computation
##' @return a data frame with nrow = length of tree.id and ncol =unique(sp)
##' @author Kunstler
BA.SP.FUN.XY <-  function(tree.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
rownames(xy.table) <- tree.id
if(rpuDist){
  require(rpud)
  dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE)
}else{
  dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE))
}
print('distance matrix computed')
dist.mat[dist.mat <Rlim] <- 1
dist.mat[dist.mat >Rlim] <- 0
diag(dist.mat) <- 0
print('distance matrix set to 0 1')
BA <- BA.fun(diam,weights=1/(pi*Rlim^2))
BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE)
print('starting tapply over species')
fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE)
  if(parallel){
    ## parallel version
    require(doParallel)
    registerDoParallel(cores=4)
    mat <- dist.mat*BA.mat
    res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% {
       fun.sum.sp(mat[i,],sp)
       }
    rownames(res.temp) <- tree.id
    return((res.temp))
  }else{
    res.temp <-  t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))
    return(res.temp)
  }
}

## function for lapply per census
BA.SP.FUN.XY.l <-  function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){

return(BA.SP.FUN.XY(obs.id[census==census.id],xy.table[census==census.id,],diam[census==census.id],sp[census==census.id],Rlim,parallel,rpuDist))

}

### wrapping function to run BA.SP.FUN.XY per census
BA.SP.FUN.XY.census <-  function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
unique.census <- unique(census)
res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist)
res.mat <- rbind.fill(res.list )
res.mat <- res.mat[match(rownames(res.mat),obs.id),]
return(res.mat)
}

############################
## FUNCTION remove trailing white space
trim.trailing <- function (x) sub("\\s+$", "", x)

## clean species.tab
fun.clean.species.tab <- function(species.tab){
species.tab2 <- species.tab[!is.na(species.tab$Latin_name),]
 
### species IFN reformat names
## clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn))
## remove trailing white space
species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn)

species.clean <- species.tab2[!duplicated(species.tab2$Latin_name),
                              c("code","Latin_name","Exotic_Native_cultivated")]
return(species.clean)}


### compute quantile 99% and sd with a bootstrap

library(boot)

f.quantile <- function (x,ind,probs){quantile(x[ind],probs=probs,na.rm=TRUE)}

f.quantile.boot <-  function(i,x,fac,R,probs=0.99){
    require(boot)
    if(length(na.exclude(x[fac==i]))>0){
quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs)
return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1))
}else{
return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1))
}
}

#######################
### function to compute number of dead per plot 
function.perc.dead <-  function(dead){
 sum(dead)/length(dead)}


function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) }


##########################
### GENERATE A R.object per ecoregion

function.replace.NA.negative <- function(data.BA.SP){
    for (i in 2:ncol(data.BA.SP)){
eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep="")))
eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep="")))
}
print('NA and negative replaced')
return(data.BA.SP)
}    

##############################################################
##function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion,data.tot,plot.name,weight.full.plot,name.country,data.TRY=NA,species.lookup=NA){
if(is.null(data.tot[['weights']])) stop("Please create a weights vector, even if it is completely NA")
require(data.table)
data.tot <-  data.table(data.tot)
data <- data.tot[ecocode==ecoregion,]
rm(data.tot)
data.BA.SP <- BA.SP.FUN(tree.id=as.vector(data[['tree.id']]),
				diam=as.vector(data[['D']]),
				sp=as.vector(data[['sp']]),
				id.plot=as.vector(data[['plot']]),
				weights=data[['weights']],
				weight.full.plot=weight.full.plot)

print('competition index computed')
## change NA and <0 data for 0
data.BA.SP <- function.replace.NA.negative(data.BA.SP)
### CHECK IF sp and sp name for column are the same
if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data")
#### compute BA tot for all competitors
## data.BA.SP[,BATOT:=sum(.SD),by=tree.id] ## slower than apply why?? 
BATOT.s <-  apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum)
data.BA.SP[,BATOT:=BATOT.s]
print('BATOT COMPUTED')
### create data frame
DT.temp <- data.table(tree.id=data[["tree.id"]],ecocode=data[["ecocode"]])
setkeyv(DT.temp,"tree.id")
setkeyv(data.BA.SP,"tree.id")
print('starting last merge')
data.BA.sp <- merge(DT.temp,data.BA.SP)
## reorder data
data <-  data.table(data)
setkeyv(data,"tree.id")
## test if same order
if(sum(!data.BA.sp[["tree.id"]] == data[["tree.id"]]) >0) stop("competition index not in the same order than data")
#####
## ADD TRY DATA OR TRAITS IF NEEDED
if(!is.na(data.TRY)){
sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),]
data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY)
## save everything as a list
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits)
save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep="."))
}else{
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA)
save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep="."))
}
}    


#####################################
#####################################
### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER
#### function to be apply per site
fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){
data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i)
BA.SP.temp <- BA.SP.FUN.XY(tree.id=data.tree.s[['tree.id']],
             xy.table=data.tree.s[,xy.name],
             diam=data.tree.s[['D']],
             sp=(data.tree.s[['sp']]),
             Rlim=15,
             parallel=FALSE,
             rpuDist=FALSE)

## replace NA per zero
print('replacing NA per zero')
BA.SP.temp[is.na(BA.SP.temp)] <- 0
print('done')
### rpud installation very cumbersome not needed ?
### longer in parallel why ?
if(sum(! rownames(BA.SP.temp)==data.tree.s[['tree.id']]) >0) stop('rows not in the good order')
if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name')

### compute sum per row
BATOT <- apply(BA.SP.temp,MARGIN=1,FUN=sum)
data.res <- data.frame(tree.id=data.tree.s[['tree.id']],BA.SP.temp,BATOT=BATOT)
return(data.res)
}