format.function.R


###################################################
###################################################
###################################################
##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS


######
######
## FUNCTION TO PLOT MAP OF TREE
fun.circles.plot <- function(plot.select,x,y,plot,D,inches){
x.t <- x[plot==plot.select]
y.t <- y[plot==plot.select]
D.t <- D[plot==plot.select]
D.t[is.na(D.t)] <-  0
symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches)
}


#########################
## 
##' .. Compute the basal area of competitor in a plot..
##'
##' .. content for \details{} ..
##' @title 
##' @param diam diameters of each individuals in cm
##' @param weights weight to compute the basal area in cm^2/m^2
##' @return basal area in cm^2/m^2 
##' @author Kunstler
BA.fun <- function(diam,weights){
((diam/2)^2)*pi*weights
}


#### 
##' .. function to compute the basal area of neighborood tree in plots ..
##'
##' .. content for \details{} ..
##' @title 
##' @param obs.id tree obs identifier
##' @param diam diam of tree in cm
##' @param sp species name or code
##' @param id.plot identifier of the plot
##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2
##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots)
##' @return data frame with tree.id and one column per species with basal area of the species (without the target tree)
##' @author Kunstler
BA.SP.FUN <-  function(obs.id,diam,sp,id.plot,weights,weight.full.plot){
require(data.table)
id.plot <- as.character(id.plot)
obs.id <-  as.character(obs.id)

## check equal length
if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights)))
    stop("length of obs.id diam,sp id.plot & weights need to be the same")

## check sp is not numeric
if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')")

# compute BA tot per species per plot
BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T)
print(dim(BASP))
DATA.BASP <-  data.table(id.plot= rownames(BASP),BASP)
setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP)))
setkeyv(DATA.BASP,c("id.plot"))
sp.name <- colnames(BASP)
rm(BASP)
print("first table created")
#### MERGE with indivudal tree
## use library(data.table)
if(!is.na(weight.full.plot)){
     data.indiv <- data.table(obs.id=obs.id,sp=sp,
                              id.plot=id.plot,diam=diam,
                              BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam))))
     setkeyv(data.indiv,"id.plot")
     print("second table created")
     data.merge <-  merge(data.indiv,DATA.BASP)
     print("merge done")
     # substract target BA
     for (i in (sp.name)){
     eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
     }

}else{
     data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot,
                         diam=diam,weights=weights,
                         BA.indiv=BA.fun(diam,weights))
     setkeyv(data.indiv,"id.plot")
     print("second table created")
     data.merge <-  merge(data.indiv,DATA.BASP)
     print("merge done")
     for (i in (sp.name)){
     eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep="")))
     }
}
print("replacment done")
data.merge[,BA.indiv:=NULL]
print("first column removed")
#### delete column not used
data.merge[,sp:=NULL]
data.merge[,diam:=NULL]
data.merge[,id.plot:=NULL]
data.merge[,weights:=NULL]
print("columns removed")
return( (data.merge))
}

#### function with X Y coordinates based on a neighborhood of radius R

BA.SP.FUN.XY <-  function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){
rownames(xy.table) <- obs.id
if(rpuDist){
  require(rpud)
  dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE)
}else{
  dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE))
}
print('distance matrix computed')
dist.mat[dist.mat <Rlim] <- 1
dist.mat[dist.mat >Rlim] <- 0
diag(dist.mat) <- 0
print('distance matrix set to 0 1')
BA <- BA.fun(diam,weights=1/(pi*Rlim^2))
BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE)
print('starting tapply over species')
fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE)
  if(parallel){
    ## parallel version
    require(doParallel)
    registerDoParallel(cores=4)
    mat <- dist.mat*BA.mat
    res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% {
       fun.sum.sp(mat[i,],sp)
       }
    rownames(res.temp) <- obs.id
    return((res.temp))
  }else{
    res.temp <-  t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))
    return(res.temp)
  }
}
 

############################
## FUNCTION remove trailing white space
trim.trailing <- function (x) sub("\\s+$", "", x)

## clean species.tab
fun.clean.species.tab <- function(species.tab){
species.tab2 <- species.tab[!is.na(species.tab$Latin_name),]
 
### species IFN reformat names
## clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn))
## remove trailing white space
species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn)

species.clean <- species.tab2[!duplicated(species.tab2$Latin_name),
                              c("code","Latin_name","Exotic_Native_cultivated")]
return(species.clean)}


### compute quantile 99% and sd with a bootstrap

library(boot)

f.quantile <- function (x,ind,probs){quantile(x[ind],probs=probs,na.rm=TRUE)}

f.quantile.boot <-  function(i,x,fac,R,probs=0.99){
    require(boot)
    if(length(na.exclude(x[fac==i]))>0){
quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs)
return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1))
}else{
return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1))
}
}

#######################
### function to compute number of dead per plot 
function.perc.dead <-  function(dead){
 sum(dead)/length(dead)}


function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) }


##########################
### GENERATE A R.object per ecoregion

function.replace.NA.negative <- function(data.BA.SP){
    for (i in 2:ncol(data.BA.SP)){
eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep="")))
eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep="")))
}
print('NA and negative replaced')
return(data.BA.SP)
}    

##############################################################
##function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion,data.tot,plot.name,weight.full.plot,name.country,data.TRY,species.lookup){
require(data.table)
data.tot <-  data.table(data.tot)
data <- data.tot[ecocode==ecoregion,]
rm(data.tot)
data.BA.SP <- BA.SP.FUN(obs.id=as.vector(data[['obs.id']]),
                        diam=as.vector(data[['D']]),
     	                sp=as.vector(data[['sp']]),
                        id.plot=as.vector(data[[plot.name]]),
	                weights=data[['weights']],
                        weight.full.plot=weight.full.plot)

print('competition index computed')
## change NA and <0 data for 0
data.BA.SP <- function.replace.NA.negative(data.BA.SP)
### CHECK IF sp and sp name for column are the same
if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data")
#### compute BA tot for all competitors
## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why?? 
BATOT.s <-  apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum)
data.BA.SP[,BATOT:=BATOT.s]
print('BATOT COMPUTED')
### create data frame
DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]])
setkeyv(DT.temp,"obs.id")
setkeyv(data.BA.SP,"obs.id")
print('starting last merge')
data.BA.sp <- merge(DT.temp,data.BA.SP)
## reorder data
data <-  data.table(data)
setkeyv(data,"obs.id")
## test if same order
if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data")
#####
## ADD TRY DATA OR TRAITS IF NEEDED
sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),]
data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY)
## save everything as a list
list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits)
save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep="."))
}