format.function.R


################################################### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS


###### FUNCTION TO PLOT MAP OF TREE
##' .. Function to plot map of tree with circle function of their dbh..
##'
##' .. content for \details{} ..
##' @title 
##' @param plot.select the plot for which draw the map
##' @param x 
##' @param y 
##' @param plot vectore of plot id for each tree
##' @param D diameter in cm
##' @param inches controling the circle size
##' @param ... 
##' @return 
##' @author Kunstler
fun.circles.plot <- function(plot.select, x, y, plot, D, inches, ...) {
    x.t <- x[plot == plot.select]
    y.t <- y[plot == plot.select]
    D.t <- D[plot == plot.select]
    D.t[is.na(D.t)] <- 0
    symbols(x.t, y.t, circles = D.t, main = plot.select, inches = inches, ...)
}


######################### 
##' .. Compute the basal area of competitor in a plot..
##'
##' .. content for \details{} ..
##' @title 
##' @param diam diameters of each individuals in cm
##' @param weights weight to compute the basal area in cm^2/m^2
##' @return basal area in cm^2/m^2 
##' @author Kunstler
BA.fun <- function(diam, weights) {
    ((diam/2)^2) * pi * weights
}


#### 
##' .. function to compute the basal area of neighborood tree in plots ..
##'
##' .. content for \details{} ..
##' @title 
##' @param obs.id tree obs identifier
##' @param diam diam of tree in cm
##' @param sp species name or code
##' @param id.plot identifier of the plot
##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2
##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots)
##' @return data frame with tree.id and one column per species with basal area of the species (without the target tree)
##' @author Kunstler
BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights, weight.full.plot) {
    require(data.table, quietly=TRUE)
    id.plot <- as.character(id.plot)
    obs.id <- as.character(obs.id)
    
    ## check equal length
    if (!(length(obs.id) == length(diam) & length(obs.id) == length(sp) & length(obs.id) == 
        length(id.plot) & length(obs.id) == length(weights))) 
        stop("length of obs.id diam,sp id.plot & weights need to be the same")
    
    ## check sp is not numeric
    if (is.numeric(sp)) 
        stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')")
    
    # compute BA tot per species per plot
    BASP <- tapply(BA.fun(diam, weights), INDEX = list(id.plot, sp), FUN = sum, na.rm = T)
    print(dim(BASP))
    DATA.BASP <- data.table(id.plot = rownames(BASP), BASP)
    setnames(DATA.BASP, old = 1:ncol(DATA.BASP), c("id.plot", colnames(BASP)))
    setkeyv(DATA.BASP, c("id.plot"))
    sp.name <- colnames(BASP)
    rm(BASP)
    print("first table created")
    #### MERGE with indivudal tree use library(data.table)
    if (!is.na(weight.full.plot)) {
        data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, 
            BA.indiv = BA.fun(diam, rep(weight.full.plot, length(diam))))
        setkeyv(data.indiv, "id.plot")
        print("second table created")
        data.merge <- merge(data.indiv, DATA.BASP)
        print("merge done")
        # substract target BA
        for (i in (sp.name)) {
            eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", 
                sep = "")))
        }
        
    } else {
        data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, 
            weights = weights, BA.indiv = BA.fun(diam, weights))
        setkeyv(data.indiv, "id.plot")
        print("second table created")
        data.merge <- merge(data.indiv, DATA.BASP)
        print("merge done")
        for (i in (sp.name)) {
            eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", 
                sep = "")))
        }
    }
    print("replacment done")
    data.merge[, `:=`(BA.indiv, NULL)]
    print("first column removed")
    #### delete column not used
    data.merge[, `:=`(sp, NULL)]
    data.merge[, `:=`(diam, NULL)]
    data.merge[, `:=`(id.plot, NULL)]
    data.merge[, `:=`(weights, NULL)]
    print("columns removed")
    return((data.merge))
}

#### 
##' .. function compute competition index with X Y coordinates based on a neighborhood of radius R ..
##'
##' .. content for \details{} ..
##' @title 
##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment
##' @param xy.table table with x.y of teh individual
##' @param diam diam in cm
##' @param sp species
##' @param Rlim radius of neighborhood search
##' @param parallel run in paralle or not ?
##' @param rpuDist run with GPU distance computation
##' @return a data frame with nrow = length of obs.id and ncol =unique(sp)
##' @author Kunstler
BA.SP.FUN.XY <- function(obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, rpuDist = FALSE) {
    rownames(xy.table) <- obs.id
    if (rpuDist) {
        require(rpud, quietly=TRUE)
        dist.mat <- rpuDist(xy.table, upper = TRUE, diag = TRUE)
    } else {
        dist.mat <- as.matrix(dist(xy.table, upper = TRUE, diag = TRUE))
    }
    print("distance matrix computed")
    dist.mat[dist.mat < Rlim] <- 1
    dist.mat[dist.mat > Rlim] <- 0
    diag(dist.mat) <- 0
    print("distance matrix set to 0 1")
    BA <- BA.fun(diam, weights = 1/(pi * Rlim^2))
    BA.mat <- matrix(rep(BA, length(BA)), nrow = length(BA), byrow = TRUE)
    print("starting tapply over species")
    fun.sum.sp <- function(x, sp) tapply(x, INDEX = sp, FUN = sum, na.rm = TRUE)
    if (parallel) {
        ## parallel version
        require(doParallel, quietly=TRUE)
        registerDoParallel(cores = 4)
        mat <- dist.mat * BA.mat
        res.temp <- foreach(i = 1:nrow(mat), .combine = rbind) %dopar% {
            fun.sum.sp(mat[i, ], sp)
        }
        rownames(res.temp) <- obs.id
        return((res.temp))
    } else {
        res.temp <- t(apply(dist.mat * BA.mat, MARGIN = 1, FUN = fun.sum.sp, sp))
        return(res.temp)
    }
}


############################ FUNCTION remove trailing white space
trim.trailing <- function(x) sub("\\s+$", "", x)

## clean species.tab
fun.clean.species.tab <- function(species.tab) {
    species.tab2 <- species.tab[!is.na(species.tab$Latin_name), ]
    
    ### species IFN reformat names clean species names and synonyme names
    species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
    species.tab2$Latin_name_syn <- (gsub("_", " ", species.tab2$Latin_name_syn))
    ## remove trailing white space
    species.tab2$Latin_name_syn <- trim.trailing(species.tab2$Latin_name_syn)
    
    species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), c("code", 
        "Latin_name", "Exotic_Native_cultivated")]
    return(species.clean)
}


### compute quantile 99% and sd with a bootstrap

library(boot, quietly=TRUE)

f.quantile <- function(x, ind, probs) {
    quantile(x[ind], probs = probs, na.rm = TRUE)
}

f.quantile.boot <- function(i, x, fac, R, probs = 0.99) {
    require(boot, quietly=TRUE)
    if (length(na.exclude(x[fac == i])) > 0) {
        quant.boot <- boot(x[fac == i], f.quantile, R = R, probs = probs)
        return(as.matrix(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), nobs = length(na.exclude(x[fac == 
            i]))), ncol = 3, nrow = 1))
    } else {
        return(as.matrix(c(mean = NA, sd = NA, nobs = NA), ncol = 3, nrow = 1))
    }
}

####################### function to compute number of dead per plot
function.perc.dead <- function(dead) {
    sum(dead)/length(dead)
}


function.perc.dead2 <- function(dead) {
    out <- sum(dead, na.rm = T)/length(dead[!is.na(dead)])
    if (!is.finite(out)) 
        out <- NA
    return(out)
}


########################## GENERATE A R.object per ecoregion

function.replace.NA.negative <- function(data.BA.SP) {
    for (i in 2:ncol(data.BA.SP)) {
        eval(parse(text = paste("data.BA.SP[is.na(", names(data.BA.SP)[i], "),", 
            names(data.BA.SP)[i], ":=0]", sep = "")))
        eval(parse(text = paste("data.BA.SP[", names(data.BA.SP)[i], "<0,", names(data.BA.SP)[i], 
            ":=0]", sep = "")))
    }
    print("NA and negative replaced")
    return(data.BA.SP)
}

############################################################## function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion, data.tot, plot.name, weight.full.plot, 
    name.country, data.TRY = NA, species.lookup = NA, output.dir = "./data/process") {
    require(data.table, quietly=TRUE)
    
    print(paste("Working on Ecoregion %s", ecoregion))
    data <- data.table(data.tot)[ecocode == ecoregion, ]
    data.BA.SP <- BA.SP.FUN(obs.id = as.vector(data[["obs.id"]]), diam = as.vector(data[["D"]]), 
        sp = as.vector(data[["sp"]]), id.plot = as.vector(data[[plot.name]]), weights = data[["weights"]], 
        weight.full.plot = weight.full.plot)
    
    print("competition index computed")
    ## change NA and <0 data for 0
    data.BA.SP <- function.replace.NA.negative(data.BA.SP)
    ### CHECK IF sp and sp name for column are the same
    if (sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) > 0) 
        stop("competition index sp name not the same as in data")
    #### compute BA tot for all competitors data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ##
    #### slower than apply why??
    BATOT.s <- apply(data.frame(data.BA.SP)[, -1], MARGIN = 1, FUN = sum)
    data.BA.SP[, `:=`(BATOT, BATOT.s)]
    print("BATOT COMPUTED")
    ### create data frame
    DT.temp <- data.table(obs.id = data[["obs.id"]], ecocode = data[["ecocode"]])
    setkeyv(DT.temp, "obs.id")
    setkeyv(data.BA.SP, "obs.id")
    print("starting last merge")
    data.BA.sp <- merge(DT.temp, data.BA.SP)
    ## reorder data
    data <- data.table(data)
    setkeyv(data, "obs.id")
    ## test if same order
    if (sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) > 0) 
        stop("competition index not in the same order than data")
    ##### ADD TRY DATA OR TRAITS IF NEEDED
    data.traits = NA
    if (!is.na(data.TRY)) {
        sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]), ]
        data.traits <- fun.extract.format.sp.traits.TRY(sp = sp.extract[["sp"]], 
            sp.syno.table = sp.extract, data.TRY)
    } 
    
    ## save everything as a list
    list.temp <- list(data.tree = data, data.BA.SP = data.BA.sp, data.traits = data.traits)
    save(list.temp, file = file.path(output.dir,paste("list", name.country, ecoregion, 
        "Rdata", sep = ".")))
}


##################################### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER function to be apply
##################################### per site
fun.compute.BA.SP.XY.per.plot <- function(i, data.tree, Rlim, xy.name = c("x", "y"), 
    parallel = FALSE, rpuDist = FALSE) {
    data.tree.s <- subset(data.tree, subset = data.tree[["plot"]] == i)
    BA.SP.temp <- BA.SP.FUN.XY(obs.id = data.tree.s[["obs.id"]], xy.table = data.tree.s[, 
        xy.name], diam = data.tree.s[["D"]], sp = (data.tree.s[["sp"]]), Rlim = 15, 
        parallel = FALSE, rpuDist = FALSE)
    
    ## replace NA per zero
    print("replacing NA per zero")
    BA.SP.temp[is.na(BA.SP.temp)] <- 0
    print("done")
    ### rpud installation very cumbersome not needed ?  longer in parallel why ?
    if (sum(!rownames(BA.SP.temp) == data.tree.s[["obs.id"]]) > 0) 
        stop("rows not in the good order")
    if (sum(!colnames(BA.SP.temp) == as.character((levels(data.tree.s[["sp"]])))) > 
        0) 
        stop("colnames does mot match species name")
    
    ### compute sum per row
    BATOT <- apply(BA.SP.temp, MARGIN = 1, FUN = sum)
    data.res <- data.frame(obs.id = data.tree.s[["obs.id"]], BA.SP.temp, BATOT = BATOT)
    return(data.res)
}