trait.fun.R

################# FUNCTION TO EXTRACT DECTED OUTLIER AND FORMAT TRY DATA Georges Kunstler
############################################ 14/06/2013
### just testing this out! ##############

### install all unstallled packages
source("R/packages.R")
check_packages(c("MASS", "doParallel","mvoutlier","plyr"))

## outlier detection based on Kattage et al 2011
##' Detection of univar outlier based on method of Kattge et al. 2011
##'
##'
##' @title
##' @param x.na
##' @param log
##' @return TRUE FALSE vector to identify outlier TRUE : outlier
##' @author Kunstler
fun.out.TF2 <- function(x.na, log = TRUE) {
    x <- x.na[!is.na(x.na)]
    x.num <- (1:length(x.na))[!is.na(x.na)]
    TF.vec <- rep(FALSE, length(x.na))
    if (log) {
        fit.dist <- fitdistr(log10(na.omit(x)), "normal")
        high.bound <- fit.dist$estimate["mean"] + 2 * (fit.dist$estimate["sd"] +
            fit.dist$sd["sd"])
        low.bound <- fit.dist$estimate["mean"] - 2 * (fit.dist$estimate["sd"] + fit.dist$sd["sd"])
        TF.vec[x.num[log10(x) > high.bound | log10(x) < low.bound]] <- TRUE
    } else {
        fit.dist <- fitdistr((na.omit(x)), "normal")
        high.bound <- fit.dist$estimate["mean"] + 2 * (fit.dist$estimate["sd"] +
            fit.dist$sd["sd"])
        low.bound <- fit.dist$estimate["mean"] - 2 * (fit.dist$estimate["sd"] + fit.dist$sd["sd"])
        TF.vec[x.num[(x) > high.bound | (x) < low.bound]] <- TRUE
    }
    return((TF.vec))
}

######################## FUNCTION TO COMPUTE QUANTILE FOR HEIGHT
f.quantile <- function(x, ind, probs) {
    quantile(x[ind], probs = probs, na.rm = TRUE)
}

f.quantile.boot2 <- function(x, R, probs = 0.99) {
    require(boot, quietly=TRUE)
    if (length(na.exclude(x)) > 0) {
        quant.boot <- boot(x, f.quantile, R = R, probs = probs)
        return(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), nobs = length(na.exclude(x))))
    } else {
        return(c(mean = NA, sd = NA, nobs = NA))
    }
}

##################### FUNcCTION TO COMPUTE MEAN SD AND NOBS WITH OR WITHOUT OUTLIER
fun.mean.sd.nobs.out <- function(x, i) {
    if (length(x) > 50) {
        ## if more than 50 obs remove outlier
        outlier <- fun.out.TF2(x.na = x, log = TRUE)
        if (i == "StdValue.Plant.height.vegetative") {
            res.temp <- f.quantile.boot2(log10(x[!outlier]), R = 1000, probs = 0.99)
        } else {
            res.temp <- c(mean(log10(x[!outlier])), sd(log10(x[!outlier])), length(x[!outlier]))
        }
    } else {
        if (i == "StdValue.Plant.height.vegetative") {
            res.temp <- f.quantile.boot2(log10(x), R = 1000, probs = 0.99)
        } else {
            res.temp <- c(mean(log10(x)), sd(log10(x)), length(x))
        }
    }
    return(res.temp)