An error occurred while loading the file. Please try again.
-
Daniel Falster authoredef3d6e2f
############################################ FUNCTION TO EXTRACT DECTED OUTLIER AND FORMAT TRY DATA Georges Kunstler
############################################ 14/06/2013
library(MASS, quietly=TRUE)
library(doParallel, quietly=TRUE)
library(mvoutlier, quietly=TRUE)
######################################################## Build a function that extract the variables
##'Description of the function to extract data from original TRY data
##'
##' based on the data structure of extraction from TRY data base
##' @title fun.extract.try
##' @param ObservationID.t list of data identifier that we want to extract
##' @param data try data object
##' @param Non.Trait.Data list of names of non traits data that we want to extract
##' @param Trait.Data list of names of traits data that we want to extract
##' @return data.frame with one line per observation id with clumns with ObservationID Species Nontrait data for Traits: OrigValue OrigUnit StdValue
##' @author Kunstler
fun.extract.try <- function(ObservationID.t, data, Non.Trait.Data, Trait.Data) {
data.temp <- data[data$ObservationID == ObservationID.t, ]
## Non trait data
Vec.Non.Trait.Data <- rep(NA, length(Non.Trait.Data))
names(Vec.Non.Trait.Data) <- Non.Trait.Data
for (i in 1:length(Non.Trait.Data)) {
if (sum(data.temp$DataName == Non.Trait.Data[i]) == 1) {
Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName == Non.Trait.Data[i],
"OrigValueStr"]
}
if (sum(data.temp$DataName == Non.Trait.Data[i]) > 1) {
## if(sum(data.temp$DataName==Non.Trait.Data[i] &
## grepl('Mean',data.temp$ValueKindName, fixed=TRUE))!=1){ print('error in
## ValueKindName')}
Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName == Non.Trait.Data[i],
"OrigValueStr"][1]
}
}
## Trait data
Vec.Trait.Data.OrigValue <- Vec.Trait.Data.OrigUnit <- Vec.Trait.Data.StdValue <- rep(NA,
length(Trait.Data))
names(Vec.Trait.Data.OrigValue) <- paste("OrigValue", Trait.Data)
names(Vec.Trait.Data.OrigUnit) <- paste("OrigUnitName", Trait.Data)
names(Vec.Trait.Data.StdValue) <- paste("StdValue", Trait.Data)
for (i in 1:length(Trait.Data)) {
if (sum(grepl(Trait.Data[i], data.temp$TraitName, fixed = TRUE)) == 1) {
Vec.Trait.Data.OrigValue[i] <- data.temp[grepl(Trait.Data[i], data.temp$TraitName,
fixed = TRUE), "OrigValue"]
Vec.Trait.Data.OrigUnit[i] <- data.temp[grepl(Trait.Data[i], data.temp$TraitName,
fixed = TRUE), "OrigUnitStr"]
Vec.Trait.Data.StdValue[i] <- data.temp[grepl(Trait.Data[i], data.temp$TraitName,
fixed = TRUE), "StdValue"]
}
if (sum(grepl(Trait.Data[i], data.temp$TraitName, fixed = TRUE)) > 1) {
if (sum((data.temp$ValueKindName %in% c("Best estimate", "Mean", "Site specific mean") &
!is.na(data.temp$ValueKindName))) == 1) {
Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i],
data.temp$TraitName, fixed = TRUE) & (data.temp$ValueKindName %in%
c("Best estimate", "Mean", "Site specific mean") & !is.na(data.temp$ValueKindName)),
"OrigValue"])
Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i], data.temp$TraitName,
fixed = TRUE) & (data.temp$ValueKindName %in% c("Best estimate",
"Mean", "Site specific mean") & !is.na(data.temp$ValueKindName)),
"OrigUnitStr"])[1]
Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i],
data.temp$TraitName, fixed = TRUE) & (data.temp$ValueKindName %in%
c("Best estimate", "Mean", "Site specific mean") & !is.na(data.temp$ValueKindName)),
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
"StdValue"])
}
if (sum(data.temp$ValueKindName %in% c("Best estimate", "Mean", "Site specific mean")) <
1) {
Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i],
data.temp$TraitName, fixed = TRUE), "OrigValue"], na.rm = T)
Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i], data.temp$TraitName,
fixed = TRUE), "OrigUnitStr"])[1]
Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i],
data.temp$TraitName, fixed = TRUE), "StdValue"], na.rm = T)
}
}
}
### EXPERIMENTAL DATA TYPE
TF.exp.data <- sum(grepl("Growth & measurement conditions - experimental tre",
data.temp$NonTraitCategories, fixed = TRUE)) > 0
names(TF.exp.data) <- "TF.exp.data"
res.temp <- data.frame(ObservationID = ObservationID.t, AccSpeciesName = unique(data.temp$AccSpeciesName),
t(Vec.Non.Trait.Data), TF.exp.data, t(Vec.Trait.Data.OrigValue), t(Vec.Trait.Data.OrigUnit),
t(Vec.Trait.Data.StdValue))
return(res.temp)
}
## outlier detection based on Kattage et al 2011
##' Detection of univar outlier based on method of Kattge et al. 2011
##'
##'
##' @title
##' @param x.na
##' @param log
##' @return TRUE FALSE vector to identify outlier TRUE : outlier
##' @author Kunstler
fun.out.TF2 <- function(x.na, log = TRUE) {
x <- x.na[!is.na(x.na)]
x.num <- (1:length(x.na))[!is.na(x.na)]
TF.vec <- rep(FALSE, length(x.na))
if (log) {
fit.dist <- fitdistr(log10(na.omit(x)), "normal")
high.bound <- fit.dist$estimate["mean"] + 2 * (fit.dist$estimate["sd"] +
fit.dist$sd["sd"])
low.bound <- fit.dist$estimate["mean"] - 2 * (fit.dist$estimate["sd"] + fit.dist$sd["sd"])
TF.vec[x.num[log10(x) > high.bound | log10(x) < low.bound]] <- TRUE
} else {
fit.dist <- fitdistr((na.omit(x)), "normal")
high.bound <- fit.dist$estimate["mean"] + 2 * (fit.dist$estimate["sd"] +
fit.dist$sd["sd"])
low.bound <- fit.dist$estimate["mean"] - 2 * (fit.dist$estimate["sd"] + fit.dist$sd["sd"])
TF.vec[x.num[(x) > high.bound | (x) < low.bound]] <- TRUE
}
return((TF.vec))
}
######################## FUNCTION TO COMPUTE QUANTILE FOR HEIGHT
f.quantile <- function(x, ind, probs) {
quantile(x[ind], probs = probs, na.rm = TRUE)
}
f.quantile.boot2 <- function(x, R, probs = 0.99) {
require(boot, quietly=TRUE)
if (length(na.exclude(x)) > 0) {
quant.boot <- boot(x, f.quantile, R = R, probs = probs)
return(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), nobs = length(na.exclude(x))))
} else {