An error occurred while loading the file. Please try again.
-
Daniel Falster authored7dbee2ef
######################################################## READ TRY AND FORMAT DATA CHECK ERROR
################ use AccSpeciesName because not author name
source("./R/FUN.TRY.R")
library(MASS)
library(doParallel)
library(mvoutlier)
## read TRY data
TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt",
sep = "\t", header = TRUE, na.strings = "", stringsAsFactors = FALSE)
TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt",
sep = "\t", header = TRUE, na.strings = "", stringsAsFactors = FALSE)
### combine both data set
TRY.DATA <- rbind(TRY.DATA, TRY.DATA2)
rm(TRY.DATA2)
################################## ERROR FOUND IN THE DATA BASE 1 problem with the seed mass of this obs seed mass
################################## = 0 DELETE
TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID == 1034196 & TRY.DATA$DataName == "Seed dry mass"),
]
#### IS 'Quercuscrispla sp' an error standing for Quercus crispula synonym of
#### Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens
#### TRY.DATA[TRY.DATA$AccSpeciesName=='Quercuscrispla sp' ,]
######################## first create a table with one row per Observation.id and column for each traits
######################## and variable
Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement",
"Altitude", "Mean annual temperature (MAT)", "Mean sum of annual precipitation (PPT)",
"Plant developmental status / plant age", "Maximum height reference", "Source in Glopnet",
"Number of replicates", "Sun vers. shade leaf qualifier")
Trait.Data <- sort(names(((table(TRY.DATA$TraitName)))))
########################## REFORMAT DATA from TRY
registerDoParallel(cores = 5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
TRY.DATA.FORMATED <- foreach(ObservationID.t = unique(TRY.DATA$ObservationID), .combine = rbind) %dopar%
{
fun.extract.try(ObservationID.t, data = TRY.DATA, Non.Trait.Data, Trait.Data)
}
## head(TRY.DATA.FORMATED) dim(TRY.DATA.FORMATED)
saveRDS(TRY.DATA.FORMATED, file = "./data/process/TRY.DATA.FORMATED.rds")
######################## READ RDS
TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds")
#################### COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI for 6 key traits
key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass",
"StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.",
"StdValue.Stem.conduit.area..vessel.and.tracheid.", "StdValue.Leaf.lifespan")
############################### READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA
species.tab <- read.csv("./data/species.list/species.csv", sep = "\t")
species.tab2 <- species.tab[!is.na(species.tab$Latin_name), ]
rm(species.tab)
gc()
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
### species IFN reformat names clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn <- (gsub("_", " ", species.tab2$Latin_name_syn)) ## THIS TABLE HAS ALREADY THE SYNONYME FOR THE FRENCH SPECIES
## remove trailing white space
species.tab2$Latin_name_syn <- trim.trailing(species.tab2$Latin_name_syn)
## create vector of species name
species.IFN <- unique(pecies.tab2$Latin_name)
########################################################################### EXTRACT SPECIES MEAN AND SD change format try species names
TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName)
key.main.traits2 <-
##################################################################### COMPUTE mean SD species:genus for each traits
######## The table 5 in Kattge et al. 2011 GCB provides estimation of mean species sd
######## SLA species sd log 0.09 Nmass species sd log 0.08 Seed Mass sd log 0.13
## # SEE sd.log.SLA <- 0.09 ### based on Kattge et al. 2011 sd.log.Nmass <- 0.08
## ### based on Kattge et al. 2011 sd.log.Seed.Mass <- 0.13 ### based on Kattge et
## al. 2011 sd.log.LL <- 0.03 ### based on Kattge et al. 2011
###################### Computed sd over the data in log10 we have under the assumption sd constant
###################### over species with lm per species
traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass",
"StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.",
"StdValue.Plant.height.vegetative")
## minimum number of observation per species to be incldue
N.min <- 3
########################### SPECIES MEAN SD
sd.vec.sp <- rep(NA, 5)
for (i in 1:(length(traits) - 1)) {
table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[i]]]),
"AccSpeciesName"])
data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names(table.sp.tmp)[table.sp.tmp >
N.min], c("AccSpeciesName", traits[i])]
names(data.t) <- c("sp", "trait")
lm.obj <- lm(log10(trait) ~ sp, data = data.t)
print(i)
sd.vec.sp[i] <- sd(residuals(lm.obj))
}
### compute 99% quantile of height and its sd TODO COMPUTE WITH ONLY SPECIES WITH
### AT LEAST TWO OBSERVATIONS
library(quantreg)
table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]), "AccSpeciesName"])
data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names(table.sp.tmp)[table.sp.tmp >
N.min], ]
res.rq <- rq(log10(StdValue.Plant.height.vegetative) ~ AccSpeciesName - 1, tau = 0.99,
data = data.t)
summary.res.rq <- summary(res.rq, se = "boot")
sd.vec.sp[5] <- mean(summary.res.rq$coefficients[, "Std. Error"])
## higher than the one reported in Kattge et al. 2011
####################### Computed sd over the data we have under the assumption sd constant over genus
sd.vec.genus <- rep(NA, 5)
for (i in 1:(length(traits) - 1)) {
table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[i]]),
"AccSpeciesName"], FUN = fun.get.genus))
data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]], fun.get.genus) %in%
names(table.sp.tmp)[table.sp.tmp > N.min], c("AccSpeciesName", traits[i])]
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
names(data.t) <- c("sp", "trait")
data.t$gs <- sapply(data.t[["sp"]], fun.get.genus)
lm.obj <- lm(log10(trait) ~ gs, data = data.t)
sd.vec.genus[i] <- sd(residuals(lm.obj))
}
## quantile for Height with quantreg
table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]),
"AccSpeciesName"], FUN = fun.get.genus))
data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]], fun.get.genus) %in%
names(table.sp.tmp)[table.sp.tmp > N.min], ]
res.rq <- rq(log10(TRY.DATA.FORMATED$StdValue.Plant.height.vegetative) ~ sapply(TRY.DATA.FORMATED$AccSpeciesName,
FUN = fun.get.genus) - 1, tau = 0.99)
summary.res.rq <- summary(res.rq, se = "boot")
sd.vec.genus[5] <- mean(summary.res.rq$coefficients[, "Std. Error"])
##### SET NAME VECTORS
names(sd.vec.sp) <- c("sdlog10.sp.Nmass", "sdlog10.sp.Seed.Mass", "sdlog10.sp.SLA",
"sdlog10.sp.WD", "sdlog10.sp.Height")
names(sd.vec.genus) <- c("sdlog10.gs.Nmass", "sdlog10.gs.Seed.Mass", "sdlog10.gs.SLA",
"sdlog10.gs.WD", "sdlog10.gs.Height")
## save mean species and genus sd
saveRDS(sd.vec.sp, file = "./data/process/sd.vec.sp.rds")
saveRDS(sd.vec.genus, file = "./data/process/sd.vec.genus.rds")
sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds")
sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds")
###################################################################################################### add columns with mean sd per species or per genus depending on whether species
###################################################################################################### or genus data
#### add column with the mean sd species or genus
data.TRY.sd.update <- data.frame(data.ifn.species.try.noout, data.ifn.species.try.noout[,
sd.names])
sd.names.1 <- paste(sd.names, 1, sep = ".")
for (i in 1:length(sd.names.1)) {
data.TRY.sd.update[[sd.names.1[i]]][!data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.sp[i]
data.TRY.sd.update[[sd.names.1[i]]][data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.genus[i]
}
head(data.TRY.sd.update, 10)
saveRDS(data.TRY.sd.update, file = "./data/process/data.TRY.sd.update.rds")
### plot sd to show mark
pdf("./figs/sd.traits.pdf")
r <- barplot(sd.vec.sp, names.arg = c("Leaf.N", "SM", "SLA", "WD", "Vessel", "LL"),
las = 2, ylim = c(0, 0.9), ylab = "sd log10")
points(r[, 1], sd.vec.genus, col = "red", pch = 16, cex = 2)
211212213214215216217
## for (i in 1:length(nobs.names)){ ## sd.obs <-
## data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]] ##
## points(rep(r[i,1],length(sd.obs)),sd.obs) ## sd.obs <-
## data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]] ##
## points(rep(r[i,1],length(sd.obs)),sd.obs,col='red',pch=4) print(sd.obs) }
dev.off()