TRY.R 9.24 KiB
########################################################
########################################################
###### READ TRY AND FORMAT DATA CHECK ERROR
################
#### use AccSpeciesName because not author name
source("./R/FUN.TRY.R")
library(MASS)
library(doParallel)
library(mvoutlier)
## read TRY data
TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt",
                       sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt",
                       sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
### combine both data set
TRY.DATA <- rbind(TRY.DATA,TRY.DATA2)
rm(TRY.DATA2)
##################################
### ERROR FOUND IN THE DATA BASE
########################
### problem with the seed mass of this obs seed mass = 0 DELETE
TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID==1034196 & TRY.DATA$DataName=="Seed dry mass"),]
#### IS "Quercuscrispla sp" an error standing for Quercus crispula synonym of Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens
## TRY.DATA[TRY.DATA$AccSpeciesName=="Quercuscrispla sp" ,]
########################
########################
### first create a table with one row per Observation.id and column for each traits and variable
Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement",
"Altitude", "Mean annual temperature (MAT)","Mean sum of annual precipitation (PPT)",
  "Plant developmental status / plant age","Maximum height reference",
  "Source in Glopnet",  "Number of replicates", "Sun vers. shade leaf qualifier" )
Trait.Data <- sort(names(((table(TRY.DATA$TraitName)))))
##########################
#### REFORMAT DATA from TRY
registerDoParallel(cores=5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
 TRY.DATA.FORMATED <- foreach(ObservationID.t=unique(TRY.DATA$ObservationID), .combine=rbind) %dopar%
            fun.extract.try(ObservationID.t,data=TRY.DATA,Non.Trait.Data,Trait.Data)
## head(TRY.DATA.FORMATED) 
## dim(TRY.DATA.FORMATED) 
saveRDS(TRY.DATA.FORMATED,file="./data/process/TRY.DATA.FORMATED.rds")
########################
########## READ RDS
TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds")
####################
####################
## COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI for 6 key traits
key.main.traits2 <-   c("StdValue.Leaf.nitrogen..N..content.per.dry.mass",
"StdValue.Seed.mass",
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
"StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.", "StdValue.Stem.conduit.area..vessel.and.tracheid.", "StdValue.Leaf.lifespan") ############################### ############################## ## READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA species.tab <- read.csv("./data/species.list/species.csv",sep="\t") species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] rm(species.tab) gc() ### species IFN reformat names ## clean species names and synonyme names species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) ## THIS TABLE HAS ALREADY THE SYNONYME FOR THE FRENCH SPECIES ## remove trailing white space species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) ## create vector of species name species.IFN <- unique(pecies.tab2$Latin_name ) ########################################################################### ########################################################################### ##### EXTRACT SPECIES MEAN AND SD ### change format try species names TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName) key.main.traits2 <- ##################################################################### #### COMPUTE mean SD species:genus for each traits ######## ## The table 5 in Kattge et al. 2011 GCB provides estimation of mean species sd ### SLA species sd log 0.09 ### Nmass species sd log 0.08 ### Seed Mass sd log 0.13 ## # SEE ## sd.log.SLA <- 0.09 ### based on Kattge et al. 2011 ## sd.log.Nmass <- 0.08 ### based on Kattge et al. 2011 ## sd.log.Seed.Mass <- 0.13 ### based on Kattge et al. 2011 ## sd.log.LL <- 0.03 ### based on Kattge et al. 2011 ###################### ### Computed sd over the data in log10 we have under the assumption sd constant over species with lm per species traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", "StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.", "StdValue.Plant.height.vegetative") ## minimum number of observation per species to be incldue N.min <- 3 ########################### ### SPECIES MEAN SD sd.vec.sp <- rep(NA,5)
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
for(i in 1:(length(traits)-1)){ table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[i]]]),"AccSpeciesName"]) data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names( table.sp.tmp)[table.sp.tmp>N.min], c("AccSpeciesName",traits[i])] names(data.t) <-c("sp","trait") lm.obj <-lm(log10(trait)~sp,data=data.t) print(i) sd.vec.sp[i] <- sd(residuals(lm.obj)) } ### compute 99% quantile of height and its sd ## TODO COMPUTE WITH ONLY SPECIES WITH AT LEAST TWO OBSERVATIONS library(quantreg) table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]), "AccSpeciesName"]) data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names( table.sp.tmp)[table.sp.tmp>N.min],] res.rq <- rq(log10(StdValue.Plant.height.vegetative)~ AccSpeciesName-1, tau=0.99,data=data.t) summary.res.rq <- summary(res.rq,se='boot') sd.vec.sp[5] <- mean(summary.res.rq$coefficients[,"Std. Error"]) ## higher than the one reported in Kattge et al. 2011 ####################### ### Computed sd over the data we have under the assumption sd constant over genus sd.vec.genus <- rep(NA,5) for(i in 1:(length(traits)-1)){ table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[i]]), "AccSpeciesName"],FUN=fun.get.genus)) data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]], fun.get.genus) %in% names( table.sp.tmp)[table.sp.tmp>N.min], c("AccSpeciesName",traits[i])] names(data.t) <-c("sp","trait") data.t$gs <- sapply(data.t[["sp"]],fun.get.genus) lm.obj <-lm(log10(trait)~gs,data=data.t) sd.vec.genus[i] <- sd(residuals(lm.obj)) } ## quantile for Height with quantreg table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]),"AccSpeciesName"],FUN=fun.get.genus)) data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]],fun.get.genus) %in% names(table.sp.tmp)[table.sp.tmp>N.min],] res.rq <- rq(log10(TRY.DATA.FORMATED$StdValue.Plant.height.vegetative)~ sapply(TRY.DATA.FORMATED$AccSpeciesName,FUN=fun.get.genus)-1, tau=0.99) summary.res.rq <- summary(res.rq,se='boot') sd.vec.genus[5] <- mean(summary.res.rq$coefficients[,"Std. Error"]) ##### ### SET NAME VECTORS names(sd.vec.sp) <- c("sdlog10.sp.Nmass","sdlog10.sp.Seed.Mass","sdlog10.sp.SLA", "sdlog10.sp.WD","sdlog10.sp.Height") names(sd.vec.genus) <- c("sdlog10.gs.Nmass","sdlog10.gs.Seed.Mass","sdlog10.gs.SLA", "sdlog10.gs.WD","sdlog10.gs.Height") ## save mean species and genus sd saveRDS(sd.vec.sp,file="./data/process/sd.vec.sp.rds") saveRDS(sd.vec.genus,file="./data/process/sd.vec.genus.rds") sd.vec.sp <- readRDS(file="./data/process/sd.vec.sp.rds") sd.vec.genus <- readRDS(file="./data/process/sd.vec.genus.rds") ###################################################################################################### ### add columns with mean sd per species or per genus depending on whether species or genus data
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
#### add column with the mean sd species or genus data.TRY.sd.update <- data.frame(data.ifn.species.try.noout, data.ifn.species.try.noout[,sd.names]) sd.names.1 <- paste(sd.names,1,sep=".") for (i in 1:length(sd.names.1)){ data.TRY.sd.update[[sd.names.1[i]]][!data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.sp[i] data.TRY.sd.update[[sd.names.1[i]]][data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.genus[i] } head(data.TRY.sd.update,10) saveRDS(data.TRY.sd.update,file="./data/process/data.TRY.sd.update.rds") ### # plot sd to show mark pdf("./figs/sd.traits.pdf") r <- barplot(sd.vec.sp ,names.arg=c("Leaf.N","SM","SLA","WD","Vessel","LL"),las=2,ylim=c(0,0.9),ylab="sd log10") points(r[,1],sd.vec.genus,col="red",pch=16,cex=2) ## for (i in 1:length(nobs.names)){ ## ## sd.obs <- data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]] ## ## points(rep(r[i,1],length(sd.obs)),sd.obs) ## ## sd.obs <- data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]] ## ## points(rep(r[i,1],length(sd.obs)),sd.obs,col="red",pch=4) ## print(sd.obs) ## } dev.off()