An error occurred while loading the file. Please try again.
-
Georges Kunstler authorede3bac863
########################################################
########################################################
###### READ TRY AND FORMAT DATA CHECK ERROR
################
#### use AccSpeciesName because not author name
source("./R/FUN.TRY.R")
library(MASS)
library(doParallel)
library(mvoutlier)
## read TRY data
TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt",
sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt",
sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
### combine both data set
TRY.DATA <- rbind(TRY.DATA,TRY.DATA2)
rm(TRY.DATA2)
##################################
### ERROR FOUND IN THE DATA BASE
########################
### problem with the seed mass of this obs seed mass = 0 DELETE
TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID==1034196 & TRY.DATA$DataName=="Seed dry mass"),]
#### IS "Quercuscrispla sp" an error standing for Quercus crispula synonym of Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens
## TRY.DATA[TRY.DATA$AccSpeciesName=="Quercuscrispla sp" ,]
########################
########################
### first create a table with one row per Observation.id and column for each traits and variable
Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement",
"Altitude", "Mean annual temperature (MAT)","Mean sum of annual precipitation (PPT)",
"Plant developmental status / plant age","Maximum height reference",
"Source in Glopnet", "Number of replicates", "Sun vers. shade leaf qualifier" )
Trait.Data <- sort(names(((table(TRY.DATA$TraitName)))))
##########################
#### REFORMAT DATA from TRY
registerDoParallel(cores=5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
TRY.DATA.FORMATED <- foreach(ObservationID.t=unique(TRY.DATA$ObservationID), .combine=rbind) %dopar%
{
fun.extract.try(ObservationID.t,data=TRY.DATA,Non.Trait.Data,Trait.Data)
}
## head(TRY.DATA.FORMATED)
## dim(TRY.DATA.FORMATED)
saveRDS(TRY.DATA.FORMATED,file="./data/TRY.DATA.FORMATED.rds")
########################
########## READ RDS
TRY.DATA.FORMATED <- readRDS("./data/TRY.DATA.FORMATED.rds")
## TRY.DATA.FORMATED[TRY.DATA.FORMATED$ObservationID==1034196,"StdValue.Seed.mass"] <- NA
## head(TRY.DATA.FORMATED)
####################
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
####################
## COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI
key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass",
"StdValue.Seed.mass",
"StdValue.Leaf.specific.area..SLA.",
"StdValue.Stem.specific.density..SSD.",
"StdValue.Stem.conduit.area..vessel.and.tracheid.",
"StdValue.Leaf.lifespan")
###############################
##############################
## READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA
### NEED TO UPDATE WITH ALL SPECIES LATER
species.tab <- read.csv("./data/species.list/species.csv",sep="\t")
species.tab2 <- species.tab[!is.na(species.tab$Latin_name),]
rm(species.tab)
gc()
### species IFN reformat names
species.IFN <- unique(gsub("_", " ", species.tab2$Latin_name))
## clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn))
## remove trailing white space
species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn)
## ##############
## ## ### TRY TO CHECK SPECIES NAME WITH taxize
## library(taxize)
## tpl_get(dir_ = "~/foo2", family = "Scrophulariaceae")
## dat <- read.csv("~/foo2/Scrophulariaceae.csv")
## library(plyr)
## species <- as.character(ddply(dat[, c("Genus", "Species")], .(), transform,
## gen_sp = as.factor(paste(Genus, Species, sep = " ")))[, 4])
## slice <- function(input, by = 2) {
## starts <- seq(1, length(input), by)
## tt <- lapply(starts, function(y) input[y:(y + (by - 1))])
## llply(tt, function(x) x[!is.na(x)])
## }
## species_split <- slice(species, by = 100)
## tnrs_safe <- failwith(NULL, tnrs) # in case some calls fail, will continue
## out <- llply(species_split, function(x) tnrs_safe(x, getpost = "POST", sleep = 3))
## check.species.ifn <- tnrs(unique(species.tab2$Latin_name_syn)[-151],getpost="POST")
## check.species.ifn <- tnrs(unique(species.tab2$Latin_name_syn)[],getpost="POST")
## ####### problem with phylotastic tnrs question asked to Scott
## tnrs(c("Fagus sylvatica"), getpost="POST")
## tnrs(c("Fagus sylvatica"))
## tnrs(c("Fagus sylvatica"), getpost="POST", source_ = "iPlant_TNRS")
## tnrs(c("Fagus sylvatica"), getpost="POST", source_ = "NCBI")
## tnrs(c("Fagus sylvatica"), getpost="POST", source_ = "MSW3")
## tnrs(c("Quercus robur"))
## tnrs(c("Quercus robur"), getpost="POST", source_ = "iPlant_TNRS")
## tnrs(c("Quercus robur"), getpost="POST", source_ = "NCBI")
## tnrs(c("Quercus robur"), getpost="POST", source_ = "MSW3")
## http://taxosaurus.org/submit?query=Fagus+sylvatica
## {"status":"OK","names":[{"matchCount":2,"matches":[{"acceptedName":"","sourceId":"iPlant_TNRS","score":"1","matchedName":"Fagus sylvatica","annotations":{"Authority":""},"uri":""},{"acceptedName":"Fagus sylvatica","sourceId":"NCBI","score":"1","matchedName":"Fagus sylvatica","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/taxonomy/28930"}],"submittedName":"Fagus sylvatica"}],"metadata":{"spellcheckers":[{"name":"NCBI","description":"NCBI Spell Checker","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/","sourceId":1,"publication":"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2578899/","call":"python2.6 tnrs_spellchecker/ncbi_spell.py","rank":1}],"sources":[{"status":"200: OK","name":"NCBI","description":"NCBI Taxonomy","uri":"http://www.ncbi.nlm.nih.gov/taxonomy","annotations":{},"sourceId":"NCBI","publication":"Federhen S. The Taxonomy Project.2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J., Ostell J., editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US);2002.","rank":3,"code":"ICZN,ICN,ICNB"},{"status":"200: OK","name":"iPlant Collaborative TNRS v3.1","description":"The iPlant Collaborative TNRS provides parsing and fuzzy matching for plant taxa.","uri":"http://tnrs.iplantcollaborative.org/","annotations":{"Authority":"Author attributed to the accepted name (where applicable)."},"sourceId":"iPlant_TNRS","publication":"Boyle, B. et.al. The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics. 2013, 14:16. doi:10.1186/1471-2105-14-16. If you use TNRS results in a publication, please also cite The Taxonomic Name Resolution Service; http://tnrs.iplantcollaborative.org; version 3.1.","rank":2,"code":"ICN"},{"status":"200: OK","name":"Mammal Species of the World v3.0","description":"Mammal Species of the World, 3rd edition (MSW3) is a database of mammalian taxonomy. Our adaptor searches the indexed database for both exact and loose mathces","uri":"http://www.bucknell.edu/msw3/","annotations":{"Authority":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)"},"sourceId":"MSW3","publication":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)","rank":4,"code":"ICZN"}],"sub_date":"Thu Jul 18 16:36:23 2013","resolver_version":"1.2.0","jobId":"f7e6e1feba55b8f5d41a208630385630"}}
## http://taxosaurus.org/submit?query=Quercus+robur
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
## {"status":"OK","names":[{"matchCount":2,"matches":[{"acceptedName":"Quercus robur","sourceId":"iPlant_TNRS","score":"1","matchedName":"Quercus robur","annotations":{"Authority":"(Ten.) A. DC."},"uri":"http://www.tropicos.org/Name/50280607"},{"acceptedName":"Quercus robur","sourceId":"NCBI","score":"1","matchedName":"Quercus robur","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/taxonomy/38942"}],"submittedName":"Quercus robur"}],"metadata":{"spellcheckers":[{"name":"NCBI","description":"NCBI Spell Checker","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/","sourceId":1,"publication":"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2578899/","call":"python2.6 tnrs_spellchecker/ncbi_spell.py","rank":1}],"sources":[{"status":"200: OK","name":"NCBI","description":"NCBI Taxonomy","uri":"http://www.ncbi.nlm.nih.gov/taxonomy","annotations":{},"sourceId":"NCBI","publication":"Federhen S. The Taxonomy Project.2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J., Ostell J., editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US);2002.","rank":3,"code":"ICZN,ICN,ICNB"},{"status":"200: OK","name":"iPlant Collaborative TNRS v3.1","description":"The iPlant Collaborative TNRS provides parsing and fuzzy matching for plant taxa.","uri":"http://tnrs.iplantcollaborative.org/","annotations":{"Authority":"Author attributed to the accepted name (where applicable)."},"sourceId":"iPlant_TNRS","publication":"Boyle, B. et.al. The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics. 2013, 14:16. doi:10.1186/1471-2105-14-16. If you use TNRS results in a publication, please also cite The Taxonomic Name Resolution Service; http://tnrs.iplantcollaborative.org; version 3.1.","rank":2,"code":"ICN"},{"status":"200: OK","name":"Mammal Species of the World v3.0","description":"Mammal Species of the World, 3rd edition (MSW3) is a database of mammalian taxonomy. Our adaptor searches the indexed database for both exact and loose mathces","uri":"http://www.bucknell.edu/msw3/","annotations":{"Authority":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)"},"sourceId":"MSW3","publication":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)","rank":4,"code":"ICZN"}],"sub_date":"Thu Jul 18 16:44:45 2013","resolver_version":"1.2.0","jobId":"a00b4210bdfeb8c7e6d2f6bf7f10df04"}}
## lapply(unique(species.tab2$Latin_name_syn)[1:10],FUN=tnrs)
### find synonyme
getsynonymnamesfromtsn(tsn = 502590)
### find synonyme
tp_synonyms(id =502590 )
# Example R script which calls the TNRS in the context of adding names to a phylogeny
## FROM Boyle et al. 2013 BMC Bioinformatics
library(ape)
library(rjson)
library(RCurl)
tnrs.api<-'http://tnrs.iplantc.org/tnrsm-svc'
#Tree topology from Ackerly, D. 2009. Conservatism and diversification of plant functional traits: Evolutionary rates versus phylogenetic signal. PNAS 106:19699--19706.
lobelioids.string<-'((((((Lobelia_kauaensis,Lobelia_villosa),Lobelia_gloria-montis),(Trematolobelia_kauaiensis,Trematolobelia_macrostachys)),((Lobelia_hypoleuca,Lobelia_yuccoides),Lobelia_niihauensis)),((Brighamia_insignis,Brighamia_rockii),(Delissea_rhytidosperma,Delissea_subcordata))),((((Cyanea_pilosa,Cyanea_acuminata),Cyanea_hirtella),(Cyanea_coriacea,Cyanea_leptostegia)),(((Clermontia_kakeana,Clermontia_parviflora),Clermontia_arborescens),Clermontia_fauriei)));'
#Transform the newick sting into an ape phylo object
tree<-read.tree(text=lobelioids.string)
#Obtain the taxa names
old.names<-tree$tip.label
#Change the underscore characters into blank spaces
old.names<-gsub('_',' ',old.names)
old.names <- species.tab2$Latin_name_syn
#Transporms the vector into a string
old.names<-paste(old.names,collapse=',')
#The string needs to be URL-encoded
old.names<-curlEscape(old.names)
#Send a request to the TNRS service
url<-paste(tnrs.api,'/matchNames?retrieve=best&names=',old.names,sep='')
tnrs.json<-getURL(url)
#The response needs to be converted from JSON
tnrs.results<-fromJSON(tnrs.json)
#The corrected names are extracted from the response
names<-sapply(tnrs.results[[1]], function(x) c(x$nameSubmitted,x$acceptedName))
names<-as.data.frame(t(names),stringsAsFactors=FALSE)
#If TNRS did not return any accepted name (no match, or name is already accepted), the submitted name is retained
names[names[,2]=="",2]<-names[names[,2]=="",1]
### SAME ERROR FOR FAGUS SYLVATICA TEH WEB SITE GIVE A GOOD RESULTS BUT NOT THE CALL FROM R ?
### change format try species names
TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName)
#### extract mean and sd per species without experimental data and detection of outlier when enough data or if not enough data compute mean of genus
### The detection of outlier is based on teh method in Kattge et al. 2011 only for univariate outlier. I have try other univariate and multivariate method of detection of outlier but didn't work well
res.list <- lapply(species.IFN,FUN=fun.species.traits,species.table=species.tab2,traits=key.main.traits2,data=TRY.DATA.FORMATED)
names(res.list) <- species.IFN
##### TRANSFORM LIST IN A TABLE
data.mean <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$mean
,res.list=res.list))
data.sd <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$sd,res.list=res.list))
data.exp <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$exp
,res.list=res.list))
data.genus <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$genus
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
,res.list=res.list))
data.nobs <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$nobs
,res.list=res.list))
#### data frame with all
data.ifn.species.try.noout <- data.frame(data.mean,data.sd,data.exp,data.genus,data.nobs)
names(data.ifn.species.try.noout) <- c(paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"mean",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"sd",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"exp",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"genus",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"nobs",sep="."))
#### check species with genus mean
## data.ifn.species.try.noout[data.ifn.species.try.noout$SLA.genus,]
## data.ifn.species.try.noout[data.ifn.species.try.noout$SLA.genus |data.ifn.species.try.noout$Wood.Density.genus |data.ifn.species.try.noout$Seed.mass.genus ,]
saveRDS(data.ifn.species.try.noout ,file="./data/data.ifn.species.try.noout.rds")
####
data.ifn.species.try.noout <- readRDS("./data/data.ifn.species.try.noout.rds")
#####################################################################
#### assume that the SD is equal mean species if less than 10 obs same for genus
########
## USE TABLE 5 in Kattge et al. 2011
### LMA species sd log 0.09
### Nmass species sd log 0.08
### Seed Mass sd log 0.13
#### SPECIES mean sd
######
# for wood density no value reportedin Kattge et al so need to compute mean sd for species withe more than 5 obs
data.sd.WD.log <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.),TRY.DATA.FORMATED$AccSpeciesName)
sd.log.WD <- mean(data.sd.WD.log[data.sd.WD.log[,2]>4 & !is.na(data.sd.WD.log[,2]),1])
sd.log.SLA <- 0.09 ### based on Kattge et al. 2011
sd.log.Nmass <- 0.08 ### based on Kattge et al. 2011
sd.log.Seed.Mass <- 0.13 ### based on Kattge et al. 2011
sd.log.LL <- 0.03 ### based on Kattge et al. 2011
#######
##### COMPUTE GENUS MEAN SD IN THIS TRY DATA EXTRACTION AS NOT REPORTED IN Kattge et al. 2011
data.sd.WD.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.WD.genus <- mean(data.sd.WD.log.genus[data.sd.WD.log.genus[,"nobs"]>10 & !is.na(data.sd.WD.log.genus[,"nobs"]),"sd"])
data.sd.SLA.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.specific.area..SLA.),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.SLA.genus <- mean(data.sd.SLA.log.genus[data.sd.SLA.log.genus[,"nobs"]>10 & !is.na(data.sd.SLA.log.genus[,"nobs"]),"sd"])
data.sd.LL.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.lifespan),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.LL.genus <- mean(data.sd.LL.log.genus[data.sd.LL.log.genus[,"nobs"]>10 & !is.na(data.sd.LL.log.genus[,"nobs"]),"sd"])
data.sd.Seed.Mass.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Seed.mass),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.Seed.Mass.genus <- mean(data.sd.Seed.Mass.log.genus[data.sd.Seed.Mass.log.genus[,"nobs"]>10 &
!is.na(data.sd.Seed.Mass.log.genus[,"nobs"]),"sd"])
data.sd.Nmass.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.nitrogen..N..content.per.dry.mass),
TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.Nmass.genus <- mean(data.sd.Nmass.log.genus[data.sd.Nmass.log.genus[,"nobs"]>10 & !is.na(data.sd.Nmass.log.genus[,"nobs"]),"sd"])
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
#############
### change value of sd if less than 10 obs assume sd mean
nobs.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan")
,"nobs",sep=".")
sd.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan")
,"sd",sep=".")
genus.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan")
,"genus",sep=".")
sd.vec.sp <- c(sd.log.Nmass,sd.log.Seed.Mass,sd.log.SLA,sd.log.WD,sd.log.LL)
sd.vec.genus <- c(sd.log.Nmass.genus,sd.log.Seed.Mass.genus,sd.log.SLA.genus,sd.log.WD.genus,sd.log.LL.genus)
### function to select obs with less than Nthresh obs
fun.select.sd.with.to.few.obs.sp <- function(data,sd.names,nobs.names,genus.names,Nthreshold=2){
(data[[nobs.names[i]]]<Nthreshold & !is.na(data[[nobs.names[i]]])
& !data[[genus.names[i]]]) }
fun.select.sd.with.to.few.obs.genus <- function(data,sd.names,nobs.names,genus.names,Nthreshold=2){
(data[[nobs.names[i]]]<Nthreshold & !is.na(data[[nobs.names[i]]])
& data[[genus.names[i]]]) }
####
data.TRY.sd.update <- data.ifn.species.try.noout
for (i in 1:length(nobs.names)){
print( sd.names[i])
print("species")
print(fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10))
data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)] <-
sd.vec.sp[i]
print("genus")
print(fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10))
data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)] <-
sd.vec.genus[i]
}
saveRDS(data.TRY.sd.update,file="./data/data.TRY.sd.update.rds")
###
# plot sd to show mark
pdf("./figs/sd.traits.pdf")
r <- barplot(sd.vec.sp ,names.arg=c("Leaf.N","SM","SLA","WD","LL"),las=2,ylim=c(0,0.6),ylab="sd log10")
for (i in 1:length(nobs.names)){
sd.obs <- data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]]
points(rep(r[i,1],length(sd.obs)),sd.obs)
points(r[i,1],sd.vec.genus[i],col="red",pch=16,cex=2)
sd.obs <- data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]]
points(rep(r[i,1],length(sd.obs)),sd.obs,col="red",pch=4)
print(sd.obs)
}
dev.off()