diff --git a/R/FUN.TRY.R b/R/FUN.TRY.R index 5abfc9f563944d929b743fa166155cda97a040cf..6f6305e2c6cb76ca562f36d9fd4faf2fb985c6ab 100644 --- a/R/FUN.TRY.R +++ b/R/FUN.TRY.R @@ -351,3 +351,11 @@ if(genus){ return(data.t) } } + +## function to compute mean sd over species or genus with more than Nobs.lim observation +fun.sd.mean.sp.or.genus <- function(traits,species,genus=FALSE,Nobs.lim) +{ +data.temp <- fun.sd.sp.or.genus((traits),species,genus=genus) +return(mean(data.temp[data.temp[,"nobs"]>Nobs.lim & !is.na(data.temp[,"sd"]),"sd"])) +} + diff --git a/TRY.R b/TRY.R index 57e2fc41e0b60db1ddebb74ab12c7924b2c81702..6014a3be06c45992bf4260ce7b6478e5eb5c5feb 100644 --- a/TRY.R +++ b/TRY.R @@ -133,12 +133,6 @@ species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) ## tnrs(c("Quercus robur"), getpost="POST", source_ = "NCBI") ## tnrs(c("Quercus robur"), getpost="POST", source_ = "MSW3") -## http://taxosaurus.org/submit?query=Fagus+sylvatica - -## {"status":"OK","names":[{"matchCount":2,"matches":[{"acceptedName":"","sourceId":"iPlant_TNRS","score":"1","matchedName":"Fagus sylvatica","annotations":{"Authority":""},"uri":""},{"acceptedName":"Fagus sylvatica","sourceId":"NCBI","score":"1","matchedName":"Fagus sylvatica","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/taxonomy/28930"}],"submittedName":"Fagus sylvatica"}],"metadata":{"spellcheckers":[{"name":"NCBI","description":"NCBI Spell Checker","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/","sourceId":1,"publication":"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2578899/","call":"python2.6 tnrs_spellchecker/ncbi_spell.py","rank":1}],"sources":[{"status":"200: OK","name":"NCBI","description":"NCBI Taxonomy","uri":"http://www.ncbi.nlm.nih.gov/taxonomy","annotations":{},"sourceId":"NCBI","publication":"Federhen S. The Taxonomy Project.2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J., Ostell J., editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US);2002.","rank":3,"code":"ICZN,ICN,ICNB"},{"status":"200: OK","name":"iPlant Collaborative TNRS v3.1","description":"The iPlant Collaborative TNRS provides parsing and fuzzy matching for plant taxa.","uri":"http://tnrs.iplantcollaborative.org/","annotations":{"Authority":"Author attributed to the accepted name (where applicable)."},"sourceId":"iPlant_TNRS","publication":"Boyle, B. et.al. The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics. 2013, 14:16. doi:10.1186/1471-2105-14-16. If you use TNRS results in a publication, please also cite The Taxonomic Name Resolution Service; http://tnrs.iplantcollaborative.org; version 3.1.","rank":2,"code":"ICN"},{"status":"200: OK","name":"Mammal Species of the World v3.0","description":"Mammal Species of the World, 3rd edition (MSW3) is a database of mammalian taxonomy. Our adaptor searches the indexed database for both exact and loose mathces","uri":"http://www.bucknell.edu/msw3/","annotations":{"Authority":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)"},"sourceId":"MSW3","publication":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)","rank":4,"code":"ICZN"}],"sub_date":"Thu Jul 18 16:36:23 2013","resolver_version":"1.2.0","jobId":"f7e6e1feba55b8f5d41a208630385630"}} - -## http://taxosaurus.org/submit?query=Quercus+robur -## {"status":"OK","names":[{"matchCount":2,"matches":[{"acceptedName":"Quercus robur","sourceId":"iPlant_TNRS","score":"1","matchedName":"Quercus robur","annotations":{"Authority":"(Ten.) A. DC."},"uri":"http://www.tropicos.org/Name/50280607"},{"acceptedName":"Quercus robur","sourceId":"NCBI","score":"1","matchedName":"Quercus robur","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/taxonomy/38942"}],"submittedName":"Quercus robur"}],"metadata":{"spellcheckers":[{"name":"NCBI","description":"NCBI Spell Checker","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/","sourceId":1,"publication":"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2578899/","call":"python2.6 tnrs_spellchecker/ncbi_spell.py","rank":1}],"sources":[{"status":"200: OK","name":"NCBI","description":"NCBI Taxonomy","uri":"http://www.ncbi.nlm.nih.gov/taxonomy","annotations":{},"sourceId":"NCBI","publication":"Federhen S. The Taxonomy Project.2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J., Ostell J., editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US);2002.","rank":3,"code":"ICZN,ICN,ICNB"},{"status":"200: OK","name":"iPlant Collaborative TNRS v3.1","description":"The iPlant Collaborative TNRS provides parsing and fuzzy matching for plant taxa.","uri":"http://tnrs.iplantcollaborative.org/","annotations":{"Authority":"Author attributed to the accepted name (where applicable)."},"sourceId":"iPlant_TNRS","publication":"Boyle, B. et.al. The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics. 2013, 14:16. doi:10.1186/1471-2105-14-16. If you use TNRS results in a publication, please also cite The Taxonomic Name Resolution Service; http://tnrs.iplantcollaborative.org; version 3.1.","rank":2,"code":"ICN"},{"status":"200: OK","name":"Mammal Species of the World v3.0","description":"Mammal Species of the World, 3rd edition (MSW3) is a database of mammalian taxonomy. Our adaptor searches the indexed database for both exact and loose mathces","uri":"http://www.bucknell.edu/msw3/","annotations":{"Authority":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)"},"sourceId":"MSW3","publication":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)","rank":4,"code":"ICZN"}],"sub_date":"Thu Jul 18 16:44:45 2013","resolver_version":"1.2.0","jobId":"a00b4210bdfeb8c7e6d2f6bf7f10df04"}} ## lapply(unique(species.tab2$Latin_name_syn)[1:10],FUN=tnrs) @@ -236,46 +230,97 @@ saveRDS(data.ifn.species.try.noout ,file="./data/process/data.ifn.species.try.no data.ifn.species.try.noout <- readRDS("./data/process/data.ifn.species.try.noout.rds") ##################################################################### -#### assume that the SD is equal mean species if less than 10 obs same for genus +#### assume that the SD is equal mean species ######## -## USE TABLE 5 in Kattge et al. 2011 -### LMA species sd log 0.09 +## The table 5 in Kattge et al. 2011 GCB provides estimation of mean species sd +### SLA species sd log 0.09 ### Nmass species sd log 0.08 ### Seed Mass sd log 0.13 -#### SPECIES mean sd +# SEE +## sd.log.SLA <- 0.09 ### based on Kattge et al. 2011 +## sd.log.Nmass <- 0.08 ### based on Kattge et al. 2011 +## sd.log.Seed.Mass <- 0.13 ### based on Kattge et al. 2011 +## sd.log.LL <- 0.03 ### based on Kattge et al. 2011 + + ###### -# for wood density no value reportedin Kattge et al so need to compute mean sd for species withe more than 5 obs -data.sd.WD.log <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.),TRY.DATA.FORMATED$AccSpeciesName) -sd.log.WD <- mean(data.sd.WD.log[data.sd.WD.log[,2]>4 & !is.na(data.sd.WD.log[,2]),1]) +### HOWEVER INCLUDING SPECIES WITH FEW observation ( less than 5 or 10) seems to lead to an underestimation +## see code below -sd.log.SLA <- 0.09 ### based on Kattge et al. 2011 -sd.log.Nmass <- 0.08 ### based on Kattge et al. 2011 -sd.log.Seed.Mass <- 0.13 ### based on Kattge et al. 2011 -sd.log.LL <- 0.03 ### based on Kattge et al. 2011 +### so I rpopose to use rather sd of species with nore than 5 obs +lm.obj <- (lm(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.)~ TRY.DATA.FORMATED$AccSpeciesName)) +sd.log.WD <-sd(residuals(lm.obj)) +lm.obj <- (lm(log10(TRY.DATA.FORMATED$StdValue.Leaf.specific.area..SLA.)~ TRY.DATA.FORMATED$AccSpeciesName)) +sd.log.SLA <-sd(residuals(lm.obj)) +lm.obj <- (lm(log10(TRY.DATA.FORMATED$StdValue.Leaf.lifespan)~ TRY.DATA.FORMATED$AccSpeciesName)) +sd.log.LL <-sd(residuals(lm.obj)) -####### -##### COMPUTE GENUS MEAN SD IN THIS TRY DATA EXTRACTION AS NOT REPORTED IN Kattge et al. 2011 +lm.obj <- (lm(log10(TRY.DATA.FORMATED$StdValue.Seed.mass)~ TRY.DATA.FORMATED$AccSpeciesName)) +sd.log.Seed.Mass <-sd(residuals(lm.obj)) -data.sd.WD.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE) -sd.log.WD.genus <- mean(data.sd.WD.log.genus[data.sd.WD.log.genus[,"nobs"]>10 & !is.na(data.sd.WD.log.genus[,"nobs"]),"sd"]) +lm.obj <- (lm(log10(TRY.DATA.FORMATED$StdValue.Leaf.nitrogen..N..content.per.dry.mass)~ TRY.DATA.FORMATED$AccSpeciesName)) +sd.log.Nmass <-sd(residuals(lm.obj)) -data.sd.SLA.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.specific.area..SLA.),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE) -sd.log.SLA.genus <- mean(data.sd.SLA.log.genus[data.sd.SLA.log.genus[,"nobs"]>10 & !is.na(data.sd.SLA.log.genus[,"nobs"]),"sd"]) +## +Nobs.lim <- 5 +sd.log.WD <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.), + TRY.DATA.FORMATED$AccSpeciesName,genus=FALSE,Nobs.lim=Nobs.lim) -data.sd.LL.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.lifespan),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE) -sd.log.LL.genus <- mean(data.sd.LL.log.genus[data.sd.LL.log.genus[,"nobs"]>10 & !is.na(data.sd.LL.log.genus[,"nobs"]),"sd"]) +sd.log.SLA <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.specific.area..SLA.), + TRY.DATA.FORMATED$AccSpeciesName,genus=FALSE,Nobs.lim=Nobs.lim) + +sd.log.LL <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.lifespan), + TRY.DATA.FORMATED$AccSpeciesName,genus=FALSE,Nobs.lim=Nobs.lim) -data.sd.Seed.Mass.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Seed.mass),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE) -sd.log.Seed.Mass.genus <- mean(data.sd.Seed.Mass.log.genus[data.sd.Seed.Mass.log.genus[,"nobs"]>10 & - !is.na(data.sd.Seed.Mass.log.genus[,"nobs"]),"sd"]) +sd.log.Seed.Mass <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Seed.mass), + TRY.DATA.FORMATED$AccSpeciesName,genus=FALSE,Nobs.lim=Nobs.lim) -data.sd.Nmass.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.nitrogen..N..content.per.dry.mass), - TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE) -sd.log.Nmass.genus <- mean(data.sd.Nmass.log.genus[data.sd.Nmass.log.genus[,"nobs"]>10 & !is.na(data.sd.Nmass.log.genus[,"nobs"]),"sd"]) +sd.log.Nmass <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.nitrogen..N..content.per.dry.mass) + ,TRY.DATA.FORMATED$AccSpeciesName,genus=FALSE,Nobs.lim=Nobs.lim) + + + +## check how sd is related to minimum number of observation per species against simul +vec.sd.theo <- c() +vec.sd.theo2 <- c() +vec.sd +nsp <- table(data.sd.SLA.log[,"nobs"])[-(1:2)] +nobs <- as.numeric(names(nsp)) +i <- 1 +for ( nlow in 1:(length(nsp)-15)) + { +vec.sd.theo[i] <- mean(unlist(sapply(nlow:length(nsp),FUN=function(i,nsp,nobs) replicate(nsp[i],expr=sd(rnorm(nobs[i],mean=0,sd=1))),nsp,nobs))) +vec.sd.theo2[i] <-sd(unlist(sapply(nlow:length(nsp),FUN=function(i,nsp,nobs) replicate(nsp[i],expr=(rnorm(nobs[i],mean=0,sd=1))),nsp,nobs))) + +i <- i+1 +} + +plot(nobs[1:(length(nsp)-15)],vec.sd.theo,type="b",xlim=c(0,50)) +lines(nobs[1:(length(nsp)-15)],vec.sd.theo2,type="b",xlim=c(0,50)) + +abline(h=1,col="red") + +####### +##### COMPUTE GENUS MEAN SD IN THIS TRY DATA EXTRACTION AS NOT REPORTED IN Kattge et al. 2011 for all species with more than 10 OBSERVARION +Nobs.lim <- 5 +sd.log.WD.genus <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.), + TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE,Nobs.lim=Nobs.lim) + +sd.log.SLA.genus <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.specific.area..SLA.), + TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE,Nobs.lim=Nobs.lim) + +sd.log.LL.genus <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.lifespan), + TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE,Nobs.lim=Nobs.lim) + +sd.log.Seed.Mass.genus <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Seed.mass), + TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE,Nobs.lim=Nobs.lim) + +sd.log.Nmass.genus <- fun.sd.mean.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.nitrogen..N..content.per.dry.mass) + ,TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE,Nobs.lim=Nobs.lim) ############# @@ -288,14 +333,21 @@ genus.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan" ,"genus",sep=".") sd.vec.sp <- c(sd.log.Nmass,sd.log.Seed.Mass,sd.log.SLA,sd.log.WD,sd.log.LL) +names(sd.vec.sp) <- c("sdlog10.sp.Nmass","sdlog10.sp.Seed.Mass","sdlog10.sp.SLA","sdlog10.sp.WD","sdlog10.sp.LL") + sd.vec.genus <- c(sd.log.Nmass.genus,sd.log.Seed.Mass.genus,sd.log.SLA.genus,sd.log.WD.genus,sd.log.LL.genus) +names(sd.vec.genus) <- c("sdlog10.gs.Nmass","sdlog10.gs.Seed.Mass","sdlog10.gs.SLA","sdlog10.gs.WD","sdlog10.gs.LL") + +## save mean species and genus sd +saveRDS(sd.vec.sp,file="./data/process/sd.vec.sp.rds") +saveRDS(sd.vec.genus,file="./data/process/sd.vec.genus.rds") ### function to select obs with less than Nthresh obs -fun.select.sd.with.to.few.obs.sp <- function(data,sd.names,nobs.names,genus.names,Nthreshold=2){ +fun.select.sd.with.to.few.obs.sp <- function(data,sd.names,nobs.names,genus.names,Nthreshold=10){ (data[[nobs.names[i]]]<Nthreshold & !is.na(data[[nobs.names[i]]]) & !data[[genus.names[i]]]) } -fun.select.sd.with.to.few.obs.genus <- function(data,sd.names,nobs.names,genus.names,Nthreshold=2){ +fun.select.sd.with.to.few.obs.genus <- function(data,sd.names,nobs.names,genus.names,Nthreshold=10){ (data[[nobs.names[i]]]<Nthreshold & !is.na(data[[nobs.names[i]]]) & data[[genus.names[i]]]) } #### @@ -303,13 +355,13 @@ data.TRY.sd.update <- data.ifn.species.try.noout for (i in 1:length(nobs.names)){ print( sd.names[i]) print("species") -print(fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)) - data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)] <- +print(fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=Nobs.lim)) + data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=Nobs.lim)] <- sd.vec.sp[i] print("genus") -print(fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)) - data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)] <- +print(fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=Nobs.lim)) + data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=Nobs.lim)] <- sd.vec.genus[i] } @@ -318,13 +370,13 @@ saveRDS(data.TRY.sd.update,file="./data/process/data.TRY.sd.update.rds") ### # plot sd to show mark pdf("./figs/sd.traits.pdf") -r <- barplot(sd.vec.sp ,names.arg=c("Leaf.N","SM","SLA","WD","LL"),las=2,ylim=c(0,0.6),ylab="sd log10") +r <- barplot(sd.vec.sp ,names.arg=c("Leaf.N","SM","SLA","WD","LL"),las=2,ylim=c(0,0.9),ylab="sd log10") +points(r[,1],sd.vec.genus,col="red",pch=16,cex=2) for (i in 1:length(nobs.names)){ -sd.obs <- data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]] -points(rep(r[i,1],length(sd.obs)),sd.obs) -points(r[i,1],sd.vec.genus[i],col="red",pch=16,cex=2) -sd.obs <- data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]] -points(rep(r[i,1],length(sd.obs)),sd.obs,col="red",pch=4) +## sd.obs <- data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]] +## points(rep(r[i,1],length(sd.obs)),sd.obs) +## sd.obs <- data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]] +## points(rep(r[i,1],length(sd.obs)),sd.obs,col="red",pch=4) print(sd.obs) } dev.off()