An error occurred while loading the file. Please try again.
-
Delaigue Olivier authored73d49238
########################################################
########################################################
###### READ TRY AND FORMAT DATA CHECK ERROR
################
#### use AccSpeciesName because not author name
source("./R/FUN.TRY.R")
library(MASS)
library(doParallel)
library(mvoutlier)
## read TRY data
TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt",
sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt",
sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
### combine both data set
TRY.DATA <- rbind(TRY.DATA,TRY.DATA2)
rm(TRY.DATA2)
##################################
### ERROR FOUND IN THE DATA BASE
#1
########################
### problem with the seed mass of this obs seed mass = 0 DELETE
TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID==1034196 & TRY.DATA$DataName=="Seed dry mass"),]
#### IS "Quercuscrispla sp" an error standing for Quercus crispula synonym of Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens
## TRY.DATA[TRY.DATA$AccSpeciesName=="Quercuscrispla sp" ,]
########################
########################
### first create a table with one row per Observation.id and column for each traits and variable
Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement",
"Altitude", "Mean annual temperature (MAT)","Mean sum of annual precipitation (PPT)",
"Plant developmental status / plant age","Maximum height reference",
"Source in Glopnet", "Number of replicates", "Sun vers. shade leaf qualifier" )
Trait.Data <- sort(names(((table(TRY.DATA$TraitName)))))
##########################
#### REFORMAT DATA from TRY
registerDoParallel(cores=5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
TRY.DATA.FORMATED <- foreach(ObservationID.t=unique(TRY.DATA$ObservationID), .combine=rbind) %dopar%
{
fun.extract.try(ObservationID.t,data=TRY.DATA,Non.Trait.Data,Trait.Data)
}
## head(TRY.DATA.FORMATED)
## dim(TRY.DATA.FORMATED)
saveRDS(TRY.DATA.FORMATED,file="./data/process/TRY.DATA.FORMATED.rds")
########################
########## READ RDS
TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds")
## TRY.DATA.FORMATED[TRY.DATA.FORMATED$ObservationID==1034196,"StdValue.Seed.mass"] <- NA
## head(TRY.DATA.FORMATED)
### export species list to check on tnrs web site
species.TRY <- (unique(TRY.DATA.FORMATED[["AccSpeciesName"]]))
write.csv(as.matrix(species.TRY),file="./data/process/species.TRY.csv",row.names=FALSE)
#######
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
## read data from TNRS
tnrs.TRY <- read.delim("./output/tnrs_results.TRY.txt",sep="\t", na.strings="",stringsAsFactors=FALSE,header=TRUE)
head(tnrs.TRY)
fix(tnrs.TRY)
####################
####################
## COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI for 6 key traits
key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass",
"StdValue.Seed.mass",
"StdValue.Leaf.specific.area..SLA.",
"StdValue.Stem.specific.density..SSD.",
"StdValue.Stem.conduit.area..vessel.and.tracheid.",
"StdValue.Leaf.lifespan")
###############################
##############################
## READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA
species.tab <- read.csv("./data/species.list/species.csv",sep="\t")
species.tab2 <- species.tab[!is.na(species.tab$Latin_name),]
rm(species.tab)
gc()
### species IFN reformat names
## clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) ## THIS TABLE HAS ALREADY THE SYNONYME FOR THE FRENCH SPECIES
## remove trailing white space
species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn)
## create vector of species name
species.IFN <- unique(pecies.tab2$Latin_name )
## ######################################################################################
## #######################################################################################
## #### CHECKING SPECIES NAME TO GET ALL SYNONYMES
## ## NOT DONE YET !!!!!!!!!!!!!!!!
## ### export name to check in http://tnrs.iplantcollaborative.org/quick_start.html
## old.names <- unique(species.tab2$Latin_name)
## write.csv(as.matrix(old.names),file="./data/process/old.names.csv",row.names=FALSE)
## ## need to remove first raw with column name to submit to teh website
## ## read data from TNRS iPLANT
## tnrs.FRANCE <- read.delim("./output/tnrs_results.IFN.FRANCE.txt",sep="\t", na.strings="",stringsAsFactors=FALSE,header=TRUE)
## (cbind(test.tnrs$Name_submitted,test.tnrs$Accepted_name_species))[test.tnrs$Selected=="true",]
## ## need to do the same for TRY to have same match
## ## ACCORDING TO WILL THE BEST SOURCE IS http://www.theplantlist.org/ BUT NOT EASY TO ACCESS AND NOT WITH SYNO
###########################################################################
###########################################################################
##### EXTRACT SPECIES MEAN AND SD
### change format try species names
TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName)
#### extract mean and sd per species without experimental data and detection of outlier when enough data or include experimental data. If no data compute mean of genus.
### The detection of outlier is based on the method in Kattge et al. 2011 only for univariate outlier.
res.list <- lapply(species.IFN,FUN=fun.species.traits,species.table=species.tab2,traits=key.main.traits2,data=TRY.DATA.FORMATED)
names(res.list) <- species.IFN
##### TRANSFORM LIST IN A TABLE