Commit e3bac863 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

initial commit

No related merge requests found
Showing with 1711 additions and 0 deletions
+1711 -0
################################################
################################################
################################################
################################################
### LOAD and PROCEED CLIMATIC DATA
################################################
################################################
################################################
################################################
## Data extracted from C. Piedallu data base with
## Radition with cloud cover for each plots
## Piedallu, C, and J Gegout. 2008. “Efficient Assessment of Topographic Solar Radiation to Improve Plant Distribution Models.” Agricultural and Forest Meteorology 148 (11) (October): 1696–1706. doi:10.1016/j.agrformet.2008.06.001.
## Temperature and precipitation with temperature corrected by elevation based on
## Piedallu, C., J. C. Gégout, V. Perez, F. Lebourgeois, and R. Field. 2012. “Soil Water Balance Performs Better Than Climatic Water Variables in Tree Species Distribution Modelling.” Global Ecology and Biogeography. http://onlinelibrary.wiley.com/doi/10.1111/geb.12012/full.
######
## LOAD PIDEALLU DATA
## data.CLIM <- read.csv("./data/raw/DataFrance/climate_piedallu/placettesGK2.csv",
## header=T,sep=',',stringsAsFactors=FALSE)
data.CLIM <- read.csv("./data/raw/DataFrance/climate_piedallu/placettesGK_avec_2011.csv",
header=T,sep='\t',stringsAsFactors=FALSE,dec=",",na.strings="")
data.CLIM$rumkg_500 <- as.numeric(gsub(",",".",data.CLIM$rumkg_500))
head(data.CLIM)
names(data.CLIM)
range(data.CLIM$diff..altIFN...mnt50)
table(data.CLIM$YEAR)
### map of diff between mean altitude of the 1x1km cell and actual elevation of the plots
pdf("./figs/map.error.alti.pdf")
plot(data.CLIM$xl2,data.CLIM$yl2,col="grey",cex=0.3,xlab="X",ylab="Y")
data.temp <- data.CLIM[data.CLIM$diff..altIFN...mnt50 < (-100) | (data.CLIM$diff..altIFN...mnt50>100),]
points(data.temp$xl2,data.temp$yl2,col=c("red","coral3","coral","grey","coral","coral3","red")[cut(data.temp$diff..altIFN...mnt50,breaks=c(-500,-300,-200,-100,100,200,300,500),labels=F)],cex=0.5,pch=1)
dev.off()
##### PLOT HIST OF ALL VARIABLES
pdf('./figs/climatvar.hist.pdf')
par(mfrow=c(3,4))
for (i in 37:48) hist(data.CLIM[,i],xlab='Monthly Precip (mm)',main=names(data.CLIM)[i])
par(mfrow=c(3,4))
for (i in 49:60) hist(data.CLIM[,i],xlab='PET (mm)',main=names(data.CLIM)[i])
par(mfrow=c(3,4))
for (i in 66:77) hist(data.CLIM[,i],xlab='Radiation (?)',main=names(data.CLIM)[i])
par(mfrow=c(3,4))
for (i in 82:93) hist(data.CLIM[,i],xlab='temperature (C)',main=names(data.CLIM)[i])
dev.off()
pdf('./figs/xyplot.Tunco.Tcor.pdf')
par(mfrow=c(3,4))
for (i in 1:12) plot(data.CLIM[,(23:34)[i]],data.CLIM[,(82:94)[i]],xlab='temperature (C) uncorrected',ylab='temperature (C) corrected',
main=names(data.CLIM)[(23:34)[i]])
dev.off()
#########################################
#########################################
#### COMPUTE CLIMATIC VARIABLES !!!
### NEED TO COMPUTE DDG5
### NEED TO COMPUTE SOIL MAX WATER CONTENT
### NEED TO COMPUTE PET
### NEED TO COMPUTE WATER BUDGET
## function to compute sum of degree days above 5.56
##' .. Compute teh sum of degree days above 5.56C from monthly data with a spline ..
##'
##' .. A spline is used to smooth the variation of T over each day from mothly data. Require package 'season' ..
##' @title
##' @param temp monthly temperature in C
##' @param threshold.sgdd threshold of temperature to compte sum of degree day default 5.56
##' @return
##' @author Kunstler
fun.sgdd <- function(temp,threshold.sgdd = 5.56){
require(season)
temp <- unlist(temp)
ndays.month <- flagleap(data.frame(year=2013, month=1:12), F)$ndaysmonth
ndays.year <- sum(ndays.month)
x <- c(-ndays.month[12]/2,(ndays.month/2) + c(0, cumsum(ndays.month[1:11])),ndays.year+ndays.month[1]/2)
## plot(x,c(temp[12],temp,temp[1]),xlim=c(0,365),type='b')
myfit <- loess(y~x, data=data.frame(y=c(temp[12],temp,temp[1]), x=x),span=0.4,degree=2)
mypred <- predict(myfit, 1:ndays.year)
## lines( 1:ndays.year,mypred,col="red")
sgdd <- sum(mypred[mypred>=threshold.sgdd]-threshold.sgdd)
return(sgdd)
}
## remove NA lines
data.CLIM.na <- is.na(data.CLIM[["tmoy6190_1_cor"]])
data.CLIM2 <- data.CLIM[!data.CLIM.na,]
### apply function
sgdd.vec <- apply(data.CLIM2[,82:93],MARGIN=1,FUN=fun.sgdd)
MeanT.vec <- apply(data.CLIM2[,82:93],MARGIN=1,FUN=mean)
plot(MeanT.vec,sgdd.vec)
dim(data.CLIM)
#################################################
### Max Soil Water Content
### compute based on
## Piedallu, C., J. C. Gégout, A. Bruand, and I. Seynave. 2011. “Mapping Soil Water Holding Capacity over Large Areas to Predict Potential Production of Forest Stands.” Geoderma 160 (3): 355–366.
### read data texture as in Piedallu et al. 2011
data.texture <- read.table("./data/raw/DataFrance/climate_piedallu/texture.txt",header=T)
### load ecological data
load(file="./data/process/ecologie_tot.Rdata")
head(ecologie_tot)
codeprof <- c(2.5,10,20,30,40,50,60,70,80,92.5) # code prof en cm
codecaillou <- c(2.5,10,20,30,40,50,60,70,80,90,97.5) ## code percentage en %
#################################################################################################
### function to compute max soil water content based on Piedallu 2010 Geoderma
fun.swhc <- function(affroc,cailloux,text2,text1,prof2,prof1,codeprof,codecaillou,data.texture){
## browser()
### transform code of prof= soil depth in cm and code of percentage of rock in %
prof2.t <- prof2
prof2.t[is.na(prof2)] <- 0
prof2.cm.temp <- codeprof[prof2+1]
prof2.cm.temp[is.na(prof2)] <-0
prof1.t <- prof1
prof1.t[is.na(prof1)] <- 0
text1[is.na(text1)] <- 0
prof1.cm <- codeprof[prof1.t+1]
prof1.cm[is.na(prof1)] <- 0
cailloux.t <- cailloux
cailloux.t[is.na(cailloux)] <- 0
cailloux.perc <- codecaillou[cailloux.t+1]
cailloux.perc[is.na(cailloux)] <- 0
## compute depth of second horizon
prof2.cm <- prof2.cm.temp - prof1.cm
perc.top1 <- apply(cbind(10/prof1.cm,rep(1,length(prof1.cm))),MARGIN=1,FUN=min)
perc.top2 <- apply(cbind(apply(cbind(rep(0,length(prof1.cm)),(10-prof1.cm)/prof2.cm.temp),MARGIN=1,FUN=max),
rep(1,length(prof1.cm))),MARGIN=1,FUN=min)
caill <- (1-codecaillou[affroc+1]/100)*(1-(sqrt(cailloux.perc/100))^3)
## print(c(prof2.cm,perc.top2))
top.hor <- (prof1.cm*perc.top1*data.texture[text1+1,"U_almajou_top"]+prof2.cm*perc.top2*data.texture[text2+1,"U_almajou_top"])
sub.hor <- (prof1.cm*(1-perc.top1)*data.texture[text1+1,"U_almajou_sub"]+prof2.cm*(1-perc.top2)*data.texture[text2+1,"U_almajou_sub"])
swhc <- caill*(top.hor+sub.hor)
return(swhc)
}
#compute it
swhc <- fun.swhc(affroc=ecologie_tot$affroc,cailloux=ecologie_tot$cailloux,text2=ecologie_tot$text2,text1=ecologie_tot$text1,prof2=ecologie_tot$prof2,prof1=ecologie_tot$prof1,codeprof,codecaillou,data.texture)
swhc[is.na(ecologie_tot$prof2)] <- NA
### add to ecological data based
ecologie_tot$SWHC <- swhc
##################################
##################################
#### COMPUTE PET WITH TURC FORMULA
### ### radneb61_13 is the mean of radneb61_1 to 12
### unit to convert radneb61_ from J/cm2/month in MJ/m2/day /100/30 then to convert in KJ/m2/day as in Nick formula *1000
#################################################################
## function to compute PET
fun.PET <- function(i,rad,temp){
## conversion data
require(season)
ndays.month <- flagleap(data.frame(year=2013, month=1:12), F)$ndaysmonth
Rs <- unlist(rad[i,])/100/30*1000
Ta <- unlist(temp[i,])
# compute TURC FORMULA
vec.pet <- 0.4*((0.0239001*Rs)+50)*(Ta/(Ta+15))/30*ndays.month
## with Rs daily global (total) solar radiation (kJ/m2/day)
## and Ta monthly mean temp in C and ndays.month number of day in each month
return(vec.pet)
}
## RAD and Temperature of the 12 months
radneb61_1to12 <- 66:77
tmoy6190_1_cor.1to12 <- 82:93
### apply in parallel
library(doParallel)
registerDoParallel(cores=6) ## affect automaticaly half of the core detected to the foreach
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
PET.matrix <-
foreach(i=1:length(data.CLIM$idp), .combine=rbind) %dopar%
{
fun.PET(i,rad=data.CLIM[,radneb61_1to12],temp=data.CLIM[,tmoy6190_1_cor.1to12])
}
PET.matrix <- PET.matrix[!data.CLIM.na,]
PET.matrix[PET.matrix<0] <- 0 ## affect zero if negative
## plot to check PET computed by me vs PET of Christian
par(mfrow=c(3,4))
for (i in 1:12) {plot(PET.matrix[,i],data.CLIM2[,i+48]) ; lines(0:100,0:100)}
colnames(PET.matrix) <- paste("PET.cor.",1:12,sep="")
##############
### MERGE CLIMATE
data.CLIM2 <- cbind(data.CLIM2,PET.matrix,sgdd=sgdd.vec)
### MERGE WITH ECOLOGICAL DATA
ecologie.clim <- merge(ecologie_tot,data.CLIM2,by="idp",all.x=T)
dim(ecologie_tot)
dim(ecologie.clim)
names(ecologie.clim)
####################################################################################
####################################################################################
####################################################################################
### FUNCTION TO COMPUTE WATER BUDGET based on Bugmann & Cramer 1998
fun.WaterBudget <-function(i,prcp.m,PET.m,Ta.m,SWHC.v,n=2){
WATER <- rep(NA,12*n)
WATER[1] <- unlist(SWHC.v)[i]
SWHC <- unlist(SWHC.v)[i]
AET <- rep(NA,12*n)
D <- rep(NA,12*n)
prcp <- rep(unlist(prcp.m[i,]),n)
PET <- rep(unlist(PET.m[i,]),n)
Ta <- rep(unlist(Ta.m[i,]),n)
for (i in 2:(12*n)){
Pi <- min(0.3*prcp[i] , PET[i])
Ps <- prcp[i] - Pi
S <- 120*WATER[i-1]/SWHC
D[i] <- PET[i]-Pi
AET[i] <- max(0,min(D[i],S))
WATER[i] <- max(0,min(WATER[i-1]+Ps-AET[i],SWHC))
}
## plot(1:(12*n),WATER,type="b")
WATER <- WATER[13:24]
Ta <- Ta[13:24]
AET <- AET[13:24]
D <- D[13:24]
## NEED to compute mean water availability of the growing season
WB.s <- mean(WATER[Ta>5.56],na.rm=T)
## NEED to compute mean water availability of the year
WB.y <- mean(WATER,na.rm=T)
## NEED to compute water stress index of the growing season
WS.s <- sum(na.omit(AET[Ta>5.56 & D>0]))/sum(na.omit(D[Ta>5.56 & D>0]))
## NEED to compute water stress index of the year
WS.y <- sum(na.omit(AET[D>0]))/sum(na.omit(D[D>0]))
return(c("WB.s"=WB.s,"WB.y"=WB.y,
"WS.s"=WS.s,"WS.y"=WS.y))
}
ecologie.clim2 <- ecologie.clim[!is.na(ecologie.clim$PET.cor.1) &!is.na(ecologie.clim$SWHC),]
### test function
fun.WaterBudget(i=16,prcp.m=(ecologie.clim2[,paste("prec6190_",1:12,sep="")]),
PET.m=(ecologie.clim2[,paste("PET.cor.",1:12,sep="")]),
Ta.m=(ecologie.clim2[,c("tmoy6190_1_cor",paste("tmoy6190_1_cor.",1:11,sep=""))]),
SWHC.v=(ecologie.clim2[ ,"SWHC"]),n=2)
### apply function in parallel
library(doParallel)
registerDoParallel(cores=6) ## affect automaticaly half of the core detected to the foreach
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
WBWS.matrix <-
foreach(i=1:length(ecologie.clim2$idp), .combine=rbind) %dopar%
{
fun.WaterBudget(i,prcp.m=(ecologie.clim2[,paste("prec6190_",1:12,sep="")]),
PET.m=(ecologie.clim2[,paste("PET.cor.",1:12,sep="")]),
Ta.m=(ecologie.clim2[,c("tmoy6190_1_cor",paste("tmoy6190_1_cor.",1:11,sep=""))]),
SWHC.v=(ecologie.clim2[ ,"SWHC"]),n=2)
}
### MERGE all and saved
WBWS.matrix2 <- cbind("idp"=ecologie.clim2$idp,WBWS.matrix)
ecologie.clim.data <- merge(ecologie.clim,WBWS.matrix2,by="idp",all.x=T)
#####
# change tplant not good format
load("./data/process/placette_tot.Rdata")
ecologie.clim.data$tplant <- placette_tot$tplant
saveRDS(ecologie.clim.data,file="./data/process/ecologie.clim.data.rds")
#### NEED TO CHECK THAT !!! sensible value SWHC with pedallu map
ecologie.clim.data <- readRDS("./data/process/ecologie.clim.data.rds")
head(ecologie.clim.data)
###check climatic data
pdf("./figs/sgdd.tmin.map.pdf")
par(mfrow=c(2,2))
plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93,
col=rev(heat.colors(10))[cut(ecologie.clim.data$sgdd,quantile(ecologie.clim.data$sgdd,probs=(0:10)/10,na.rm=T),labels=F)],
cex=0.2,pty="s",xlab=NA,ylab=NA,main="SGDD")
plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93,
col=rev(heat.colors(10))[cut(ecologie.clim.data$tmin6190_min_cor,quantile(ecologie.clim.data$tmin6190_min_cor,probs=(0:10)/10,na.rm=T),labels=F)],
cex=0.2,pty="s",xlab=NA,ylab=NA,main="Tmin")
plot( ecologie.clim.data$tmin6190_min_cor,ecologie.clim.data$sgdd,cex=0.2,xlab="Tmin",ylab="SGDD")
dev.off()
pdf("./figs/Water.map.pdf")
par(mfrow=c(2,2),mar=c(1,1,1,1))
plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93,
col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WB.y,quantile(ecologie.clim.data$WB.y,probs=(0:10)/10,na.rm=T),labels=F)],
cex=0.2,pty="s",xlab=NA,ylab=NA)
plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93,
col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WB.s,quantile(ecologie.clim.data$WB.s,probs=(0:10)/10,na.rm=T),labels=F)],
cex=0.2,pty="s",xlab=NA,ylab=NA)
plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93,
col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WS.y,quantile(ecologie.clim.data$WS.y,probs=(0:10)/10,na.rm=T),labels=F)],
cex=0.2,pty="s",xlab=NA,ylab=NA)
plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93,
col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WS.s,quantile(ecologie.clim.data$WS.s,probs=(0:10)/10,na.rm=T),labels=F)],
cex=0.2,pty="s",xlab=NA,ylab=NA)
dev.off()
pdf("./figs/Water.var,cor.pdf")
par(mfrow=c(2,2))
plot(ecologie.clim.data[['WB.y']],ecologie.clim.data[['WS.y']],xlab="WB year",ylab="ratio AET/D year",cex=0.2)
plot(ecologie.clim.data[['WB.s']],ecologie.clim.data[['WS.s']],xlab="WB growing season",ylab="ratio AET/D growing season",cex=0.2)
plot(ecologie.clim.data[['WB.s']],ecologie.clim.data[['WB.y']],xlab="WB growing season",ylab="WB growing year",cex=0.2)
plot(ecologie.clim.data[['WS.s']],ecologie.clim.data[['WS.y']],xlab="ratio AET/D growing season",ylab="ratio AET/D year",cex=0.2)
dev.off()
pdf("./figs/sgdd.water.cor.pdf")
plot(ecologie.clim.data[['sgdd']],ecologie.clim.data[['WS.y']],xlab="SGDD",ylab="ratio AET/D year",cex=0.2)
dev.off()
### DO PCA OF CLIMATIC DATA
data.prc <- prcomp(ecologie.clim.data[apply(is.na(ecologie.clim.data[,c('sgdd','tmin6190_min_cor','WB.s','WS.s')]),MARGIN=1,FUN=sum)<1,c('sgdd','tmin6190_min_cor','WB.s','WS.s')],na.action=na.omit,scale=T)
summary(data.prc)
scores <-data.prc$x
pdf("./figs/climate.pca.pdf")
biplot(data.prc,pch=1,xlabs=rep('.',length(data.prc$x[,1])),ylabs=c('SGDD','Tmin','WB','WS'))
text(150,180,labels=paste("Var PC1 ", ((summary(data.prc)))$importance[2,1],sep=""))
text(150,165,labels=paste("Var PC2 ", ((summary(data.prc)))$importance[2,2],sep=""))
dev.off()
###############################################################
###############################################################
###############################################################
####### ----- COMPUTE COMPETITION INDEX ------ ################
source('./R/format.function.R')
#### LOAD MERGED DEAD AND ALIVE TREE DATA TO COMPUTE COMPETITION INDEX AT THE START OF THE 5 yr. PERIOD
load('./data/process/arbre.ALIVE.DEAD2.Rdata')
arbre.ALIVE.DEAD2$id.tree <- paste(arbre.ALIVE.DEAD2[["idp"]],arbre.ALIVE.DEAD2[["a"]],sep=".")
arbre.ALIVE.DEAD2$espar <- factor(arbre.ALIVE.DEAD2$espar)
##################
### COMPUTE BA in cm²/m² or m²/ha for each species on each plot without target species
start<- Sys.time()
data.BA.SP <- BA.SP.FUN(id.tree=as.vector(arbre.ALIVE.DEAD2[["id.tree"]]),
diam=as.vector(arbre.ALIVE.DEAD2[["c13"]])/pi,
sp=as.vector(arbre.ALIVE.DEAD2[["espar"]]),
id.plot=as.vector(arbre.ALIVE.DEAD2[["idp"]]),
weights=as.vector(arbre.ALIVE.DEAD2[["w"]])/10000,
weight.full.plot=1/(pi*(c(15))^2))
end <- Sys.time()
end-start
#### compute BA tot for all competitors
BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum)
data.BA.SP$BATOT.COMPET <- BATOT.COMPET
### merge data
names(data.BA.SP) <- c("id.tree2",names(data.BA.SP)[-1])
arbre.tot.merge <- cbind(arbre.ALIVE.DEAD2,data.BA.SP[match(arbre.ALIVE.DEAD2$id.tree,data.BA.SP$id.tree2),])
save(arbre.tot.merge,file="./data/process/arbre.tot.merge.Rdata")
R/FUN.TRY.R 0 → 100644
############################################
############################################
## FUNCTION TO EXTRACT DECTED OUTLIER AND FORMAT TRY DATA
## Georges Kunstler 14/06/2013
########################################################
########################################################
########################################################
########################################################
###Build a function that extract the variables
##'Description of the function to extract data from original TRY data
##'
##' based on the data structure of extraction from TRY data base
##' @title fun.extract.try
##' @param ObservationID.t list of data identifier that we want to extract
##' @param data try data object
##' @param Non.Trait.Data list of names of non traits data that we want to extract
##' @param Trait.Data list of names of traits data that we want to extract
##' @return data.frame with one line per observation id with clumns with ObservationID Species Nontrait data for Traits: OrigValue OrigUnit StdValue
##' @author Kunstler
fun.extract.try <- function(ObservationID.t,data,Non.Trait.Data,Trait.Data){
data.temp <- data[data$ObservationID==ObservationID.t,]
## Non trait data
Vec.Non.Trait.Data <- rep(NA,length(Non.Trait.Data))
names(Vec.Non.Trait.Data) <- Non.Trait.Data
for (i in 1:length(Non.Trait.Data)){
if( sum(data.temp$DataName==Non.Trait.Data[i])==1){
Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName==Non.Trait.Data[i],"OrigValueStr"]
}
if(sum(data.temp$DataName==Non.Trait.Data[i])>1){
## if(sum(data.temp$DataName==Non.Trait.Data[i] & grepl("Mean",data.temp$ValueKindName,
## fixed=TRUE))!=1){ print("error in ValueKindName")}
Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName==Non.Trait.Data[i] ,
"OrigValueStr"][1]
}
}
## Trait data
Vec.Trait.Data.OrigValue <-Vec.Trait.Data.OrigUnit <- Vec.Trait.Data.StdValue <-
rep(NA,length(Trait.Data))
names(Vec.Trait.Data.OrigValue) <- paste("OrigValue",Trait.Data)
names(Vec.Trait.Data.OrigUnit) <- paste("OrigUnitName",Trait.Data)
names(Vec.Trait.Data.StdValue) <- paste("StdValue",Trait.Data)
for (i in 1:length(Trait.Data)){
if(sum(grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE))==1){
Vec.Trait.Data.OrigValue[i] <- data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"OrigValue"]
Vec.Trait.Data.OrigUnit[i] <- data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"OrigUnitStr"]
Vec.Trait.Data.StdValue[i] <- data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"StdValue"]
}
if( sum(grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE))>1){
if(sum((data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)))==1){
Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE)&
(data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)) ,"OrigValue"])
Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) &
(data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)),"OrigUnitStr"])[1]
Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) &
(data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)),"StdValue"])
}
if(sum(data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") )<1){
Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"OrigValue"],na.rm=T)
Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) ,"OrigUnitStr"])[1]
Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) ,"StdValue"],na.rm=T)
}
}
}
### EXPERIMENTAL DATA TYPE
TF.exp.data <- sum(grepl("Growth & measurement conditions - experimental tre",data.temp$NonTraitCategories, fixed=TRUE) )>0
names(TF.exp.data) <- 'TF.exp.data'
res.temp <- data.frame("ObservationID"=ObservationID.t,"AccSpeciesName"=unique(data.temp$AccSpeciesName) ,t(Vec.Non.Trait.Data),TF.exp.data,
t(Vec.Trait.Data.OrigValue),t(Vec.Trait.Data.OrigUnit),t(Vec.Trait.Data.StdValue))
return(res.temp)
}
##### function to flag outliers
# univar outliers
library(extremevalues)
##' Function to identify univariate outlier from package extrem value
##'
##' Not used in this version
##' @title
##' @param x.na
##' @param distribution
##' @param method
##' @return
##' @author Kunstler
fun.out.TF <- function(x.na,distribution="lognormal",method){
require(extremevalues)
x <- x.na[!is.na(x.na)]
x.num <- (1:length(x.na))[!is.na(x.na)]
TF.vec <- rep(FALSE,length(x.na))
outliers.temp <- getOutliers(x,distribution="lognormal",method=method)
TF.vec[x.num[c(outliers.temp$iLeft,outliers.temp$iRight)]] <- TRUE
return(list(TF.vec,outliers.temp))
}
## second outlier detection based on Kattage et al 2011
##' Detection of univar outlier based on method of Kattge et al. 2011
##'
##'
##' @title
##' @param x.na
##' @param log
##' @return TRUE FALSE vector to identify outlier TRUE : outlier
##' @author Kunstler
fun.out.TF2 <- function(x.na,log=TRUE){
x <- x.na[!is.na(x.na)]
x.num <- (1:length(x.na))[!is.na(x.na)]
TF.vec <- rep(FALSE,length(x.na))
if(log){
fit.dist <- fitdistr(log10(na.omit(x) ),'normal')
high.bound <- fit.dist$estimate["mean"]+2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"])
low.bound <- fit.dist$estimate["mean"]-2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"])
TF.vec[x.num[log10(x)>high.bound | log10(x)<low.bound]] <- TRUE
}else{
fit.dist <- fitdistr((na.omit(x) ),'normal')
high.bound <- fit.dist$estimate["mean"]+2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"])
low.bound <- fit.dist$estimate["mean"]-2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"])
TF.vec[x.num[(x)>high.bound | (x)<low.bound]] <- TRUE
}
return((TF.vec))
}
fun.univar.outlier <- function(data,traits,method="I"){
matrix.TF.outliers <- as.data.frame(matrix(NA,nrow=length(data[,1]),ncol=length(traits)))
names(matrix.TF.outliers) <- traits
pdf("./figs/univar.outliers.pdf")
for(i in traits){
x.na <- data[,i]
x <- data[!is.na(data[,i]),i]
if(sum(x<0.0000000001)<1){
list.out <- fun.out.TF(x.na,distribution="lognormal",method=method)
matrix.TF.outliers[,i] <- list.out[[1]]
outliers.temp <- list.out[[2]]
######
hist.obj <- hist(log10(x),main=i,breaks=40,xlim=range(log10(x),log10(outliers.temp$limit)))
hist(log10(x)[!data[!is.na(data[,i]),"TF.exp.data"]], breaks=hist.obj$breaks,add=TRUE,col="grey")
abline(v=log10(outliers.temp$limit[1]),col="red")
abline(v=log10(outliers.temp$limit[2]),col="red")
}else{print(paste(i,"negative"))
list.out <- fun.out.TF(x.na,distribution="lognormal",method=method)
matrix.TF.outliers[,i] <- list.out[[1]]
outliers.temp <- list.out[[2]]
TF.vec <- fun.out.TF(x,distribution="normal",method=method)
matrix.TF.outliers[,i] <- TF.vec
}
}
dev.off()
return(matrix.TF.outliers)
}
##################
## Multi variates outlier
fun.multi.measure <- function(X,Y,data){
return(sum(!is.na(data[,X]) & !is.na(data[,Y])))
}
fun.multi.measure.mat <- function(traits,data){
mat.multi <- matrix(NA,length(traits),length(traits))
for (i in 1:(length(traits)-1)){
for (j in (i+1):length(traits)){
mat.multi[i,j] <- fun.multi.measure(traits[i],traits[j],data=data)
}
}
rownames(mat.multi) <- traits
colnames(mat.multi) <- traits
return(mat.multi)
}
#### function detect outylier with two methods
fun.mv.out <- function(data.t,outbound,quant.treshold=0.995){
require(mvoutlier)
outliers.temp <- pcout(data.t,makeplot=FALSE,outbound)
Mahalanobi.dist <- covMcd(data.t)$mah
Mah.vec.01 <- rep(1,length(Mahalanobi.dist))
Mah.vec.01[Mahalanobi.dist>quantile(Mahalanobi.dist,probs=quant.treshold)] <- 0
return(data.frame(Mah.out=Mah.vec.01,pcout.01=outliers.temp$wfinal01))
}
####
fun.multivar.outlier <- function(data,traits,lim.obs=50,outbound=0.25){
## compute number of multi traits measurments
mat.multi <- fun.multi.measure.mat(traits,data)
## loop over traits combination and identify combination with enough observation
## mat to save outliers
mat.mv.outliers <- array(NA,dim=c(nrow(data),length(traits),length(traits)),dimnames=list(data$ObservationID,traits,traits))
pdf( "./figs/mv.outliers.pdf")
for (i in 1:(length(traits)-1)){
for (j in (i+1):length(traits)){
if(mat.multi[i,j]>lim.obs){
data.t <- log10(data[apply(is.na(data[,c(traits[i],traits[j]) ]),MARGIN=1,FUN=sum)<1,c(traits[i],traits[j])])
out.data <- fun.mv.out(data.t,outbound,quant.treshold=0.995)
mat.mv.outliers[apply(is.na(data[,c(traits[i],traits[j]) ]),MARGIN=1,FUN=sum)<1,i,j] <- out.data$Mah.out ##out.data$pcout.01
plot(data.t[,1],data.t[,2],col=c("grey","black")[out.data$pcout.01+1],xlab=traits[i],ylab=traits[j],
main=paste("outliers",i,j,"pdf",sep="."))
points(data.t[out.data$Mah.out==0,1],data.t[out.data$Mah.out==0,2],pch=3,col="red")
}
}
}
dev.off()
return(mat.mv.outliers)
}
############################################################
####################
## function for plot
fun.log.scatter.with.marg.hist <- function(x,y,name.x,name.y,lab.x,lab.y,outlier.x,outlier.y,array.outlier){
if(sum(apply(is.na(cbind(x,y)),MARGIN=1,FUN=sum)<1)>1){
data.t <- ((cbind(x,y)[apply(is.na(cbind(x,y)),MARGIN=1,FUN=sum)<1,]))
def.par <- par(no.readonly = TRUE) # save default, for resetting...
xhist <- hist(log10(x), breaks=20,plot=FALSE)
yhist <- hist(log10(y), breaks=20,plot=FALSE)
top <- max(c(xhist$counts, yhist$counts))
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE)
#layout.show(nf)
## colors for outlier
col.vec <- rep(1,length(x[apply(is.na(cbind(x,y)),MARGIN=1,FUN=sum)<1]))
col.vec[apply((cbind(outlier.x,outlier.y)[apply(is.na(cbind(x,y)),MARGIN=1,FUN=sum)<1,]),MARGIN=1,FUN=sum)>0] <- 2
## col for mv outlier
col.mvout.vec <- array.outlier[apply(is.na(cbind(x,y)),MARGIN=1,FUN=sum)<1,name.x,name.y]
par(mar=c(5,5,1,1))
plot(data.t[,1],data.t[,2],log="xy"
,xlab=lab.x,ylab=lab.y,xlim=range(x,na.rm=TRUE),
ylim=range(y,na.rm=TRUE),col=c("black","red")[col.vec],pch=c(16,2)[col.vec],cex=c(0.5,1)[col.vec])
if ( sum(!is.na(col.mvout.vec))>1) {
points(data.t[col.mvout.vec==0 & !is.na(col.mvout.vec),1],data.t[col.mvout.vec==0 & !is.na(col.mvout.vec),2],pch=3,col="blue")
}
par(mar=c(0,5,1,1))
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0)
high.out <- hist(log10(x[outlier.x])[x[outlier.x]>mean(x[outlier.x],na.rm=T)],breaks=xhist$breaks,plot=FALSE)
barplot(high.out$count,col="red",add=TRUE, space=0)
low.out <- hist(log10(x[outlier.x])[x[outlier.x]<mean(x[outlier.x],na.rm=T)],breaks=xhist$breaks,plot=FALSE)
barplot(low.out$count,col="red",add=TRUE, space=0)
par(mar=c(5,0,1,1))
barplot(yhist$counts,axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
high.out <- hist(log10(y[outlier.y])[y[outlier.y]>mean(y[outlier.y],na.rm=T)],breaks=yhist$breaks,plot=FALSE)
barplot(high.out$count,col="red",add=TRUE, space=0, horiz=TRUE)
low.out <- hist(log10(y[outlier.y])[y[outlier.y]<mean(y[outlier.y],na.rm=T)],breaks=yhist$breaks,plot=FALSE)
barplot(low.out$count,col="red",add=TRUE, space=0, horiz=TRUE)
par(def.par)}else{print("less than 2 observation in common in x and y")}
}
###################################
###################################
####### extract mean sd per species or genus
####### add species synonyme
fun.species.traits <- function(species,species.table,col.sp="Latin_name",col.sp.syno="Latin_name_syn",traits,data){
vec.mean <- vec.sd <- vec.nobs <- rep(NA,length(traits))
vec.exp <- vec.genus <- rep(FALSE,length(traits))
names(vec.mean) <- names(vec.sd) <- names(vec.exp) <- names(vec.genus) <- names(vec.nobs)<- traits
species.syno <- species.table[species.table[[col.sp]]==species,col.sp.syno]
#browser()
for(i in traits){
if(sum((data$AccSpeciesName %in% species.syno) & !is.na(data[[i]]))>0){ ## if data for this species or syno
if(sum((data$AccSpeciesName %in% species.syno) & (!is.na(data[[i]])) & (!data[["TF.exp.data"]]))>0){## if data with out experiments
x <- data[[i]][data$AccSpeciesName %in% species.syno & (!is.na(data[[i]])) &
(!data[["TF.exp.data"]])]
if(length(x)>50){## if more than 50 obs remove outlier
outlier <- fun.out.TF2(x.na=x,log=TRUE)
vec.mean[[i]] <- mean(log10(x[!outlier]))
vec.sd[[i]] <- sd(log10(x[!outlier]))
vec.nobs[[i]] <- length(x[!outlier])
}else{
vec.mean[[i]] <- mean(log10(x))
vec.sd[[i]] <- sd(log10(x))
vec.nobs[[i]] <- length(x)}
}else{### include experimental data
x <- data[[i]][data$AccSpeciesName %in% species.syno & (!is.na(data[[i]])) ]
if(length(x)>50){
outlier <- fun.out.TF2(x.na=x,log=TRUE)
vec.mean[[i]] <- mean(log10(x[!outlier]))
vec.sd[[i]] <- sd(log10(x[!outlier]))
vec.exp[[i]] <- TRUE
vec.nobs[[i]] <- length(x[!outlier])
}else{
vec.mean[[i]] <- mean(log10(x))
vec.sd[[i]] <- sd(log10(x))
vec.exp[[i]] <- TRUE
vec.nobs[[i]] <- length(x)}
}
}else{### compte data at genus level if no data for the species
genus <- sub(" .*","",species)
if(sum(grepl(genus,data$AccSpeciesName) & (!is.na(data[[i]])))>0){
x <- data[[i]][grepl(genus,data$AccSpeciesName,fixed=TRUE ) & (!is.na(data[[i]])) ]
if(length(x)>50){
outlier <- fun.out.TF2(x.na=x,log=TRUE)
vec.mean[[i]] <- mean(log10(x[!outlier]))
vec.sd[[i]] <- sd(log10(x[!outlier]))
vec.genus[[i]] <- TRUE
vec.nobs[[i]] <- length(x[!outlier])
}else{
vec.mean[[i]] <- mean(log10(x))
vec.sd[[i]] <- sd(log10(x))
vec.genus[[i]] <- TRUE
vec.nobs[[i]] <- length(x)}
}
}
}
return(list(mean=vec.mean,sd=vec.sd,exp=vec.exp,genus=vec.genus,nobs=vec.nobs))
}
#######################
### FUNCTIONS TO Manipulate species names
fun.get.genus <- function(x) gsub(paste(" ",gsub("^([a-zA-Z]* )","",x),sep=""),"",x,fixed=TRUE)
trim.trailing <- function (x) sub("\\s+$", "", x)
#####FUN to COMPUTE MEAN SD FOR GENUS OR SPECIES
fun.sd.sp.or.genus <- function(traits,species,genus=FALSE){
if(genus){
genus <- sapply(species,fun.get.genus)
data.t <- cbind(tapply((traits),
INDEX = genus,FUN=sd,na.rm=TRUE),
tapply(!is.na(traits),
INDEX = genus,FUN=sum,na.rm=TRUE))
colnames(data.t) <- c("sd","nobs")
return(data.t)
}else{
data.t <- cbind(tapply((traits),
INDEX = species,FUN=sd,na.rm=TRUE),
tapply(!is.na(traits),
INDEX = species,FUN=sum,na.rm=TRUE))
colnames(data.t) <- c("sd","nobs")
return(data.t)
}
}
###################################################
###################################################
###################################################
##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS
#########################
##
##' .. Compute the basal area of competitor in a plot..
##'
##' .. content for \details{} ..
##' @title
##' @param diam diameters of each individuals in cm
##' @param weights weight to compute the basal area in cm^2/m^2
##' @return basal area in cm^2/m^2
##' @author Kunstler
BA.fun <- function(diam,weights){
((diam/2)^2)*pi*weights
}
####
##' .. function to compute the basal area of neighborood tree in plots ..
##'
##' .. content for \details{} ..
##' @title
##' @param id.tree plot identifier
##' @param diam diam of tree in cm
##' @param sp species name or code
##' @param id.plot identifier of the plot
##' @param weights weights to compute basal area in cm^2/m^2
##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2
##' @return data frame with tree.id and one column per species with basal area of the species (without the target tree)
##' @author Kunstler
BA.SP.FUN <- function(id.tree,diam,sp,id.plot,weights,weight.full.plot){
# compute BA tot per species per plot
BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T)
print(dim(BASP))
DATA.BASP <- data.frame(id.plot= rownames(BASP),BASP)
names( DATA.BASP) <- c("id.plot",colnames(BASP))
#### MERGE with indivudal tree
data.indiv <- data.frame(id.tree,sp,id.plot,diam)
data.merge <- merge(data.indiv,DATA.BASP,by="id.plot")
rm(data.indiv,DATA.BASP)
gc()
## substracte the BA of target species in the column of the good species (with transpose of the matrix )
t.arbre.BASP <- t(as.matrix(data.merge[,-(1:4)]))
t.arbre.BASP[t(outer(data.merge$sp,names(data.merge[,-(1:4)]),FUN="=="))] <-
t.arbre.BASP[t(outer(data.merge$sp,names(data.merge[,-(1:4)]),FUN="=="))] -BA.fun(data.merge$diam,rep(weight.full.plot,length(diam)))
data.res <- data.frame(data.merge$id.tree,t(t.arbre.BASP))
names(data.res) <- c("id.tree",colnames(BASP))
return( data.res)
}
#### function with X Y coordinates based on a neighborhood of radius R
BA.SP.FUN.XY <- function(id.tree,xy.table,diam,sp,Rlim){
dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE))
dist.mat[dist.mat <Rlim] <- 1
dist.mat[dist.mat >Rlim] <- 0
diag(dist.mat) <- 0
BA <- BA.fun(diam,weights=10000/(pi*Rlim^2))
BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE)
fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE)
return(t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp)))
}
############################################################
############################################################
########## READ, MERGE AND CLEAN ALL NFI DATA NEW METHODS
### read TREE table downloaded from the web
arbre2005 <- read.csv("./data/raw/DataFrance/2005/arbres_foret_2005.csv",sep=";", stringsAsFactors=FALSE)
summary(arbre2005)
arbre2006 <- read.csv("./data/raw/DataFrance/2006/arbres_foret_2006.csv",sep=";", stringsAsFactors=FALSE)
summary(arbre2006)
arbre2007 <- read.csv("./data/raw/DataFrance/2007/arbres_foret_2007.csv",sep=";", stringsAsFactors=FALSE)
summary(arbre2007)
arbre2008 <- read.csv("./data/raw/DataFrance/2008/arbres_foret_2008.csv",sep=";", stringsAsFactors=FALSE)
summary(arbre2005)
arbre2009 <- read.csv("./data/raw/DataFrance/2009/arbres_foret_2009.csv",sep=";", stringsAsFactors=FALSE)
summary(arbre2009)
arbre2010 <- read.csv("./data/raw/DataFrance/2010/arbres_foret_2010.csv",sep=";", stringsAsFactors=FALSE)
summary(arbre2010)
arbre2011 <- read.csv("./data/raw/DataFrance/2011/arbres_foret_2011.csv",sep=";", stringsAsFactors=FALSE)
#### ALL TREE after 2007 don't have the veget value as in 2005 aned 2006 because new variable ACCI for tree with accident (trunk broken ...)
### NEED TO UPDATE VEGET FROM ACCI
arbre2007$veget[arbre2007$acci>0] <- 1
arbre2008$veget[arbre2008$acci>0] <- 1
arbre2009$veget[arbre2009$acci>0] <- 1
arbre2010$veget[arbre2010$acci>0] <- 1
arbre2011$veget[arbre2011$acci>0] <- 1
##
arbre2005$veget <- unclass(arbre2005$veget)-1
arbre2006$veget <- unclass(arbre2006$veget)-1
######
## ORI NEED TO BE DEGRADED SINCE 2007 to two level from resprout or from seed
arbre2007$ori[arbre2007$ori==2] <- 0
arbre2008$ori[arbre2008$ori==2] <- 0
arbre2009$ori[arbre2009$ori==2] <- 0
arbre2010$ori[arbre2010$ori==2] <- 0
arbre2011$ori[arbre2011$ori==2] <- 0
##############################
### merge all table adding NA when no variable for that year
arbre.tot <- data.frame(idp=c(arbre2005$idp,arbre2006$idp,arbre2007$idp,arbre2008$idp,arbre2009$idp,arbre2010$idp,arbre2011$idp),
a=c(arbre2005$a,arbre2006$a,arbre2007$a,arbre2008$a,arbre2009$a,arbre2010$a,arbre2011$a),
veget=c(arbre2005$veget,arbre2006$veget,arbre2007$veget,arbre2008$veget,arbre2009$veget,arbre2010$veget,arbre2011$veget),
simplif=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),
rep(NA,length(arbre2008$idp)),arbre2009$simplif,arbre2010$simplif,arbre2011$simplif),
acci=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),arbre2007$acci,arbre2008$acci,arbre2009$acci,
arbre2010$acci,arbre2011$acci),
espar=c(as.character(arbre2005$espar),as.character(arbre2006$espar),as.character(arbre2007$espar),
as.character(arbre2008$espar),as.character(arbre2009$espar),as.character(arbre2010$espar),as.character(arbre2011$espar)),
ori=c(arbre2005$ori,arbre2006$ori,arbre2007$ori,arbre2008$ori,arbre2009$ori,arbre2010$ori,arbre2011$ori),
lib=c(arbre2005$lib,arbre2006$lib,arbre2007$lib,arbre2008$lib,arbre2009$lib,arbre2010$lib,arbre2011$lib),
forme=c(arbre2005$forme,arbre2006$forme,arbre2007$forme,arbre2008$forme,arbre2009$forme,arbre2010$forme,arbre2011$forme),
tige=c(arbre2005$tige,arbre2006$tige,arbre2007$tige,arbre2008$tige,arbre2009$tige,arbre2010$tige,arbre2011$tige),
mortb=c(rep(NA,length(arbre2005$idp)),arbre2006$mortb,arbre2007$mortb,arbre2008$mortb,arbre2009$mortb,arbre2010$mortb,
arbre2011$mortb),
sfgui=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfgui,
arbre2009$sfgui,arbre2010$sfgui,arbre2011$sfgui),
sfgeliv=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfgeliv,
arbre2009$sfgeliv,arbre2010$sfgeliv,arbre2011$sfgeliv),
sfpied=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfpied,
arbre2009$sfpied,arbre2010$sfpied,arbre2011$sfpied),
sfdorge=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfdorge,
arbre2009$sfdorge,arbre2010$sfdorge,arbre2011$sfdorge),
sfcoeur=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),
rep(NA,length(arbre2008$idp)),arbre2009$sfcoeur,arbre2010$sfcoeur,arbre2011$sfcoeur),
c13=c(arbre2005$c13,arbre2006$c13,arbre2007$c13,arbre2008$c13,arbre2009$c13,arbre2010$c13,arbre2011$c13),
ir5=c(arbre2005$ir5,arbre2006$ir5,arbre2007$ir5,arbre2008$ir5,arbre2009$ir5,arbre2010$ir5,arbre2011$ir5),
htot=c(arbre2005$htot,arbre2006$htot,arbre2007$htot,arbre2008$htot,arbre2009$htot,arbre2010$htot,arbre2011$htot),
hdec=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$hdec,
arbre2009$hdec,arbre2010$hdec,arbre2011$hdec),
decoupe=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$decoupe,
arbre2009$decoupe,arbre2010$decoupe,arbre2011$decoupe),
q1=c(arbre2005$q1,arbre2006$q1,arbre2007$q1,arbre2008$q1,arbre2009$q1,arbre2010$q1,arbre2011$q1),
q2=c(arbre2005$q2,arbre2006$q2,arbre2007$q2,arbre2008$q2,arbre2009$q2,arbre2010$q2,arbre2011$q2),
q3=c(arbre2005$q3,arbre2006$q3,arbre2007$q3,arbre2008$q3,arbre2009$q3,arbre2010$q3,arbre2011$q3),
r=c(arbre2005$r,arbre2006$r,arbre2007$r,arbre2008$r,arbre2009$r,arbre2010$r,arbre2011$r),
lfsd=c(arbre2005$lfsd,arbre2006$lfsd,arbre2007$lfsd,arbre2008$lfsd,arbre2009$lfsd,arbre2010$lfsd,arbre2011$lfsd),
age=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$age,
arbre2009$age,arbre2010$age,arbre2011$age),
v=c(arbre2005$v,arbre2006$v,arbre2007$v,arbre2008$v,arbre2009$v,arbre2010$v,arbre2011$v),
w=c(arbre2005$w,arbre2006$w,arbre2007$w,arbre2008$w,arbre2009$w,arbre2010$w,arbre2011$w),
YEAR=c(rep(2005,length(arbre2005$idp)),rep(2006,length(arbre2006$idp)),rep(2007,length(arbre2007$idp)),
rep(2008,length(arbre2008$idp)),rep(2009,length(arbre2009$simplif)),rep(2010,length(arbre2010$simplif)),
rep(2011,length(arbre2011$simplif))))
rm(arbre2005,arbre2006,arbre2007,arbre2008,arbre2009,arbre2010,arbre2011)
gc()
########################################################
## #### check problem of unit for c13 ir5 and htot by plotting the data
## plot(arbre.tot$c13,arbre.tot$ir5,col=unclass(factor(arbre.tot$YEAR)),cex=0.1)
## boxplot(arbre.tot$c13~arbre.tot$YEAR,ylab="c13")
## boxplot(arbre.tot$ir5~arbre.tot$YEAR,ylab="ir5")
## boxplot(arbre.tot$htot~arbre.tot$YEAR,ylab="htot")
## boxplot(arbre.tot$age~arbre.tot$YEAR,ylab="age")
## ##### SOMETHING VERY STRANGE FOR THE AGE WITH SEVERAL TREE OVER 1000 YEARS OLD
## boxplot(arbre.tot$mortb~arbre.tot$YEAR,ylab="mortality branche")
## boxplot(arbre.tot$veget~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$simplif~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$w~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$ori~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$lib~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$forme~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$tige~arbre.tot$YEAR,ylab="accident")
## ### NEED TO USE ONLY THE TIGE == 1 in the ANALYSIS
## boxplot(arbre.tot$sfgui~arbre.tot$YEAR,ylab="accident")
## boxplot(arbre.tot$sfgeliv~arbre.tot$YEAR,ylab="accident")
## ##### CHECK OTHER VARIABLE OK DONE
## ### USE BRANCH MORTALITY AS AN INDICATOR OF MORTALITY ?? ABIOTIC STRESS ?
## x11()
## plot(arbre.tot$c13,arbre.tot$htot,col=unclass(factor(arbre.tot$YEAR)))
save(arbre.tot,file="./data/process/arbre.tot.Rdata")
#########################################
###### DEAD
#########################################
##############################################################
#############################################################
## READ AND MERGE DEAD DATA
### MERGE WITH DEAD TREE and MERGE WITH PLOT DATA!!
### read DEAD TREE table downloaded from the web
arbre_mort2005 <- read.csv("./data/raw/DataFrance/2005/arbres_morts_foret_2005.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(arbre_mort2005)
arbre_mort2006 <- read.csv("./data/raw/DataFrance/2006/arbres_morts_foret_2006.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(arbre_mort2006)
arbre_mort2007 <- read.csv("./data/raw/DataFrance/2007/arbres_morts_foret_2007.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(arbre_mort2007)
arbre_mort2008 <- read.csv("./data/raw/DataFrance/2008/arbres_morts_foret_2008.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(arbre_mort2005)
arbre_mort2009 <- read.csv("./data/raw/DataFrance/2009/arbres_morts_foret_2009.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(arbre_mort2009)
arbre_mort2010 <- read.csv("./data/raw/DataFrance/2010/arbres_morts_foret_2010.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(arbre_mort2010)
arbre_mort2011 <- read.csv("./data/raw/DataFrance/2011/arbres_morts_foret_2011.csv",sep=";"
, stringsAsFactors=FALSE)
## names(arbre_mort2005)
## names(arbre_mort2006)
## names(arbre_mort2007)
## names(arbre_mort2008)
## names(arbre_mort2009)
## names(arbre_mort2010)
## names(arbre_mort2011)
### merge 2005 2006 2007 to compute c13
arbre_mort05_07 <- rbind(arbre_mort2005, arbre_mort2006, arbre_mort2007)
arbre_mort05_07$c13 <- rep(NA,length(arbre_mort05_07$c0))
arbre_mort05_07$espar2 <- as.numeric(substr(arbre_mort05_07$espar,1,2))
arbre_mort05_07$year <- c(rep(2005,length=length(arbre_mort2005[,1])),
rep(2006,length=length(arbre_mort2006[,1])),
rep(2007,length=length(arbre_mort2007[,1])))
#### NEED TO CONVERT c0 into c13 before 2008
### before 2008 no date dead but all tree died less than 5 years ago.
### need to do convertion between c0 and c13.
## for that use the NFI data from previous inventory that were reporting both c0 and c13 for all species
## fit an allometric relationship and then use it to predict c13 in this data base
### READ DATA CYCLE 3 ORGINAL /// NEED TO CONVERT C0 and C13 in cm *100
arbre.cycle3 <- read.table("./data/raw/DataFrance/cycle3/data.arbre.tot.txt",sep=" ", stringsAsFactors=FALSE)
### change the C from m to cm
arbre.cycle3$C0 <- arbre.cycle3$C0*100
arbre.cycle3$C13 <- arbre.cycle3$C13*100
## LOAD library RMA regression
library(lmodel2)
## the regression between C13 and C0 vary between species, but not same species in cycle 3 because less details (the classification is only based on number and no letters
## remove the letters and apply the same model to all species that have the same number code
species.list <- c(as.numeric(names(table(substr(arbre_mort05_07$espar,1,2))))[-1])
## length(table(arbre.cycle3$ESS))
for (i in species.list)
{
if (sum(arbre.cycle3$ESS==i)>50)
{
lmodel2.res <- lmodel2(C13~C0 ,data=arbre.cycle3[arbre.cycle3$ESS==i,],range.x="relative",range.y="relative")
arbre_mort05_07$c13[arbre_mort05_07$espar2==i & !is.na(arbre_mort05_07$espar2)] <-
lmodel2.res$regression.results[4,2] + arbre_mort05_07$c0[arbre_mort05_07$espar2==i &
!is.na(arbre_mort05_07$espar2)]*lmodel2.res$regression.results[4,3]
print(i)
print(lmodel2.res$regression.results[4,2:3])
print(range(arbre_mort05_07$c13[arbre_mort05_07$espar2==i &
!is.na(arbre_mort05_07$espar2)],na.rm=T))
}
else
{
lmodel2.res <- lmodel2(C13~C0 ,data=arbre.cycle3,range.x="relative",range.y="relative")
arbre_mort05_07$c13[arbre_mort05_07$espar2==i & !is.na(arbre_mort05_07$espar2)] <-
lmodel2.res$regression.results[4,2] + arbre_mort05_07$c0[arbre_mort05_07$espar2==i &
!is.na(arbre_mort05_07$espar2)]*lmodel2.res$regression.results[4,3]
print(i)
print(lmodel2.res$regression.results[4,2:3])
print(range(arbre_mort05_07$c13[arbre_mort05_07$espar2==i & !is.na(arbre_mort05_07$espar2)],na.rm=T))
}
}
### for species with NO DATA in cycle 3 apply mean model over all species
lmodel2.res <- lmodel2(C13~C0 ,data=arbre.cycle3,range.x="relative",range.y="relative")
arbre_mort05_07$c13[is.na(arbre_mort05_07$espar2)] <-
lmodel2.res$regression.results[4,2] + arbre_mort05_07$c0[is.na(arbre_mort05_07$espar2)]*lmodel2.res$regression.results[4,3]
## check predicted C13 from C0
head(cbind(arbre_mort05_07$c13,arbre_mort05_07$c0,arbre_mort05_07$espar) )
### ok donne c13 added
########################
## MERGE WITH other dead data
arbre_mort_tot <- data.frame(idp=c(arbre_mort05_07$idp,arbre_mort2008$idp,arbre_mort2009$idp,arbre_mort2010$idp,arbre_mort2011$idp),
a=c(arbre_mort05_07$a,arbre_mort2008$a,arbre_mort2009$a,arbre_mort2010$a,arbre_mort2011$a),
espar=c(as.character(arbre_mort05_07$espar),as.character(arbre_mort2008$espar),as.character(arbre_mort2009$espar),
as.character(arbre_mort2010$espar),as.character(arbre_mort2011$espar)),
ori=c(arbre_mort05_07$ori,arbre_mort2008$ori,arbre_mort2009$ori,arbre_mort2010$ori,arbre_mort2011$ori),
veget=c(arbre_mort05_07$veget,arbre_mort2008$veget,arbre_mort2009$veget,arbre_mort2010$veget,arbre_mort2011$veget),
datemort=c(rep(NA,length(arbre_mort05_07$ori)),arbre_mort2008$datemort,arbre_mort2009$datemort,arbre_mort2010$datemort
,arbre_mort2011$datemort),
c13=c(arbre_mort05_07$c13,arbre_mort2008$c13,arbre_mort2009$c13,arbre_mort2010$c13,arbre_mort2011$c13),
v=c(arbre_mort05_07$v,arbre_mort2008$v,arbre_mort2009$v,arbre_mort2010$v,arbre_mort2011$v),
w=c(arbre_mort05_07$w,arbre_mort2008$w,arbre_mort2009$w,arbre_mort2010$w,arbre_mort2011$w),
YEAR=c(rep(2005,length(arbre_mort2005$idp)),rep(2006,length(arbre_mort2006$idp)),rep(2007,length(arbre_mort2007$idp))
,rep(2008,length(arbre_mort2008$idp)),rep(2009,length(arbre_mort2009$idp)),rep(2010,length(arbre_mort2010$idp))
,rep(2011,length(arbre_mort2011$idp)))
)
rm(arbre.cycle3,arbre_mort2005,arbre_mort2006,arbre_mort2007,arbre_mort2008,arbre_mort2009,arbre_mort2010,arbre_mort2011)
gc()
save(arbre_mort_tot,file="./data/process/arbre_mort_tot.Rdata")
################################################################################
#### MERGE DEAD AND ALIVE TREE
head(arbre_mort_tot)
head(arbre.tot)
arbre.ALIVE.DEAD <- data.frame(
idp=c(arbre.tot$idp,arbre_mort_tot$idp),
a=c(arbre.tot$a,arbre_mort_tot$a),
veget=c(arbre.tot$veget,arbre_mort_tot$veget),
simplif=c(arbre.tot$simplif,rep(NA,length=length(arbre_mort_tot$idp))),
acci=c(arbre.tot$acci,rep(NA,length=length(arbre_mort_tot$idp))),
espar=c(as.character(arbre.tot$espar),as.character(arbre_mort_tot$espar)),
ori=c(arbre.tot$ori,arbre_mort_tot$ori),
lib=c(arbre.tot$lib,rep(NA,length=length(arbre_mort_tot$idp))),
forme=c(arbre.tot$forme,rep(NA,length=length(arbre_mort_tot$idp))),
tige=c(arbre.tot$tige,rep(NA,length=length(arbre_mort_tot$idp))),
mortb=c(arbre.tot$mortb,rep(NA,length=length(arbre_mort_tot$idp))),
sfgui=c(arbre.tot$sfgui,rep(NA,length=length(arbre_mort_tot$idp))),
sfgeliv=c(arbre.tot$sfgeliv,rep(NA,length=length(arbre_mort_tot$idp))),
sfpied=c(arbre.tot$sfpied,rep(NA,length=length(arbre_mort_tot$idp))),
sfdorge=c(arbre.tot$sfdorge,rep(NA,length=length(arbre_mort_tot$idp))),
sfcoeur=c(arbre.tot$sfcoeur,rep(NA,length=length(arbre_mort_tot$idp))),
c13=c(arbre.tot$c13,arbre_mort_tot$c13),
ir5=c(arbre.tot$ir5,rep(NA,length=length(arbre_mort_tot$idp))),
htot=c(arbre.tot$htot,rep(NA,length=length(arbre_mort_tot$idp))),
hdec=c(arbre.tot$hdec,rep(NA,length=length(arbre_mort_tot$idp))),
decoupe=c(arbre.tot$decoupe,rep(NA,length=length(arbre_mort_tot$idp))),
q1=c(arbre.tot$q1,rep(NA,length=length(arbre_mort_tot$idp))),
q2=c(arbre.tot$q2,rep(NA,length=length(arbre_mort_tot$idp))),
q3=c(arbre.tot$q3,rep(NA,length=length(arbre_mort_tot$idp))),
r=c(arbre.tot$r,rep(NA,length=length(arbre_mort_tot$idp))),
lfsd=c(arbre.tot$lfsd,rep(NA,length=length(arbre_mort_tot$idp))),
age=c(arbre.tot$age,rep(NA,length=length(arbre_mort_tot$idp))),
v=c(arbre.tot$v,arbre_mort_tot$v),
w=c(arbre.tot$w,rep(10000/(pi*(c(15))^2),length=length(arbre_mort_tot$w))),## assume that all dead tree are sampled on the whole plot as explained in the method
YEAR=c(arbre.tot$YEAR,arbre_mort_tot$YEAR),
datemort=c(rep(NA,length=length(arbre.tot$YEAR)),arbre_mort_tot$datemort),
dead=c(rep(0,length=length(arbre.tot$YEAR)),rep(1,length=length(arbre_mort_tot$idp))))## 1 = dead
## delete plot with DEAD tree missing because of no C13 or no espar
arbre.ALIVE.DEAD2 <- arbre.ALIVE.DEAD[!(arbre.ALIVE.DEAD$idp %in% unique(c(names(tapply(is.na(arbre.ALIVE.DEAD$c13),
INDEX=arbre.ALIVE.DEAD$idp,FUN=sum))[tapply(is.na(arbre.ALIVE.DEAD$c13),INDEX=arbre.ALIVE.DEAD$idp,FUN=sum)>0],
names(tapply(is.na(arbre.ALIVE.DEAD$espar),INDEX=arbre.ALIVE.DEAD$idp,FUN=sum))[tapply(is.na(arbre.ALIVE.DEAD$espar),
INDEX=arbre.ALIVE.DEAD$idp,FUN=sum)>0]))),]
save(arbre.ALIVE.DEAD2,file='./data/process/arbre.ALIVE.DEAD2.Rdata')
########################################################################################
#########################################
#########################################
#########################################
##### LOAD DATA FOR PLOT INFO
### read DEAD TREE table downloaded from the web
placette2005 <- read.csv("./data/raw/DataFrance/2005/placettes_foret_2005.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(placette2005)
placette2006 <- read.csv("./data/raw/DataFrance/2006/placettes_foret_2006.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(placette2006)
placette2007 <- read.csv("./data/raw/DataFrance/2007/placettes_foret_2007.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(placette2007)
placette2008 <- read.csv("./data/raw/DataFrance/2008/placettes_foret_2008.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(placette2005)
placette2009 <- read.csv("./data/raw/DataFrance/2009/placettes_foret_2009.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(placette2009)
placette2010 <- read.csv("./data/raw/DataFrance/2010/placettes_foret_2010.csv",sep=";"
, stringsAsFactors=FALSE)
## summary(placette2010)
placette2011 <- read.csv("./data/raw/DataFrance/2011/placettes_foret_2011.csv",sep=";"
, stringsAsFactors=FALSE)
## names(placette2005)
## ## uta -> uta1
## ## sfo NA
## ## plisi NA
## names(placette2006)
## ## plisi NA
## names(placette2007)
## ## dcespar1 dcespar2 tpespar1 tpespar2 iti pentexp NA
## ## deleted acces
## names(placette2008)
## ## gest incid peupnr portance asperite
## names(placette2009)
## names(placette2010)
## names(placette2011)
placette2005$tplant <- as.character(placette2005$tplant)
placette2005$tplant[placette2005$tplant==""] <-0
placette2006$tplant[placette2006$tplant==""] <-0
### for selection of plot use plisi=0 dc=0 tplant=0
## incid indicateur d incident récent utilisé ou pas ?? avec ou sans perturbatrion naturelle
placette_tot <-data.frame(idp=c(placette2005$idp,placette2006$idp,placette2007$idp,placette2008$idp,placette2009$idp,placette2010$idp,
placette2011$idp),
xl93=c(placette2005$xl93,placette2006$xl93,placette2007$xl93,placette2008$xl93,placette2009$xl93,placette2010$xl93,
placette2011$xl93),
yl93=c(placette2005$yl93,placette2006$yl93,placette2007$yl93,placette2008$yl93,placette2009$yl93,placette2010$yl93,
placette2011$yl93),
dep=c(placette2005$dep,placette2006$dep,placette2007$dep,placette2008$dep,placette2009$dep,placette2010$dep,
placette2011$dep),
csa=c(placette2005$csa,placette2006$csa,placette2007$csa,placette2008$csa,placette2009$csa,placette2010$csa,
placette2011$csa),
plisi=c(rep(NA,length(placette2005$tm2)),rep(NA,length(placette2006$idp)),placette2007$plisi,placette2008$plisi,
placette2009$plisi,placette2010$plisi,placette2011$plisi),
uta1=c(placette2005$uta,placette2006$uta1,placette2007$uta1,placette2008$uta1,placette2009$uta1,placette2010$uta1,
placette2011$uta1),
tm2=c(placette2005$tm2,placette2006$tm2,placette2007$tm2,placette2008$tm2,placette2009$tm2,placette2010$tm2,
placette2011$tm2),
sfo=c(rep(NA,length(placette2005$tm2)),placette2006$sfo,placette2007$sfo,placette2008$sfo,placette2009$sfo,
placette2010$sfo,placette2011$sfo),
incid=c(rep(NA,length(placette2005$idp)),rep(NA,length(placette2006$idp)),rep(NA,length(placette2007$idp)),
rep(NA,length(placette2008$idp)),placette2009$incid,placette2010$incid,placette2011$incid),
dc=c(placette2005$dc,placette2006$dc,placette2007$dc,placette2008$dc,placette2009$dc,placette2010$dc,placette2011$dc),
tplant=c(placette2005$tplant,placette2006$tplant,placette2007$tplant,placette2008$tplant,placette2009$tplant,
placette2010$tplant,placette2011$tplant),
esspre=c(placette2005$esspre,placette2006$esspre,placette2007$esspre,placette2008$esspre,placette2009$esspre,
placette2010$esspre,placette2011$esspre),
cac=c(placette2005$cac,placette2006$cac,placette2007$cac,placette2008$cac,placette2009$cac,placette2010$cac,
placette2011$cac),
ess_age_1=c(placette2005$ess_age_1,placette2006$ess_age_1,placette2007$ess_age_1,placette2008$ess_age_1,
placette2009$ess_age_1,placette2010$ess_age_1,placette2011$ess_age_1),
YEAR=c(rep(2005,length(placette2005$idp)),rep(2006,length(placette2006$idp)),rep(2007,length(placette2007$idp)),
rep(2008,length(placette2008$idp)),rep(2009,length(placette2009$idp)),rep(2010,length(placette2010$idp)),
rep(2011,length(placette2011$idp))))
rm(placette2005,placette2006,placette2007,placette2008,placette2009,placette2010,placette2011)
save(placette_tot,file="./data/process/placette_tot.Rdata")
#####################################
## LOAD elevation data
##### LOAD ALTITUDE DATA
## load("./data/process/placette_tot.Rdata")
alti <- read.csv("./data/raw/DataFrance/altitude/SER_alti.csv",header=T,sep=";", stringsAsFactors=FALSE)
alti2011 <- read.csv("./data/raw/DataFrance/altitude/SER_alti_2011.csv",header=T,sep=";",stringsAsFactors=FALSE)
names(alti2011) <- names(alti)
alti.tot <- rbind(alti,alti2011)
## sum( placette_tot$idp %in% alti.tot$IDP)/length(placette_tot$idp)
## table(placette_tot$YEAR[! placette_tot$idp %in% alti.tot$IDP])
placette_tot.alti <- merge(placette_tot,alti.tot,by.x="idp",by.y="IDP")
### write csv file for Piedallu
write.csv(placette_tot.alti,"./data/process/placette_tot.alti.csv")
### write new fiel for 2011
write.csv(placette_tot.alti[placette_tot.alti$YEAR==2011,],"./data/process/placette_tot.alti.2011.csv")
#########################################
#########################################
#########################################
##### LOAD DATA ECOLOGIE
### read DEAD TREE table downloaded from the web
ecologie2005 <- read.csv("./data/raw/DataFrance/2005/ecologie_2005.csv",sep=";", stringsAsFactors=FALSE)
## summary(ecologie2005)
ecologie2006 <- read.csv("./data/raw/DataFrance/2006/ecologie_2006.csv",sep=";", stringsAsFactors=FALSE)
## summary(ecologie2006)
ecologie2007 <- read.csv("./data/raw/DataFrance/2007/ecologie_2007.csv",sep=";", stringsAsFactors=FALSE)
## summary(ecologie2007)
ecologie2008 <- read.csv("./data/raw/DataFrance/2008/ecologie_2008.csv",sep=";", stringsAsFactors=FALSE)
## summary(ecologie2005)
ecologie2009 <- read.csv("./data/raw/DataFrance/2009/ecologie_2009.csv",sep=";", stringsAsFactors=FALSE)
## summary(ecologie2009)
ecologie2010 <- read.csv("./data/raw/DataFrance/2010/ecologie_2010.csv",sep=";", stringsAsFactors=FALSE)
## summary(ecologie2010)
ecologie2011 <- read.csv("./data/raw/DataFrance/2011/ecologie_2011.csv",sep=";", stringsAsFactors=FALSE)
ecologie_tot <- rbind(ecologie2005,ecologie2006,ecologie2007,ecologie2008,ecologie2009,ecologie2010,ecologie2011)
head(ecologie_tot)
rm(ecologie2005,ecologie2006,ecologie2007,ecologie2008,ecologie2009,ecologie2010,ecologie2011)
save(ecologie_tot,file="./data/process/ecologie_tot.Rdata")
TRY.R 0 → 100644
########################################################
########################################################
###### READ TRY AND FORMAT DATA CHECK ERROR
################
#### use AccSpeciesName because not author name
source("./R/FUN.TRY.R")
library(MASS)
library(doParallel)
library(mvoutlier)
## read TRY data
TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt",
sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt",
sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE)
### combine both data set
TRY.DATA <- rbind(TRY.DATA,TRY.DATA2)
rm(TRY.DATA2)
##################################
### ERROR FOUND IN THE DATA BASE
########################
### problem with the seed mass of this obs seed mass = 0 DELETE
TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID==1034196 & TRY.DATA$DataName=="Seed dry mass"),]
#### IS "Quercuscrispla sp" an error standing for Quercus crispula synonym of Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens
## TRY.DATA[TRY.DATA$AccSpeciesName=="Quercuscrispla sp" ,]
########################
########################
### first create a table with one row per Observation.id and column for each traits and variable
Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement",
"Altitude", "Mean annual temperature (MAT)","Mean sum of annual precipitation (PPT)",
"Plant developmental status / plant age","Maximum height reference",
"Source in Glopnet", "Number of replicates", "Sun vers. shade leaf qualifier" )
Trait.Data <- sort(names(((table(TRY.DATA$TraitName)))))
##########################
#### REFORMAT DATA from TRY
registerDoParallel(cores=5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores
getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6)
TRY.DATA.FORMATED <- foreach(ObservationID.t=unique(TRY.DATA$ObservationID), .combine=rbind) %dopar%
{
fun.extract.try(ObservationID.t,data=TRY.DATA,Non.Trait.Data,Trait.Data)
}
## head(TRY.DATA.FORMATED)
## dim(TRY.DATA.FORMATED)
saveRDS(TRY.DATA.FORMATED,file="./data/TRY.DATA.FORMATED.rds")
########################
########## READ RDS
TRY.DATA.FORMATED <- readRDS("./data/TRY.DATA.FORMATED.rds")
## TRY.DATA.FORMATED[TRY.DATA.FORMATED$ObservationID==1034196,"StdValue.Seed.mass"] <- NA
## head(TRY.DATA.FORMATED)
####################
####################
## COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI
key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass",
"StdValue.Seed.mass",
"StdValue.Leaf.specific.area..SLA.",
"StdValue.Stem.specific.density..SSD.",
"StdValue.Stem.conduit.area..vessel.and.tracheid.",
"StdValue.Leaf.lifespan")
###############################
##############################
## READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA
### NEED TO UPDATE WITH ALL SPECIES LATER
species.tab <- read.csv("./data/species.list/species.csv",sep="\t")
species.tab2 <- species.tab[!is.na(species.tab$Latin_name),]
rm(species.tab)
gc()
### species IFN reformat names
species.IFN <- unique(gsub("_", " ", species.tab2$Latin_name))
## clean species names and synonyme names
species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name))
species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn))
## remove trailing white space
species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn)
## ##############
## ## ### TRY TO CHECK SPECIES NAME WITH taxize
## library(taxize)
## tpl_get(dir_ = "~/foo2", family = "Scrophulariaceae")
## dat <- read.csv("~/foo2/Scrophulariaceae.csv")
## library(plyr)
## species <- as.character(ddply(dat[, c("Genus", "Species")], .(), transform,
## gen_sp = as.factor(paste(Genus, Species, sep = " ")))[, 4])
## slice <- function(input, by = 2) {
## starts <- seq(1, length(input), by)
## tt <- lapply(starts, function(y) input[y:(y + (by - 1))])
## llply(tt, function(x) x[!is.na(x)])
## }
## species_split <- slice(species, by = 100)
## tnrs_safe <- failwith(NULL, tnrs) # in case some calls fail, will continue
## out <- llply(species_split, function(x) tnrs_safe(x, getpost = "POST", sleep = 3))
## check.species.ifn <- tnrs(unique(species.tab2$Latin_name_syn)[-151],getpost="POST")
## check.species.ifn <- tnrs(unique(species.tab2$Latin_name_syn)[],getpost="POST")
## ####### problem with phylotastic tnrs question asked to Scott
## tnrs(c("Fagus sylvatica"), getpost="POST")
## tnrs(c("Fagus sylvatica"))
## tnrs(c("Fagus sylvatica"), getpost="POST", source_ = "iPlant_TNRS")
## tnrs(c("Fagus sylvatica"), getpost="POST", source_ = "NCBI")
## tnrs(c("Fagus sylvatica"), getpost="POST", source_ = "MSW3")
## tnrs(c("Quercus robur"))
## tnrs(c("Quercus robur"), getpost="POST", source_ = "iPlant_TNRS")
## tnrs(c("Quercus robur"), getpost="POST", source_ = "NCBI")
## tnrs(c("Quercus robur"), getpost="POST", source_ = "MSW3")
## http://taxosaurus.org/submit?query=Fagus+sylvatica
## {"status":"OK","names":[{"matchCount":2,"matches":[{"acceptedName":"","sourceId":"iPlant_TNRS","score":"1","matchedName":"Fagus sylvatica","annotations":{"Authority":""},"uri":""},{"acceptedName":"Fagus sylvatica","sourceId":"NCBI","score":"1","matchedName":"Fagus sylvatica","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/taxonomy/28930"}],"submittedName":"Fagus sylvatica"}],"metadata":{"spellcheckers":[{"name":"NCBI","description":"NCBI Spell Checker","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/","sourceId":1,"publication":"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2578899/","call":"python2.6 tnrs_spellchecker/ncbi_spell.py","rank":1}],"sources":[{"status":"200: OK","name":"NCBI","description":"NCBI Taxonomy","uri":"http://www.ncbi.nlm.nih.gov/taxonomy","annotations":{},"sourceId":"NCBI","publication":"Federhen S. The Taxonomy Project.2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J., Ostell J., editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US);2002.","rank":3,"code":"ICZN,ICN,ICNB"},{"status":"200: OK","name":"iPlant Collaborative TNRS v3.1","description":"The iPlant Collaborative TNRS provides parsing and fuzzy matching for plant taxa.","uri":"http://tnrs.iplantcollaborative.org/","annotations":{"Authority":"Author attributed to the accepted name (where applicable)."},"sourceId":"iPlant_TNRS","publication":"Boyle, B. et.al. The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics. 2013, 14:16. doi:10.1186/1471-2105-14-16. If you use TNRS results in a publication, please also cite The Taxonomic Name Resolution Service; http://tnrs.iplantcollaborative.org; version 3.1.","rank":2,"code":"ICN"},{"status":"200: OK","name":"Mammal Species of the World v3.0","description":"Mammal Species of the World, 3rd edition (MSW3) is a database of mammalian taxonomy. Our adaptor searches the indexed database for both exact and loose mathces","uri":"http://www.bucknell.edu/msw3/","annotations":{"Authority":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)"},"sourceId":"MSW3","publication":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)","rank":4,"code":"ICZN"}],"sub_date":"Thu Jul 18 16:36:23 2013","resolver_version":"1.2.0","jobId":"f7e6e1feba55b8f5d41a208630385630"}}
## http://taxosaurus.org/submit?query=Quercus+robur
## {"status":"OK","names":[{"matchCount":2,"matches":[{"acceptedName":"Quercus robur","sourceId":"iPlant_TNRS","score":"1","matchedName":"Quercus robur","annotations":{"Authority":"(Ten.) A. DC."},"uri":"http://www.tropicos.org/Name/50280607"},{"acceptedName":"Quercus robur","sourceId":"NCBI","score":"1","matchedName":"Quercus robur","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/taxonomy/38942"}],"submittedName":"Quercus robur"}],"metadata":{"spellcheckers":[{"name":"NCBI","description":"NCBI Spell Checker","annotations":{},"uri":"http://www.ncbi.nlm.nih.gov/","sourceId":1,"publication":"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2578899/","call":"python2.6 tnrs_spellchecker/ncbi_spell.py","rank":1}],"sources":[{"status":"200: OK","name":"NCBI","description":"NCBI Taxonomy","uri":"http://www.ncbi.nlm.nih.gov/taxonomy","annotations":{},"sourceId":"NCBI","publication":"Federhen S. The Taxonomy Project.2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J., Ostell J., editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US);2002.","rank":3,"code":"ICZN,ICN,ICNB"},{"status":"200: OK","name":"iPlant Collaborative TNRS v3.1","description":"The iPlant Collaborative TNRS provides parsing and fuzzy matching for plant taxa.","uri":"http://tnrs.iplantcollaborative.org/","annotations":{"Authority":"Author attributed to the accepted name (where applicable)."},"sourceId":"iPlant_TNRS","publication":"Boyle, B. et.al. The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics. 2013, 14:16. doi:10.1186/1471-2105-14-16. If you use TNRS results in a publication, please also cite The Taxonomic Name Resolution Service; http://tnrs.iplantcollaborative.org; version 3.1.","rank":2,"code":"ICN"},{"status":"200: OK","name":"Mammal Species of the World v3.0","description":"Mammal Species of the World, 3rd edition (MSW3) is a database of mammalian taxonomy. Our adaptor searches the indexed database for both exact and loose mathces","uri":"http://www.bucknell.edu/msw3/","annotations":{"Authority":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)"},"sourceId":"MSW3","publication":"Don E. Wilson & DeeAnn M. Reeder (editors). 2005. Mammal Species of the World. A Taxonomic and Geographic Reference (3rd ed)","rank":4,"code":"ICZN"}],"sub_date":"Thu Jul 18 16:44:45 2013","resolver_version":"1.2.0","jobId":"a00b4210bdfeb8c7e6d2f6bf7f10df04"}}
## lapply(unique(species.tab2$Latin_name_syn)[1:10],FUN=tnrs)
### find synonyme
getsynonymnamesfromtsn(tsn = 502590)
### find synonyme
tp_synonyms(id =502590 )
# Example R script which calls the TNRS in the context of adding names to a phylogeny
## FROM Boyle et al. 2013 BMC Bioinformatics
library(ape)
library(rjson)
library(RCurl)
tnrs.api<-'http://tnrs.iplantc.org/tnrsm-svc'
#Tree topology from Ackerly, D. 2009. Conservatism and diversification of plant functional traits: Evolutionary rates versus phylogenetic signal. PNAS 106:19699--19706.
lobelioids.string<-'((((((Lobelia_kauaensis,Lobelia_villosa),Lobelia_gloria-montis),(Trematolobelia_kauaiensis,Trematolobelia_macrostachys)),((Lobelia_hypoleuca,Lobelia_yuccoides),Lobelia_niihauensis)),((Brighamia_insignis,Brighamia_rockii),(Delissea_rhytidosperma,Delissea_subcordata))),((((Cyanea_pilosa,Cyanea_acuminata),Cyanea_hirtella),(Cyanea_coriacea,Cyanea_leptostegia)),(((Clermontia_kakeana,Clermontia_parviflora),Clermontia_arborescens),Clermontia_fauriei)));'
#Transform the newick sting into an ape phylo object
tree<-read.tree(text=lobelioids.string)
#Obtain the taxa names
old.names<-tree$tip.label
#Change the underscore characters into blank spaces
old.names<-gsub('_',' ',old.names)
old.names <- species.tab2$Latin_name_syn
#Transporms the vector into a string
old.names<-paste(old.names,collapse=',')
#The string needs to be URL-encoded
old.names<-curlEscape(old.names)
#Send a request to the TNRS service
url<-paste(tnrs.api,'/matchNames?retrieve=best&names=',old.names,sep='')
tnrs.json<-getURL(url)
#The response needs to be converted from JSON
tnrs.results<-fromJSON(tnrs.json)
#The corrected names are extracted from the response
names<-sapply(tnrs.results[[1]], function(x) c(x$nameSubmitted,x$acceptedName))
names<-as.data.frame(t(names),stringsAsFactors=FALSE)
#If TNRS did not return any accepted name (no match, or name is already accepted), the submitted name is retained
names[names[,2]=="",2]<-names[names[,2]=="",1]
### SAME ERROR FOR FAGUS SYLVATICA TEH WEB SITE GIVE A GOOD RESULTS BUT NOT THE CALL FROM R ?
### change format try species names
TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName)
#### extract mean and sd per species without experimental data and detection of outlier when enough data or if not enough data compute mean of genus
### The detection of outlier is based on teh method in Kattge et al. 2011 only for univariate outlier. I have try other univariate and multivariate method of detection of outlier but didn't work well
res.list <- lapply(species.IFN,FUN=fun.species.traits,species.table=species.tab2,traits=key.main.traits2,data=TRY.DATA.FORMATED)
names(res.list) <- species.IFN
##### TRANSFORM LIST IN A TABLE
data.mean <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$mean
,res.list=res.list))
data.sd <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$sd,res.list=res.list))
data.exp <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$exp
,res.list=res.list))
data.genus <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$genus
,res.list=res.list))
data.nobs <- t(sapply(species.IFN,FUN=function(i,res.list) res.list[[i]]$nobs
,res.list=res.list))
#### data frame with all
data.ifn.species.try.noout <- data.frame(data.mean,data.sd,data.exp,data.genus,data.nobs)
names(data.ifn.species.try.noout) <- c(paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"mean",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"sd",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"exp",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"genus",sep=".")
,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density"
,"Vessel.area","Leaf.Lifespan"),"nobs",sep="."))
#### check species with genus mean
## data.ifn.species.try.noout[data.ifn.species.try.noout$SLA.genus,]
## data.ifn.species.try.noout[data.ifn.species.try.noout$SLA.genus |data.ifn.species.try.noout$Wood.Density.genus |data.ifn.species.try.noout$Seed.mass.genus ,]
saveRDS(data.ifn.species.try.noout ,file="./data/data.ifn.species.try.noout.rds")
####
data.ifn.species.try.noout <- readRDS("./data/data.ifn.species.try.noout.rds")
#####################################################################
#### assume that the SD is equal mean species if less than 10 obs same for genus
########
## USE TABLE 5 in Kattge et al. 2011
### LMA species sd log 0.09
### Nmass species sd log 0.08
### Seed Mass sd log 0.13
#### SPECIES mean sd
######
# for wood density no value reportedin Kattge et al so need to compute mean sd for species withe more than 5 obs
data.sd.WD.log <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.),TRY.DATA.FORMATED$AccSpeciesName)
sd.log.WD <- mean(data.sd.WD.log[data.sd.WD.log[,2]>4 & !is.na(data.sd.WD.log[,2]),1])
sd.log.SLA <- 0.09 ### based on Kattge et al. 2011
sd.log.Nmass <- 0.08 ### based on Kattge et al. 2011
sd.log.Seed.Mass <- 0.13 ### based on Kattge et al. 2011
sd.log.LL <- 0.03 ### based on Kattge et al. 2011
#######
##### COMPUTE GENUS MEAN SD IN THIS TRY DATA EXTRACTION AS NOT REPORTED IN Kattge et al. 2011
data.sd.WD.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Stem.specific.density..SSD.),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.WD.genus <- mean(data.sd.WD.log.genus[data.sd.WD.log.genus[,"nobs"]>10 & !is.na(data.sd.WD.log.genus[,"nobs"]),"sd"])
data.sd.SLA.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.specific.area..SLA.),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.SLA.genus <- mean(data.sd.SLA.log.genus[data.sd.SLA.log.genus[,"nobs"]>10 & !is.na(data.sd.SLA.log.genus[,"nobs"]),"sd"])
data.sd.LL.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.lifespan),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.LL.genus <- mean(data.sd.LL.log.genus[data.sd.LL.log.genus[,"nobs"]>10 & !is.na(data.sd.LL.log.genus[,"nobs"]),"sd"])
data.sd.Seed.Mass.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Seed.mass),TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.Seed.Mass.genus <- mean(data.sd.Seed.Mass.log.genus[data.sd.Seed.Mass.log.genus[,"nobs"]>10 &
!is.na(data.sd.Seed.Mass.log.genus[,"nobs"]),"sd"])
data.sd.Nmass.log.genus <- fun.sd.sp.or.genus(log10(TRY.DATA.FORMATED$StdValue.Leaf.nitrogen..N..content.per.dry.mass),
TRY.DATA.FORMATED$AccSpeciesName,genus=TRUE)
sd.log.Nmass.genus <- mean(data.sd.Nmass.log.genus[data.sd.Nmass.log.genus[,"nobs"]>10 & !is.na(data.sd.Nmass.log.genus[,"nobs"]),"sd"])
#############
### change value of sd if less than 10 obs assume sd mean
nobs.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan")
,"nobs",sep=".")
sd.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan")
,"sd",sep=".")
genus.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Leaf.Lifespan")
,"genus",sep=".")
sd.vec.sp <- c(sd.log.Nmass,sd.log.Seed.Mass,sd.log.SLA,sd.log.WD,sd.log.LL)
sd.vec.genus <- c(sd.log.Nmass.genus,sd.log.Seed.Mass.genus,sd.log.SLA.genus,sd.log.WD.genus,sd.log.LL.genus)
### function to select obs with less than Nthresh obs
fun.select.sd.with.to.few.obs.sp <- function(data,sd.names,nobs.names,genus.names,Nthreshold=2){
(data[[nobs.names[i]]]<Nthreshold & !is.na(data[[nobs.names[i]]])
& !data[[genus.names[i]]]) }
fun.select.sd.with.to.few.obs.genus <- function(data,sd.names,nobs.names,genus.names,Nthreshold=2){
(data[[nobs.names[i]]]<Nthreshold & !is.na(data[[nobs.names[i]]])
& data[[genus.names[i]]]) }
####
data.TRY.sd.update <- data.ifn.species.try.noout
for (i in 1:length(nobs.names)){
print( sd.names[i])
print("species")
print(fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10))
data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.sp(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)] <-
sd.vec.sp[i]
print("genus")
print(fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10))
data.TRY.sd.update[[sd.names[i]]][fun.select.sd.with.to.few.obs.genus(data=data.TRY.sd.update,sd.names,nobs.names,genus.names,Nthreshold=10)] <-
sd.vec.genus[i]
}
saveRDS(data.TRY.sd.update,file="./data/data.TRY.sd.update.rds")
###
# plot sd to show mark
pdf("./figs/sd.traits.pdf")
r <- barplot(sd.vec.sp ,names.arg=c("Leaf.N","SM","SLA","WD","LL"),las=2,ylim=c(0,0.6),ylab="sd log10")
for (i in 1:length(nobs.names)){
sd.obs <- data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]]
points(rep(r[i,1],length(sd.obs)),sd.obs)
points(r[i,1],sd.vec.genus[i],col="red",pch=16,cex=2)
sd.obs <- data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]]
points(rep(r[i,1],length(sd.obs)),sd.obs,col="red",pch=4)
print(sd.obs)
}
dev.off()
species.list.R 0 → 100644
####################################################
####################################################
##### SPECIES LIST FOR WORKSHOP
#################################
#################################
#### READ SPECIES LIST I HAVE FOR NOW FOR EACH SITES NON TROPICAL
#### (ASSUMING THAT TROPICAL SITES COMES WITH TRAITS DATA
#############################3
### US based on FIA full species list
Data.Species.FIA <- read.csv("./data/species.list/REF_SPECIES.CSV",stringsAsFactors=FALSE,na.strings="")
## head(Data.Species.FIA)
## merge genus & species
Data.Species.FIA$sp <- gsub("spp.","sp",paste(Data.Species.FIA$GENUS,Data.Species.FIA$SPECIES))
Species.FIA.short <- Data.Species.FIA$sp[Data.Species.FIA$SPCD<6000]
Species.FIA.long <- Data.Species.FIA$sp
####################################
## US SPECIES Based on Burns and Honkala
BurnsAndHonkalaSpeciesSummary<- read.csv("./data/species.list/BurnsAndHonkalaSpeciesSummary.csv",
stringsAsFactors=FALSE,na.strings="",sep=";")
## head(BurnsAndHonkalaSpeciesSummary)
## clean species name
fun.get.sp <- function(x) gsub(paste(" ",gsub("^([a-zA-Z]* [a-zA-Z]* )","",x),sep=""),"",x,fixed=TRUE)
BurnsAndHonkalaSpeciesSummary$sp <- sapply(BurnsAndHonkalaSpeciesSummary[,"Scientific_name"],fun.get.sp)
Species.Burns <- BurnsAndHonkalaSpeciesSummary$sp
#########################################
### canada check http://en.wikipedia.org/wiki/List_of_trees_of_Canada
Data.canadian.species <- read.csv("./data/species.list/canadian.species.csv",stringsAsFactors=FALSE)
Species.Canada <- Data.canadian.species$species
#########################
## SPAIN BASED ON TABLE PROVIDED BY MIGUEL
Data.Species.Spain<- read.csv("./data/species.list/List_120_SPP.csv",
stringsAsFactors=FALSE)
## head(Data.Species.Spain)
Species.Spain <- Data.Species.Spain$Scoentific.Name
#########################################
### SPECIE Sin Swizerland not in FRANCE
Species.Swiss <- c('Pinus montana','Ulmus scabra')
####################################3
#### SWEDEN
Data.Sweden <- read.csv("./data/species.list/Swedish_NFI_tree_species.csv",
sep=";",stringsAsFactors=FALSE)
Species.Sweden <- Data.Sweden$Species
###########################
###########################
## NEW ZEALAND
load("./data/species.list/vec.code.nvs.Rdata") ### load list of species code from the NVS data I used to calibrate SORTIE
Data.NVS.species <- read.csv("./data/species.list/CurrentNVSNames.csv",sep=";",stringsAsFactors=FALSE)
table.NVS.tree <- Data.NVS.species[Data.NVS.species$NVS.Code %in% vec.code.nvs,]
write.csv(table.NVS.tree,file="./data/species.list/table.NVS.tree.csv")
Species.NZ <- table.NVS.tree$Species.Name
###########################
###########################
## NSW
Data.NSW <- read.csv("./data/species.list/Sub-trop_RF_trees.csv",sep=";",stringsAsFactors=FALSE)
Species.NSW <- Data.NSW$Species..trees...10cm.dbhob.
###########################################
###########################################
#### LOAD TRY SPECIES
try.species <- read.csv("./data/species.list/SPECIES.TRY.csv",
stringsAsFactors=FALSE,header=TRUE)
## head(try.species,20)
## dim(try.species)
## genus already asked to try
genus.asked.try <- read.csv("./data/species.list/genus.asked.try.csv")$genus.asked.try
#### LIST OF ALL SPECIES with short FIA
species.vector <- unique(c(Species.Spain,Species.Canada,Species.Burns
,Species.Swiss,Species.Sweden,Species.NZ,Species.NSW,Species.FIA.short))
#
### get genus
fun.get.genus <- function(x) gsub(paste(" ",gsub("^([a-zA-Z]* )","",x),sep=""),"",x,fixed=TRUE)
Genus.tot <- unique(sapply(species.vector,fun.get.genus))
## all FIA data
## species.vector2 <- unique(c(Species.Spain,Species.Canada,Species.Burns
## ,Species.Swiss,Species.Sweden,as.character(Species.NZ),Species.NSW,Species.FIA.long))
## Genus.tot2 <- unique(sapply(species.vector2,fun.get.genus))
#### GENUS TO ASK
GENUS.to.ASK <- Genus.tot[!(Genus.tot %in% genus.asked.try)]
## GENUS.to.ASK2 <- Genus.tot2[!(Genus.tot2 %in% genus.asked.try)]
## CHECK which genus are in TRY
GENUS.TRY <- unique(sapply(try.species$AccSpeciesName,fun.get.genus))
#############################3
genus.list.short <- sort(GENUS.to.ASK[(GENUS.to.ASK %in% GENUS.TRY)])
write.csv(as.data.frame(genus.list.short),file="./data/process/genus.list.try.csv")
sort(GENUS.to.ASK[!(GENUS.to.ASK %in% GENUS.TRY)])
## genus.lis.long <- sort(GENUS.to.ASK2[(GENUS.to.ASK2 %in% GENUS.TRY)])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment