diff --git a/.gitignore b/.gitignore index 5b152fa262e65bfdc441348b13a27379b96ff368..9ad6799d8856b909b007abb1196c2150610cdaf8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ figs *.html *.Rdata *.rds +*.R~ docs/metadata/sites .Rproj.user GROWTH.MODEL.* diff --git a/R/analysis/run.local.R b/R/analysis/run.local.R index eddc0a6c6ed736bfc78180bb24c5b8d1af5466f5..400d7bc47ca62273c34fb9750c1107749f9f0608 100644 --- a/R/analysis/run.local.R +++ b/R/analysis/run.local.R @@ -4,13 +4,14 @@ source("R/analysis/JAGS.run.R") run.jags("R/analysis/model.LOGLIN.linearized.HD.R", "SLA", "France", "AB", sample.size=5000,type.model=2) -run.jags("R/analysis/model.LOGLIN.HD.R", "SLA", "France", "AB", sample.size=5000,type.model=1) +run.jags("R/analysis/model.LOGLIN.HD.R", "SLA", "France", "AB", sample.size=500,type.model=1) model.files <- c("R/analysis/model.LOGLIN.NULL.R", "R/analysis/model.LOGLIN.2.R") run.multiple.jags.for.set.one.trait(model.files, "SLA", "France") #========== # Test lmer + run.lmer("R/analysis/model.lmer.LOGLIN.AD.R", "SLA", "France", "AB", sample.size=50000) run.lmer("R/analysis/model.lmer.LOGLIN.HD.R", "SLA", "France", "AB", sample.size=50000) run.lmer("R/analysis/model.lmer.LOGLIN.nocomp.R", "SLA", "France", "AB", sample.size=50000) @@ -51,6 +52,3 @@ r2.nocomp <- r2.calc(obs,pred.nocomp) # -0.48, Very bad model due to log and abs r2.AD <- r2.calc(obs,pred.AD) # 0.60 ! r2.HD <- r2.calc(obs,pred.HD) # -0.52 r2.simplecomp <- r2.calc(obs,pred.simplecomp) # -0.52 - - - diff --git a/R/analysis/theo.model.R b/R/analysis/theo.model.R index ecba673f7b9f61441adbb2e3c137f9b1298ab4fd..6018ccd2f0cb1349a78845cdb70ca385582d8dd4 100644 --- a/R/analysis/theo.model.R +++ b/R/analysis/theo.model.R @@ -3,112 +3,268 @@ ### explore the link between competitive effect/response and abs trait distance ### G. Kunstler 21/09/2013 +setwd("../..") -## range between -1 and 1 -min.t <- -1 +### explore models of link between traits and compet +## traits range between 0 and 1 + +min.t <- 0 max.t <- 1 a <- seq(min.t,max.t,length=100) b <- seq(min.t,max.t,length=100) -mat.abs.dist <- outer(a,b,FUN= function(x,y) abs(a-y)) -mat.hier.dist <- outer(a,b,FUN='-') - ## fit to response/effect model a.r <- runif(1000,min.t,max.t) b.r <- runif(1000,min.t,max.t) -abs.r <- abs(a.r-b.r) -hier.r <- (a.r-b.r) +abs.r <- 1-abs(a.r-b.r) +hier.r <- 0.5*(a.r-b.r) lm.abs.dist <- lm(abs.r~a.r*b.r) -library(nlme4) -nls.abs.dist <- nls(abs.r~(b*a.r+c*b.r+b*c*b.r),data=data.frame(abs.r=abs.r,a.r=a.r,b.r=b.r),start=list(b=0.1,c=0.2,d=0.2)) lm.hier.dist <- lm(hier.r~a.r*b.r) -## predict -mat.abs.dist.pred <- matrix(NA,100,100) -mat.hier.dist.pred <- matrix(NA,100,100) +lm.abs.dist.nointer <- lm(abs.r~a.r+b.r) + +logistic <- function(x) 1/(1+exp(-(x))) + +mat.c1.dist.pred <- matrix(NA,100,100) +mat.c2.dist.pred <- matrix(NA,100,100) +mat.c3.dist.pred <- matrix(NA,100,100) +mat.c4.dist.pred <- matrix(NA,100,100) +mat.c5.dist.pred <- matrix(NA,100,100) +mat.c6.dist.pred<-matrix(NA,100,100) +mat.c7.dist.pred<-matrix(NA,100,100) for (i in 1:100){ for (j in 1:100){ - mat.abs.dist.pred[i,j] <- predict(lm.abs.dist,newdata=data.frame(a.r=a[i],b.r=b[j])) - mat.hier.dist.pred[i,j] <- predict(lm.hier.dist,newdata=data.frame(a.r=a[i],b.r=b[j])) + mat.c1.dist.pred[i,j] <- 0.5+0.5*a[i]-0.5*b[j] + mat.c2.dist.pred[i,j] <- 1-abs(a[i]-b[j]) + mat.c3.dist.pred[i,j] <- 0.65+0.04*a[i] -0.04*b[j] #1.272-1.206*a[i]-1.206*b[j]+2.4604*a[i]*b[j] + mat.c4.dist.pred[i,j] <-(0.769-1.568*b[j])*(0.769-1.568*a[i]) # (1.127-1.07*b[j])*(1.127-1.07*a[i]) # + mat.c5.dist.pred[i,j] <- (1-1*b[j])*(1*a[i]) + mat.c6.dist.pred[i,j] <- 0.5-0.5*b[j]+0.5*a[i]-0*a[i]*b[j] + mat.c7.dist.pred[i,j] <- (0.5+1.5*b[j])*(-0.5+1.5*a[i]) + } } -par(mfrow=c(2,2)) -image(mat.abs.dist) -image(mat.abs.dist.pred) -image(mat.hier.dist) -image(mat.hier.dist.pred) +png(file="./figs/compet_model.png",width=600,height=900) +m <- matrix(c(1:2,7,3,4,7,5,6,7), 3, 3,byrow=TRUE) +nclass <- 12 +layout(m, widths=c(3, 3,0.5),heights=c(1, 1,1)) +zrange <- range(mat.c1.dist.pred,mat.c2.dist.pred,mat.c3.dist.pred,mat.c4.dist.pred,mat.c5.dist.pred) +library(RColorBrewer) +breakss <- c(seq(zrange[1],0,length=4),seq(0,zrange[2],length=nclass+1+1-4)[-1]) +col.pal <- c(rev(brewer.pal(4-1,"Blues")),brewer.pal(nclass-4+1,"Reds")) +image(x=a,y=b,z=mat.c1.dist.pred,zlim=zrange,xlab="tn",ylab="tf",main="Hierarchical distance",col=col.pal,breaks=breakss) +contour(a,b,mat.c1.dist.pred,levels=0,add=TRUE,col="red") +image(a,b,mat.c2.dist.pred,zlim=zrange,xlab="tn",ylab="tf",main="Absolute distance",col=col.pal,breaks=breakss) +contour(a,b,mat.c2.dist.pred,levels=0,add=TRUE,col="red") +image(a,b,mat.c6.dist.pred,zlim=zrange,xlab="tn",ylab="tf",main="linear with linear increase of both and no interaction",col=col.pal,breaks=breakss) +contour(a,b,mat.c6.dist.pred,levels=0,add=TRUE,col="red") +image(a,b,mat.c3.dist.pred,zlim=zrange,xlab="tn",ylab="tf",main="linear with best fit to abs dist",col=col.pal,breaks=breakss) +contour(a,b,mat.c3.dist.pred,levels=0,add=TRUE,col="red") +image(a,b,mat.c5.dist.pred,zlim=zrange,xlab="tn",ylab="tf",main="multiplicative with linear increase of both",col=col.pal,breaks=breakss) +contour(a,b,mat.c5.dist.pred,levels=0,add=TRUE,col="red") +image(a,b,mat.c4.dist.pred,zlim=zrange,xlab="tn",ylab="tf",main="multiplicative with close to best fit to abs dist",col=col.pal,breaks=breakss) +contour(a,b,mat.c4.dist.pred,levels=0,add=TRUE,col="red") +par(mar=c(0,0,0,0)) +image(x=1,y=breakss,matrix(breakss[-1],1,nclass),col=col.pal,breaks=breakss) +dev.off() -## range between 0 and 1 -min.t <- 0 -max.t <- 1 -a <- seq(min.t,max.t,length=100) -b <- seq(min.t,max.t,length=100) +png(file="./figs/compet_facill_abs.png",width=200,height=600) +par(mfrow=c(3,1)) +image(a,b,mat.c4.dist.pred,xlab="tn",ylab="tf",main="multiplicative with close best fit to abs dist",col=col.pal,breaks=breakss) +plot(a,-0.769+1.568*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0.769-1.568*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) +dev.off() -mat.abs.dist <- outer(a,b,FUN= function(x,y) abs(a-y)) -mat.hier.dist <- outer(a,b,FUN='-') +#### 4 different multiplicative +png(file="./figs/multi4models.png",width=1200,height=900) +mat.c7.dist.pred<-matrix(NA,100,100) +for (i in 1:100){ + for (j in 1:100){ + mat.c7.dist.pred[i,j] <- (0+1*b[j])*(0+1*a[i]) + } + } -## fit to response/effect model -a.r <- runif(1000,min.t,max.t) -b.r <- runif(1000,min.t,max.t) -abs.r <- abs(a.r-b.r) -hier.r <- (a.r-b.r) -lm.abs.dist <- lm(abs.r~a.r*b.r) -lm.hier.dist <- lm(hier.r~a.r*b.r) -## predict -mat.abs.dist.pred <- matrix(NA,100,100) -mat.hier.dist.pred <- matrix(NA,100,100) +par(mfcol=c(3,4)) +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(0+b)(0+a)",col=col.pal,breaks=breakss) +plot(a,0+1*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0+1*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) + +mat.c7.dist.pred<-matrix(NA,100,100) for (i in 1:100){ for (j in 1:100){ - mat.abs.dist.pred[i,j] <- predict(lm.abs.dist,newdata=data.frame(a.r=a[i],b.r=b[j])) - mat.hier.dist.pred[i,j] <- predict(lm.hier.dist,newdata=data.frame(a.r=a[i],b.r=b[j])) + mat.c7.dist.pred[i,j] <- (1-1*b[j])*(0+1*a[i]) + } + } + +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(1-b)(0+a)",col=col.pal,breaks=breakss) +plot(a,0+1*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,1-1*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) +mat.c7.dist.pred<-matrix(NA,100,100) +for (i in 1:100){ + for (j in 1:100){ + mat.c7.dist.pred[i,j] <- (1-1*b[j])*(1-1*a[i]) } } -x11() -par(mfrow=c(2,2)) -image(mat.abs.dist) -image(mat.abs.dist.pred) -image(mat.hier.dist) -image(mat.hier.dist.pred) +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(1-b)(1-a)",col=col.pal,breaks=breakss) +plot(a,1-a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,1-1*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) -### explore other shape -lm.hier.dist -lm.abs.dist +mat.c7.dist.pred<-matrix(NA,100,100) +for (i in 1:100){ + for (j in 1:100){ + mat.c7.dist.pred[i,j] <- (0+1*b[j])*(1-1*a[i]) + } + } -min.t <- 0 -max.t <- 1 -a <- seq(min.t,max.t,length=100) -b <- seq(min.t,max.t,length=100) +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(0+b)(1-a)",col=col.pal,breaks=breakss) +plot(a,1-a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0+1*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) +dev.off() +## 4 different additive +png(file="./figs/additiv4models.png",width=1200,height=900) +mat.c7.dist.pred<-matrix(NA,100,100) +for (i in 1:100){ + for (j in 1:100){ + mat.c7.dist.pred[i,j] <- 0.5*(1*b[j]+1*a[i]) + } + } -mat.c1.dist.pred <- matrix(NA,100,100) -mat.c2.dist.pred <- matrix(NA,100,100) -mat.c3.dist.pred <- matrix(NA,100,100) -mat.c4.dist.pred <- matrix(NA,100,100) -mat.c5.dist.pred <- matrix(NA,100,100) -mat.c6.dist.pred <- matrix(NA,100,100) +par(mfcol=c(3,4)) +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(0.5*b+0.5*a)",col=col.pal,breaks=breakss) +plot(a,0+0.5*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0+0.5*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) + +mat.c7.dist.pred<-matrix(NA,100,100) +for (i in 1:100){ + for (j in 1:100){ + mat.c7.dist.pred[i,j] <- 0.5*(1-1*b[j]+1*a[i]) + } + } + +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(0.5-0.5*b+0.5a)",col=col.pal,breaks=breakss) +plot(a,0+0.5*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0.5-0.5*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) + +mat.c7.dist.pred<-matrix(NA,100,100) for (i in 1:100){ for (j in 1:100){ - mat.c1.dist.pred[i,j] <- 0.01+a[i]-b[j] - mat.c2.dist.pred[i,j] <- 0.01+abs(a[i]-b[j]) - mat.c3.dist.pred[i,j] <- -0.25+1.17*a[i]+1.17*b[j]-2.32*a[i]*b[j] - mat.c4.dist.pred[i,j] <- (0.76-1.52*b[j])*(-0.76+1.52*a[i]) - mat.c5.dist.pred[i,j] <- (1-1*b[j])*(0+1*a[i]) - mat.c6.dist.pred[i,j] <- 1-1*b[j]+a[i]-0*a[i]*b[j] - + mat.c7.dist.pred[i,j] <- 0.5*(2-1*b[j]-1*a[i]) } } + + +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(1-0.5*b-0.5a)",col=col.pal,breaks=breakss) +plot(a,0.5-0.5*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0.5-0.5*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) + +mat.c7.dist.pred<-matrix(NA,100,100) +for (i in 1:100){ + for (j in 1:100){ + mat.c7.dist.pred[i,j] <- 0.5*(1+1*b[j]-1*a[i]) + } + } + +image(a,b,mat.c7.dist.pred,xlab="tn",ylab="tf",main="(0.5+0.5*b-0.5a)",col=col.pal,breaks=breakss) +plot(a,0.5-0.5*a,type="l",xlab="tn",ylab="competitive effect") +abline(h=0,lty=2) +plot(b,0.5*b,type="l",xlab="tf",ylab="competitive response") +abline(h=0,lty=2) +dev.off() + + +plot(as.vector(mat.c2.dist.pred),as.vector(mat.c4.dist.pred),xlab="abs dist",ylab="multiplicative best fit") +plot(as.vector(mat.c2.dist.pred),as.vector(mat.c3.dist.pred),xlab="abs dist",ylab="multiplicative best fit") +### what is the meaning of negative competitive effect or response ? facilitation ? + + +## TRY TO UNDERSTAND HOW the multiplicative model reduce the space in comparison with full model +max.r <- 20 +a <- runif(100000,-max.r,max.r) +b <- runif(100000,-max.r,max.r) +c <- runif(100000,-max.r,max.r) +d <- runif(100000,-max.r,max.r) + +a.2 <- a*c +b.2 <- c*b +c.2 <- a*d +d.2 <- b*d + +mat.param <- cbind(a.2,b.2,c.2,d.2) +pairs(mat.param) +library(scatterplot3d) +png(file="./figs/param.space.png",width=20,height=20) +par(mfrow=c(2,2)) +scatterplot3d(c.2,b.2,d.2,highlight.3d=TRUE,cex.symbols=0.1) + +a.2.s <- b.2.s <- seq(-max.r,max.r,length=100) +mat.d <- outer(a.2.s,b.2.s,FUN=function(x,y,max.r) {x*y/(max.r*max.r)},max.r=max.r) +mat.d2 <- outer(a.2.s,b.2.s,FUN=function(x,y,max.r) {x*y/(-max.r*max.r)},max.r=max.r) +persp(a.2.s,b.2.s,mat.d, theta = 10, phi = 15, expand = 0.99, col = "lightblue") +persp(a.2.s,b.2.s,mat.d2, theta = 25, phi = 15, expand = 0.99, col = "lightblue") +scatterplot3d(c(c.2,c.2),c(b.2,b.2),c(d.2.a,d.2.b),color=c(rep("red",length(b.2)),rep("blue",length(b.2))),cex.symbols=0.1) +dev.off() + x11() -par(mfrow=c(3,2)) -image(a,b,mat.c1.dist.pred,xlab="tn",ylab="tf",main="Hierarchical distance") -image(a,b,mat.c2.dist.pred,xlab="tn",ylab="tf",main="Absolute distance") -image(a,b,mat.c6.dist.pred,xlab="tn",ylab="tf",main="linear with linear increase of both and negative interaction") -image(a,b,mat.c3.dist.pred,xlab="tn",ylab="tf",main="linear with best fit to abs dist") -image(a,b,mat.c5.dist.pred,xlab="tn",ylab="tf",main="multiplicative with linear increase of both") -image(a,b,mat.c4.dist.pred,xlab="tn",ylab="tf",main="multiplicative with best fit to abs dist") - -par(mfrow=c(1,2)) -plot(as.vector(mat.c1.dist.pred),as.vector(mat.c5.dist.pred)) -plot(as.vector(mat.c2.dist.pred),as.vector(mat.c4.dist.pred)) +par(mfrow=c(3,3)) + ss <- 1 +for (lim in c(-300,-200,-100,-50,0,50,100,200,300)){ + plot(c.2[b.2> (lim-ss) & b.2<(lim+ss)],d.2[b.2> (lim-ss) & b.2<(lim+ss)],xlab="c.2",ylab="d.2",main=paste("slice for b.2 + or - ",ss,"of",lim)) + lines((-400):400,(-400):400*lim/(max.r*max.r),col="red") + lines((-400):400,(-400):400*lim/(-max.r*max.r),col="red") +} + +#### DAVID CODE + +# response and effect coefficients +r0 = 1; r1 = .5; e0 = 1; e1 = 0.5 +#traits +t1 = 1:100 +t2 = 1:100 +#work out the response/effect model +l = matrix(data = 0, nrow = 100, ncol = 100) +for (i in 1:100) l[i,] = ((r0+ r1*t1[i])*(e0 + e1*t2)) +filled.contour(t1,t2, l, xlab = "trait1", ylab = "trait2", main = "hierarchical") +#work out absolute model +l1 = matrix(data = 0, nrow = 100, ncol = 100) +for (i in 1:100) l1[i,] = abs(t1[i]-t2) +filled.contour(t1,t2, l1, xlab = "trait1", ylab = "trait2", main = "absolute") + +### JOE + +# Joe's fiddling about with David's script +# response and effect coefficients +r0 = 1; r1 = .5; e0 = 1; e1 = 0.5 +#traits +l2 = matrix(data=0, nrow=10000, ncol=5) +l2[,1] = rep(1:100, times=100) +l2[,2] = rep(1:100, each=100) +l2[,3] = l2[,1]*l2[,2] +#work out absolute model +l2[,4] = abs(l2[,1]-l2[,2]) +#work out the response/effect model +l2[,5] = (r0+ r1*l2[,1])*(e0 + e1*l2[,2]) +# evaluate relationship between the absolute model & the components of the response/effect model +model <- lm(l2[,4]~1+l2[,1]+l2[,2]+l2[,3]) +summary(model) +plot(l2[,4],model$fitted.values) + diff --git a/R/find.trait/France.R b/R/find.trait/France.R index 5a44a96ce2480c2b7b92bfcd41d53c975758d4fa..342aba147727801152f1b7b04a7c1c3954ef647a 100644 --- a/R/find.trait/France.R +++ b/R/find.trait/France.R @@ -10,7 +10,7 @@ species.clean$Latin_name <- (gsub("_", " ", species.clean$Latin_name)) species.clean$Latin_name_syn<- (gsub("_", " ", species.clean$Latin_name_syn)) species.clean <- subset(species.clean,subset=!is.na(species.clean$Latin_name_syn)) species.clean <- subset(species.clean,subset=!duplicated(species.clean$Latin_name_syn)) -species.clean$sp <- species.clean$code +species.clean$sp <- paste("sp.",species.clean$code,sep="") species.clean$code <- NULL ## read in data data.TRY.std <- readRDS("output/formatted/TRY/data.TRY.std.rds") diff --git a/R/find.trait/NSW.R b/R/find.trait/NSW.R index 4880c0a80eac6b9b79e7ed9227cbf7353e165aa6..f86fce31b9d9b64f550297bee8c3abc0ac22582b 100644 --- a/R/find.trait/NSW.R +++ b/R/find.trait/NSW.R @@ -7,8 +7,9 @@ species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"], Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"], Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"], stringsAsFactors =FALSE) +species.clean$sp <- species.clean$Latin_name - + ################ MASSAGE TRAIT DATA Obtain maximum height per species from data.trait no sd ################ available as we have only one observation for species ###################################### MASSAGE TRAIT DATA diff --git a/R/find.trait/Paracou.R b/R/find.trait/Paracou.R index 43d7034e8a57b23464b284e6d46f85bd735bd3c7..2e71faa5148e78b6913edb49de501c3314c53a27 100644 --- a/R/find.trait/Paracou.R +++ b/R/find.trait/Paracou.R @@ -272,5 +272,7 @@ names(data.species) <- c("Species","Genus", "Wood.density.mean","Wood.density.sd","Wood.density.n", "Leaf.N.gen","Seed.mass.gen","SLA.gen","Wood.density.gen","sp") +data.species$sp <- paste("sp.",data.species$sp,sep="") # write.table(data.genus,file="data.genus.txt",sep="\t",row.names=FALSE) write.csv(data.species,file="./output/formatted/Paracou/traits.csv",row.names=FALSE) + diff --git a/R/format.data/BCI.R b/R/format.data/BCI.R index abb766e877274f1462230b2d6d3ef35fa51ed7d8..43acab8490dfb093a2000397e0381df1a1974dba 100644 --- a/R/format.data/BCI.R +++ b/R/format.data/BCI.R @@ -9,28 +9,28 @@ library(reshape,quietly=TRUE) ############# READ DATA read individuals tree data Requires careful formatting of 7 census ############ datasets The raw data is such that, once a tree dies in census X, then it no ########## longer exists in census X+1, X+2 etc... -data.bci1 <- read.table("data/raw/BCI/census1/PlotsDataReport.txt", header = TRUE, +data.bci1 <- read.table("data/raw/BCI/census1/PlotsDataReport.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t") data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL big.bci <- NULL -max_census = 7 # only use first census for now +max_census = 7 # only use first census for now for (k in 2:max_census) { - new.directory <- paste("data/raw/BCI/census", k, "/PlotsDataReport.txt", + new.directory <- paste("data/raw/BCI/census", k, "/PlotsDataReport.txt", sep = "") - data.bci2 <- read.table(new.directory, header = TRUE, stringsAsFactors = FALSE, - sep = "\t"); + data.bci2 <- read.table(new.directory, header = TRUE, stringsAsFactors = FALSE, + sep = "\t"); if (!is.null(big.bci)) { - sub.bci <- merge(data.bci1[, c("Latin","Quadrat","Census","gx","gy","TreeID","Tag","Date1","DBH1")], data.frame(TreeID = data.bci2[["TreeID"]], - DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == + sub.bci <- merge(data.bci1[, c("Latin","Quadrat","Census","gx","gy","TreeID","Tag","Date1","DBH1")], data.frame(TreeID = data.bci2[["TreeID"]], + DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == "dead"),stringsAsFactors=F), sort = T, by = "TreeID") ## Uses the Date1 as the census number big.bci <- rbind(big.bci, sub.bci) } if (is.null(big.bci)) { - big.bci <- merge(data.bci1[, c("Latin","Quadrat","Census","gx","gy","TreeID","Tag","Date1","DBH1")], data.frame(TreeID = data.bci2[["TreeID"]], - DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == + big.bci <- merge(data.bci1[, c("Latin","Quadrat","Census","gx","gy","TreeID","Tag","Date1","DBH1")], data.frame(TreeID = data.bci2[["TreeID"]], + DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == "dead"),stringsAsFactors=F), sort = T, by = "TreeID") } data.bci1 <- data.bci2 @@ -44,14 +44,16 @@ big.bci <- big.bci[order(big.bci$TreeID), ] data.bci <- big.bci rm(big.bci) ### read species names -species.clean <- read.table("data/raw/BCI/TaxonomyDataReport.txt", stringsAsFactors = FALSE, +species.clean <- read.table("data/raw/BCI/TaxonomyDataReport.txt", stringsAsFactors = FALSE, header = T, sep = "\t") -species.clean$Latin_name <- paste(species.clean[["Genus"]], +species.clean$Latin_name <- paste(species.clean[["Genus"]], species.clean[["species"]],sep=" ") ## ## Try to relate SpeciesID in species.clean species names in data.bci #unique(data.bci$Latin) %in% species.clean$Latin_name data.bci <- merge(data.bci, data.frame(Latin = species.clean$Latin_name, sp = species.clean$SpeciesID, genus = species.clean$Genus, stringsAsFactors = F),by = "Latin", sort = F) - + +## format sp as sp. +data.bci$sp <- paste("sp",data.bci$sp,sep=".") ########################################## FORMAT INDIVIDUAL TREE DATA data.bci <- data.bci[order(data.bci[["TreeID"]]),] @@ -59,14 +61,15 @@ data.bci$Date1 <- as.Date(data.bci$Date1) data.bci$Date2 <- as.Date(data.bci$Date2) # data.bci$yr1 <- format(strptime(data.bci$Date1, format = '%Y-%m-%d'),'%Y') # data.bci$yr2 <- format(strptime(data.bci$Date2, format = '%Y-%m-%d'),'%Y') -data.bci$year <- as.numeric(difftime(data.bci$Date2, data.bci$Date1, units = "weeks")/52) ## Not rounded -data.bci$obs.id <- 1:nrow(data.bci) #apply(data.bci[,c("TreeID","Census")],1,paste,collapse="_") +data.bci$year <- as.numeric(difftime(data.bci$Date2, data.bci$Date1, units = "weeks")/52) ## Not rounded +data.bci$obs.id <- 1:nrow(data.bci) #apply(data.bci[,c("TreeID","Census")],1,paste,collapse="_") data.bci$tree.id <- data.bci$TreeID; data.bci$TreeID <- NULL data.bci$x <- data.bci$gx; data.bci$gx <- NULL data.bci$y <- data.bci$gy; data.bci$gy <- NULL data.bci$G <- (data.bci$DBH2 - data.bci$DBH1)/data.bci$year ## diameter growth in mm per year - BASED ON UNROUNDED YEARS +data.bci$BA.G <- (pi*(10/2*data.bci$DBH2)^2 -pi*(10/2* data.bci$DBH1)^2)/data.bci$year ## BA growth in cm2/yr data.bci$D <- data.bci[["DBH1"]]/10 ## diameter in cm -data.bci$plot <- data.bci[["Quadrat"]] +data.bci$plot <- data.bci[["Quadrat"]] data.bci$cluster <- rep("A",nrow(data.bci)) data.bci$htot <- rep(NA,nrow(data.bci)) ## height of tree in m data.bci$sp.name <- data.bci$Latin @@ -78,10 +81,10 @@ data.bci$dead <- as.numeric(is.na(data.bci$DBH2)) ## Not entire sure if this wor ## LIMIT TO TREES > 10CM dbh data.bci <- subset(data.bci,subset=data.bci[["DBH1"]]>10) ### select only tree above 10cm of dbh at census 1 data.bci <- subset(data.bci,subset=data.bci[["DBH2"]]>10) ### select only tree above 10cm of dbh at census 2 -data.bci <- subset(data.bci,subset=data.bci[["census"]]==6) ### select only census 6 +data.bci <- subset(data.bci,subset=data.bci[["census"]] %in% c(6,5,4,3)) ### select only census 6 ###################### PLOT SELECTION FOR THE ANALYSIS -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","plot", "cluster", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","plot", "cluster", "D", "G","BA.G", "dead", "year", "htot", "x", "y", "census") data.tree <- subset(data.bci, select = c(vec.basic.var)) @@ -89,3 +92,9 @@ data.tree[["Lon"]] <- -79.8461 data.tree[["Lat"]] <- 9.1543 data.tree[["ecocode"]] <- "tropical" write.csv(data.tree,file="output/formatted/BCI/tree.csv",row.names = FALSE) + +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code + +vec.basic.var.p <- c("plot", "cluster", "Lon","Lat","ecocode") +data.plot <- subset(data.tree, subset=!duplicated(data.tree$cluster),select = c(vec.basic.var.p)) +write.csv(data.plot,file="output/formatted/BCI/plot.csv",row.names = FALSE) diff --git a/R/format.data/Canada.R b/R/format.data/Canada.R index fa39890e24403ab23f87509947e8d35b957e8d7b..ca1ab3331107050c354628bba6fc6ad563d84346 100644 --- a/R/format.data/Canada.R +++ b/R/format.data/Canada.R @@ -3,7 +3,7 @@ ### MERGE canada DATA rm(list = ls()) source("R/format.data/format.fun.R") -library(reshape) +library(reshape,quietly=TRUE) dir.create("output/formatted/Canada", recursive=TRUE,showWarnings=FALSE) @@ -16,7 +16,7 @@ data.canada <- read.csv("data/raw/Canada/Canada_Data2George_20130910.csv", data.canada <- subset(data.canada,subset=(data.canada$SubPlotSize>=0.02 & data.canada$InitDBH>=10) ) sum(is.na(data.canada$species)) -data.canada$sp = data.canada$species_FIACode +data.canada$sp <- data.canada$species_FIACode data.canada$sp[data.canada$sp==8349] <- 762 #MCV replace incorrect sp code for Prunus serotina data.canada$species_FIACode <- NULL ### read species names and merge with data.canada @@ -32,7 +32,9 @@ data.canada$sp <- paste("sp",data.canada$sp,sep=".") ######### FORMAT INDIVIDUAL TREE DATA data.canada$FinalDBH[data.canada$FinalDBH < 0] <- NA data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year +data.canada$BA.G <- (pi*(data.canada$FinalDBH/2)^2 - pi*(data.canada$InitDBH/2)^2)/data.canada$Interval ## diameter growth in mm per year data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA +data.canada$BA.G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA data.canada$year <- data.canada$Interval; data.canada$IntervalYears <- NULL ## number of year between measurement data.canada$D <- data.canada[["InitDBH"]] data.canada$D[data.canada$D == 0] <- NA ## diameter in cm @@ -46,7 +48,10 @@ data.canada$cluster[!is.na(data.canada$SUBPLOT_ID)] <- data.canada$PlotID_InitYe data.canada$tree.id <- data.canada[["PlotTree_ID"]]; data.canada[["PlotTree_ID"]] <- NULL ## tree unique id data.canada$census <- rep(1,nrow(data.canada)) data.canada$obs.id <- data.canada$tree.id -data.canada$weights <- 1/(data.canada$SubPlotSize*10000) ### there is some strange small value in SubPlotSize 20 m^2 even for large tree NEED TO ASK Hongcheng ## REPLY BETTER TO DElETE THIS PLOTS HE DON T KNOW WHY SO SMALLL +data.canada$weights <- 1/(data.canada$SubPlotSize*10000) ### there is some strange small value in SubPlotSize 20 m^2 even for large tree NEED TO ASK Hongcheng ## REPLY BETTER TO DElETE THIS PLOTS HE DON T KNOW WHY SO SMALL REMOVED ABOVE + + + ###################### ECOREGION merge to have no ecoregion with low number of observation greco <- read.csv(file = "data/raw/Canada/EcoregionCodes.csv", header = T, sep = "\t") @@ -80,11 +85,17 @@ data.canada <- merge(data.canada, data.frame(plot = names(perc.dead), perc.dead table(data.canada$dead) ## data.canada <- data.canada[data.canada$dead == 0,] vec.abio.var.names <- c("MAT", "MAP") -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") data.tree <- subset(data.canada, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/Canada/tree.csv",row.names = FALSE) +### PLOTS DATA +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.canada, subset=!duplicated(data.canada$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/Canada/plot.csv",row.names = FALSE) diff --git a/R/format.data/France.R b/R/format.data/France.R index 2e9e7530491066f93c90e74c7e487b35a1bbc915..b870019c229abc10df6d391ea7804131381467db 100644 --- a/R/format.data/France.R +++ b/R/format.data/France.R @@ -2,7 +2,7 @@ ############################################# MERGE FRENCH DATA rm(list = ls()); source("R/format.data/format.fun.R") -library(reshape) +library(reshape,quietly=TRUE) dir.create("output/formatted/France", recursive=TRUE,showWarnings=FALSE) @@ -10,7 +10,7 @@ dir.create("output/formatted/France", recursive=TRUE,showWarnings=FALSE) data.france <- read.csv("data/raw/France/dataIFN.FRANCE.csv", stringsAsFactors = FALSE) ### read IFN species names and clean species.clean <- fun.clean.species.tab(read.csv("data/raw/France/species.csv", stringsAsFactors = FALSE)) - +data.france$espar <- paste("sp", data.france[["espar"]],sep=".") ######## MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed ######## height to add variables to the traits data base Because we have two heights, ######## then take the max of the two heights and then bootstrap @@ -20,14 +20,15 @@ res.quant.boot <- t(sapply(levels(factor(data.france[["espar"]])), FUN = f.quant data.max.height <- data.frame(sp = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) rm(res.quant.boot) -write.csv(data.max.height,file='output/formatted/France/max.height.csv') +write.csv(data.max.height,file='output/formatted/France/max.height.csv',row.names = FALSE) rm(data.max.height) ########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same ########################################## in all data for the tree data.france$G <- data.france[["ir5"]]/5 * 2 ## diameter growth in mm per year +data.france$BA.G <- (pi*(data.france[["c13"]]/pi/2)^2 -pi* (data.france[["c13"]]/pi/2 - 10*data.france[["ir5"]])^2)/5 ## BA growth in cm2 per year data.france$year <- rep(5, nrow(data.france)) ## number of year between measurement -data.france$D <- data.france[["c13"]]/pi ## diameter in cm +data.france$D <- data.france[["c13"]]/pi - 10*data.france[["ir5"]] * 2 ## diameter in cm need to be nack date data.france$sp <- as.character(data.france[["espar"]]); data.france$espar <- NULL ## species code data.france$sp.name <- species.clean[match(data.france$sp,species.clean$sp),"Latin_name"] data.france$plot <- (data.france[["idp"]]); data.france$idp <- NULL ## plot code @@ -77,7 +78,14 @@ data.france <- subset(data.france, subset = !is.na(data.france[["SER"]])) ## mi ## names of variables for abiotic conditions vec.abio.var.names <- c("MAT", "SAP", "sgdd", "WB.s", "WB.y", "WS.s", "WS.y") ## other var -vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") data.tree <- subset(data.france, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/France/tree.csv",row.names = FALSE) + +### PLOTS DATA +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "SAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.france, subset=!duplicated(data.france$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/France/plot.csv",row.names = FALSE) diff --git a/R/format.data/Fushan.R b/R/format.data/Fushan.R index dfdb69404cfbe5a547db867f1c0b921dbb5760ff..1eff96b71ba525debb52f7b55465f66a7a8fdfec 100644 --- a/R/format.data/Fushan.R +++ b/R/format.data/Fushan.R @@ -17,6 +17,7 @@ species.clean <- read.csv("data/raw/Fushan/Splist_Fushan_En.csv", stringsAsFacto ########################################## FORMAT INDIVIDUAL TREE DATA data.fushan$year <- rep(5, nrow(data.fushan)) ## number of year between measurement data.fushan$G <- 10*(data.fushan$dbh2 - data.fushan$dbh1)/data.fushan$year ## diameter growth in mm per year +data.fushan$BA.G <- (pi*(data.fushan$dbh2/2)^2 - pi*(data.fushan$dbh1/2)^2)/data.fushan$year ## BA growth in cm2 per year data.fushan$D <- data.fushan[["dbh1"]] ## diameter in cm data.fushan$dead <- as.numeric(data.fushan$status2 == "dead") ## dummy variable for dead tree 0 alive 1 dead data.fushan$cluster <- rep(1,nrow(data.fushan)) ## cluster code @@ -35,8 +36,7 @@ data.fushan$census <- rep(1,nrow(data.fushan)) data.fushan$x <- data.fushan$gx data.fushan$y <- data.fushan$gy - -####### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 +#data.fushan$sp <- paste("sp",data.fushan$sp,".") ### delete recruited tree data.fushan <- subset(data.fushan,subset=data.fushan$status1!="P") @@ -44,12 +44,12 @@ data.fushan <- subset(data.fushan,subset=data.fushan$status1!="P") ###################### PLOT SELECTION FOR THE ANALYSIS -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster", "plot", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster", "plot", "D", "G","BA.G", "dead", "year", "htot", "x", "y", "census") data.tree <- subset(data.fushan, select = c(vec.basic.var)) -## select only tree qbove 10cm DBH +## select only tree above 10cm DBH data.tree <- subset(data.tree, subset =data.tree$D>10) @@ -59,5 +59,9 @@ data.tree[["ecocode"]] <- "tropical" write.csv(data.tree,file="output/formatted/Fushan/tree.csv",row.names = FALSE) +## save plot data +vec.basic.var.p <- c("plot", "cluster", "Lon","Lat","ecocode") +data.plot <- subset(data.tree, subset=!duplicated(data.tree$cluster),select = c(vec.basic.var.p)) +write.csv(data.plot,file="output/formatted/Fushan/plot.csv",row.names = FALSE) diff --git a/R/format.data/NSW.R b/R/format.data/NSW.R index 404691bf83e4c129171d719f0122317dadc0f61d..84c0cc8bf0e176a8ccca6342f5a9663d8c4c4e83 100644 --- a/R/format.data/NSW.R +++ b/R/format.data/NSW.R @@ -3,7 +3,7 @@ rm(list = ls()) source("R/format.data/format.fun.R") dir.create("output/formatted/NSW", recursive=TRUE,showWarnings=FALSE) -library(reshape) +library(reshape, quietly=TRUE) ######################### READ DATA read individuals tree data data.nswbrc <- read.csv("data/raw/NSW/NSW_data_BRcontrols.csv", header = TRUE, stringsAsFactors = FALSE) @@ -20,7 +20,7 @@ data.nswbrt$Date.of.measure <- as.vector(sapply(data.nswbrt$Date.of.measure, fun data.nswbrt$cluster <- data.nswbrt$Plot data.nswbrt$plot <- rep(NA,nrow(data.nswbrt)) - +# data.nswbs1 <- read.csv("data/raw/NSW/NSW_data_BS1.csv", header = TRUE, stringsAsFactors = FALSE) data.nswbs1$Date.of.measure <- as.character(format(as.Date(data.nswbs1$Date.of.measure, @@ -34,8 +34,8 @@ data.nswbs1$plot <- rep(NA,nrow(data.nswbs1)) ## data.nswbs2 has a different format to the other datasets, so format to match ## the above data.nswbs2 <- read.csv("data/raw/NSW/NSW_data_BS2.csv", header = TRUE, stringsAsFactors = FALSE) -data.nswbs2$cluster <- data.nswbs2$Plot -data.nswbs2$plot <- data.nswbs2$Subplot +data.nswbs2$cluster <- "BS2" +data.nswbs2$plot <- paste(data.nswbs2$Plot) ### plot to be used for competition index 20 x 20 m data.nswbs2$Subplot <- NULL data.nswbs2$Family <- NULL data.nswbs2$species <- data.nswbs2$Species.Name @@ -60,24 +60,59 @@ data.nswtnd$plot <- rep(NA,nrow(data.nswtnd)) data.nsw <- rbind(data.nswbrc, data.nswbrt, data.nswbs1, data.nswbs2, data.nswtnd) data.nsw$Plot <- NULL -##################### CREATE PLOT BASED ON 10x10m cell from X Y -fun.quadrat <- function(x,y) { - vec.x <- seq(0,max(x,na.rm=T),by=10) - vec.y <- seq(0,max(y,na.rm=T),by=10) +##################### CREATE PLOT BASED ON square.sxsquare.s m cell from X Y +fun.quadrat <- function(x,y,square.s) { + + if(sum(!is.na(x))>10){ + vec.x <- seq(0,max(x,na.rm=T)+9,by=square.s) + vec.y <- seq(0,max(y,na.rm=T)+9,by=square.s) x.cut <- cut(x,breaks=vec.x) y.cut <- cut(y,breaks=vec.y) out <- apply(cbind(x.cut,y.cut),1,paste,collapse=".") out[is.na(x.cut)] <- NA out[is.na(y.cut)] <- NA - return(unclass(out)) } + return(unclass(out)) + }else{ + return(rep(NA,length(x))) + } + } -make.quad <- fun.quadrat(data.nsw$x, data.nsw$y) +## function to apply per cluster +fun.quadrat.cluster <- function(cluster.id,cluster,x,y,square.s){ +temp <- fun.quadrat(x[cluster==cluster.id], y[cluster==cluster.id],square.s=square.s) +temp[!is.na(temp)] <- paste((cluster[cluster==cluster.id])[!is.na(temp)],temp[!is.na(temp)],sep=".") +return(as.vector(temp)) +} + +square.s.t <- 20 +make.quad <- unlist(sapply(unique(data.nsw$cluster),FUN=fun.quadrat.cluster,cluster=data.nsw$cluster,x=data.nsw$x, y=data.nsw$y,square.s=square.s.t)) data.nsw <- cbind(data.nsw, make.quad) +## plot to check problem +nn <- 0 +for (i in unique(data.nsw$cluster)){ + +if (sum(!is.na(data.nsw$x[data.nsw$cluster == i]))>3){ +x11() +nn <- nn+1 +cat("\n\n\n",nn,"\n") +print(i) +print(range(data.nsw$x[data.nsw$cluster == i],na.rm=TRUE)) +print(table(factor(data.nsw$make.quad[data.nsw$cluster == i]))) +plot(data.nsw$x[data.nsw$cluster == i],data.nsw$y[data.nsw$cluster == i],col=unclass(factor(data.nsw$make.quad[data.nsw$cluster == i])),main=i) +points(data.nsw$x[data.nsw$cluster == i],data.nsw$y[data.nsw$cluster == i],cex=0.2) +vec.x <- seq(0,max(data.nsw$x[data.nsw$cluster == i],na.rm=T)+9,by=square.s.t) +vec.y <- seq(0,max(data.nsw$y[data.nsw$cluster == i],na.rm=T)+9,by=square.s.t) +abline(v=vec.x) +abline(h=vec.y) +} +} + + + ## subplot becomes plot; plot becomes combination of x and y -data.nsw$subplot <- data.nsw$plot -data.nsw$plot <- as.vector(unclass(data.nsw$make.quad)) ## plot code +data.nsw$plot[is.na(data.nsw$plot)] <- as.vector(unclass(data.nsw$make.quad))[is.na(data.nsw$plot)] ## plot code data.nsw$make.quad <- NULL ########################################## FORMAT INDIVIDUAL TREE DATA - Each tree has at most 3 observations @@ -107,30 +142,25 @@ data.nsw2 <- data.nsw2[-1, ] data.nsw2$Date.of.measure <- data.nsw2$Dbh <- NULL data.nsw2$dead <- as.numeric(data.nsw2$dbh2 == "M") ## dummy variable for dead tree 0 alive 1 dead data.nsw <- data.nsw2 -for (k in c("Dbh","x","y","plot","subplot","year1","year2","dbh1","dbh2")) data.nsw[[k]] <- as.numeric(data.nsw[[k]]) +for (k in c("x","y","year1","year2","dbh1","dbh2")) data.nsw[[k]] <- as.numeric(data.nsw[[k]]) ## change unit and names of variables to be the same in all data for the tree data.nsw$year <- (data.nsw$year2 - data.nsw$year1) ## number of year between measurements data.nsw$G <- 10 * (data.nsw$dbh2 - data.nsw$dbh1)/(data.nsw$year) ## diameter growth in mm per year +data.nsw$BA.G <- (pi*(data.nsw$dbh2/2)^2 - pi*(data.nsw$dbh1/2)^2)/(data.nsw$year) ## BA growth in cm2 per year data.nsw$D <- data.nsw[["dbh1"]] ## diameter in cm -data.nsw$sp <- as.numeric(factor(data.nsw[["species"]])); ## species code - use the spp name as code +#data.nsw$sp <- as.numeric(factor(data.nsw[["species"]])); ## species code - use the spp name as code +data.nsw$sp <- factor(data.nsw[["species"]]) ## species code - use the spp name as code data.nsw$sp.name <- data.nsw[["species"]]; data.nsw$species <- NULL data.nsw$htot <- rep(NA, nrow(data.nsw)) ## height of tree in m ### add plot weights for computation of competition index (in 1/m^2) - from the ### original excel file -data.nsw$weights <- rep(NA, nrow(data.nsw)) -data.nsw$weights[grep("AA", data.nsw$cluster)] <- 1/(20 * 80) -data.nsw$weights[grep("BB", data.nsw$cluster)] <- 1/(20 * 80) -data.nsw$weights[grep("CC", data.nsw$cluster)] <- 1/(20 * 80) -data.nsw$weights[grep("DD", data.nsw$cluster)] <- 1/(20 * 80) -data.nsw$weights[grep("BS", data.nsw$cluster)] <- 1/(25 * 30) -data.nsw$weights[grep("BR", data.nsw$cluster)] <- 1/(60.4 * 60.4) -data.nsw$weights[grep("END", data.nsw$cluster)] <- 1/(40 * 50) -data.nsw$weights[grep("TND", data.nsw$cluster)] <- 1/(40 * 50) -data.nsw$cenus <- data.nsw$year1 +data.nsw$weights <- rep(1/(square.s.t * square.s.t), nrow(data.nsw)) +data.nsw$census <- data.nsw$year1 data.nsw$obs.id <- 1:nrow(data.nsw) ###################### ECOREGION nsw has only 1 eco-region +data.nsw$ecocode <- "AA" ###################### PERCENT DEAD NO DATA ON MORTALITY perc.dead <- tapply(data.nsw[["dead"]], INDEX = data.nsw[["plot"]], FUN = function.perc.dead2) data.nsw <- merge(data.nsw, data.frame(plot = names(perc.dead), perc.dead = perc.dead), @@ -143,54 +173,28 @@ data.nsw <- subset(data.nsw, !is.na(plot)) ########################################### VARIABLES SELECTION FOR THE ANALYSIS data.latlong <- read.csv("data/raw/NSW/NSW_Lat_Long.csv", header = TRUE, stringsAsFactors = FALSE) -data.nsw <- merge(data.frame(plot = data.latlong$Site, Lat = data.latlong$Lat, Lon = data.latlong$Long, stringsAsFactors = F), data.nsw, by = "plot", all = T) +data.nsw <- merge(data.frame(cluster = data.latlong$Site, Lat = data.latlong$Lat, Lon = data.latlong$Long, stringsAsFactors = F), + data.nsw, + by = "cluster", all = T) ## There is an additional site = BS4 in data.latlong which is not in data.nsw, so delete it -data.nsw <- data.nsw[-which(data.nsw$plot == "BS4"),] - -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","plot", "subplot", "D", "G", "dead", - "year", "htot", "x", "y", "census") -data.tree <- subset(data.nsw, select = c(vec.basic.var)) +data.nsw <- data.nsw[-which(data.nsw$cluster %in% c("BS4","BS2")),] + +#### get wc climate +source("R/utils/climate.R") +clim <- GetClimate(data.nsw$Lat,data.nsw$Lon) +data.nsw$MAT <- clim$MAT +data.nsw$MAP <- clim$MAP + +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G","BA.G", "dead", + "year", "htot", "Lon", "Lat", "perc.dead","weights","census") +data.tree <- subset(data.nsw, select = c(vec.basic.var,vec.abio.var.names )) write.csv(data.tree,file="output/formatted/NSW/tree.csv",row.names = FALSE) +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.nsw, subset=!duplicated(data.nsw$cluster),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/NSW/plot.csv",row.names = FALSE) + -## ############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in -## ############################################## m^2/ha without the target species NEED TO COMPUTE BASED ON RADIUS AROUND TARGET -## ############################################## TREE species as factor because number -## data.nsw[["species"]] <- factor(data.nsw[["species"]]) -## data.nsw$spcode <- data.nsw[["species"]] -## levels(data.nsw$spcode) <- 1:length(levels(data.nsw$spcode)) -## data.BA.SP <- BA.SP.FUN(obs.id = as.vector(data.nsw[["tree.id"]]), diam = as.vector(data.nsw[["D"]]), -## sp = as.vector(data.nsw[["spcode"]]), id.plot = as.vector(data.nsw[["Plot"]]), -## weights = data.nsw[["weights"]], weight.full.plot = NA) -## ## change NA and <0 data for 0 -## data.BA.SP[is.na(data.BA.SP)] <- 0 -## data.BA.SP2 <- data.frame(data.BA.SP) -## colnames(data.BA.SP2) <- colnames(data.BA.SP) -## ### CHECK IF sp and sp name for column are the same -## if (sum(!(names(data.BA.SP2)[-1] %in% unique(data.nsw[["spcode"]]))) > 0) stop("competition index sp name not the same as in data.tree") -## #### compute BA tot for all competitors -## BATOT.COMPET <- apply(data.BA.SP2[, -1], 1, sum, na.rm = TRUE) -## data.BA.SP2$BATOT.COMPET <- BATOT.COMPET -## rm(BATOT.COMPET) -## data.BA.SP <- data.BA.SP2 -## # Rlim <- 15 # set size of neighborhood for competition index system.time(test <- -## # fun.compute.BA.SP.XY.per.plot(1,data.tree=data.nsw,Rlim=15,parallel=TRUE,rpuDist=FALSE)) -## # list.BA.SP.data <- -## # mclapply(unique(data.nsw[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.nsw,Rlim=Rlim,mc.cores=4) -## # data.BA.sp <- rbind.fill(list.BA.SP.data) dim(data.BA.SP) ### TEST DATA FORMAT -## # if(sum(! rownames(BA.SP.temp)==data.tree[['obs.id']]) >0) stop('rows not in the -## # good order') -## # if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree[['species']]))))>0) -## # stop('colnames does mot match species name') ## test same order as data.nsw -## # if(sum(!data.BA.SP[['obs.id']] == data.nsw[['obs.id']]) >0) stop('competition -## # index not in the same order than data.nsw') ## REMOVE TREE IN BUFFER ZONE -## # not.in.buffer.zone <- (data.nsw[['x']]<(250-Rlim) & data.nsw[['x']]>(0+Rlim) & -## # data.nsw[['y']]<(250-Rlim) & data.nsw[['y']]>(0+Rlim)) # remove subset data.nsw -## # <- subset(data.nsw,subset=not.in.buffer.zone) data.BA.sp <- -## # subset(data.BA.sp,subset=not.in.buffer.zone) ## plot each plot -## # pdf('./figs/plots.tree.pdf') -## # lapply(unique(data.nsw[['plot']]),FUN=fun.circles.plot,data.nsw[['x']],data.nsw[['y']],data.nsw[['plot']],data.nsw[['D']],inches=0.2,xlim=c(0,250),ylim=c(0,250)) -## # dev.off() -## ## save everything as a list -## list.nsw <- list(data.tree = data.nsw, data.BA.SP = data.BA.sp, data.traits = data.traits) -## save(list.nsw, file = "output/formatted/NSW/list.nsw.Rdata") diff --git a/R/format.data/NVS.R b/R/format.data/NVS.R index 82bd1bea74744a1e76c8c6b5fb1d16893249bafe..0188b6774de682854afd2d70f4e60a4959dcc057 100644 --- a/R/format.data/NVS.R +++ b/R/format.data/NVS.R @@ -3,7 +3,7 @@ ### MERGE NVS DATA rm(list = ls()) source("R/format.data/format.fun.R") -library(reshape) +library(reshape, quietly=TRUE) dir.create("output/formatted/NVS", recursive=TRUE,showWarnings=FALSE) ######################### READ DATA read individuals tree and environmental data @@ -25,7 +25,9 @@ rm(data.plot) ################################################################ FORMAT INDIVIDUAL TREE DATA ## change unit and names of variables to be the same in all data for the tree data.nz$G <- 10 * (data.nz[["D1"]] - data.nz[["D0"]])/(data.nz[["t1"]] - data.nz[["t0"]]) ## diameter growth in mm per year +data.nz$BA.G <- (pi*(data.nz[["D1"]]/2)^2 - pi*(data.nz[["D0"]]/2)^2)/(data.nz[["t1"]] - data.nz[["t0"]]) ## BA growth in cm2 per year data.nz$sp.name <- data.nz$sp ## no latin name so use that as not needed to get traits +data.nz$sp <- paste("sp",data.nz$sp,sep=".") data.nz$year <- (data.nz[["t1"]] - data.nz[["t0"]]) ## number of year between measurement data.nz$D <- data.nz[["D0"]] ## diameter in cm data.nz$dead <- as.numeric(is.na(data.nz[["D1"]])) ## dummy variable for dead tree 0 alive 1 dead @@ -48,9 +50,9 @@ data.nz$ecocode <- data.nz$Broad ########################################## CHANGE COORDINATE SYSTEM ########################################## system of Easting Northing to be in lat long WGS84 -library(sp) -library(dismo) -library(rgdal) +library(sp, quietly=TRUE) +library(dismo, quietly=TRUE) +library(rgdal, quietly=TRUE) data.sp <- data.nz[, c("tree.id", "Easting", "Northing")] coordinates(data.sp) <- c("Easting", "Northing") # define x y proj4string(data.sp) <- CRS("+init=epsg:27200") # define projection system of our data ## EPSG CODE 27200 NZGD49 / New Zealand Map Grid @@ -83,9 +85,15 @@ data.nz <- merge(data.nz, data.frame(plot = names(perc.dead), perc.dead = perc.d ###### REMOVE DEAD TREE at first census data.nz <- subset(data.nz,subset=!is.na(data.nz[["D"]])) vec.abio.var.names <- c("MAT", "MAP") -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") data.tree <- subset(data.nz, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/NVS/tree.csv",row.names = FALSE) + +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.nz, subset=!duplicated(data.nz$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/NVS/plot.csv",row.names = FALSE) diff --git a/R/format.data/Paracou.R b/R/format.data/Paracou.R index 39e4bd0a5498eabd23d29d817530eb3b4ca9c9f7..94d35e7b6d091a271e0864cea837c036cab40c74 100644 --- a/R/format.data/Paracou.R +++ b/R/format.data/Paracou.R @@ -25,7 +25,7 @@ for(k in numeric.col.name){ data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) } ## Replace all , in decimals with . data.paracou$tree.id <- apply(data.paracou[,c("cluster","plot","tree")],1,paste,collapse="_"); -data.paracou$sp <- data.paracou[["taxonid"]] +data.paracou$sp <- paste("sp",data.paracou[["taxonid"]],sep=".") data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))] ## plot each plot ## pdf("output/figs/plots.paracou.pdf") @@ -43,7 +43,7 @@ data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied" ### read species names species.clean <- read.csv("data/raw/Paracou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") -species.clean$sp <- species.clean[["idTaxon"]] +species.clean$sp <- paste("sp.",species.clean[["idTaxon"]],sep="") species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ") ## keep only one row pers idTaxon species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille","idCIRAD")) @@ -72,6 +72,9 @@ data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (a data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 +data.paracou$BA.G <- (pi*(data.paracou$dbh2/2)^2-pi*(data.paracou$dbh1/2)^2)/data.paracou$year ## BA growth in cm2 per year +data.paracou$BA.G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 +data.paracou$BA.G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 data.paracou$census <- data.paracou$yr1 data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm data.paracou$cluster <- paste("p",data.paracou$cluster,sep=".")#apply(data.paracou[,c("forest","cluster","plot")],1,paste,collapse=".") ## plot code @@ -92,7 +95,7 @@ data.paracou$sp.name <- data.paracou$Latin_name ###################### PLOT SELECTION FOR THE ANALYSIS -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster", "plot", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster", "plot", "D", "G","BA.G", "dead", "year", "htot", "x", "y", "census") data.tree <- subset(data.paracou, select = c(vec.basic.var)) @@ -102,9 +105,14 @@ data.tree[["ecocode"]] <- "tropical" ## select tree above 10 cm and last census only data.tree <- subset(data.tree,subset=data.tree$D>10 & !is.na(data.tree$D)) -data.tree <- subset(data.tree,subset=data.tree$census==2005) +## data.tree <- subset(data.tree,subset=data.tree$census==2005) write.csv(data.tree,file="output/formatted/Paracou/tree.csv",row.names = FALSE) +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code + +vec.basic.var.p <- c("plot", "cluster", "Lon","Lat","ecocode") +data.plot <- subset(data.tree, subset=!duplicated(data.tree$cluster),select = c(vec.basic.var.p)) +write.csv(data.plot,file="output/formatted/Paracou/plot.csv",row.names = FALSE) diff --git a/R/format.data/Spain.R b/R/format.data/Spain.R index 21e8295cb05a91b709b0663989ca828f04474c75..0f49444bed05697fc86304ca5630390c59230f7c 100644 --- a/R/format.data/Spain.R +++ b/R/format.data/Spain.R @@ -3,7 +3,7 @@ ### MERGE spain DATA Edited by FH rm(list = ls()) source("R/format.data/format.fun.R") -library(reshape) +library(reshape,quietly=TRUE) dir.create("output/formatted/Spain", recursive=TRUE,showWarnings=FALSE) @@ -25,6 +25,7 @@ write.csv(data.max.height, file = "output/formatted/Spain/max.height.csv") ## I ################################## FORMAT INDIVIDUAL TREE DATA data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year +data.spain$BA.G <- (pi*(data.spain[["dbh2"]]/20 )^2 -pi*(data.spain[["dbh1"]]/20 )^2)/data.spain[["years"]] ## BA growth in cm2 data.spain$year <- data.spain[["years"]]; data.spain$years <- NULL; ## number of year between measurement data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm data.spain$dead <- as.numeric(data.spain[["Life_status"]] == "dead") ## dummy variable for dead tree 0 alive 1 dead @@ -39,9 +40,9 @@ data.spain$census <- rep(1,nrow(data.spain)) ## only one census in spain data.spain$weights <- as.vector(1/(pi * (data.spain[["R1"]])^2)) ## weights in 1/m^2 ################# #### change coordinates system of x y to be in lat long WGS84 -library(sp) -library(dismo) -library(rgdal) +library(sp,quietly=TRUE) +library(dismo,quietly=TRUE) +library(rgdal,quietly=TRUE) data.sp <- data.spain[, c("tree.id", "CX", "CY")] coordinates(data.sp) <- c("CX", "CY") # define x y proj4string(data.sp) <- CRS("+init=epsg:23030") # define projection system of our data ## EPSG CODE 23030 ED50 / UTM zone 30N @@ -51,7 +52,7 @@ data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection i data.spain$Lon <- coordinates(data.sp2)[, "CX"] data.spain$Lat <- coordinates(data.sp2)[, "CY"] ## ## plot on world map -## library(rworldmap) +## library(rworldmap,quietly=TRUE) ## newmap <- getMap(resolution = 'coarse') ## # different resolutions available plot(newmap) ## points(data.sp2,cex=0.2,col='red') @@ -67,7 +68,7 @@ rm(greco2) table(data.spain$ecocode) ## There's an eco-region with no code, and one with < 1000 sites The former we could drop as they were on the border of Spain # ### PLOT ECOREGION -library(RColorBrewer) +library(RColorBrewer,quietly=TRUE) mycols <- brewer.pal(10, "Set3") ecoreg <- unclass(data.spain$ecocode) @@ -115,7 +116,13 @@ colnames(data.spain)[colnames(data.spain) %in% c("mat", "pp", "PET")] <- c("MAT" data.spain$ecocode <- NULL colnames(data.spain)[names(data.spain) == "ecocode.merged"] <- c("ecocode") vec.abio.var.names <- c("MAT", "PP", "PET") -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name", "cluster","plot", "ecocode", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name", "cluster","plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","census","weights") data.tree <- subset(data.spain, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/Spain/tree.csv",row.names = FALSE) + +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "PP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.spain, subset=!duplicated(data.spain$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/Spain/plot.csv",row.names = FALSE) diff --git a/R/format.data/Sweden.R b/R/format.data/Sweden.R index 30944a68724144dec90c4095ba63be5f15e533cd..8ffaf8fbf934e70cac6f90b8d0cd72cb3036ebcf 100644 --- a/R/format.data/Sweden.R +++ b/R/format.data/Sweden.R @@ -3,7 +3,7 @@ ### MERGE sweden DATA rm(list = ls()); source("R/format.data/format.fun.R"); -library(reshape); +library(reshape,quietly=TRUE); dir.create("output/formatted/Sweden", recursive=TRUE,showWarnings=FALSE) ######################### READ DATA read individuals tree data @@ -33,30 +33,17 @@ data.swe2$DryW_t1 <- data.swe2$DryW_t2 <- data.swe2$DryW_t3 <- NULL data.swe2$census <- rep(1:2,nrow(data.swe2)/2) ## Using first census in the row data.swe2$Diameter <- data.swe2$Volume <- data.swe2$BrhAge <- NULL data.swe2$TractNr <- data.swe2$PlotNr <- data.swe2$TreeNr <- NULL -data.swe2$sp <- data.swe$TreeSpecies; data.swe$TreeSpecies <- NULL ## Species names are in the xlsx files if required +data.swe2$sp <- data.swe$TreeSpecies ; data.swe$TreeSpecies <- NULL ## Species names are in the xlsx files if required data.swe <- data.swe2 rm(data.swe2) -############## MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed -############## height to add variables to the traits data base Because we have two heights, -############## then take the max of the two heights and then bootstrap -## Obtain heights in dataset - I assume that they use the volume of a cylinder V = pi*r^2*h -data.swe$ht1 <- data.swe$vol1/(pi*(0.5*data.swe$dbh1/100)^2) ## m -data.swe$ht2 <- data.swe$vol2/(pi*(0.5*data.swe$dbh2/100)^2) ## m -res.quant.boot <- t(sapply(levels(factor(data.swe[["sp"]])), FUN = f.quantile.boot, - R = 1000, x = log10(apply(data.swe[, c("ht1", "ht2")], 1, max, na.rm = T)), - fac = factor(data.swe[["sp"]]))) -## create data base -data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, - 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) -rm(res.quant.boot) -write.csv(data.max.height,file='output/formatted/Sweden/max.height.csv') ### GEORGES NOT SURE WE SHOULD USE THAT + ################# ##### change the projection of xy to wgs84 lat long. #### change coordinates system of x y to be in lat long WGS84 -library(sp) -library(dismo) -library(rgdal) +library(sp,quietly=TRUE) +library(dismo,quietly=TRUE) +library(rgdal,quietly=TRUE) data.sp <- data.swe[, c("tree.id", "ecoord", "ncoord")] coordinates(data.sp) <- c("ecoord", "ncoord") # define x y proj4string(data.sp) <- CRS("+init=epsg:3006") # define projection system of our data ## EPSG:3006 check with Bertil @@ -67,7 +54,7 @@ data.swe$Lon <- coordinates(data.sp2)[, "ecoord"] data.swe$Lat <- coordinates(data.sp2)[, "ncoord"] ## ## ## ## plot on world map -## library(rworldmap) +## library(rworldmap,quietly=TRUE) ## newmap <- getMap(resolution = 'coarse') ## ## # different resolutions available ## plot(newmap) @@ -93,6 +80,8 @@ data.swe$Year <- NULL; data.swe$year <- 5 ## number of year between measurement data.swe$G <- 10*(data.swe$dbh2-data.swe$dbh1)/data.swe$year ## diameter growth in mm per year data.swe$G[is.na(data.swe$dbh1)] <- NA +data.swe$BA.G <- (pi*(data.swe$dbh2/2)^2-pi*(data.swe$dbh1/2)^2)/data.swe$year ## BA growth in cm2 per year +data.swe$G[is.na(data.swe$dbh1)] <- NA sum(data.swe$G < 0, na.rm = T) data.swe$D <- data.swe[["dbh1"]]; ## diameter in cm data.swe$dead <- as.numeric(is.na(data.swe$dbh2)) ## dummy variable for dead tree 0 alive 1 dead @@ -103,18 +92,21 @@ data.swe$weights[data.swe$D<4] <- data.swe$PlotArea0139[data.swe$D<4] data.swe$weights[data.swe$D>=4 & data.swe$D<10] <- data.swe$PlotArea4099[data.swe$D>=4 & data.swe$D<10] data.swe$weights[data.swe$D>=10] <- data.swe$PlotArea100[data.swe$D>=10] data.swe$weights <- 1/data.swe$weights - +data.swe$sp <- paste("sp",data.swe$sp,sep=".") ######################## ECOREGION ## One ecoregion for Sweden only? -source("R/utils/ecoregions.r") +source("R/utils/ecoregions.R") data.swe$ecocode <- GetEcoregions(data.swe$Lon,data.swe$Lat) +data.swe$ecocode <- as.character(data.swe$ecocode) ### NEED TO VERIFY THAT ECOREGIONS ARE ASSIGNED PROPERLY +table(as.character(data.swe$ecocode)) +# ok -#data.swe$ecocode <- rep("A",nrow(data.swe)) -## NEED TO ASK SWEDEN - -source("R/utils/climate.r") -clim <- GetClimate(data.swe$Lon,data.swe$Lat) +#### get wc climate +source("R/utils/climate.R") +clim <- GetClimate(data.swe$Lat,data.swe$Lon) +data.swe$MAT <- clim$MAT +data.swe$MAP <- clim$MAP ######################## PERCENT DEAD @@ -129,7 +121,16 @@ data.swe <- merge(data.swe,data.frame(plot=names(perc.dead),perc.dead=perc.dead) ## data.swe <- subset(data.swe, dead == 0) ## vec.abio.var.names <- c("MAT", "MAP") ## TODO NO MAT MAP NEED TO LOAD FROM WORLDCLIM -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") -data.tree <- subset(data.swe, select = c(vec.basic.var)) +data.tree <- subset(data.swe, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/Sweden/tree.csv",row.names = FALSE) + + + +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.swe, subset=!duplicated(data.swe$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/Sweden/plot.csv",row.names = FALSE) diff --git a/R/format.data/Swiss.R b/R/format.data/Swiss.R index 95687c2e506f396efb6055038fc8b47e70bdb1f2..0f80ba82cd4fa700d6b1389b4f6073092f87495b 100644 --- a/R/format.data/Swiss.R +++ b/R/format.data/Swiss.R @@ -3,8 +3,8 @@ ### MERGE Swiss DATA rm(list = ls()) source("R/format.data/format.fun.R") -library(reshape) -library(foreign) +library(reshape,quietly=TRUE) +library(foreign,quietly=TRUE) dir.create("output/formatted/Swiss", recursive=TRUE,showWarnings=FALSE) ######################### READ DATA read individuals tree data @@ -24,14 +24,14 @@ colnames(data.swiss) <- c("Inventid", "siteid", "tree.id", "x", "y", "sp", "sp.n ################# ##### change the projection of xy to wgs84 lat long. #### change coordinates system of x y to be in lat long WGS84 -library(sp) -library(dismo) -library(rgdal) +library(sp,quietly=TRUE) +library(dismo,quietly=TRUE) +library(rgdal,quietly=TRUE) data.sp <- data.swiss[, c("tree.id", "x", "y")] coordinates(data.sp) <- c("x", "y") # define x y proj4string(data.sp) <- CRS("+init=epsg:21781") # define projection system of our data ## EPSG:21781 CH1903 / LV03 CHECK WITH NICK STRANGE summary(data.sp) -library(maptools) +library(maptools,quietly=TRUE) bioregion <- readShapePoly("data/raw/Swiss/bio_regions/biogregions.shp") ## plot(bioregion) ## plot(data.sp,add=TRUE) @@ -61,11 +61,12 @@ res.quant.boot <- (t(sapply(levels(factor(data.swiss2[["sp"]])), FUN = f.quantil ## create data base data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) -rm(res.quant.boot) +rm(res.quant.boot,data.swiss2) write.csv(data.max.height,file='output/formatted/Swiss/max.height.csv') ########################################## FORMAT INDIVIDUAL TREE DATA data.swiss$G <- 10 * (data.swiss$dbh_diff)/data.swiss$year ## diameter growth in mm per year +data.swiss$BA.G <- (pi*data.swiss[["dbh2"]]^2/4 - pi*data.swiss[["dbh1"]]^2/4)/data.swiss$year data.swiss$D <- data.swiss[["dbh1"]] data.swiss$D[data.swiss$D == 0] <- NA ## diameter in cm data.swiss$dead <- rep(NA, nrow(data.swiss)) ## Mortality - MISSING @@ -92,8 +93,14 @@ data.swiss <- merge(data.swiss, data.frame(plot = data.climate$CLNR, swb = data. rm(data.climate) ### select good columns vec.abio.var.names <- c("MAT", "MAP", "swb") -vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") data.tree <- subset(data.swiss, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/Swiss/tree.csv",row.names = FALSE) warnings() + +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "MAP","swb") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.swiss, subset=!duplicated(data.swiss$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/Swiss/plot.csv",row.names = FALSE) diff --git a/R/format.data/US.R b/R/format.data/US.R index 4deefae321707a88d3496f64637f1926b15ec5e4..2f30eacd1e472f89c4e4cdc93d7fd2b342cf9d5d 100644 --- a/R/format.data/US.R +++ b/R/format.data/US.R @@ -38,6 +38,7 @@ data.us <- subset(data.us,subset=(data.us$InitDbh>=10) ) ############## FORMAT INDIVIDUAL TREE DATA ## change unit and names of variables to be the same in all data for the tree data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year +data.us$BA.G <- (pi*(data.us$FinalDbh/2)^2-pi*(data.us$InitDbh/2)^2)/data.us$Interval ## ba growth in cm^2/year data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA data.us$year <- data.us$Interval ## number of year between measuremen data.us$D <- data.us[["InitDbh"]] @@ -77,14 +78,25 @@ for (i in 1:length(sel.small.div)) { data.us$ecocode <- unlist(lapply(lapply(strsplit(data.us[["eco_codemerged"]], " "), FUN = substr, 1, 2), FUN = paste, collapse = ".")) -## ## plot map to check coordinates syste +## ### TEST Olson ecocoregion +## source("R/utils/ecoregions.R") +## data.us$ecocode2 <- GetEcoregions(data.us$Lon,data.us$Lat) +## data.us$ecocode2 <- as.character(data.us$ecocode2) +## ### NEED TO VERIFY THAT ECOREGIONS ARE ASSIGNED PROPERLY +## table(as.character(data.us$ecocode2)) +## table(substr(as.character(data.us$ecocode2),1,4)) +## table(as.character(data.us$ecocode)) + + + +## ## ## plot map to check coordinates syste ## library(RColorBrewer) ## library(rworldmap) ## newmap <- getMap(resolution = 'coarse') ## # different resolutions available ## plot(newmap,xlim=c(-160,-50),ylim=c(45,50)) -## ecoreg <- factor(data.us$ecocode); levels(ecoreg) <- mycols <- brewer.pal(length(levels(ecoreg)), "Set3") -## points(data.us[['Lon']],data.us[['Lat']],pty='.',cex=.2,col = as.character(ecoreg)) +## ecoreg <- factor(data.us$ecocode);# levels(ecoreg) <- mycols <- brewer.pal(length(levels(ecoreg)), "Set3") +## points(data.us[['Lon']],data.us[['Lat']],pty='.',cex=.2,col = (ecoreg)) ## legend('bottomleft', col = mycols, legend = names(table(data.us$ecocode)), pch = rep(19,length(levels(ecoreg)))) @@ -102,9 +114,16 @@ rm(greco, perc.dead, tab.small.div, sel.small.div) ## variables to keep vec.abio.var.names <- c("MAT", "MAP") -vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G", "dead", +vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G","BA.G", "dead", "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names)) write.csv(data.tree,file="output/formatted/US/tree.csv",row.names = FALSE) + +### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") +data.plot <- subset(data.us, subset=!duplicated(data.us$plot),select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot,file="output/formatted/US/plot.csv",row.names = FALSE) + rm(data.us) gc() diff --git a/R/format.data/format.fun.R b/R/format.data/format.fun.R index f77429eb64f35c2a544b43b54f892dc7bc861a01..07d10ab243da086c9116cf80fc66a1eabd385bea 100644 --- a/R/format.data/format.fun.R +++ b/R/format.data/format.fun.R @@ -2,7 +2,7 @@ ################################################### ################################################### ################################################### -##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP +##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP #### G. Kunstler 11/09/2013 source("R/packages.R") check_packages(c("reshape", "boot","RColorBrewer","sp","dismo","rgdal","foreign","rworldmap")) @@ -44,7 +44,7 @@ return(species.clean) ##' @return a matrix with # col and 1 row with mean sd adn nobs ##' @author Kunstler f.quantile.boot <- function(i,x,fac,R,probs=0.99){ -require(boot) +require(boot,quietly=TRUE) if(length(na.exclude(x[fac==i]))>0){ quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) @@ -54,12 +54,12 @@ require(boot) } f.quantile <- function (x,ind,probs){ - require(boot) + require(boot,quietly=TRUE) quantile(x[ind],probs=probs,na.rm=TRUE) } ####################### -### function to compute number of dead per plot +### function to compute number of dead per plot function.perc.dead <- function(dead){ sum(dead)/length(dead)} @@ -79,16 +79,16 @@ function.perc.dead2 <- function(dead) { ##' .. Function to plot map of tree with circle function of their dbh.. ##' ##' .. content for \details{} .. -##' @title +##' @title ##' @param plot.select the plot for which draw the map -##' @param x -##' @param y +##' @param x +##' @param y ##' @param plot vectore of plot id for each tree ##' @param D diameter in cm ##' @param inches controling the circle size ##' @param fg.l color circle -##' @param ... -##' @return +##' @param ... +##' @return ##' @author Kunstler fun.circles.plot <- function(plot.select,x,y,plot,D,inches,fg.l,...){ x.t <- x[plot==plot.select] diff --git a/R/packages.R b/R/packages.R index 2fa5680f643cee43d9f544e7826bdf14a225d3f0..9eab8a44065d49ee2aac42a7b241951b247b17dc 100644 --- a/R/packages.R +++ b/R/packages.R @@ -8,5 +8,5 @@ check_packages <- function(packages=c("data.table", "dismo", "doParallel", "fore if(length(new.packages)) install.packages(new.packages) #load for(p in packages) - library(p, quietly=TRUE, character.only =TRUE) + suppressMessages(library(p, quietly=TRUE, character.only =TRUE) ) } diff --git a/R/process.data/georges.R b/R/process.data/georges.R new file mode 100644 index 0000000000000000000000000000000000000000..b66705c6956385cb8ef3aad90610f2253a1e0a59 --- /dev/null +++ b/R/process.data/georges.R @@ -0,0 +1,41 @@ +library(Rcpp) +setwd("../..") +sourceCpp("R/process.data/georges.cpp") + +set.seed(1) +n <- 1000 +x <- runif(n) +y <- runif(n) +d <- runif(n) +n.types <- 6 +s <- as.integer(sample(5, n, replace=TRUE)) +r <- .1 + +square <- function(x) x*x +f <- function(idx, r2) + which(square(x - x[idx]) + square(y - y[idx]) < r2) + +system.time(ans.R <- lapply(seq_len(n), f, square(r))) +system.time(ans.C <- lapply(seq_len(n), within_neighbourhood, x, y, r)) + +idx <- setdiff(f(1, square(r)), 1) +idx0 <- within_neighbourhood(0, x, y, r) + +area.R <- tapply(d[idx], s[idx], sum) +fun.areas.R <- function(id.0,x,y,r,d,s){ +idx <- setdiff(f(id.0, square(r)), 1) +return(tapply(d[idx], s[idx], sum)) +} +area.C <- areas_by_species_within_neighbourhood(0, x, y, r, d, s - 1L, n.types) +all.equal(as.numeric(area.R), area.C) + +system.time( +ans <-t( sapply(seq_len(n) - 1L, + areas_by_species_within_neighbourhood, + x, y, r, d, s - 1L, n.types))) + +system.time( +ans.R <- lapply(seq_len(n), + fun.areas.R, + x, y, r, d, factor(s))) + diff --git a/R/process.data/georges.cpp b/R/process.data/georges.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b1fc2383c1851d9c4d23a94a6a1a336a838d92c --- /dev/null +++ b/R/process.data/georges.cpp @@ -0,0 +1,69 @@ +// compile with R CMD SHLIB georges.cpp +#include <Rcpp.h> +#include <vector> + +std::vector<int> within_neighbourhood(int idx, + std::vector<double> x, + std::vector<double> y, + double r); +std::vector<double> +areas_by_species_within_neighbourhood(int idx, + std::vector<double> x, + std::vector<double> y, + double r, + std::vector<double> d, + std::vector<int> type, + int n_types); +bool cmp(double x0, double y0, double x1, double y1, double r2); +double square(double x); + + +// NOTE: idx is in C (base 0) indexing, not R (base 1) indexing. +// [[Rcpp::export]] +std::vector<int> within_neighbourhood(int idx, + std::vector<double> x, + std::vector<double> y, + double r) { + std::vector<int> ret; + std::vector<double>::iterator xi = x.begin(), yi = y.begin(); + double r2 = square(r); + const double x0 = x[idx], y0 = y[idx]; + for (int i = 0; i < x.size(); i++, xi++, yi++) + if (cmp(x0, y0, *xi, *yi, r2) && i != idx) + ret.push_back(i); + return ret; +} + +// [[Rcpp::export]] +std::vector<double> areas_by_species(std::vector<int> idx, + std::vector<double> d, + std::vector<int> type, + int n_types) { + std::vector<double> ret(n_types); + for (std::vector<int>::const_iterator i = idx.begin(); + i != idx.end(); i++) { + ret[type[*i]] += d[*i]; + } + return ret; +} + +// [[Rcpp::export]] +std::vector<double> +areas_by_species_within_neighbourhood(int idx, + std::vector<double> x, + std::vector<double> y, + double r, + std::vector<double> d, + std::vector<int> type, + int n_types) { + return areas_by_species(within_neighbourhood(idx, x, y, r), + d, type, n_types); +} + +bool cmp(double x0, double y0, double x1, double y1, double r2) { + return square(x0 - x1) + square(y0 - y1) < r2; +} + +double square(double x) { + return x*x; +} diff --git a/R/process.data/process.fun.R b/R/process.data/process.fun.R index 8d01889e7fcb125c7862efa90d4297238c3d09fa..26de4ca647241eb280bca89943c3ada38dbaeb15 100644 --- a/R/process.data/process.fun.R +++ b/R/process.data/process.fun.R @@ -1,15 +1,15 @@ #################################################### function to process data install all unstallled packages source("R/packages.R") -check_packages(c("reshape", "data.table", "doParallel", "data.table")) +check_packages(c("reshape", "data.table", "doParallel", "data.table","Rcpp")) -######################### +######################### ##' .. Compute the basal area of competitor in a plot.. ##' ##' .. content for \details{} .. -##' @title +##' @title ##' @param diam diameters of each individuals in cm ##' @param weights weight in 1/m^2 to compute the basal area in cm^2/m^2 -##' @return basal area in cm^2/m^2 +##' @return basal area in cm^2/m^2 ##' @author Kunstler BA.fun <- function(diam, weights) { ((diam/2)^2) * pi * weights @@ -18,66 +18,50 @@ BA.fun <- function(diam, weights) { -#### +#### ##' .. function to compute the basal area of neighborood tree in plots .. ##' ##' .. content for \details{} .. -##' @title +##' @title ##' @param obs.id tree obs identifier ##' @param diam diam of tree in cm ##' @param sp species name or code ##' @param id.plot identifier of the plot ##' @param weights weights to compute basal area in cm^2/m^2 SO THE 1/AREA of teh plot (or subplot) with area in m^2 -##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots) ##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree) ##' @author Kunstler -BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights, weight.full.plot) { - print(paste("Number of observation :", length(obs.id))) - require(data.table) +BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights) { + + cat("Number of observation :", length(obs.id),"\n") + require(data.table,quietly=TRUE) id.plot <- as.character(id.plot) obs.id <- as.character(obs.id) ## check equal length - if (!(length(obs.id) == length(diam) & length(obs.id) == length(sp) & length(obs.id) == - length(id.plot) & length(obs.id) == length(weights))) + if (!(length(obs.id) == length(diam) & length(obs.id) == length(sp) & length(obs.id) == + length(id.plot) & length(obs.id) == length(weights))) stop("length of obs.id diam,sp id.plot & weights need to be the same") - + ## check sp is not numeric - if (is.numeric(sp)) + if (is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") - - # compute BA tot per species per plot + + # compute BA tot per species per plot and merge BASP <- tapply(BA.fun(diam, weights), INDEX = list(id.plot, sp), FUN = sum, na.rm = T) DATA.BASP <- data.table(id.plot = rownames(BASP), BASP) setnames(DATA.BASP, old = 1:ncol(DATA.BASP), c("id.plot", colnames(BASP))) setkeyv(DATA.BASP, c("id.plot")) sp.name <- colnames(BASP) - rm(BASP) print("plot level table created") #### MERGE with indivudal tree use library(data.table) - if (!is.na(weight.full.plot)) { - data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, - BA.indiv = BA.fun(diam, rep(weight.full.plot, length(diam)))) - setkeyv(data.indiv, "id.plot") - print("tree level table created") - data.merge <- merge(data.indiv, DATA.BASP) - print("merge done") - # substract target BA - for (i in (sp.name)) { - eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", - sep = ""))) - } - - } else { - data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, - weights = weights, BA.indiv = BA.fun(diam, weights)) - setkeyv(data.indiv, "id.plot") - print("tree level table created") - data.merge <- merge(data.indiv, DATA.BASP) - print("merge done") - for (i in (sp.name)) { - eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", - sep = ""))) - } + data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, + weights = weights, BA.indiv = BA.fun(diam, weights)) + setkeyv(data.indiv, "id.plot") + print("tree level table created") + data.merge <- merge(data.indiv, DATA.BASP) + print("merge done") + for (i in (sp.name)) { + eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", + sep = ""))) } print("substraction of target tree BA done") #### delete column not used @@ -90,12 +74,27 @@ BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights, weight.full.plot) { return((data.merge)) } -###################### apply BA.SP.FUN per census function to run on a subset to only one census -BA.SP.FUN.l <- function(census.id, census, obs.id, diam, sp, id.plot, weights, weight.full.plot) { - return(BA.SP.FUN(obs.id = obs.id[census == census.id], diam = diam[census == - census.id], sp = sp[census == census.id], id.plot = id.plot[census == census.id], - weights = weights[census == census.id], weight.full.plot = weight.full.plot)) +###################### +##' .. apply BA.SP.FUN per census. function to run on only one census .. +##' +##' .. content for \details{} .. +##' @title +##' @param census.id name of census to format other variables same as above +##' @param census +##' @param obs.id +##' @param diam +##' @param sp +##' @param id.plot +##' @param weights +##' @return +##' @author Kunstler +BA.SP.FUN.l <- function(census.id, census, obs.id, diam, sp, id.plot, weights) { + + return(BA.SP.FUN(obs.id = obs.id[census == census.id], diam = diam[census == + census.id], sp = sp[census == census.id], id.plot = id.plot[census == census.id], + weights = weights[census == census.id])) } + ## function to apply over all census and merge back together ##' .. function to apply competition index computation over all census and merge back together .. ##' @@ -107,116 +106,251 @@ BA.SP.FUN.l <- function(census.id, census, obs.id, diam, sp, id.plot, weights, w ##' @param sp vector of species of length N ##' @param id.plot vector of id.plot of length N ##' @param weights vector of weight in 1/m^2 of length N -##' @param weight.full.plot optional if want to use a different weight to remove the BA of target tree (if NA weights is use instead) ##' @return a dta.table of dim N & length(unique(sp))+1 (obs.id and basal area of all species ##' @author Kunstler -BA.SP.FUN.census <- function(census, obs.id, diam, sp, id.plot, weights, weight.full.plot) { - require(data.table) +BA.SP.FUN.census <- function(census, obs.id, diam, sp, id.plot, weights) { + require(data.table,quietly=TRUE) unique.census <- unique(census) - res.list <- lapply(unique.census, FUN = BA.SP.FUN.l, census, obs.id, diam, sp, - id.plot, weights, weight.full.plot) + res.list <- lapply(unique.census, FUN = BA.SP.FUN.l, census, obs.id, diam, sp, + id.plot, weights) res.mat <- rbind.fill(res.list) res.mat <- data.table(res.mat[match(obs.id, res.mat[["obs.id"]]), ]) return(res.mat) } -#### +#### ##' .. function compute competition index with X Y coordinates based on a neighborhood of radius Rlim using a global distance matrix .. ##' ##' .. content for \details{} .. -##' @title +##' @title ##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment ##' @param xy.table table with x.y of teh individual ##' @param diam diam in cm ##' @param sp species ##' @param Rlim radius of neighborhood search -##' @param parallel TRUE/FALSE run in paralle or not -##' @param rpuDist TRUE/FALSE run with GPU distance computation (need top install additional software only on linux) +##' @param parallel TRUE/FALSE run in paralle or not ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler -BA.SP.FUN.XY <- function(obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, rpuDist = FALSE) { - print(paste("n observation = ", nrow(xy.table))) +BA.SP.FUN.XY <- function(obs.id, xy.table, diam, sp, Rlim, parallel) { + cat("n observation = ", nrow(xy.table),"\n") print("start computing distance matrix") - if (rpuDist) { - require(rpud) - dist.mat <- rpuDist(xy.table, upper = TRUE, diag = TRUE) - } else { - dist.mat <- as.matrix(dist(xy.table, upper = TRUE, diag = TRUE)) - } + dist.mat <- as.matrix(dist(xy.table, upper = TRUE, diag = TRUE)) print("distance matrix computed") dist.mat[dist.mat < Rlim] <- 1 dist.mat[dist.mat > Rlim] <- 0 diag(dist.mat) <- 0 print("distance matrix set to 0 1") BA <- BA.fun(diam, weights = 1/(pi * Rlim^2)) - print(c(length(BA), length(diam))) BA.mat <- matrix(rep(BA, length(BA)), nrow = length(BA), byrow = TRUE) print("starting tapply over species") fun.sum.sp <- function(x, sp) tapply(x, INDEX = sp, FUN = sum, na.rm = TRUE) if (parallel) { ## parallel version - require(doParallel) + require(doParallel,quietly=TRUE) registerDoParallel(cores = 4) mat <- dist.mat * BA.mat res.temp <- foreach(i = 1:nrow(mat), .combine = rbind) %dopar% { fun.sum.sp(mat[i, ], sp) } res.temp <- data.frame(obs.id = obs.id, res.temp) + ## replace NA per zero + print("replacing NA per zero") + res.temp[is.na(res.temp)] <- 0 + print("done") return((res.temp)) } else { - res.temp <- data.frame(obs.id = obs.id, t(apply(dist.mat * BA.mat, MARGIN = 1, + res.temp <- data.frame(obs.id = obs.id, t(apply(dist.mat * BA.mat, MARGIN = 1, FUN = fun.sum.sp, sp))) + ## replace NA per zero + print("replacing NA per zero") + res.temp[is.na(res.temp)] <- 0 + print("done") return(res.temp) } } +## TODO CHANGE THIS FUNCTION TO COMPUTE DIRECTLY the sum of weighted traits mean per traits for each traits with filling all or only species + genus or only species and compute the percentage of missing traits at species or genus and this for log(BA+1) or BA + + +fun.weighted.mean.traits <- function(BA,T){ +sum(BA/sum(BA)*T) +} + + + + +fun.add.missing.sp.NA <- function(trait.t,vec.sp,traits.data){ +trait.t <- c(trait.t,rep(NA,sum(! (vec.sp %in% traits.data[["sp"]])))) +names(trait.t) <- c(as.character(traits.data[traits.data[["sp"]] %in% vec.sp,"sp"]), + as.character(vec.sp[! (vec.sp %in% traits.data[["sp"]])])) +trait <- (trait.t)[match(vec.sp,names(trait.t))] +return(trait) +} + +fun.std.trait <- function(trait){ +(trait - mean(trait,na.rm=TRUE))/sd(trait,na.rm=TRUE) +} + +## assume that I will input standaridized traits data + +# FUNCTION TO EXTRACT mean and sd for a list of species from traits.data a data frame with sp mean and sd +fun.trait.format <- function(traits.data,trait.name,genus.name,vec.sp){ + ## mean traits + trait.t <- traits.data[traits.data[["sp"]] %in% vec.sp,trait.name] + ## genus traits + genus.t <- traits.data[traits.data[["sp"]] %in% vec.sp,genus.name] + ## add species missing in data base + trait <- fun.add.missing.sp.NA(trait.t,vec.sp,traits.data) + # genus + genus <- fun.add.missing.sp.NA(genus.t,vec.sp,traits.data) + ## fill with mean or keep only species value + trait.fill <- trait.species <-trait.genus <-trait + trait.fill[is.na(trait)] <- mean( traits.data[,"mean"],na.rm=TRUE) + trait.species[genus==TRUE & !is.na(genus)] <- NA + return(cbind(trait.species,trait.genus,trait.fill)) +} + +,traits.data,trait.name,genus.name +## function for one traits +format.one.trait.CWM <- function(trait.list,i,BA){ +mat.t <- trait.list[[1]] +trait.sp.id <- trait.list[[2]][i] +if(!is.na(trait.sp.id)){ + BA.no0 <- BA[BA>0] + mat.t.no0 <- mat.t[BA>0,] + t.species <- fun.weighted.mean.traits(BA.no0,mat.t.no0[,1]) + t.genus <- fun.weighted.mean.traits(BA.no0,mat.t.no0[,2]) + t.fill <- fun.weighted.mean.traits(BA.no0,mat.t.no0[,3]) + t.species.log <- fun.weighted.mean.traits(log(BA.no0+1),mat.t.no0[,1]) + t.genus.log <- fun.weighted.mean.traits(log(BA.no0+1),mat.t.no0[,2]) + t.fill.log <- fun.weighted.mean.traits(log(BA.no0+1),mat.t.no0[,3]) + #abs dist + abs.t.species <- fun.weighted.mean.traits(BA.no0,abs(trait.sp.id-mat.t.no0[,1])) + abs.t.genus <- fun.weighted.mean.traits(BA.no0,abs(trait.sp.id-mat.t.no0[,2])) + abs.t.fill <- fun.weighted.mean.traits(BA.no0,abs(trait.sp.id-mat.t.no0[,3])) + abs.t.species.log <- fun.weighted.mean.traits(log(BA.no0+1),abs(trait.sp.id-mat.t.no0[,1])) + abs.t.genus.log <- fun.weighted.mean.traits(log(BA.no0+1),abs(trait.sp.id-mat.t.no0[,2])) + abs.t.fill.log <- fun.weighted.mean.traits(log(BA.no0+1),abs(trait.sp.id-mat.t.no0[,3])) + # compute percentage of traits obs + perc.species <- sum(!is.na(mat.t.no0[,1]))/length(BA.no0) + perc.genus <- sum(!is.na(mat.t.no0[,2]))/length(BA.no0) + }else{ + t.species <- t.genus <- t.fill <- NA + t.species.log <- t.genus.log <- t.fill.log <- NA + abs.t.species <- abs.t.genus <- abs.t.fill <- NA + abs.t.species.log <- abs.t.genus.log <- abs.t.fill.log <- NA + prec.species <- perc.genus <- NA + } +return(c(trait.sp.id, + t.species,t.genus,t.fill,abs.t.species,abs.t.genus,abs.t.fill, + t.species.log,t.genus.log,t.fill.log,abs.t.species.log,abs.t.genus.log,abs.t.fill.log, + perc.species,perc.genus)) +} + + +format.one.trait.CWM.xy <- function(i,traits.list,sp.num.sp.length,xy.table,BA.a,Rlim){ + require(Rcpp,quietly=TRUE) +## compute local BA +BA.n <- areas_by_species_within_neighbourhood(i - 1L, + xy.table[,"x"], xy.table[,"y"], Rlim, BA.a, sp.num - 1L, sp.length) +BATOT <- sum(BA.n) +BATOT.log <- sum(log(BA.n+1)) +## compute community weighted mean traits +res.list <- lapply(traits.list,FUN=format.one.trait.CWM,i,BA.n) +## create matrix of data +res.vec <- c(i,unlist(res.list),BATOT,BATOT.log) +names(res.vec) <- c("obs.id",unlist(lapply(names(traits.list),FUN=function(x,y) paste(x,y,sep="."),y= + c("focal","CWM.species","CWM.genus","CWM.fill", + "abs.CWM.species","abs.CWM.genus","abs.CWM.fill", + "CWM.species.log","CWM.genus.log","CWM.fill.log", + "abs.CWM.species.log","abs.CWM.genus.log","abs.CWM.fill.log", + "perc.species","perc.genus"))),"BATOT","BATOT.log") +return(res.vec) +} + +## need same but with data from plot + + -#### +## function that generate the list traits data +format.traits.list <- function(sp.vec,data.TRAITS){ + + + + data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% data[[sp.code2]]) + +traits.mean <- c("Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.density.mean", + "Max.height.mean") + +traits.genus <- c("Leaf.N.genus","Seed.mass.genus","SLA.genus","Wood.density.genus", + "Max.height.genus") + +##TODO ADD TEST GOOD ORDER + +## TOD add test good range of BATOT + +## TODO +## 1. FUNCTION THAT COMPUTE TEH TRAITS MATRIX FOR ALL SPECIES +## 3. LLOP OVER ALL TRAITS + + + + +library(Rcpp) +sourceCpp("R/process.data/georges.cpp") + +#### ##' .. function to compute competition index with X Y coordinates based on a neighborhood of radius R without the global distance matrix.. ##' ##' .. content for \details{} .. -##' @title +##' @title ##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment -##' @param xy.table table with x.y of teh individual +##' @param xy.table table with x.y of the individual ##' @param diam diam in cm ##' @param sp species ##' @param Rlim radius of neighborhood search -##' @param parallel run in paralle or not ? +##' @param parallel run in paralle or not : TRUE / FALSE ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler -BA.SP.FUN.XY2 <- function(obs.id, xy.table, diam, sp, Rlim, parallel) { - require(plyr) - +BA.SP.FUN.XY2 <- function(obs.id, xy.table, diam, sp, Rlim,parallel) { + require(Rcpp,quietly=TRUE) + require(plyr,quietly=TRUE) rownames(xy.table) <- obs.id - print(paste("n observation = ", nrow(xy.table))) - print("start computing distance matrix") - ## create local function - fun.compute.local <- function(obs.id.t, obs.id, xy.table, diam, sp, Rlim) { - dist.t <- as.vector(sqrt((outer(xy.table[obs.id == obs.id.t, "x"], xy.table[, - "x"], FUN = "-"))^2 + (outer(xy.table[obs.id == obs.id.t, "y"], xy.table[, - "y"], FUN = "-"))^2)) - dist.t[dist.t <= Rlim & dist.t>0] <- 1 - dist.t[dist.t > Rlim] <- 0 - - res.BA.t <- tapply(BA.fun(diam*dist.t, weights = dist.t/(pi * Rlim^2)), INDEX = sp, - FUN = sum, na.rm = TRUE) - res.BA.t[is.na(res.BA.t)] <- 0 - return(data.frame(obs.id = obs.id.t, t(res.BA.t))) - } + cat("n observation = ", nrow(xy.table),"\n") +BA.a <- BA.fun(diam, weights = 1/(pi * Rlim^2)) +sp.num <- unclass(factor(sp)) +sp.name <- levels(factor(sp)) +n <- length(obs.id) + + # lapply function if (parallel) { - library(doParallel) - print("start mclapply") - list.BA.SP.data <- mclapply(obs.id, FUN = fun.compute.local, obs.id, xy.table, - diam, sp, Rlim, mc.cores = 4) - print("done") - data.BA.SP <- rbind.fill(list.BA.SP.data) - print("data.frame created") + ## parallel version + require(doParallel,quietly=TRUE) + registerDoParallel(cores = 4) + ans <- foreach(i = seq.int(n) - 1L, .combine = rbind) %dopar% { +areas_by_species_within_neighbourhood(i - 1L, + xy.table[,"x"], xy.table[,"y"], Rlim, BA.a, sp.num - 1L, length(table(sp.num))) + + } + print("lapply done") + BATOT <- apply(ans, MARGIN = 1, FUN = sum) + data.BA.SP <- cbind(obs.id,(ans),BATOT) + colnames(data.BA.SP) <- c("obs.id",sp.name,"BATOT") + print("data.frame created") + + } else { - list.BA.SP.data <- lapply(obs.id, FUN = fun.compute.local, obs.id, xy.table, - diam, sp, Rlim) - data.BA.SP <- rbind.fill(list.BA.SP.data) + print("start computing in C") + ans <- t(sapply(seq.int(n) - 1L, + areas_by_species_within_neighbourhood, + xy.table[,"x"], xy.table[,"y"], Rlim, BA.a, sp.num - 1L, length(table(sp.num))) ) + print("lapply done") + BATOT <- apply(ans, MARGIN = 1, FUN = sum) + data.BA.SP <- cbind(obs.id,(ans),BATOT) + colnames(data.BA.SP) <- c("obs.id",sp.name,"BATOT") + print("data.frame created") } return(data.BA.SP) } @@ -224,70 +358,78 @@ BA.SP.FUN.XY2 <- function(obs.id, xy.table, diam, sp, Rlim, parallel) { -## function for lapply per census -BA.SP.FUN.XY.l <- function(census.id, census, obs.id, xy.table, diam, sp, Rlim, parallel, - rpuDist) { - print(dim(xy.table)) +##test function +## source('R/process.data/process.fun.R'); +## ## system.time(process_bigplot_dataset('BCI')) + +## t2 <- c() +## t2[1] <- (system.time(BA.SP.FUN.XY2(obs.id=1:120, xy.table=data.frame(x=runif(n=120,0,100),y=runif(n=120,0,100)), diam=rep(1,120), sp=rep(c("a","b","c"),40), Rlim=15,parallel=T) ))[1] +## t2[2] <- (system.time(BA.SP.FUN.XY2(obs.id=1:1200, xy.table=data.frame(x=runif(n=1200,0,100),y=runif(n=1200,0,100)), diam=rep(1,1200), sp=rep(c("a","b","c"),400), Rlim=15,parallel=T) ))[1] +## t2[3] <- (system.time(BA.SP.FUN.XY2(obs.id=1:12000, xy.table=data.frame(x=runif(n=12000,0,100),y=runif(n=12000,0,100)), diam=rep(1,12000), sp=rep(c("a","b","c"),4000), Rlim=15,parallel=T) ))[1] +## t2[4] <- (system.time(BA.SP.FUN.XY2(obs.id=1:24000, xy.table=data.frame(x=runif(n=24000,0,100),y=runif(n=24000,0,100)), diam=rep(1,24000), sp=rep(c("a","b","c"),8000), Rlim=15,parallel=T) ))[1] + + +## t1 <- c() +## t1[1] <- (system.time(BA.SP.FUN.XY(obs.id=1:120, xy.table=data.frame(x=runif(n=120,0,100),y=runif(n=120,0,100)), diam=rep(1,120), sp=rep(c("a","b","c"),40), Rlim=15,parallel=F) ))[1] +## t1[2] <- (system.time(BA.SP.FUN.XY(obs.id=1:1200, xy.table=data.frame(x=runif(n=1200,0,100),y=runif(n=1200,0,100)), diam=rep(1,1200), sp=rep(c("a","b","c"),400), Rlim=15,parallel=F) ))[1] +## t1[3] <- (system.time(BA.SP.FUN.XY(obs.id=1:12000, xy.table=data.frame(x=runif(n=12000,0,100),y=runif(n=12000,0,100)), diam=rep(1,12000), sp=rep(c("a","b","c"),4000), Rlim=15,parallel=F) ))[1] +## t1[4] <- (system.time(BA.SP.FUN.XY(obs.id=1:24000, xy.table=data.frame(x=runif(n=24000,0,100),y=runif(n=24000,0,100)), diam=rep(1,24000), sp=rep(c("a","b","c"),8000), Rlim=15,parallel=F) ))[1] + + + + +## function for lapply per census apply global didtance matrix if less than 10000 obs or not if more because matrix to big for R memory +BA.SP.FUN.XY.l <- function(census.id, census, obs.id, xy.table, diam, sp, Rlim, parallel) { + cat("number of tree in census",census.id," is ",nrow(xy.table[census == census.id,]),"\n") if (length(obs.id[census == census.id]) < 10000) { - data.temp <- BA.SP.FUN.XY(obs.id = obs.id[census == census.id], xy.table = xy.table[census == - census.id, ], diam = diam[census == census.id], sp = sp[census == census.id], - Rlim = Rlim, parallel, rpuDist) + data.temp <- BA.SP.FUN.XY(obs.id = obs.id[census == census.id], xy.table = xy.table[census == + census.id, ], diam = diam[census == census.id], sp = sp[census == census.id], + Rlim = Rlim, parallel) } else { - data.temp <- BA.SP.FUN.XY2(obs.id = obs.id[census == census.id], xy.table = xy.table[census == - census.id, ], diam = diam[census == census.id], sp = sp[census == census.id], + data.temp <- BA.SP.FUN.XY2(obs.id = obs.id[census == census.id], xy.table = xy.table[census == + census.id, ], diam = diam[census == census.id], sp = sp[census == census.id], Rlim = Rlim, parallel) } return(data.temp) } -### wrapping function to run BA.SP.FUN.XY per census and merge together all census -BA.SP.FUN.XY.census <- function(census, obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, - rpuDist = FALSE) { +### wrapping function to run BA.SP.FUN.XY per census and merge back together all census +BA.SP.FUN.XY.census <- function(census, obs.id, xy.table, diam, sp, Rlim, parallel) { unique.census <- unique(census) - print(paste("Vector of census to lapply over", paste(unique.census, collapse = ","), - sep = " ")) - res.list <- lapply(unique.census, FUN = BA.SP.FUN.XY.l, census = census, obs.id = obs.id, - xy.table = xy.table, diam = diam, sp = sp, Rlim = Rlim, parallel, rpuDist) + cat("Vector of census to lapply over", paste(unique.census, collapse = ","),"\n") + res.list <- lapply(unique.census, FUN = BA.SP.FUN.XY.l, census = census, obs.id = obs.id, + xy.table = xy.table, diam = diam, sp = sp, Rlim = Rlim, parallel) res.mat <- rbind.fill(res.list) res.DF <- res.mat[match(obs.id, res.mat[["obs.id"]]), ] return(res.DF) } -##################################### FUNCTION TO COMPUTE BA.SP.XY PER PLOT function to be lapply per site -##' .. FUNCTION TO COMPUTE BA.SP.XY PER PLOT to be lapply over all plot or only one plot .. +##################################### FUNCTION TO COMPUTE BA.SP.XY PER CLUSTER function to be lapply per site +##' .. FUNCTION TO COMPUTE BA.SP.XY PER CLUSTER to be lapply over all cluster or only one cluster .. ##' ##' .. content for \details{} .. -##' @title fun.compute.BA.SP.XY.per.plot -##' @param i selected plot to compute BA -##' @param data.tree data.frame with column plot, census, D, sp and xy.name +##' @title fun.compute.BA.SP.XY.per.cluster +##' @param i selected cluster to compute BA +##' @param data.tree data.frame with column cluster, census, D, sp and xy.name ##' @param Rlim maximum radius to compute local competition index ##' @param xy.name name of variable for x and y (vector of length 2) ##' @param parallel TRUE/FALSE run in parallel -##' @param rpuDist TRUE/FALSE use rpuDist to compute distance matrix only available on linux with additional software -##' @return a data.frame with nrow = length of obs.id[plot==i] and ncol =unique(sp)+ 2 (BATOT added) +##' @return a data.frame with nrow = length of obs.id[cluster==i] and ncol =unique(sp)+ 2 (BATOT added) ##' @author Kunstler -fun.compute.BA.SP.XY.per.plot <- function(i, data.tree, Rlim, xy.name = c("x", "y"), - parallel = FALSE, rpuDist = FALSE) { - data.tree.s <- subset(data.tree, subset = data.tree[["plot"]] == i) - print(paste("size of the data for plot", i, "is", nrow(data.tree.s))) - BA.SP.temp <- BA.SP.FUN.XY.census(census = data.tree.s[["census"]], obs.id = data.tree.s[["obs.id"]], - xy.table = data.tree.s[, xy.name], diam = data.tree.s[["D"]], sp = (data.tree.s[["sp"]]), - Rlim = Rlim, parallel, rpuDist) - ## replace NA per zero - print("replacing NA per zero") - BA.SP.temp[is.na(BA.SP.temp)] <- 0 - print("done") - print(head(BA.SP.temp)) +fun.compute.BA.SP.XY.per.cluster <- function(i, data.tree, Rlim, xy.name = c("x", "y"), + parallel = FALSE) { + data.tree.s <- subset(data.tree, subset = data.tree[["cluster"]] == i) + print(paste("size of the data for cluster", i, "is", nrow(data.tree.s))) + BA.SP.temp <- BA.SP.FUN.XY.census(census = data.tree.s[["census"]], obs.id = data.tree.s[["obs.id"]], + xy.table = data.tree.s[, xy.name], diam = data.tree.s[["D"]], sp = (data.tree.s[["sp"]]), + Rlim = Rlim, parallel) ### TEST GOOD ORDER - if (sum(!(BA.SP.temp[["obs.id"]]) == data.tree.s[["obs.id"]]) > 0) + if (sum(!(BA.SP.temp[["obs.id"]]) == data.tree.s[["obs.id"]]) > 0) stop("rows not in the good order") - if (sum(!colnames(BA.SP.temp[, -1]) == as.character((levels(data.tree.s[["sp"]])))) > - 0) + if (sum(!colnames(BA.SP.temp[, -c(1,ncol(BA.SP.temp))]) == as.character((levels(data.tree.s[["sp"]])))) > + 0) stop("colnames does mot match species name") - ### compute sum per row - BATOT <- apply(BA.SP.temp[, -1], MARGIN = 1, FUN = sum) - data.res <- data.frame(BA.SP.temp, BATOT = BATOT, stringsAsFactors = FALSE) - return(data.res) + return(BA.SP.temp) } @@ -295,9 +437,9 @@ fun.compute.BA.SP.XY.per.plot <- function(i, data.tree, Rlim, xy.name = c("x", " ########################## function to replace missing value per zero in competition matrix function.replace.NA.negative <- function(data.BA.SP) { for (i in 2:ncol(data.BA.SP)) { - eval(parse(text = paste("data.BA.SP[is.na(", names(data.BA.SP)[i], "),", + eval(parse(text = paste("data.BA.SP[is.na(", names(data.BA.SP)[i], "),", names(data.BA.SP)[i], ":=0]", sep = ""))) - eval(parse(text = paste("data.BA.SP[", names(data.BA.SP)[i], "<0,", names(data.BA.SP)[i], + eval(parse(text = paste("data.BA.SP[", names(data.BA.SP)[i], "<0,", names(data.BA.SP)[i], ":=0]", sep = ""))) } print("NA and negative replaced") @@ -305,93 +447,101 @@ function.replace.NA.negative <- function(data.BA.SP) { } ############################################################## function to generate data in good format per ecoregion -fun.data.per.ecoregion <- function(ecoregion, data.tot, weight.full.plot, site.name, +fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name, data.TRAITS = NA, sp.code = "sp", sp.code2 = "sp", out.dir = "output/processed/") { - if (is.null(data.tot[["weights"]])) + if (is.null(data.tot[["weights"]])) stop("Please create a weights vector(1/m^2), even if it is completely NA") data.tot <- data.table(data.tot) data <- data.tot[ecocode == ecoregion, ] rm(data.tot) - - print(paste("number of obs in ecoregion", ecoregion, " = ", nrow(data))) - - path <- file.path(out.dir, site.name, ecoregion) - dir.create(path, recursive = TRUE, showWarnings = FALSE) - - print("start computing competition index") - data.BA.SP <- BA.SP.FUN.census(census = data[["census"]], obs.id = as.vector(data[["obs.id"]]), - diam = as.vector(data[["D"]]), sp = as.vector(data[["sp"]]), id.plot = as.vector(data[["plot"]]), - weights = data[["weights"]], weight.full.plot = weight.full.plot) - - data$obs.id <- as.character(data$obs.id) - data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) - - print("competition index computed") - ## change NA and <0 data for 0 - data.BA.SP <- function.replace.NA.negative(data.BA.SP) - ### CHECK IF sp and sp name for column are the same - if (sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) > 0) - stop("competition index sp name not the same as in data") - #### compute BA tot for all competitors data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## - #### slower than apply why?? - BATOT.s <- apply(data.frame(data.BA.SP)[, -1], MARGIN = 1, FUN = sum) - data.BA.SP[, `:=`(BATOT, BATOT.s)] - print("BATOT COMPUTED") - ### create data frame - DT.temp <- data.table(obs.id = data[["obs.id"]], ecocode = data[["ecocode"]]) - setkeyv(DT.temp, "obs.id") - setkeyv(data.BA.SP, "obs.id") - data.BA.sp <- merge(DT.temp, data.BA.SP) - ## reorder data - data <- data.table(data) - setkeyv(data, "obs.id") - ## test if same order - if (sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) > 0) - stop("competition index not in the same order than data") - - ## ADD TRY DATA IF NEEDED - data.TRAITS.s <- NA - if (is.data.frame(data.TRAITS)) - data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% data[[sp.code2]]) - - write.csv(data, file = file.path(path, "data.tree.csv"), quote = FALSE, row.names = FALSE) - write.csv(data.BA.sp, file = file.path(path, "data.BA.SP.csv"), quote = FALSE, - row.names = FALSE) - write.csv(data.TRAITS.s, file = file.path(path, "data.traits.csv"), quote = FALSE, - row.names = FALSE) + ### TEST IF enough species diversity in the ecoregion to run analysis + thres.prop <- 0.05 + min.sp <- 3 + if(sum((table(data[["sp"]])/length(data[["sp"]]))>thres.prop)>min.sp){ + # enough species to analyse this ecoregion + print(paste("number of obs in ecoregion", ecoregion, " = ", nrow(data))) + print(paste("number of species ",sum((table(data[["sp"]])/length(data[["sp"]]))>thres.prop))) + path <- file.path(out.dir, site.name, ecoregion) + dir.create(path, recursive = TRUE, showWarnings = FALSE) + print("start computing competition index") + data.BA.SP <- BA.SP.FUN.census(census = data[["census"]], obs.id = as.vector(data[["obs.id"]]), + diam = as.vector(data[["D"]]), sp = as.vector(data[["sp"]]), id.plot = as.vector(data[["plot"]]), + weights = data[["weights"]]) + data$obs.id <- as.character(data$obs.id) + data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) + print("competition index computed") + ## change NA and <0 data for 0 + data.BA.SP <- function.replace.NA.negative(data.BA.SP) + ### CHECK IF sp and sp name for column are the same + if (sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) > 0) + stop("competition index sp name not the same as in data") + #### compute BA tot for all competitors data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## + #### slower than apply why?? + BATOT.s <- apply(data.frame(data.BA.SP)[, -1], MARGIN = 1, FUN = sum) + data.BA.SP[, `:=`(BATOT, BATOT.s)] + print("BATOT COMPUTED") + # test if range of BATOT OK + if (max(BATOT.s,na.rm=TRUE)>250 | max(BATOT.s,na.rm=TRUE)<20) stop("max of BATOT outside 20 250m^2/ha") + ### create data frame and merge + DT.temp <- data.table(obs.id = data[["obs.id"]], ecocode = data[["ecocode"]]) + setkeyv(DT.temp, "obs.id") + setkeyv(data.BA.SP, "obs.id") + data.BA.sp <- merge(DT.temp, data.BA.SP) + ## reorder data + data <- data.table(data) + setkeyv(data, "obs.id") + ## test if same order + if (sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) > 0) + stop("competition index not in the same order than data") + ## ADD TRAITS DATA IF POSSIBLE + data.TRAITS.s <- NA + if (is.data.frame(data.TRAITS)) + data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% data[[sp.code2]]) + ## write data + write.csv(data, file = file.path(path, "data.tree.csv"), quote = FALSE, row.names = FALSE) + write.csv(data.BA.sp, file = file.path(path, "data.BA.SP.csv"), quote = FALSE, + row.names = FALSE) + write.csv(data.TRAITS.s, file = file.path(path, "data.traits.csv"), quote = FALSE, + row.names = FALSE) + }else{ + print(paste("Not enough species in ecoregion", ecoregion)) + print(paste("number of species ",sum((table(data[["sp"]])/length(data[["sp"]]))>thres.prop))) + } } ####### FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT -fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, Rlim = 15, xy.name, - parallel = FALSE, rpuDist = FALSE, sp.code = "sp", sp.code2 = "sp", out.dir = "output/processed/") { - require(data.table) - - path <- file.path(out.dir, site.name) +fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, Rlim = 15, xy.name, + parallel = FALSE, sp.code = "sp", sp.code2 = "sp", out.dir = "output/processed/") { + require(data.table,quietly=TRUE) + path <- file.path(out.dir, site.name,"tropical") dir.create(path, recursive = TRUE, showWarnings = FALSE) - data$sp <- factor(data$sp) data.TRAITS$sp <- factor(data.TRAITS$sp) - print(paste("plots :", unique(data[["plot"]]))) + print(paste("clusters :", unique(data[["cluster"]]))) ## compute competition matrix - list.BA.SP.data <- lapply(unique(data[["plot"]]), FUN = fun.compute.BA.SP.XY.per.plot, - data = data, Rlim = Rlim, xy.name = xy.name, parallel = TRUE, rpuDist = FALSE) + list.BA.SP.data <- lapply(unique(data[["cluster"]]), FUN = fun.compute.BA.SP.XY.per.cluster, + data = data, Rlim = Rlim, xy.name = xy.name, parallel = parallel) print("competition index computed in list") data.BA.SP <- rbind.fill(list.BA.SP.data) print("competition index computed") - - ### test - if (sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]), "BATOT"))) > 0) + ### test species + if (sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]), "BATOT"))) > 0) stop("competition index sp name not the same as in data") - + # test if range of BATOT OK + if (max(data.BA.SP$BATOT,na.rm=TRUE)>250 | max(data.BA.SP$BATOT,na.rm=TRUE)<20) stop("max of BATOT outside 20 250m^2/ha") + ### REMOVE TREE IN BUFFER ZONE - obs.id.no.buffer <- subset(data, subset = ((data[["x"]] - 15) > 0 & (data[["x"]] + - 15) < max(data[["x"]])) & ((data[["y"]] - 15) > 0 & (data[["y"]] + 15) < - max(data[["y"]])), select = "obs.id") - data <- subset(data, subset = data[["obs.id"]] %in% as.character(obs.id.no.buffer[, - 1])) - data.BA.SP <- subset(data.BA.SP, subset = data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[, - 1])) + obs.id.no.buffer <- subset(data, subset = ((data[["x"]] - 15) > 0 & + (data[["x"]] + 15) < max(data[["x"]])) & + ((data[["y"]] - 15) > 0 & + (data[["y"]] + 15) < max(data[["y"]])), + select = "obs.id") + + data <- subset(data, subset = data[["obs.id"]] %in% + as.character(obs.id.no.buffer[,1])) + data.BA.SP <- subset(data.BA.SP,subset = data.BA.SP[["obs.id"]] %in% + as.character(obs.id.no.buffer[,1])) ## reorder data data$obs.id <- as.character(data$obs.id) data <- data.table(data) @@ -400,27 +550,27 @@ fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, Rlim = 15, x data.BA.SP <- data.table(data.BA.SP) setkeyv(data.BA.SP, "obs.id") ## test if same order - if (sum(!as.character(data.BA.SP[["obs.id"]]) == as.character(data[["obs.id"]])) > - 0) + if (sum(!as.character(data.BA.SP[["obs.id"]]) == + as.character(data[["obs.id"]])) > 0) stop("competition index not in the same order than data") ## ADD TRAITS DATA IF POSSIBLE data.TRAITS.s <- NA - data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% data[[sp.code2]]) + data.TRAITS.s <- subset(data.TRAITS, subset = data.TRAITS[[sp.code]] %in% data[[sp.code2]]) write.csv(data, file = file.path(path, "data.tree.csv"), quote = FALSE, row.names = FALSE) - write.csv(data.BA.SP, file = file.path(path, "data.BA.SP.csv"), quote = FALSE, + write.csv(data.BA.SP, file = file.path(path, "data.BA.SP.csv"), quote = FALSE, row.names = FALSE) - write.csv(data.TRAITS.s, file = file.path(path, "data.traits.csv"), quote = FALSE, + write.csv(data.TRAITS.s, file = file.path(path, "data.traits.csv"), quote = FALSE, row.names = FALSE) } process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed"){ - process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "inventory") + process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "inventory") } process_bigplot_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", Rlim = 15){ - process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "bigplot", Rlim = Rlim) + process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "bigplot", Rlim = Rlim) } process_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", type = "inventory", Rlim = 15) { @@ -429,19 +579,16 @@ process_dataset <- function(set, path.formatted = "output/formatted", path.proce data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE) # remove nas data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]])) - - ## change sp - data.traits$sp <- paste("sp", data.traits$sp, sep = ".") - data.tree$sp <- paste("sp", data.tree$sp, sep = ".") - - if(type=="inventory"){ + + + if(type=="inventory"){ # split into ecoregions - tmp <- lapply(unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, data.tot = data.tree, - weight.full.plot = NA, site.name = set, data.TRAITS = data.traits, out.dir = path.processed) + tmp <- lapply(unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, data.tot = data.tree, + site.name = set, data.TRAITS = data.traits, out.dir = path.processed) } else { fun.data.per.bigplot(data=data.tree,site.name=set,data.TRAITS=data.traits,Rlim=Rlim,xy.name=c("x","y")) } - + cat("finished", file = file.path(path.processed, set, "Done.txt")) } diff --git a/R/utils/climate.R b/R/utils/climate.R index 55e9866f639851bbe827d9f14c8734cb4d2bb49f..fa5c4295e60829b34a6491ab71d66dfe40d5e1ab 100644 --- a/R/utils/climate.R +++ b/R/utils/climate.R @@ -1,4 +1,5 @@ -library(raster) +source("R/packages.R") +check_packages(c( "raster")) #Returns the index of the nearest plot(x2,y2) to each of the target plots (x1,y1) NearestPlot <- function(x1,y1,x2,y2) { @@ -16,7 +17,6 @@ NearestPlot <- function(x1,y1,x2,y2) { #looks up MAT and MAP for given lat/lon values GetClimate <-function(lats,lons) { - #create raster grid for tiles xNum<-0:11 yNum<-0:4 diff --git a/R/utils/ecoregions.R b/R/utils/ecoregions.R index 8fd73dd06927768a27d78d2e7deacf242671e4d5..c049fbf470ef798c6411b7461cb9c745ae8bd69e 100644 --- a/R/utils/ecoregions.R +++ b/R/utils/ecoregions.R @@ -1,13 +1,13 @@ -library(maptools) -library(rgdal) -library(rgeos) -library(raster) +source("R/packages.R") +check_packages(c("maptools", "rgdal", "rgeos", "raster")) +### function to get ecoregion from wwf ecoregion map + GetEcoregions <- function(lons,lats) { #read in ecoregions and transform shapefile to lat/lon - wwf.ecoregions <- readOGR(dsn="C:\\Users\\Mark\\Documents\\analyses\\FIA traits analysis\\wwf_ecoregion", layer="wwf_terr_ecos") + wwf.ecoregions <- readOGR(dsn="data/raw/wwf_ecoregion", layer="wwf_terr_ecos") geo.proj <- proj4string(wwf.ecoregions) diff --git a/docs/analysis.outline/Traits.compet.theo.md b/docs/analysis.outline/Traits.compet.theo.md new file mode 100644 index 0000000000000000000000000000000000000000..b841513730af3e6a75f546edca6ed2ab45da1682 --- /dev/null +++ b/docs/analysis.outline/Traits.compet.theo.md @@ -0,0 +1,192 @@ +% Notes on discussion of an alternatives model to link traits to competition based on the workshop 'How are competitive interactions influenced by traits? A global analysis based on tree radial growth' +% Georges Kunstler + +## Department of Biological Sciences Macquarie University, Sydney, NSW / Irstea EMGR Grenoble France <georges.kunstler@gmail.com> + + +# Growth model and competitive growth reduction/inhibition + +The general approach of the analysis rely on fitting an individual growth model in which the growth decrease with increasing abundance of neighbourhood trees. Then we consider that the relative growth decrease with an increase of neighbour abundance is a function of neighbourhood species $n$ trait $t_n$ and of the focal species $f$ trait $t_f$. + +The model to analyse the individual growth can be written in a generic way as: + +\begin{equation} \label{eq:G1} +G_{f,p,i} = Gmax_{f,p} \times f(D_i) \times g(-\sum_{n=1}^{N_p} \lambda_{n,f} \times B_{n}) +\end{equation} + +where $G_{f,p,i}$ is the growth (diameter or basal area growth) of the individual $i$ in the plot $p$ of the focal species $f$. $D_i$ is the diameter of the individual $i$. $B_{n}$ is the the basal area (or an other measure of abundance for instance log(B)) of neighbourhood species $n$. $Gmax_{f,p}$ is the maximum growth rate of the focal species $f$ on the plot $p$ and $\lambda_{n,f}$ is a parameter representing the growth reducing for a unit of neighbourhood basal area increase (or any other measurement of abundance) of species $n$ on species $f$ (here if $\lambda$ is positive this represents competition if negative facilitation). + +This document aims at discussing the different approach to model $\lambda_{n,f}$ in function of $t_n$ and $t_f$. + +# Approach to include traits effect on competitive effect and competitive response + +The null model is that $\lambda_{n,f}$ do not vary between species and are independent of traits in which case + +\begin{equation} +\lambda_{n,f} = \lambda +\end{equation} + +where $\lambda$ is the average competitive interaction (or effet/response). + +## Absolute and hierarchical distance +In Kunstler et al. 2012 I considered only two metrics of trait distance that could explain competitive interactions: either the absolute trait distance or the hierarchical trait distance (I first estimated each $\lambda_{n,f}$ and then analysed their link with the two type of distance, I didn t modeled directly in the growth model the effect of the two traits distance on $\lambda_{n,f}$). + +This two alternative trait distances can be used to model $\lambda_{f,n}$ as + +\begin{equation} +\lambda_{n,f} = a + b \times |t_{n} - t_{f}| +\end{equation} + + +\begin{equation} +\lambda_{n,f} = a +b \times (t_{n} - t_{f}) +\end{equation} + +The logical behind the hierarchical trait distance was that such a model could arise if trait are linked in exactly the same way to the competitive effect and competitive tolerance (trait value confering a high competitive effect also confer a high competition tolerance in the same way - through out the document I will use competitive response as the inverse of competition tolerance, so high competitive response means low competitive tolerance). During the workshop we have discussed in depth the possibility of including a model with separate effect of traits on competitive effect and competitive response. This is strongly connected to several paper by Goldberg *et al.* that framed competition in term of effect and response and their link to traits (ADD REFS).Two main approaches have been proposed during the workshop are a multiplicative model or an additive model of competitive effect and response. + +## Effect/response model + +### Additive model of competitive effect and response + +The general framework for this approach is to consider that $\lambda_{n,f} = r(t_f) +e(t_n)$ where $r$ and $e$ are respectively function that relate the competitive response and effect to the trait. We can test a series of model with increasing complexity to include the traits. + +1. $\lambda$ can be influence only by the variation in competitive effect through the trait of the neighbourhood species: +\begin{equation} +\lambda_{n,f} = a +b \times t_{n} +\end{equation} + +2. $\lambda$ can be influence only by the variation in competitive response through the trait of the focal species: +\begin{equation} +\lambda_{n,f} = a +b \times t_{f} +\end{equation} + +3. $\lambda$ can be influence by the variation of both competitive effect and response through the trait of the neighbourhood and focal species: +\begin{equation} +\lambda_{n,f} = a +b \times t_{f} +c \times t_{n} +\end{equation} + +4. $\lambda$ can be influence by the variation of both competitive effect and response through the trait of the neighbourhood and focal species and their interaction: +\begin{equation} +\lambda_{n,f} = a +b \times t_{f} +c \times t_{n}+d \times t_{n} \times t_{f} +\end{equation} + +It is then possible to develop this different models to relate the competition in term of community weighted mean trait of the neighbourhood species. For the most complex model 4 this gives: + +\begin{equation} +\sum_{n=1}^{N_p} \lambda_{n,f} B_n = B_{tot} \times (a +b \times t_{f} +c \times \bar{t_{n}}+d \times t_{f} \times \bar{t_{n}}) +\end{equation} + +where $B_{tot}$ is the sum of basal area of all neighbourhood species and $\bar{t_{n}}$ is weight mean of the trait of the neighbourhood species ($\bar(t_{n}) = \sum_{n=1}^{N_p} P_n t_n$ with $P_n$ the relative abundance of species $n$). + +### Multiplicative model of competitive effect and response + +The general framework for this approach is to consider that $\lambda_{n,f} = r(t_f) \times e(t_n)$ where $r$ and $e$ are respectively function that relate the competitive response and effect to the trait.We can test a series of model with increasing complexity of trait effect. + +1. $\lambda$ can be influence only by the variation in competitive effect through the trait of the neighbourhood species: +\begin{equation} +\lambda_{n,f} = a +b \times t_{n} +\end{equation} + +2. $\lambda$ can be influence only by the variation in competitive response through the trait of the focal species: +\begin{equation} +\lambda_{n,f} = a +b \times t_{f} +\end{equation} + +3. $\lambda$ can be influence by the variation of both competitive effect and response through the trait of the neighbourhood and focal species: +\begin{equation} +\lambda_{n,f} = (a +b \times t_{f}) \times (c +d \times t_{n}) +\end{equation} + +In this last multiplicative model and interaction term is already included. + +As for the additive model it is then possible to develop the multiplicative model 3 to relate the competition in term of community weighted mean trait of the neighbourhood species. + +\begin{equation} +\sum_{n=1}^{N_p} \lambda_{n,f} B_n= B_{tot} \times (a +b \times t_{f}) \times (c+d \times \bar{t_{n}}) +\end{equation} + + +## Comparison of the multiplicative and additive effect/response model + +Developing the multiplicative model gives: + +\begin{equation} +(a +b \times t_{f}) \times (c +d \times t_{n}) = ac+bc \times t_f +ad \times t_n +bd \times t_f \times t_n +\end{equation} + +In comparison the additive model plus interaction is + +\begin{equation} +a_{add}+b_{add} \times t_{f} +c_{add} \times t_{n}+d_{add} \times t_{n} \times t_{f} +\end{equation} + +Thus the two models are equal when: + +\begin{equation} +a_{add}=ac \mspace{3mu} ;\mspace{3mu} b_{add}=bc\mspace{3mu} ;\mspace{3mu} c_{add}=ad \mspace{5mu} and \mspace{5mu} d_{add}=bd +\end{equation} + +The multiplicative model is more constraining than the additive model plus interaction. In other word the additive model with interaction can be fitted to any multiplicative model but the inverse is not true (This would requires adding an interaction in the multiplicative model). For instance, it is not possible to match the hierarchical distance because if $b_{add}$ and $d_{add} \neq 0$ then $d_{add} \neq = 0$ as well. More generally, if parameters a, b , c and d vary between [-max.r, max.r] then $d_{add}>b_{add}*c_{add}/(ma.r^2)$ (or $d_{add}<b_{add}*c_{add}/(-ma.r^2)$). Thus it is not possible to have a strong traits effect on response and effect and no interaction. + +## How Competitive effect/response model compare with the hierarchical distance and absolute distance model + +During the workshop Joe Wright run a regression between the absolute distance model and the effect/response model additive plus an interaction and found a good fit ($R^2 >0.7$). In this section I have run several R models to explore this issue. First the multiplicative or the additive models can give rise to prediction similar to the hierarchical distance (see figure below). In that case high competitive effect and high competitive response (thus low tolerance to competition) are related inversely to the trait (thus trait value that give rise to high competitive effect also results in high competition tolerance) (see figure below). Both models can also gives rise to prediction that share some similarity with the absolute distance (see figure below). For the multiplicative model this arise only when the $\lambda$ changes of sign so with a shift from competition to facilitation. This represents thus something relatively different from the limiting similarity. The interpretation of a shift of the effect and response parameters to facilitation is however difficult to grasp in the multiplicative model. If species with high trait value have high competitive response this will either mean a strong competition decrease of growth if the competitive effect parameter is positive or a strong increase of growth by neighbour (facilitation) if the competitive effect is negative. + + + + + + + +Within the framework of competitive effect/response (additive or multiplicative) I thus think that to start it would be much more logical to test only for competitive interaction (constraining e and r to be positive). In that case, the difference between the additive with no interactions and the multiplicative models are less drastic (see the two following figures comparing four different effect of trait on e and r for both models). And the two models are more clearly different from teh absolute distance. + + + + + +## Are competitive effect and response additive or multiplicative? + +I think that a key point is to start to choose from first principle whether we beleive that competitive effect and response are multiplicative or additive. I tend to agree with David Coomes that if you consider competition for a single limiting ressource a multiplicative model is more logical, Competitive effect would decrease the percentage of resource from the maximum resource available, then the relative growth rate increase with the proportion of resource available from the maximum resource possible. This partly contradict the hierarchical distance of traits that I used previously, as the model is only possible, within the additive model. However, I think that the multiplicative model can give rise to prediction that are qualitatively similar to the hierarchical distance and can thus represents a similar alternative to the model of limiting similarity based on the absolute distance. + +## Choice of model to fit +In my view there is two options that would represents a relatively minor change from our initial plan but still allow to test distinct effect of trait on competitive effect and response. + +**1. using the additive model** +Test a family of model based on : + +\begin{equation} +\lambda_{n,f} = f(a +b \times t_{f} +c \times t_{n}) +\end{equation} + if b and c =0 no trait effects, if b no trait effect on competitive response, if c = 0 no trait effect on competitive effect, if b=-c this the traits hierarchy with same link between trait effect or response. + +This different model could be contrasted with the absolute distance. + +**2. using the multiplicative model** +Test a family of model based on : + +\begin{equation} +\lambda_{n,f} = f(a +b \times t_{f})f(c +d \times t_{n}) +\end{equation} + if b and c =0 no trait effects 9fit only one value for a*c), if b no trait effect on competitive response, if c = 0 no trait effect on competitive effect, if b=-d and a=c+d this the multiplicative equivalent to traits hierarchy with same link between trait effect or response. We can constrain the parameters e and r to be positive with a function $f$. + +**Because we will frame the question more in term of competitive effect and response I think it is more logical to use the multiplicative model.** + +A last question is whether as proposed by David we try to explore model that include both this effect response and a limiting similarity (either through adding also the absolute distance or and interaction between $t_n$ and $t_f$ - but the interpretation is less clear for me). I think that in first step it would be more simple to contrast this two alternative models. + + + +TODO DECIDE TO INCLUDE ADDITIONAL ABSOLUTE DISTANCE EFFECT? + +TODO FOR ME WORKOUT LINK WITH SHIFTING COMPETITIVE HIERARCHY + + + +TODO WORKOUT HOW TO LINK WITH ADLER FITNESS DIFF VS STABILIZING STRENGTH PLOT +Explain MAIN differences only traits distance or traits space, multiplicative vs additive effect (both model can be expanded up to the community weighted trait mean). + +One fundamental question is whether we should consider that competitive effect and competitive response are multiplicative or additive + +Then test all model with mew parametrization of ghislain. + + + diff --git a/docs/analysis.outline/Traits.compet.theo.tex b/docs/analysis.outline/Traits.compet.theo.tex new file mode 100644 index 0000000000000000000000000000000000000000..18eb1aaa86a4aa349a317a45f9d208b1175f2039 --- /dev/null +++ b/docs/analysis.outline/Traits.compet.theo.tex @@ -0,0 +1,356 @@ +\documentclass[11pt,a4paper]{article} +\usepackage{graphicx,epsfig,setspace,subfig,url,amsmath} +\usepackage{fullpage} +\title{Notes on discussion of an alternatives model to link traits to competition based on the workshop 'How are competitive interactions influenced by traits? A global analysis based on tree radial growth'} +\author{Georges Kunstler\thanks{georges.kunstler@gmail.com}\\Department of Biological Sciences\\Macquarie University, Sydney, NSW\\Irstea EMGR\\Grenoble, France} + +\begin{document} +\maketitle + +\section{Growth model and competitive growth reduction/inhibition} + +The general approach of the analysis relies on fitting an individual +growth model in which the growth decrease with increasing abundance of +neighbourhood trees. Then we consider that the relative growth decrease +per unit of increase of neighbour abundance is a function of neighbourhood +species $n$ trait $t_n$ and of the focal species $f$ trait $t_f$. + +The model to analyse the individual growth can be written in a generic +way as: + +\begin{equation} \label{eq:G1} +G_{f,p,i} = Gmax_{f,p} \times h(D_i) \times g(-\sum_{n=1}^{N_p} \lambda_{n,f} \times B_{n}) +\end{equation} + +where $G_{f,p,i}$ is the growth \footnote{Diameter or basal area growth.} of the +individual $i$ in the plot $p$ of the focal species $f$. $D_i$ is the +diameter of the individual $i$. $B_{n}$ is the basal area \footnote{or an +other measure of abundance for instance log(B)} of neighbourhood species +$n$. $Gmax_{f,p}$ is the maximum growth rate of the focal species $f$ on +the plot $p$ and $\lambda_{n,f}$ is a parameter representing the growth +reducing for a unit of neighbourhood basal area increase of species $n$ on species $f$ \footnote{here if +$\lambda$ is positive this represents competition if negative +facilitation to make David happy}. Different functions $h$ and $g$ can be used but this do not affect the rest of the document. + +This document aims at discussing the different approach to model +$\lambda_{n,f}$ in function of $t_n$ and $t_f$. + +\section{Approach to include traits effect on competitive effect and +competitive response} + +The null model is that $\lambda_{n,f}$ +are independent of traits in which case + +\begin{equation} +\lambda_{n,f} = \lambda +\end{equation} + +where $\lambda$ is the average competitive interaction (or effect/response). + +\subsection{Absolute and hierarchical distance} + +In Kunstler et al. 2012 I considered only two metrics of trait distance +: either the absolute trait +distance or the hierarchical trait distance \footnote{I first estimated each +$\lambda_{n,f}$ and then analysed their link with the two type of +distance, I didn't modeled directly in the growth model the effect of +the two traits distance on $\lambda_{n,f}$ in contrast to what we plan to do here.}. + +This two alternative trait distances can be used to model +$\lambda_{f,n}$ as + +\begin{equation} +\lambda_{n,f} = a + b \times |t_{n} - t_{f}| +\end{equation} + +\begin{equation} +\lambda_{n,f} = a +b \times (t_{n} - t_{f}) +\end{equation} + +The logical behind the hierarchical trait distance was that such a model +could arise if trait are linked in exactly the same way to the +competitive effect and competitive tolerance (trait value conferring a +high competitive effect also confer a high competition tolerance in the +same way \footnote{through out the document I will use competitive response as +the inverse of competition tolerance, so high competitive response means +low competitive tolerance}). During the workshop we have discussed in +depth the possibility of including a model with separate effect of +traits on competitive effect and competitive response. This is strongly +connected to several paper by Goldberg \emph{et al.} that framed +competition in term of effect and response and their link to traits (ADD +REFS).Two main approaches have been proposed during the workshop are a +multiplicative or additive model of competitive effect and +response. + + +Based on David Coomes document I describe two alternative effect/response models. +\subsection{Effect/response model} + +\subsubsection{Additive model of competitive effect and response} + +Here +$\lambda_{n,f} = r(t_f) +e(t_n)$ where $r$ and $e$ are respectively +function that relate the competitive response and effect to the trait. +We can test a series of model with increasing complexity to include the +traits. + +\begin{enumerate} +\def\labelenumi{\arabic{enumi}.} +\itemsep1pt\parskip0pt\parsep0pt +\item + $\lambda$ can be influence only by the variation in competitive effect + through the trait of the neighbourhood species: + + \begin{equation} + \lambda_{n,f} = a +b \times t_{n} + \end{equation} +\item + $\lambda$ can be influence only by the variation in competitive + response through the trait of the focal species: + + \begin{equation} + \lambda_{n,f} = a +b \times t_{f} + \end{equation} +\item + $\lambda$ can be influence by the variation of both competitive effect + and response through the trait of the neighbourhood and focal species: + + \begin{equation} + \lambda_{n,f} = a +b \times t_{f} +c \times t_{n} + \end{equation} +\item + $\lambda$ can be influence by the variation of both competitive effect + and response through the trait of the neighbourhood and focal species + and their interaction: + + \begin{equation} + \lambda_{n,f} = a +b \times t_{f} +c \times t_{n}+d \times t_{n} \times t_{f}\label{addinter} + \end{equation} +\end{enumerate} + +It is then possible to develop this different models to relate the +competition in term of community weighted mean trait of the +neighbourhood species. For the most complex model (equation \ref{addinter}) this gives: + +\begin{equation} +\sum_{n=1}^{N_p} \lambda_{n,f} B_n = B_{tot} \times (a +b \times t_{f} +c \times \bar{t_{n}}+d \times t_{f} \times \bar{t_{n}}) +\end{equation} + +where $B_{tot}$ is the sum of basal area of all neighbourhood species +and $\bar{t_{n}}$ is weight mean of the trait of the neighbourhood +species ($\bar{t_{n}} = \sum_{n=1}^{N_p} P_n t_n$ with $P_n$ the +relative abundance of species $n$). + +\subsubsection{Multiplicative model of competitive effect and response} + +Here +$\lambda_{n,f} = r(t_f) \times e(t_n)$ where $r$ and $e$ are +respectively function that relate the competitive response and effect to +the trait. We can test a series of model with increasing complexity of +trait effect. + +\begin{enumerate} +\def\labelenumi{\arabic{enumi}.} +\itemsep1pt\parskip0pt\parsep0pt +\item + $\lambda$ can be influence only by the variation in competitive effect + through the trait of the neighbourhood species: + + \begin{equation} + \lambda_{n,f} = a +b \times t_{n} + \end{equation} +\item + $\lambda$ can be influence only by the variation in competitive + response through the trait of the focal species: + + \begin{equation} + \lambda_{n,f} = a +b \times t_{f} + \end{equation} +\item + $\lambda$ can be influence by the variation of both competitive effect + and response through the trait of the neighbourhood and focal species: + + \begin{equation} + \lambda_{n,f} = (a +b \times t_{f}) \times (c +d \times t_{n})\label{mult} + \end{equation} +\end{enumerate} + + +As for the additive model it is then possible to develop the +multiplicative model (equation \ref{mult}) to relate the competition in term of community +weighted mean trait of the neighbourhood species. + +\begin{equation} +\sum_{n=1}^{N_p} \lambda_{n,f} B_n= B_{tot} \times (a +b \times t_{f}) \times (c+d \times \bar{t_{n}}) +\end{equation} + +\subsection{Comparison of the multiplicative and additive +effect/response model} + +Developing the multiplicative model gives: + +\begin{equation} +(a +b \times t_{f}) \times (c +d \times t_{n}) = ac+bc \times t_f +ad \times t_n +bd \times t_f \times t_n +\end{equation} + +In comparison the additive model plus interaction is + +\begin{equation} +a_{add}+b_{add} \times t_{f} +c_{add} \times t_{n}+d_{add} \times t_{n} \times t_{f} +\end{equation} + +Thus the two models are equal when: + +\begin{equation} +a_{add}=ac \mspace{3mu} ;\mspace{3mu} b_{add}=bc\mspace{3mu} ;\mspace{3mu} c_{add}=ad \mspace{5mu} and \mspace{5mu} d_{add}=bd +\end{equation} + +The multiplicative model is more constraining than the additive model +plus interaction. In other word the additive model with interaction can +be fitted to any multiplicative model but the inverse is not true. For +instance, it is not possible to match the equation 7 because +if $b_{add}$ and $d_{add} \neq 0$ then $d_{add} \neq = 0$ as well \footnote{More +generally, if parameters a, b , c and d vary between {[}-max.r, max.r{]} +then $d_{add}>b_{add}*c_{add}/(ma.r^2)$ (or +$d_{add}<b_{add}*c_{add}/(-ma.r^2)$). Thus it is not possible to have a +strong traits effect on response and effect and no interaction.}. + +\subsection{How Competitive effect/response model compare with the +hierarchical distance and absolute distance model} + +During the workshop Joe Wright run a regression between the absolute +distance model and the effect/response model additive plus an +interaction and found a good fit ($R^2 >0.7$). Indeed, both models can gives rise to prediction that share +some similarity with the absolute distance (Fig \ref{fig:competmodel}). For the +multiplicative model this arise only when the $\lambda$ changes of sign +so with a shift from competition to facilitation (Fig \ref{fig:competfacill}) and for the additive model this only possible when an interaction term is included. This represents thus +something relatively different from the limiting similarity. The +interpretation of a shift to +facilitation is difficult to grasp in the multiplicative model. A high competitive response will results either in a strong decrease of growth if the +competitive effect parameter is positive or a strong increase of growth +by neighbour (facilitation) if the competitive effect is negative. This seems not logical for me. + +In contrast, the multiplicative or the additive models can give rise to prediction similar to the hierarchical distance (Fig 1). In that case high competitive effect and high competitive response (thus low tolerance to competition) are related inversely to the trait 7 (Fig 1). This seems much more logical to me. + + +\begin{figure}[htbp] +\centering +\includegraphics[width=16cm]{compet_model.png} +\caption{Variation of competition parameter in function of $t_f$ and $t_n$ for +hierarchical distance and absolute distance and respective best fit of +additive or multiplicative model. For the hierarchical distance +$\lambda = 0.5-0.5*t_f+0.5*t_n$ , for the absolute distance +$\lambda = 1-\vert t_n-t_f\vert $} +\label{fig:competmodel} +\end{figure} + +\begin{figure}[htbp] +\centering +\includegraphics[width=8cm]{compet_facill_abs.png} +\caption{Shift in facilitation in the multiplicative model similar to +absolute distance.} +\label{fig:competfacill} +\end{figure} + +Within the framework of competitive effect/response (additive or +multiplicative) I thus think that to start it would be much more logical +to test only for competition and no facilitation (constraining $e$ and $r$ or their sum to be +positive). In that case, the difference between the additive model with no +interactions (equation 7) and the multiplicative models are less drastic (see Fig \ref{fig:add} and Fig \ref{fig:mult} comparing four different effect of trait on $e$ and $r$ for both models). And the two models are more different from the absolute distance. + +\begin{figure}[htbp] +\centering +\includegraphics[width=17cm]{additiv4models.png} +\caption{Four alternative additive E/R models.} +\label{fig:add} +\end{figure} + +\begin{figure}[htbp] +\centering +\includegraphics[width=17cm]{multi4models.png} +\caption{Four alternative multiplicative E/R models.} +\label{fig:mult} +\end{figure} + + +\section{Choice of model to fit} + +\subsection{Are competitive effect and response additive or +multiplicative?} + +I think that a key point is to start to choose from first principle +whether we believe that competitive effect and response are +multiplicative or additive. I tend to agree with David Coomes that if +you consider competition for a single limiting resource a +multiplicative model is more logical. This partly contradict the +hierarchical distance of traits that I used previously, as the model is +only possible, within the additive model. However, I think that the +multiplicative model can give rise to prediction that are qualitatively +similar to the hierarchical distance. + +\subsection{Alternative models} + +In my view there is two options that would represent a relatively minor +change from our initial plan but still allow to test distinct effect of +trait on competitive effect and response. + +\textbf{1. using the additive model} +Test a family of model based on : + +\begin{equation} +\lambda_{n,f} = f(a +b \times t_{f} +c \times t_{n}) +\end{equation} + +if b and c =0 no trait effects, if b no trait effect on competitive +response, if c = 0 no trait effect on competitive effect, if b=-c this +the traits hierarchy with same link between trait effect or response. + +This different model could be contrasted with the absolute distance. + +\textbf{2. using the multiplicative model} +Test a family of model based +on : + +\begin{equation} +\lambda_{n,f} = f(a +b \times t_{f})f(c +d \times t_{n}) +\end{equation} + +if b and c =0 no trait effects (fit only one value for a*c), if b no +trait effect on competitive response, if c = 0 no trait effect on +competitive effect, if b=-d and a=c+d this the multiplicative equivalent +to traits hierarchy with same link between trait effect or response. We +can constrain the parameters $e$ and $r$ to be positive with a function $f$. + + +\textbf{Because we will frame the question more in term of competitive +effect and response I think it is more logical to use the multiplicative +model.} + +More specifically, to start we can use the following equation in which we constrain $e$ and $r$ to be between 0 and 1 and add a max $\lambda$; +\begin{equation} +\lambda_{n,f} = \lambda_{max} \frac{1}{1 + e^{- (a +b \times t_{f})}} \times \frac{1}{1 + e^{- (c +d \times t_{n})}} +\end{equation} + + +A last question is whether as proposed by David we should try to explore models +that include both this effect/response and a limiting similarity (either +through adding also the absolute distance or by adding an interaction between +$t_n$ and $t_f$ - but the interpretation is less clear for me). I think +that in first step it would be more simple to contrast this two +alternative models and no explore such complex model. + +% TODO DECIDE TO INCLUDE ADDITIONAL ABSOLUTE DISTANCE EFFECT? + +% TODO FOR ME WORKOUT LINK WITH SHIFTING COMPETITIVE HIERARCHY + +% TODO WORKOUT HOW TO LINK WITH ADLER FITNESS DIFF VS STABILIZING STRENGTH +% PLOT Explain MAIN differences only traits distance or traits space, +% multiplicative vs additive effect (both model can be expanded up to the +% community weighted trait mean). + +% One fundamental question is whether we should consider that competitive +% effect and competitive response are multiplicative or additive + +% Then test all model with mew parametrization of ghislain. + +\end{document} diff --git a/docs/analysis.outline/additiv4models.png b/docs/analysis.outline/additiv4models.png new file mode 100644 index 0000000000000000000000000000000000000000..2d51bc6591b399107db9425548c44c93d065881a Binary files /dev/null and b/docs/analysis.outline/additiv4models.png differ diff --git a/docs/analysis.outline/analysis.md b/docs/analysis.outline/analysis.md index c7649d22a6fd9c5806ea6e4b6bb5563a28e6e4cd..0eda9990655e46be1cc1e3923d6f977bc129825a 100644 --- a/docs/analysis.outline/analysis.md +++ b/docs/analysis.outline/analysis.md @@ -5,19 +5,17 @@ # Workshop at Macquarie University 16-20 September 2013 -<!-- 3/9/2013 Discussion with Marie Uriarte. Paper in review in PNAS testing a method for incorporating trait data into demographic models which includes differences in average rates, niche differences and competitive hierarchies. Comment of Maria that my paper in Ecol Let mainly focus on average rate. Need to check in Chesson how average rate is linked to the competitive hierarchy. --> - -<!-- Need to propose assumption of how process are changing across ecoregion or abiotic gradient. Not a single hypothesis as it is now. She found that depending on traits some time it was competitive hierarchy and some time limiting similarity. --> - -<!-- Also need to keep in mind that traits represents the history of the species as well presents process see Sedio et Nature communication 2013. BUT I DON T THINK THIS IS SO DETERMINANT IN TERM OF HOW TRAITS CONNECT WITH PRESENT DAY COMPETITION. THIS MAINLY RELEVANT FOR THE ACTUAL COMPOSITION. --> # ADD DISCUSSION PROGRES AND EQUATION TO TEST AND RUN # Approach to include traits effect on competitive effect and competitive response NEED TO READ ALL GOLDBERG PAPERS. +Freckelton and Watkinson 2001 Fun Ecol present an idea of asymetric competitive hierarchy between species for ressource competition similar to the Ecol Let paper. + +Explain MAIN differences only traist distance or traits space, multiplicative vs additive effect (both model can be expanded up to the community weighted trait mean). -One fundamental question is wether we should consider that competitive effect and competitive response are multiplicative or additive +One fundamental question is whether we should consider that competitive effect and competitive response are multiplicative or additive Then test all model with mew parametrization of ghislain. diff --git a/docs/analysis.outline/compet_facill_abs.png b/docs/analysis.outline/compet_facill_abs.png new file mode 100644 index 0000000000000000000000000000000000000000..6f6d59fa301e3cd7ad18b39d57392098e7c02878 Binary files /dev/null and b/docs/analysis.outline/compet_facill_abs.png differ diff --git a/docs/analysis.outline/compet_model.png b/docs/analysis.outline/compet_model.png new file mode 100644 index 0000000000000000000000000000000000000000..ca65d2d3b5b2606ad9a789ab6f8ed35779ecec45 Binary files /dev/null and b/docs/analysis.outline/compet_model.png differ diff --git a/docs/analysis.outline/multi4models.png b/docs/analysis.outline/multi4models.png new file mode 100644 index 0000000000000000000000000000000000000000..397e3df4b675239b08bd1940de7a037899a25478 Binary files /dev/null and b/docs/analysis.outline/multi4models.png differ