diff --git a/R/CLIMATE.FRANCE.R b/R/CLIMATE.FRANCE.R index 7e046dee7e11515feb25ddd45b2b7d2ba133e874..0bd3ccab3de68b95a171168fa358ebaf44f4623b 100644 --- a/R/CLIMATE.FRANCE.R +++ b/R/CLIMATE.FRANCE.R @@ -1,243 +1,238 @@ -################################################ -################################################ -################################################ -################################################ -### LOAD and PROCEED CLIMATIC DATA -################################################ -################################################ -################################################ -################################################ +################################################ LOAD and PROCEED CLIMATIC DATA source("./R/FUN.climate.R") -## Data extracted from C. Piedallu data base with -## Radition with cloud cover for each plots -## Piedallu, C, and J Gegout. 2008. “Efficient Assessment of Topographic Solar Radiation to Improve Plant Distribution Models.†Agricultural and Forest Meteorology 148 (11) (October): 1696–1706. doi:10.1016/j.agrformet.2008.06.001. -## Temperature and precipitation with temperature corrected by elevation based on -## Piedallu, C., J. C. Gégout, V. Perez, F. Lebourgeois, and R. Field. 2012. “Soil Water Balance Performs Better Than Climatic Water Variables in Tree Species Distribution Modelling.†Global Ecology and Biogeography. http://onlinelibrary.wiley.com/doi/10.1111/geb.12012/full. - -###### -## LOAD PIDEALLU DATA -data.CLIM <- read.csv("./data/raw/DataFrance/climate_piedallu/placettesGK_avec_2011.csv", - header=T,sep='\t',stringsAsFactors=FALSE,dec=",",na.strings="") +## Data extracted from C. Piedallu data base with Radition with cloud cover for +## each plots Piedallu, C, and J Gegout. 2008. “Efficient Assessment of +## Topographic Solar Radiation to Improve Plant Distribution Models.†Agricultural +## and Forest Meteorology 148 (11) (October): 1696–1706. +## doi:10.1016/j.agrformet.2008.06.001. Temperature and precipitation with +## temperature corrected by elevation based on Piedallu, C., J. C. Gégout, V. +## Perez, F. Lebourgeois, and R. Field. 2012. “Soil Water Balance Performs Better +## Than Climatic Water Variables in Tree Species Distribution Modelling.†Global +## Ecology and Biogeography. +## http://onlinelibrary.wiley.com/doi/10.1111/geb.12012/full. + +###### LOAD PIDEALLU DATA +data.CLIM <- read.csv("./data/raw/DataFrance/climate_piedallu/placettesGK_avec_2011.csv", + header = T, sep = "\t", stringsAsFactors = FALSE, dec = ",", na.strings = "") print(names(data.CLIM)) -data.CLIM$rumkg_500 <- as.numeric(gsub(",",".",data.CLIM$rumkg_500)) +data.CLIM$rumkg_500 <- as.numeric(gsub(",", ".", data.CLIM$rumkg_500)) -######################################### -######################################### -#### COMPUTE CLIMATIC VARIABLES !!! +######################################### COMPUTE CLIMATIC VARIABLES !!! -### NEED TO COMPUTE DDG5 -### NEED TO COMPUTE SOIL MAX WATER CONTENT -### NEED TO COMPUTE PET +### NEED TO COMPUTE DDG5 NEED TO COMPUTE SOIL MAX WATER CONTENT NEED TO COMPUTE PET ### NEED TO COMPUTE WATER BUDGET ## remove NA lines data.CLIM.na <- is.na(data.CLIM[["tmoy6190_1_cor"]]) -data.CLIM2 <- data.CLIM[!data.CLIM.na,] +data.CLIM2 <- data.CLIM[!data.CLIM.na, ] ### apply function sgdd -sgdd.vec <- apply(data.CLIM2[,82:93],MARGIN=1,FUN=fun.sgdd) -MeanT.vec <- apply(data.CLIM2[,82:93],MARGIN=1,FUN=mean) -plot(MeanT.vec,sgdd.vec) +sgdd.vec <- apply(data.CLIM2[, 82:93], MARGIN = 1, FUN = fun.sgdd) +MeanT.vec <- apply(data.CLIM2[, 82:93], MARGIN = 1, FUN = mean) +plot(MeanT.vec, sgdd.vec) dim(data.CLIM) -################################################# -### Max Soil Water Content -### compute based on -## Piedallu, C., J. C. Gégout, A. Bruand, and I. Seynave. 2011. “Mapping Soil Water Holding Capacity over Large Areas to Predict Potential Production of Forest Stands.†Geoderma 160 (3): 355–366. +################################################# Max Soil Water Content compute based on Piedallu, C., J. C. Gégout, A. Bruand, +################################################# and I. Seynave. 2011. “Mapping Soil Water Holding Capacity over Large Areas to +################################################# Predict Potential Production of Forest Stands.†Geoderma 160 (3): 355–366. ### read data texture as in Piedallu et al. 2011 -data.texture <- read.table("./data/raw/DataFrance/climate_piedallu/texture.txt",header=T) +data.texture <- read.table("./data/raw/DataFrance/climate_piedallu/texture.txt", + header = T) ### load ecological data -load(file="./data/process/ecologie_tot.Rdata") +load(file = "./data/process/ecologie_tot.Rdata") head(ecologie_tot) -codeprof <- c(2.5,10,20,30,40,50,60,70,80,92.5) # code prof en cm -codecaillou <- c(2.5,10,20,30,40,50,60,70,80,90,97.5) ## code percentage en % +codeprof <- c(2.5, 10, 20, 30, 40, 50, 60, 70, 80, 92.5) # code prof en cm +codecaillou <- c(2.5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 97.5) ## code percentage en % -#compute swhc -swhc <- fun.swhc(affroc=ecologie_tot$affroc,cailloux=ecologie_tot$cailloux,text2=ecologie_tot$text2,text1=ecologie_tot$text1,prof2=ecologie_tot$prof2,prof1=ecologie_tot$prof1,codeprof,codecaillou,data.texture) -swhc[is.na(ecologie_tot$prof2)] <- NA +# compute swhc +swhc <- fun.swhc(affroc = ecologie_tot$affroc, cailloux = ecologie_tot$cailloux, + text2 = ecologie_tot$text2, text1 = ecologie_tot$text1, prof2 = ecologie_tot$prof2, + prof1 = ecologie_tot$prof1, codeprof, codecaillou, data.texture) +swhc[is.na(ecologie_tot$prof2)] <- NA -### add to ecological data based +### add to ecological data based ecologie_tot$SWHC <- swhc -################################## -################################## -#### COMPUTE PET WITH TURC FORMULA -### unit to convert radneb61_ from J/cm2/month in MJ/m2/day /100/30 then to convert in KJ/m2/day as in Nick formula *1000 +################################## COMPUTE PET WITH TURC FORMULA unit to convert radneb61_ from J/cm2/month in +################################## MJ/m2/day /100/30 then to convert in KJ/m2/day as in Nick formula *1000 -## RAD and Temperature of the 12 months +## RAD and Temperature of the 12 months radneb61_1to12 <- 66:77 tmoy6190_1_cor.1to12 <- 82:93 ### apply in parallel library(doParallel) -registerDoParallel(cores=6) ## affect automaticaly half of the core detected to the foreach -getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6) +registerDoParallel(cores = 6) ## affect automaticaly half of the core detected to the foreach +getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6) -PET.matrix <- -foreach(i=1:length(data.CLIM$idp), .combine=rbind) %dopar% - { -fun.PET(i,rad=data.CLIM[,radneb61_1to12],temp=data.CLIM[,tmoy6190_1_cor.1to12]) - } +PET.matrix <- foreach(i = 1:length(data.CLIM$idp), .combine = rbind) %dopar% { + fun.PET(i, rad = data.CLIM[, radneb61_1to12], temp = data.CLIM[, tmoy6190_1_cor.1to12]) +} -PET.matrix <- PET.matrix[!data.CLIM.na,] -PET.matrix[PET.matrix<0] <- 0 ## affect zero if negative PET +PET.matrix <- PET.matrix[!data.CLIM.na, ] +PET.matrix[PET.matrix < 0] <- 0 ## affect zero if negative PET -## plot to check PET computed by me vs PET of Christian -## par(mfrow=c(3,4)) -## for (i in 1:12) {plot(PET.matrix[,i],data.CLIM2[,i+48]) ; lines(0:100,0:100)} +## plot to check PET computed by me vs PET of Christian par(mfrow=c(3,4)) for (i +## in 1:12) {plot(PET.matrix[,i],data.CLIM2[,i+48]) ; lines(0:100,0:100)} -colnames(PET.matrix) <- paste("PET.cor.",1:12,sep="") -############## -### MERGE CLIMATE and PET -data.CLIM2 <- cbind(data.CLIM2,PET.matrix,sgdd=sgdd.vec) +colnames(PET.matrix) <- paste("PET.cor.", 1:12, sep = "") +############## MERGE CLIMATE and PET +data.CLIM2 <- cbind(data.CLIM2, PET.matrix, sgdd = sgdd.vec) ### MERGE WITH ECOLOGICAL DATA -ecologie.clim <- merge(ecologie_tot,data.CLIM2,by="idp",all.x=T) +ecologie.clim <- merge(ecologie_tot, data.CLIM2, by = "idp", all.x = T) dim(ecologie_tot) dim(ecologie.clim) names(ecologie.clim) -ecologie.clim2 <- ecologie.clim[!is.na(ecologie.clim$PET.cor.1) &!is.na(ecologie.clim$SWHC),] +ecologie.clim2 <- ecologie.clim[!is.na(ecologie.clim$PET.cor.1) & !is.na(ecologie.clim$SWHC), + ] -########################################################################################### -###### COMPUTE WATER BUDGET +########################################################################################### COMPUTE WATER BUDGET ## ### test function -## fun.WaterBudget(i=16,prcp.m=(ecologie.clim2[,paste("prec6190_",1:12,sep="")]), -## PET.m=(ecologie.clim2[,paste("PET.cor.",1:12,sep="")]), -## Ta.m=(ecologie.clim2[,c("tmoy6190_1_cor",paste("tmoy6190_1_cor.",1:11,sep=""))]), -## SWHC.v=(ecologie.clim2[ ,"SWHC"]),n=2) +## fun.WaterBudget(i=16,prcp.m=(ecologie.clim2[,paste('prec6190_',1:12,sep='')]), +## PET.m=(ecologie.clim2[,paste('PET.cor.',1:12,sep='')]), +## Ta.m=(ecologie.clim2[,c('tmoy6190_1_cor',paste('tmoy6190_1_cor.',1:11,sep=''))]), +## SWHC.v=(ecologie.clim2[ ,'SWHC']),n=2) ### apply function in parallel library(doParallel) -registerDoParallel(cores=6) ## affect automaticaly half of the core detected to the foreach -getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6) - -WBWS.matrix <- -foreach(i=1:length(ecologie.clim2$idp), .combine=rbind) %dopar% - { -fun.WaterBudget(i,prcp.m=(ecologie.clim2[,paste("prec6190_",1:12,sep="")]), - PET.m=(ecologie.clim2[,paste("PET.cor.",1:12,sep="")]), - Ta.m=(ecologie.clim2[,c("tmoy6190_1_cor",paste("tmoy6190_1_cor.",1:11,sep=""))]), - SWHC.v=(ecologie.clim2[ ,"SWHC"]),n=2) - } +registerDoParallel(cores = 6) ## affect automaticaly half of the core detected to the foreach +getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6) + +WBWS.matrix <- foreach(i = 1:length(ecologie.clim2$idp), .combine = rbind) %dopar% + { + fun.WaterBudget(i, prcp.m = (ecologie.clim2[, paste("prec6190_", 1:12, sep = "")]), + PET.m = (ecologie.clim2[, paste("PET.cor.", 1:12, sep = "")]), Ta.m = (ecologie.clim2[, + c("tmoy6190_1_cor", paste("tmoy6190_1_cor.", 1:11, sep = ""))]), + SWHC.v = (ecologie.clim2[, "SWHC"]), n = 2) + } ### MERGE all and saved -WBWS.matrix2 <- cbind("idp"=ecologie.clim2$idp,WBWS.matrix) -ecologie.clim.data <- merge(ecologie.clim,WBWS.matrix2,by="idp",all.x=T) +WBWS.matrix2 <- cbind(idp = ecologie.clim2$idp, WBWS.matrix) +ecologie.clim.data <- merge(ecologie.clim, WBWS.matrix2, by = "idp", all.x = T) -##### -# change tplant not good format +##### change tplant not good format load("./data/process/placette_tot.Rdata") -ecologie.clim.data$tplant <- placette_tot$tplant +ecologie.clim.data$tplant <- placette_tot$tplant ### Compute mean T and annual sum of precip -ecologie.clim.data$MAT <- apply(ecologie.clim.data[,114:125],MARGIN=1,FUN=mean) -ecologie.clim.data$SAP <- apply(ecologie.clim.data[,69:80],MARGIN=1,FUN=sum) +ecologie.clim.data$MAT <- apply(ecologie.clim.data[, 114:125], MARGIN = 1, FUN = mean) +ecologie.clim.data$SAP <- apply(ecologie.clim.data[, 69:80], MARGIN = 1, FUN = sum) -saveRDS(ecologie.clim.data,file="./data/process/ecologie.clim.data.rds") +saveRDS(ecologie.clim.data, file = "./data/process/ecologie.clim.data.rds") -######################################################### -######################################################### -###### FIGURES OF CLIMATIC DATA -############################# +######################################################### FIGURES OF CLIMATIC DATA ecologie.clim.data <- readRDS("./data/process/ecologie.clim.data.rds") - names(ecologie.clim.data) +names(ecologie.clim.data) -###check climatic data +### check climatic data pdf("./figs/sgdd.tmin.map.pdf") -par(mfrow=c(2,2)) -plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93, - col=rev(heat.colors(10))[cut(ecologie.clim.data$sgdd,quantile(ecologie.clim.data$sgdd,probs=(0:10)/10,na.rm=T),labels=F)], - cex=0.2,pty="s",xlab=NA,ylab=NA,main="SGDD") - -plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93, - col=rev(heat.colors(10))[cut(ecologie.clim.data$tmin6190_min_cor,quantile(ecologie.clim.data$tmin6190_min_cor,probs=(0:10)/10,na.rm=T),labels=F)], - cex=0.2,pty="s",xlab=NA,ylab=NA,main="Tmin") - plot( ecologie.clim.data$tmin6190_min_cor,ecologie.clim.data$sgdd,cex=0.2,xlab="Tmin",ylab="SGDD") +par(mfrow = c(2, 2)) +plot(ecologie.clim.data$xl93, ecologie.clim.data$yl93, col = rev(heat.colors(10))[cut(ecologie.clim.data$sgdd, + quantile(ecologie.clim.data$sgdd, probs = (0:10)/10, na.rm = T), labels = F)], + cex = 0.2, pty = "s", xlab = NA, ylab = NA, main = "SGDD") + +plot(ecologie.clim.data$xl93, ecologie.clim.data$yl93, col = rev(heat.colors(10))[cut(ecologie.clim.data$tmin6190_min_cor, + quantile(ecologie.clim.data$tmin6190_min_cor, probs = (0:10)/10, na.rm = T), + labels = F)], cex = 0.2, pty = "s", xlab = NA, ylab = NA, main = "Tmin") +plot(ecologie.clim.data$tmin6190_min_cor, ecologie.clim.data$sgdd, cex = 0.2, xlab = "Tmin", + ylab = "SGDD") dev.off() pdf("./figs/Water.map.pdf") - par(mfrow=c(2,2),mar=c(1,1,1,1)) -plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93, - col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WB.y,quantile(ecologie.clim.data$WB.y,probs=(0:10)/10,na.rm=T),labels=F)], - cex=0.2,pty="s",xlab=NA,ylab=NA) -plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93, - col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WB.s,quantile(ecologie.clim.data$WB.s,probs=(0:10)/10,na.rm=T),labels=F)], - cex=0.2,pty="s",xlab=NA,ylab=NA) -plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93, - col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WS.y,quantile(ecologie.clim.data$WS.y,probs=(0:10)/10,na.rm=T),labels=F)], - cex=0.2,pty="s",xlab=NA,ylab=NA) -plot(ecologie.clim.data$xl93,ecologie.clim.data$yl93, - col=rev(colorRampPalette(c("blue", "red"))( 10 ))[cut(ecologie.clim.data$WS.s,quantile(ecologie.clim.data$WS.s,probs=(0:10)/10,na.rm=T),labels=F)], - cex=0.2,pty="s",xlab=NA,ylab=NA) +par(mfrow = c(2, 2), mar = c(1, 1, 1, 1)) +plot(ecologie.clim.data$xl93, ecologie.clim.data$yl93, col = rev(colorRampPalette(c("blue", + "red"))(10))[cut(ecologie.clim.data$WB.y, quantile(ecologie.clim.data$WB.y, probs = (0:10)/10, + na.rm = T), labels = F)], cex = 0.2, pty = "s", xlab = NA, ylab = NA) +plot(ecologie.clim.data$xl93, ecologie.clim.data$yl93, col = rev(colorRampPalette(c("blue", + "red"))(10))[cut(ecologie.clim.data$WB.s, quantile(ecologie.clim.data$WB.s, probs = (0:10)/10, + na.rm = T), labels = F)], cex = 0.2, pty = "s", xlab = NA, ylab = NA) +plot(ecologie.clim.data$xl93, ecologie.clim.data$yl93, col = rev(colorRampPalette(c("blue", + "red"))(10))[cut(ecologie.clim.data$WS.y, quantile(ecologie.clim.data$WS.y, probs = (0:10)/10, + na.rm = T), labels = F)], cex = 0.2, pty = "s", xlab = NA, ylab = NA) +plot(ecologie.clim.data$xl93, ecologie.clim.data$yl93, col = rev(colorRampPalette(c("blue", + "red"))(10))[cut(ecologie.clim.data$WS.s, quantile(ecologie.clim.data$WS.s, probs = (0:10)/10, + na.rm = T), labels = F)], cex = 0.2, pty = "s", xlab = NA, ylab = NA) dev.off() pdf("./figs/Water.var,cor.pdf") -par(mfrow=c(2,2)) -plot(ecologie.clim.data[['WB.y']],ecologie.clim.data[['WS.y']],xlab="WB year",ylab="ratio AET/D year",cex=0.2) -plot(ecologie.clim.data[['WB.s']],ecologie.clim.data[['WS.s']],xlab="WB growing season",ylab="ratio AET/D growing season",cex=0.2) -plot(ecologie.clim.data[['WB.s']],ecologie.clim.data[['WB.y']],xlab="WB growing season",ylab="WB growing year",cex=0.2) -plot(ecologie.clim.data[['WS.s']],ecologie.clim.data[['WS.y']],xlab="ratio AET/D growing season",ylab="ratio AET/D year",cex=0.2) +par(mfrow = c(2, 2)) +plot(ecologie.clim.data[["WB.y"]], ecologie.clim.data[["WS.y"]], xlab = "WB year", + ylab = "ratio AET/D year", cex = 0.2) +plot(ecologie.clim.data[["WB.s"]], ecologie.clim.data[["WS.s"]], xlab = "WB growing season", + ylab = "ratio AET/D growing season", cex = 0.2) +plot(ecologie.clim.data[["WB.s"]], ecologie.clim.data[["WB.y"]], xlab = "WB growing season", + ylab = "WB growing year", cex = 0.2) +plot(ecologie.clim.data[["WS.s"]], ecologie.clim.data[["WS.y"]], xlab = "ratio AET/D growing season", + ylab = "ratio AET/D year", cex = 0.2) dev.off() pdf("./figs/sgdd.water.cor.pdf") -plot(ecologie.clim.data[['sgdd']],ecologie.clim.data[['WS.y']],xlab="SGDD",ylab="ratio AET/D year",cex=0.2) +plot(ecologie.clim.data[["sgdd"]], ecologie.clim.data[["WS.y"]], xlab = "SGDD", ylab = "ratio AET/D year", + cex = 0.2) dev.off() ### DO PCA OF CLIMATIC DATA -data.prc <- prcomp(ecologie.clim.data[apply(is.na(ecologie.clim.data[,c('sgdd','tmin6190_min_cor','WB.s','WS.s')]),MARGIN=1,FUN=sum)<1,c('sgdd','tmin6190_min_cor','WB.s','WS.s')],na.action=na.omit,scale=T) +data.prc <- prcomp(ecologie.clim.data[apply(is.na(ecologie.clim.data[, c("sgdd", + "tmin6190_min_cor", "WB.s", "WS.s")]), MARGIN = 1, FUN = sum) < 1, c("sgdd", + "tmin6190_min_cor", "WB.s", "WS.s")], na.action = na.omit, scale = T) summary(data.prc) -scores <-data.prc$x +scores <- data.prc$x pdf("./figs/climate.pca.pdf") -biplot(data.prc,pch=1,xlabs=rep('.',length(data.prc$x[,1])),ylabs=c('SGDD','Tmin','WB','WS')) - text(150,180,labels=paste("Var PC1 ", ((summary(data.prc)))$importance[2,1],sep="")) - text(150,165,labels=paste("Var PC2 ", ((summary(data.prc)))$importance[2,2],sep="")) +biplot(data.prc, pch = 1, xlabs = rep(".", length(data.prc$x[, 1])), ylabs = c("SGDD", + "Tmin", "WB", "WS")) +text(150, 180, labels = paste("Var PC1 ", ((summary(data.prc)))$importance[2, 1], + sep = "")) +text(150, 165, labels = paste("Var PC2 ", ((summary(data.prc)))$importance[2, 2], + sep = "")) dev.off() -##### FIGS on elevation correction of temperature done by Christian -### map of diff between mean altitude of the 1x1km cell and actual elevation of the plots +##### FIGS on elevation correction of temperature done by Christian map of diff +##### between mean altitude of the 1x1km cell and actual elevation of the plots pdf("./figs/map.error.alti.pdf") -plot(data.CLIM$xl2,data.CLIM$yl2,col="grey",cex=0.3,xlab="X",ylab="Y") -data.temp <- data.CLIM[data.CLIM$diff..altIFN...mnt50 < (-100) | (data.CLIM$diff..altIFN...mnt50>100),] -points(data.temp$xl2,data.temp$yl2,col=c("red","coral3","coral","grey","coral","coral3","red")[cut(data.temp$diff..altIFN...mnt50,breaks=c(-500,-300,-200,-100,100,200,300,500),labels=F)],cex=0.5,pch=1) +plot(data.CLIM$xl2, data.CLIM$yl2, col = "grey", cex = 0.3, xlab = "X", ylab = "Y") +data.temp <- data.CLIM[data.CLIM$diff..altIFN...mnt50 < (-100) | (data.CLIM$diff..altIFN...mnt50 > + 100), ] +points(data.temp$xl2, data.temp$yl2, col = c("red", "coral3", "coral", "grey", "coral", + "coral3", "red")[cut(data.temp$diff..altIFN...mnt50, breaks = c(-500, -300, -200, + -100, 100, 200, 300, 500), labels = F)], cex = 0.5, pch = 1) dev.off() ##### PLOT HIST OF ALL VARIABLES -pdf('./figs/climatvar.hist.pdf') -par(mfrow=c(3,4)) -for (i in 37:48) hist(data.CLIM[,i],xlab='Monthly Precip (mm)',main=names(data.CLIM)[i]) - -par(mfrow=c(3,4)) -for (i in 49:60) hist(data.CLIM[,i],xlab='PET (mm)',main=names(data.CLIM)[i]) +pdf("./figs/climatvar.hist.pdf") +par(mfrow = c(3, 4)) +for (i in 37:48) hist(data.CLIM[, i], xlab = "Monthly Precip (mm)", main = names(data.CLIM)[i]) -par(mfrow=c(3,4)) -for (i in 66:77) hist(data.CLIM[,i],xlab='Radiation (?)',main=names(data.CLIM)[i]) +par(mfrow = c(3, 4)) +for (i in 49:60) hist(data.CLIM[, i], xlab = "PET (mm)", main = names(data.CLIM)[i]) -par(mfrow=c(3,4)) -for (i in 82:93) hist(data.CLIM[,i],xlab='temperature (C)',main=names(data.CLIM)[i]) -dev.off() +par(mfrow = c(3, 4)) +for (i in 66:77) hist(data.CLIM[, i], xlab = "Radiation (?)", main = names(data.CLIM)[i]) -pdf('./figs/xyplot.Tunco.Tcor.pdf') -par(mfrow=c(3,4)) -for (i in 1:12) plot(data.CLIM[,(23:34)[i]],data.CLIM[,(82:94)[i]],xlab='temperature (C) uncorrected',ylab='temperature (C) corrected', - main=names(data.CLIM)[(23:34)[i]]) +par(mfrow = c(3, 4)) +for (i in 82:93) hist(data.CLIM[, i], xlab = "temperature (C)", main = names(data.CLIM)[i]) dev.off() - +pdf("./figs/xyplot.Tunco.Tcor.pdf") +par(mfrow = c(3, 4)) +for (i in 1:12) plot(data.CLIM[, (23:34)[i]], data.CLIM[, (82:94)[i]], xlab = "temperature (C) uncorrected", + ylab = "temperature (C) corrected", main = names(data.CLIM)[(23:34)[i]]) +dev.off() diff --git a/R/FUN.TRY.R b/R/FUN.TRY.R index 88028c85315af352e4aaa7f7ee04a99bc5751067..cbe4bb59eb1fe78510e543edef9201132b8d3d46 100644 --- a/R/FUN.TRY.R +++ b/R/FUN.TRY.R @@ -1,18 +1,12 @@ -############################################ -############################################ -## FUNCTION TO EXTRACT DECTED OUTLIER AND FORMAT TRY DATA -## Georges Kunstler 14/06/2013 +############################################ FUNCTION TO EXTRACT DECTED OUTLIER AND FORMAT TRY DATA Georges Kunstler +############################################ 14/06/2013 library(MASS) library(doParallel) library(mvoutlier) -######################################################## -######################################################## -######################################################## -######################################################## -###Build a function that extract the variables +######################################################## Build a function that extract the variables ##'Description of the function to extract data from original TRY data ##' ##' based on the data structure of extraction from TRY data base @@ -23,70 +17,88 @@ library(mvoutlier) ##' @param Trait.Data list of names of traits data that we want to extract ##' @return data.frame with one line per observation id with clumns with ObservationID Species Nontrait data for Traits: OrigValue OrigUnit StdValue ##' @author Kunstler -fun.extract.try <- function(ObservationID.t,data,Non.Trait.Data,Trait.Data){ -data.temp <- data[data$ObservationID==ObservationID.t,] -## Non trait data -Vec.Non.Trait.Data <- rep(NA,length(Non.Trait.Data)) -names(Vec.Non.Trait.Data) <- Non.Trait.Data - -for (i in 1:length(Non.Trait.Data)){ - if( sum(data.temp$DataName==Non.Trait.Data[i])==1){ -Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName==Non.Trait.Data[i],"OrigValueStr"] - } - if(sum(data.temp$DataName==Non.Trait.Data[i])>1){ - ## if(sum(data.temp$DataName==Non.Trait.Data[i] & grepl("Mean",data.temp$ValueKindName, - ## fixed=TRUE))!=1){ print("error in ValueKindName")} - Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName==Non.Trait.Data[i] , - "OrigValueStr"][1] - } - } - -## Trait data -Vec.Trait.Data.OrigValue <-Vec.Trait.Data.OrigUnit <- Vec.Trait.Data.StdValue <- - rep(NA,length(Trait.Data)) -names(Vec.Trait.Data.OrigValue) <- paste("OrigValue",Trait.Data) -names(Vec.Trait.Data.OrigUnit) <- paste("OrigUnitName",Trait.Data) -names(Vec.Trait.Data.StdValue) <- paste("StdValue",Trait.Data) - -for (i in 1:length(Trait.Data)){ - if(sum(grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE))==1){ -Vec.Trait.Data.OrigValue[i] <- data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"OrigValue"] -Vec.Trait.Data.OrigUnit[i] <- data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"OrigUnitStr"] -Vec.Trait.Data.StdValue[i] <- data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"StdValue"] - } - - if( sum(grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE))>1){ - if(sum((data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)))==1){ - Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE)& - (data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)) ,"OrigValue"]) - Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) & - (data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)),"OrigUnitStr"])[1] - Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) & - (data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") & !is.na(data.temp$ValueKindName)),"StdValue"]) - } - if(sum(data.temp$ValueKindName %in% c("Best estimate","Mean","Site specific mean") )<1){ - Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE),"OrigValue"],na.rm=T) - Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) ,"OrigUnitStr"])[1] - Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i],data.temp$TraitName, fixed=TRUE) ,"StdValue"],na.rm=T) - } - - } - -} -### EXPERIMENTAL DATA TYPE -TF.exp.data <- sum(grepl("Growth & measurement conditions - experimental tre",data.temp$NonTraitCategories, fixed=TRUE) )>0 -names(TF.exp.data) <- 'TF.exp.data' -res.temp <- data.frame("ObservationID"=ObservationID.t,"AccSpeciesName"=unique(data.temp$AccSpeciesName) ,t(Vec.Non.Trait.Data),TF.exp.data, - t(Vec.Trait.Data.OrigValue),t(Vec.Trait.Data.OrigUnit),t(Vec.Trait.Data.StdValue)) -return(res.temp) - +fun.extract.try <- function(ObservationID.t, data, Non.Trait.Data, Trait.Data) { + data.temp <- data[data$ObservationID == ObservationID.t, ] + ## Non trait data + Vec.Non.Trait.Data <- rep(NA, length(Non.Trait.Data)) + names(Vec.Non.Trait.Data) <- Non.Trait.Data + + for (i in 1:length(Non.Trait.Data)) { + if (sum(data.temp$DataName == Non.Trait.Data[i]) == 1) { + Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName == Non.Trait.Data[i], + "OrigValueStr"] + } + if (sum(data.temp$DataName == Non.Trait.Data[i]) > 1) { + ## if(sum(data.temp$DataName==Non.Trait.Data[i] & + ## grepl('Mean',data.temp$ValueKindName, fixed=TRUE))!=1){ print('error in + ## ValueKindName')} + Vec.Non.Trait.Data[i] <- data.temp[data.temp$DataName == Non.Trait.Data[i], + "OrigValueStr"][1] + } + } + + ## Trait data + Vec.Trait.Data.OrigValue <- Vec.Trait.Data.OrigUnit <- Vec.Trait.Data.StdValue <- rep(NA, + length(Trait.Data)) + names(Vec.Trait.Data.OrigValue) <- paste("OrigValue", Trait.Data) + names(Vec.Trait.Data.OrigUnit) <- paste("OrigUnitName", Trait.Data) + names(Vec.Trait.Data.StdValue) <- paste("StdValue", Trait.Data) + + for (i in 1:length(Trait.Data)) { + if (sum(grepl(Trait.Data[i], data.temp$TraitName, fixed = TRUE)) == 1) { + Vec.Trait.Data.OrigValue[i] <- data.temp[grepl(Trait.Data[i], data.temp$TraitName, + fixed = TRUE), "OrigValue"] + Vec.Trait.Data.OrigUnit[i] <- data.temp[grepl(Trait.Data[i], data.temp$TraitName, + fixed = TRUE), "OrigUnitStr"] + Vec.Trait.Data.StdValue[i] <- data.temp[grepl(Trait.Data[i], data.temp$TraitName, + fixed = TRUE), "StdValue"] + } + + if (sum(grepl(Trait.Data[i], data.temp$TraitName, fixed = TRUE)) > 1) { + if (sum((data.temp$ValueKindName %in% c("Best estimate", "Mean", "Site specific mean") & + !is.na(data.temp$ValueKindName))) == 1) { + Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i], + data.temp$TraitName, fixed = TRUE) & (data.temp$ValueKindName %in% + c("Best estimate", "Mean", "Site specific mean") & !is.na(data.temp$ValueKindName)), + "OrigValue"]) + Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i], data.temp$TraitName, + fixed = TRUE) & (data.temp$ValueKindName %in% c("Best estimate", + "Mean", "Site specific mean") & !is.na(data.temp$ValueKindName)), + "OrigUnitStr"])[1] + Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i], + data.temp$TraitName, fixed = TRUE) & (data.temp$ValueKindName %in% + c("Best estimate", "Mean", "Site specific mean") & !is.na(data.temp$ValueKindName)), + "StdValue"]) + } + if (sum(data.temp$ValueKindName %in% c("Best estimate", "Mean", "Site specific mean")) < + 1) { + Vec.Trait.Data.OrigValue[i] <- mean(data.temp[grepl(Trait.Data[i], + data.temp$TraitName, fixed = TRUE), "OrigValue"], na.rm = T) + Vec.Trait.Data.OrigUnit[i] <- (data.temp[grepl(Trait.Data[i], data.temp$TraitName, + fixed = TRUE), "OrigUnitStr"])[1] + Vec.Trait.Data.StdValue[i] <- mean(data.temp[grepl(Trait.Data[i], + data.temp$TraitName, fixed = TRUE), "StdValue"], na.rm = T) + } + + } + + } + ### EXPERIMENTAL DATA TYPE + TF.exp.data <- sum(grepl("Growth & measurement conditions - experimental tre", + data.temp$NonTraitCategories, fixed = TRUE)) > 0 + names(TF.exp.data) <- "TF.exp.data" + res.temp <- data.frame(ObservationID = ObservationID.t, AccSpeciesName = unique(data.temp$AccSpeciesName), + t(Vec.Non.Trait.Data), TF.exp.data, t(Vec.Trait.Data.OrigValue), t(Vec.Trait.Data.OrigUnit), + t(Vec.Trait.Data.StdValue)) + return(res.temp) + } -## outlier detection based on Kattage et al 2011 +## outlier detection based on Kattage et al 2011 ##' Detection of univar outlier based on method of Kattge et al. 2011 ##' ##' @@ -95,184 +107,179 @@ return(res.temp) ##' @param log ##' @return TRUE FALSE vector to identify outlier TRUE : outlier ##' @author Kunstler -fun.out.TF2 <- function(x.na,log=TRUE){ - x <- x.na[!is.na(x.na)] - x.num <- (1:length(x.na))[!is.na(x.na)] - TF.vec <- rep(FALSE,length(x.na)) - if(log){ - fit.dist <- fitdistr(log10(na.omit(x) ),'normal') - high.bound <- fit.dist$estimate["mean"]+2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"]) - low.bound <- fit.dist$estimate["mean"]-2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"]) - TF.vec[x.num[log10(x)>high.bound | log10(x)<low.bound]] <- TRUE - }else{ - fit.dist <- fitdistr((na.omit(x) ),'normal') - high.bound <- fit.dist$estimate["mean"]+2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"]) - low.bound <- fit.dist$estimate["mean"]-2*(fit.dist$estimate["sd"]+fit.dist$sd["sd"]) - TF.vec[x.num[(x)>high.bound | (x)<low.bound]] <- TRUE - } - return((TF.vec)) +fun.out.TF2 <- function(x.na, log = TRUE) { + x <- x.na[!is.na(x.na)] + x.num <- (1:length(x.na))[!is.na(x.na)] + TF.vec <- rep(FALSE, length(x.na)) + if (log) { + fit.dist <- fitdistr(log10(na.omit(x)), "normal") + high.bound <- fit.dist$estimate["mean"] + 2 * (fit.dist$estimate["sd"] + + fit.dist$sd["sd"]) + low.bound <- fit.dist$estimate["mean"] - 2 * (fit.dist$estimate["sd"] + fit.dist$sd["sd"]) + TF.vec[x.num[log10(x) > high.bound | log10(x) < low.bound]] <- TRUE + } else { + fit.dist <- fitdistr((na.omit(x)), "normal") + high.bound <- fit.dist$estimate["mean"] + 2 * (fit.dist$estimate["sd"] + + fit.dist$sd["sd"]) + low.bound <- fit.dist$estimate["mean"] - 2 * (fit.dist$estimate["sd"] + fit.dist$sd["sd"]) + TF.vec[x.num[(x) > high.bound | (x) < low.bound]] <- TRUE + } + return((TF.vec)) } -######################## -######################## -### FUNCTION TO COMPUTE QUANTILE FOR HEIGHT -f.quantile <- function (x,ind,probs){quantile(x[ind],probs=probs,na.rm=TRUE)} +######################## FUNCTION TO COMPUTE QUANTILE FOR HEIGHT +f.quantile <- function(x, ind, probs) { + quantile(x[ind], probs = probs, na.rm = TRUE) +} -f.quantile.boot2 <- function(x,R,probs=0.99){ +f.quantile.boot2 <- function(x, R, probs = 0.99) { require(boot) - if(length(na.exclude(x))>0){ -quant.boot <- boot(x,f.quantile,R=R,probs=probs) -return(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t), - nobs=length(na.exclude(x)))) -}else{ -return(c(mean=NA,sd=NA,nobs=NA)) -} + if (length(na.exclude(x)) > 0) { + quant.boot <- boot(x, f.quantile, R = R, probs = probs) + return(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), nobs = length(na.exclude(x)))) + } else { + return(c(mean = NA, sd = NA, nobs = NA)) + } } -##################### -##################### -##### FUNcCTION TO COMPUTE MEAN SD AND NOBS WITH OR WITHOUT OUTLIER -fun.mean.sd.nobs.out <- function(x,i){ - if(length(x)>50){## if more than 50 obs remove outlier - outlier <- fun.out.TF2(x.na=x,log=TRUE) - if(i=="StdValue.Plant.height.vegetative"){ - res.temp <- f.quantile.boot2(log10(x[!outlier]),R=1000,probs=0.99) - }else{ - res.temp <- c(mean(log10(x[!outlier])), - sd(log10(x[!outlier])), - length(x[!outlier]))} - }else{ - if(i=="StdValue.Plant.height.vegetative"){ - res.temp <- f.quantile.boot2(log10(x),R=1000,probs=0.99) - }else{ - res.temp <- c(mean(log10(x)), - sd(log10(x)), - length(x))} - } -return(res.temp) - } - -################################### -################################### -####### extract mean sd per species or genus -####### added species synonyme - -fun.species.traits <- function(species.code,species.table,col.sp="sp",col.sp.syno="Latin_name_syn",traits,data){ - vec.mean <- vec.sd <- vec.nobs <- rep(NA,length(traits)) - vec.exp <- vec.genus <- rep(FALSE,length(traits)) - names(vec.mean) <- names(vec.sd) <- names(vec.exp) <- names(vec.genus) <- names(vec.nobs)<- traits - species.syno <- species.table[species.table[[col.sp]]==species.code,col.sp.syno] - #browser() - for(i in traits){ - if(sum((data$AccSpeciesName %in% species.syno) & !is.na(data[[i]]))>0){ ## if data for this species or syno - if(sum((data$AccSpeciesName %in% species.syno) & (!is.na(data[[i]])) & (!data[["TF.exp.data"]]))>0){## if data with out experiments - x <- data[[i]][data$AccSpeciesName %in% species.syno & (!is.na(data[[i]])) & - (!data[["TF.exp.data"]])] - res.temp <- fun.mean.sd.nobs.out(x,i) - vec.mean[[i]] <- res.temp[1] - vec.sd[[i]] <- res.temp[2] - vec.nobs[[i]] <- res.temp[3] - - }else{### include experimental data - x <- data[[i]][data$AccSpeciesName %in% species.syno & (!is.na(data[[i]])) ] - res.temp <- fun.mean.sd.nobs.out(x,i) - vec.mean[[i]] <- res.temp[1] - vec.sd[[i]] <- res.temp[2] - vec.nobs[[i]] <- res.temp[3] - vec.exp[[i]] <- TRUE - } - }else{### compute data at genus level if no data for the species +##################### FUNcCTION TO COMPUTE MEAN SD AND NOBS WITH OR WITHOUT OUTLIER +fun.mean.sd.nobs.out <- function(x, i) { + if (length(x) > 50) { + ## if more than 50 obs remove outlier + outlier <- fun.out.TF2(x.na = x, log = TRUE) + if (i == "StdValue.Plant.height.vegetative") { + res.temp <- f.quantile.boot2(log10(x[!outlier]), R = 1000, probs = 0.99) + } else { + res.temp <- c(mean(log10(x[!outlier])), sd(log10(x[!outlier])), length(x[!outlier])) + } + } else { + if (i == "StdValue.Plant.height.vegetative") { + res.temp <- f.quantile.boot2(log10(x), R = 1000, probs = 0.99) + } else { + res.temp <- c(mean(log10(x)), sd(log10(x)), length(x)) + } + } + return(res.temp) +} - genus <- sub(" .*","",species.syno) - if(sum(grepl(genus,data$AccSpeciesName) & (!is.na(data[[i]])))>0){ - x <- data[[i]][grepl(genus,data$AccSpeciesName,fixed=TRUE ) & (!is.na(data[[i]])) ] - res.temp <- fun.mean.sd.nobs.out(x,i) - vec.mean[[i]] <- res.temp[1] - vec.sd[[i]] <- res.temp[2] - vec.nobs[[i]] <- res.temp[3] - vec.genus[[i]] <- TRUE - } +################################### extract mean sd per species or genus added species synonyme + +fun.species.traits <- function(species.code, species.table, col.sp = "sp", col.sp.syno = "Latin_name_syn", + traits, data) { + vec.mean <- vec.sd <- vec.nobs <- rep(NA, length(traits)) + vec.exp <- vec.genus <- rep(FALSE, length(traits)) + names(vec.mean) <- names(vec.sd) <- names(vec.exp) <- names(vec.genus) <- names(vec.nobs) <- traits + species.syno <- species.table[species.table[[col.sp]] == species.code, col.sp.syno] + # browser() + for (i in traits) { + if (sum((data$AccSpeciesName %in% species.syno) & !is.na(data[[i]])) > 0) { + ## if data for this species or syno if data with out experiments + if (sum((data$AccSpeciesName %in% species.syno) & (!is.na(data[[i]])) & + (!data[["TF.exp.data"]])) > 0) { + x <- data[[i]][data$AccSpeciesName %in% species.syno & (!is.na(data[[i]])) & + (!data[["TF.exp.data"]])] + res.temp <- fun.mean.sd.nobs.out(x, i) + vec.mean[[i]] <- res.temp[1] + vec.sd[[i]] <- res.temp[2] + vec.nobs[[i]] <- res.temp[3] + + } else { + ### include experimental data + x <- data[[i]][data$AccSpeciesName %in% species.syno & (!is.na(data[[i]]))] + res.temp <- fun.mean.sd.nobs.out(x, i) + vec.mean[[i]] <- res.temp[1] + vec.sd[[i]] <- res.temp[2] + vec.nobs[[i]] <- res.temp[3] + vec.exp[[i]] <- TRUE + } + } else { + ### compute data at genus level if no data for the species + + genus <- sub(" .*", "", species.syno) + if (sum(grepl(genus, data$AccSpeciesName) & (!is.na(data[[i]]))) > 0) { + x <- data[[i]][grepl(genus, data$AccSpeciesName, fixed = TRUE) & + (!is.na(data[[i]]))] + res.temp <- fun.mean.sd.nobs.out(x, i) + vec.mean[[i]] <- res.temp[1] + vec.sd[[i]] <- res.temp[2] + vec.nobs[[i]] <- res.temp[3] + vec.genus[[i]] <- TRUE + } + } } - } -return(list(mean=vec.mean,sd=vec.sd,exp=vec.exp,genus=vec.genus,nobs=vec.nobs)) + return(list(mean = vec.mean, sd = vec.sd, exp = vec.exp, genus = vec.genus, nobs = vec.nobs)) } -####################### -### FUNCTIONS TO Manipulate species names -fun.get.genus <- function(x) gsub(paste(" ",gsub("^([a-zA-Z]* )","",x),sep=""),"",x,fixed=TRUE) -trim.trailing <- function (x) sub("\\s+$", "", x) +####################### FUNCTIONS TO Manipulate species names +fun.get.genus <- function(x) gsub(paste(" ", gsub("^([a-zA-Z]* )", "", x), sep = ""), + "", x, fixed = TRUE) +trim.trailing <- function(x) sub("\\s+$", "", x) -####################################### -####################################### -##### FUN TO EXTRACT FOR A GIVEN DATA BASE +####################################### FUN TO EXTRACT FOR A GIVEN DATA BASE -fun.turn.list.in.DF <- function(sp,res.list){ -data.mean <- t(sapply(sp,FUN=function(i,res.list) res.list[[i]]$mean - ,res.list=res.list)) -data.sd <- t(sapply(sp,FUN=function(i,res.list) res.list[[i]]$sd,res.list=res.list)) -data.exp <- t(sapply(sp,FUN=function(i,res.list) res.list[[i]]$exp - ,res.list=res.list)) -data.genus <- t(sapply(sp,FUN=function(i,res.list) res.list[[i]]$genus - ,res.list=res.list)) -data.nobs <- t(sapply(sp,FUN=function(i,res.list) res.list[[i]]$nobs - ,res.list=res.list)) -## create data.frame withh all observation -extract.species.try <- data.frame(data.mean,data.sd,data.exp,data.genus,data.nobs) -names(extract.species.try) <- c(paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"mean",sep=".") - ,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"sd",sep=".") - ,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"exp",sep=".") - ,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"genus",sep=".") - ,paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"nobs",sep=".")) -return(extract.species.try) +fun.turn.list.in.DF <- function(sp, res.list) { + data.mean <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$mean, res.list = res.list)) + data.sd <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$sd, res.list = res.list)) + data.exp <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$exp, res.list = res.list)) + data.genus <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$genus, res.list = res.list)) + data.nobs <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$nobs, res.list = res.list)) + ## create data.frame withh all observation + extract.species.try <- data.frame(data.mean, data.sd, data.exp, data.genus, data.nobs) + names(extract.species.try) <- c(paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", + "Height"), "mean", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", + "Height"), "sd", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", + "Height"), "exp", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", + "Height"), "genus", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", + "Height"), "nobs", sep = ".")) + return(extract.species.try) } - - -fun.extract.format.sp.traits.TRY <- function(sp,sp.syno.table,data){ -## check syno data if not create a table with column syno repating the species - -### test data sp and sp.syno.table match -if(sum(!(sp %in% sp.syno.table[["sp"]] ))>0) stop('not same species name in sp and sp.syno.table') -if(sum((sp.syno.table[["Latin_name_syn"]] %in% data[["AccSpeciesName"]] ))==0) stop('not a single similar species name in sp and TRY') -## extract -traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", - "StdValue.Seed.mass", - "StdValue.Leaf.specific.area..SLA.", - "StdValue.Stem.specific.density..SSD.", - "StdValue.Plant.height.vegetative") - -res.list <- lapply(sp,FUN=fun.species.traits,species.table=sp.syno.table,traits=traits,data=data) -names(res.list) <- sp - -##### TRANSFORM LIST IN A TABLE -extract.species.try <- fun.turn.list.in.DF(sp,res.list) - -############### add mean sd of species or genus if we want to use that -sd.vec.sp <- readRDS(file="./data/process/sd.vec.sp.rds") -sd.vec.genus <- readRDS(file="./data/process/sd.vec.genus.rds") -sd.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"sd",sep=".") -genus.names <- paste(c("Leaf.N","Seed.mass","SLA","Wood.Density","Height") - ,"genus",sep=".") -### add columns -extract.species.try.2 <- data.frame(extract.species.try, - extract.species.try[,sd.names]) -## update value -sd.names.1 <- paste(sd.names,1,sep=".") -for (i in 1:length(sd.names.1)){ - extract.species.try.2[[sd.names.1[i]]][!extract.species.try.2[[genus.names[i]]]] <- sd.vec.sp[i] - extract.species.try.2[[sd.names.1[i]]][extract.species.try.2[[genus.names[i]]]] <- sd.vec.genus[i] - } -data.frame.TRY <- data.frame(sp=sp,Latin_name=sp.syno.table[["Latin_name_syn"]],extract.species.try.2) -if(sum(!data.frame.TRY[["sp"]]==sp)>0) stop('Wrong order of species code') -return(data.frame.TRY) -} +fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { + ## check syno data if not create a table with column syno repating the species + + ### test data sp and sp.syno.table match + if (sum(!(sp %in% sp.syno.table[["sp"]])) > 0) + stop("not same species name in sp and sp.syno.table") + if (sum((sp.syno.table[["Latin_name_syn"]] %in% data[["AccSpeciesName"]])) == + 0) + stop("not a single similar species name in sp and TRY") + ## extract + traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", + "StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.", + "StdValue.Plant.height.vegetative") + + res.list <- lapply(sp, FUN = fun.species.traits, species.table = sp.syno.table, + traits = traits, data = data) + names(res.list) <- sp + + ##### TRANSFORM LIST IN A TABLE + extract.species.try <- fun.turn.list.in.DF(sp, res.list) + + ############### add mean sd of species or genus if we want to use that + sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds") + sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds") + + sd.names <- paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", "Height"), + "sd", sep = ".") + genus.names <- paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density", "Height"), + "genus", sep = ".") + ### add columns + extract.species.try.2 <- data.frame(extract.species.try, extract.species.try[, + sd.names]) + + ## update value + sd.names.1 <- paste(sd.names, 1, sep = ".") + for (i in 1:length(sd.names.1)) { + extract.species.try.2[[sd.names.1[i]]][!extract.species.try.2[[genus.names[i]]]] <- sd.vec.sp[i] + extract.species.try.2[[sd.names.1[i]]][extract.species.try.2[[genus.names[i]]]] <- sd.vec.genus[i] + } + data.frame.TRY <- data.frame(sp = sp, Latin_name = sp.syno.table[["Latin_name_syn"]], + extract.species.try.2) + if (sum(!data.frame.TRY[["sp"]] == sp) > 0) + stop("Wrong order of species code") + return(data.frame.TRY) +} diff --git a/R/FUN.climate.R b/R/FUN.climate.R index 5142d7c99cb805365b26d480539a5f3139523fa4..d20735431c49578f42c8015fbd88aab50e3a3b5e 100644 --- a/R/FUN.climate.R +++ b/R/FUN.climate.R @@ -1,6 +1,4 @@ -##################################### -##################################### -#### FUNCTION TO COMPUTE CLIMATIC DATA FOR FRANCE +##################################### FUNCTION TO COMPUTE CLIMATIC DATA FOR FRANCE ## function to compute sum of degree days above 5.56 ##' .. Compute teh sum of degree days above 5.56C from monthly data with a spline .. @@ -11,110 +9,110 @@ ##' @param threshold.sgdd threshold of temperature to compte sum of degree day default 5.56 ##' @return ##' @author Kunstler -fun.sgdd <- function(temp,threshold.sgdd = 5.56){ - require(season) - temp <- unlist(temp) - ndays.month <- flagleap(data.frame(year=2013, month=1:12), F)$ndaysmonth - ndays.year <- sum(ndays.month) - x <- c(-ndays.month[12]/2,(ndays.month/2) + c(0, cumsum(ndays.month[1:11])),ndays.year+ndays.month[1]/2) - ## plot(x,c(temp[12],temp,temp[1]),xlim=c(0,365),type='b') - myfit <- loess(y~x, data=data.frame(y=c(temp[12],temp,temp[1]), x=x),span=0.4,degree=2) - mypred <- predict(myfit, 1:ndays.year) - ## lines( 1:ndays.year,mypred,col="red") - sgdd <- sum(mypred[mypred>=threshold.sgdd]-threshold.sgdd) - return(sgdd) +fun.sgdd <- function(temp, threshold.sgdd = 5.56) { + require(season) + temp <- unlist(temp) + ndays.month <- flagleap(data.frame(year = 2013, month = 1:12), F)$ndaysmonth + ndays.year <- sum(ndays.month) + x <- c(-ndays.month[12]/2, (ndays.month/2) + c(0, cumsum(ndays.month[1:11])), + ndays.year + ndays.month[1]/2) + ## plot(x,c(temp[12],temp,temp[1]),xlim=c(0,365),type='b') + myfit <- loess(y ~ x, data = data.frame(y = c(temp[12], temp, temp[1]), x = x), + span = 0.4, degree = 2) + mypred <- predict(myfit, 1:ndays.year) + ## lines( 1:ndays.year,mypred,col='red') + sgdd <- sum(mypred[mypred >= threshold.sgdd] - threshold.sgdd) + return(sgdd) } -################################################################################################# -### function to compute max soil water content based on Piedallu 2010 Geoderma -fun.swhc <- function(affroc,cailloux,text2,text1,prof2,prof1,codeprof,codecaillou,data.texture){ -## browser() - -### transform code of prof= soil depth in cm and code of percentage of rock in % -prof2.t <- prof2 -prof2.t[is.na(prof2)] <- 0 -prof2.cm.temp <- codeprof[prof2+1] -prof2.cm.temp[is.na(prof2)] <-0 -prof1.t <- prof1 -prof1.t[is.na(prof1)] <- 0 -text1[is.na(text1)] <- 0 -prof1.cm <- codeprof[prof1.t+1] -prof1.cm[is.na(prof1)] <- 0 -cailloux.t <- cailloux -cailloux.t[is.na(cailloux)] <- 0 -cailloux.perc <- codecaillou[cailloux.t+1] -cailloux.perc[is.na(cailloux)] <- 0 - -## compute depth of second horizon -prof2.cm <- prof2.cm.temp - prof1.cm -perc.top1 <- apply(cbind(10/prof1.cm,rep(1,length(prof1.cm))),MARGIN=1,FUN=min) -perc.top2 <- apply(cbind(apply(cbind(rep(0,length(prof1.cm)),(10-prof1.cm)/prof2.cm.temp),MARGIN=1,FUN=max), - rep(1,length(prof1.cm))),MARGIN=1,FUN=min) -caill <- (1-codecaillou[affroc+1]/100)*(1-(sqrt(cailloux.perc/100))^3) -## print(c(prof2.cm,perc.top2)) -top.hor <- (prof1.cm*perc.top1*data.texture[text1+1,"U_almajou_top"]+prof2.cm*perc.top2*data.texture[text2+1,"U_almajou_top"]) - -sub.hor <- (prof1.cm*(1-perc.top1)*data.texture[text1+1,"U_almajou_sub"]+prof2.cm*(1-perc.top2)*data.texture[text2+1,"U_almajou_sub"]) - -swhc <- caill*(top.hor+sub.hor) -return(swhc) -} - -################################################################# -## function to compute PET -fun.PET <- function(i,rad,temp){ -## conversion data - require(season) - ndays.month <- flagleap(data.frame(year=2013, month=1:12), F)$ndaysmonth -Rs <- unlist(rad[i,])/100/30*1000 -Ta <- unlist(temp[i,]) -# compute TURC FORMULA -vec.pet <- 0.4*((0.0239001*Rs)+50)*(Ta/(Ta+15))/30*ndays.month -## with Rs daily global (total) solar radiation (kJ/m2/day) -## and Ta monthly mean temp in C and ndays.month number of day in each month -return(vec.pet) +################################################################################################# function to compute max soil water content based on Piedallu 2010 Geoderma +fun.swhc <- function(affroc, cailloux, text2, text1, prof2, prof1, codeprof, codecaillou, + data.texture) { + ## browser() + + ### transform code of prof= soil depth in cm and code of percentage of rock in % + prof2.t <- prof2 + prof2.t[is.na(prof2)] <- 0 + prof2.cm.temp <- codeprof[prof2 + 1] + prof2.cm.temp[is.na(prof2)] <- 0 + prof1.t <- prof1 + prof1.t[is.na(prof1)] <- 0 + text1[is.na(text1)] <- 0 + prof1.cm <- codeprof[prof1.t + 1] + prof1.cm[is.na(prof1)] <- 0 + cailloux.t <- cailloux + cailloux.t[is.na(cailloux)] <- 0 + cailloux.perc <- codecaillou[cailloux.t + 1] + cailloux.perc[is.na(cailloux)] <- 0 + + ## compute depth of second horizon + prof2.cm <- prof2.cm.temp - prof1.cm + perc.top1 <- apply(cbind(10/prof1.cm, rep(1, length(prof1.cm))), MARGIN = 1, + FUN = min) + perc.top2 <- apply(cbind(apply(cbind(rep(0, length(prof1.cm)), (10 - prof1.cm)/prof2.cm.temp), + MARGIN = 1, FUN = max), rep(1, length(prof1.cm))), MARGIN = 1, FUN = min) + caill <- (1 - codecaillou[affroc + 1]/100) * (1 - (sqrt(cailloux.perc/100))^3) + ## print(c(prof2.cm,perc.top2)) + top.hor <- (prof1.cm * perc.top1 * data.texture[text1 + 1, "U_almajou_top"] + + prof2.cm * perc.top2 * data.texture[text2 + 1, "U_almajou_top"]) + + sub.hor <- (prof1.cm * (1 - perc.top1) * data.texture[text1 + 1, "U_almajou_sub"] + + prof2.cm * (1 - perc.top2) * data.texture[text2 + 1, "U_almajou_sub"]) + + swhc <- caill * (top.hor + sub.hor) + return(swhc) } +################################################################# function to compute PET +fun.PET <- function(i, rad, temp) { + ## conversion data + require(season) + ndays.month <- flagleap(data.frame(year = 2013, month = 1:12), F)$ndaysmonth + Rs <- unlist(rad[i, ])/100/30 * 1000 + Ta <- unlist(temp[i, ]) + # compute TURC FORMULA + vec.pet <- 0.4 * ((0.0239001 * Rs) + 50) * (Ta/(Ta + 15))/30 * ndays.month + ## with Rs daily global (total) solar radiation (kJ/m2/day) and Ta monthly mean + ## temp in C and ndays.month number of day in each month + return(vec.pet) +} -#################################################################################### -#################################################################################### -#################################################################################### -### FUNCTION TO COMPUTE WATER BUDGET based on Bugmann & Cramer 1998 - -fun.WaterBudget <-function(i,prcp.m,PET.m,Ta.m,SWHC.v,n=2){ -WATER <- rep(NA,12*n) -WATER[1] <- unlist(SWHC.v)[i] -SWHC <- unlist(SWHC.v)[i] -AET <- rep(NA,12*n) -D <- rep(NA,12*n) -prcp <- rep(unlist(prcp.m[i,]),n) -PET <- rep(unlist(PET.m[i,]),n) -Ta <- rep(unlist(Ta.m[i,]),n) -for (i in 2:(12*n)){ - Pi <- min(0.3*prcp[i] , PET[i]) - Ps <- prcp[i] - Pi - S <- 120*WATER[i-1]/SWHC - D[i] <- PET[i]-Pi - AET[i] <- max(0,min(D[i],S)) - WATER[i] <- max(0,min(WATER[i-1]+Ps-AET[i],SWHC)) - } -## plot(1:(12*n),WATER,type="b") -WATER <- WATER[13:24] -Ta <- Ta[13:24] -AET <- AET[13:24] -D <- D[13:24] -## NEED to compute mean water availability of the growing season -WB.s <- mean(WATER[Ta>5.56],na.rm=T) -## NEED to compute mean water availability of the year -WB.y <- mean(WATER,na.rm=T) -## NEED to compute water stress index of the growing season -WS.s <- sum(na.omit(AET[Ta>5.56 & D>0]))/sum(na.omit(D[Ta>5.56 & D>0])) +#################################################################################### FUNCTION TO COMPUTE WATER BUDGET based on Bugmann & Cramer 1998 -## NEED to compute water stress index of the year -WS.y <- sum(na.omit(AET[D>0]))/sum(na.omit(D[D>0])) -return(c("WB.s"=WB.s,"WB.y"=WB.y, - "WS.s"=WS.s,"WS.y"=WS.y)) -} +fun.WaterBudget <- function(i, prcp.m, PET.m, Ta.m, SWHC.v, n = 2) { + WATER <- rep(NA, 12 * n) + WATER[1] <- unlist(SWHC.v)[i] + SWHC <- unlist(SWHC.v)[i] + AET <- rep(NA, 12 * n) + D <- rep(NA, 12 * n) + prcp <- rep(unlist(prcp.m[i, ]), n) + PET <- rep(unlist(PET.m[i, ]), n) + Ta <- rep(unlist(Ta.m[i, ]), n) + + for (i in 2:(12 * n)) { + Pi <- min(0.3 * prcp[i], PET[i]) + Ps <- prcp[i] - Pi + S <- 120 * WATER[i - 1]/SWHC + D[i] <- PET[i] - Pi + AET[i] <- max(0, min(D[i], S)) + WATER[i] <- max(0, min(WATER[i - 1] + Ps - AET[i], SWHC)) + } + ## plot(1:(12*n),WATER,type='b') + WATER <- WATER[13:24] + Ta <- Ta[13:24] + AET <- AET[13:24] + D <- D[13:24] + ## NEED to compute mean water availability of the growing season + WB.s <- mean(WATER[Ta > 5.56], na.rm = T) + ## NEED to compute mean water availability of the year + WB.y <- mean(WATER, na.rm = T) + ## NEED to compute water stress index of the growing season + WS.s <- sum(na.omit(AET[Ta > 5.56 & D > 0]))/sum(na.omit(D[Ta > 5.56 & D > 0])) + + ## NEED to compute water stress index of the year + WS.y <- sum(na.omit(AET[D > 0]))/sum(na.omit(D[D > 0])) + return(c(WB.s = WB.s, WB.y = WB.y, WS.s = WS.s, WS.y = WS.y)) +} diff --git a/R/READ.DATA.NFI.FRANCE.R b/R/READ.DATA.NFI.FRANCE.R index bb84aa0aa4e17119d65923f6a38d755a57ada4d7..fe4f8c5cf6c28a9707d8d1b443afc2aad8c4be2b 100644 --- a/R/READ.DATA.NFI.FRANCE.R +++ b/R/READ.DATA.NFI.FRANCE.R @@ -1,443 +1,432 @@ -############################################################ -############################################################ -########## READ, MERGE AND CLEAN ALL NFI DATA NEW METHODS +############################################################ READ, MERGE AND CLEAN ALL NFI DATA NEW METHODS ### read TREE table downloaded from the web -arbre2005 <- read.csv("./data/raw/DataFrance/2005/arbres_foret_2005.csv",sep=";", stringsAsFactors=FALSE) +arbre2005 <- read.csv("./data/raw/DataFrance/2005/arbres_foret_2005.csv", sep = ";", + stringsAsFactors = FALSE) summary(arbre2005) -arbre2006 <- read.csv("./data/raw/DataFrance/2006/arbres_foret_2006.csv",sep=";", stringsAsFactors=FALSE) +arbre2006 <- read.csv("./data/raw/DataFrance/2006/arbres_foret_2006.csv", sep = ";", + stringsAsFactors = FALSE) summary(arbre2006) -arbre2007 <- read.csv("./data/raw/DataFrance/2007/arbres_foret_2007.csv",sep=";", stringsAsFactors=FALSE) +arbre2007 <- read.csv("./data/raw/DataFrance/2007/arbres_foret_2007.csv", sep = ";", + stringsAsFactors = FALSE) summary(arbre2007) -arbre2008 <- read.csv("./data/raw/DataFrance/2008/arbres_foret_2008.csv",sep=";", stringsAsFactors=FALSE) +arbre2008 <- read.csv("./data/raw/DataFrance/2008/arbres_foret_2008.csv", sep = ";", + stringsAsFactors = FALSE) summary(arbre2005) -arbre2009 <- read.csv("./data/raw/DataFrance/2009/arbres_foret_2009.csv",sep=";", stringsAsFactors=FALSE) +arbre2009 <- read.csv("./data/raw/DataFrance/2009/arbres_foret_2009.csv", sep = ";", + stringsAsFactors = FALSE) summary(arbre2009) -arbre2010 <- read.csv("./data/raw/DataFrance/2010/arbres_foret_2010.csv",sep=";", stringsAsFactors=FALSE) +arbre2010 <- read.csv("./data/raw/DataFrance/2010/arbres_foret_2010.csv", sep = ";", + stringsAsFactors = FALSE) summary(arbre2010) -arbre2011 <- read.csv("./data/raw/DataFrance/2011/arbres_foret_2011.csv",sep=";", stringsAsFactors=FALSE) - - - - - -#### ALL TREE after 2007 don't have the veget value as in 2005 aned 2006 because new variable ACCI for tree with accident (trunk broken ...) -### NEED TO UPDATE VEGET FROM ACCI -arbre2007$veget[arbre2007$acci>0] <- 1 -arbre2008$veget[arbre2008$acci>0] <- 1 -arbre2009$veget[arbre2009$acci>0] <- 1 -arbre2010$veget[arbre2010$acci>0] <- 1 -arbre2011$veget[arbre2011$acci>0] <- 1 -## -arbre2005$veget <- unclass(arbre2005$veget)-1 -arbre2006$veget <- unclass(arbre2006$veget)-1 - -###### -## ORI NEED TO BE DEGRADED SINCE 2007 to two level from resprout or from seed -arbre2007$ori[arbre2007$ori==2] <- 0 -arbre2008$ori[arbre2008$ori==2] <- 0 -arbre2009$ori[arbre2009$ori==2] <- 0 -arbre2010$ori[arbre2010$ori==2] <- 0 -arbre2011$ori[arbre2011$ori==2] <- 0 - - -############################## -### merge all table adding NA when no variable for that year -arbre.tot <- data.frame(idp=c(arbre2005$idp,arbre2006$idp,arbre2007$idp,arbre2008$idp,arbre2009$idp,arbre2010$idp,arbre2011$idp), - a=c(arbre2005$a,arbre2006$a,arbre2007$a,arbre2008$a,arbre2009$a,arbre2010$a,arbre2011$a), - veget=c(arbre2005$veget,arbre2006$veget,arbre2007$veget,arbre2008$veget,arbre2009$veget,arbre2010$veget,arbre2011$veget), - simplif=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)), - rep(NA,length(arbre2008$idp)),arbre2009$simplif,arbre2010$simplif,arbre2011$simplif), - acci=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),arbre2007$acci,arbre2008$acci,arbre2009$acci, - arbre2010$acci,arbre2011$acci), - espar=c(as.character(arbre2005$espar),as.character(arbre2006$espar),as.character(arbre2007$espar), - as.character(arbre2008$espar),as.character(arbre2009$espar),as.character(arbre2010$espar),as.character(arbre2011$espar)), - ori=c(arbre2005$ori,arbre2006$ori,arbre2007$ori,arbre2008$ori,arbre2009$ori,arbre2010$ori,arbre2011$ori), - lib=c(arbre2005$lib,arbre2006$lib,arbre2007$lib,arbre2008$lib,arbre2009$lib,arbre2010$lib,arbre2011$lib), - forme=c(arbre2005$forme,arbre2006$forme,arbre2007$forme,arbre2008$forme,arbre2009$forme,arbre2010$forme,arbre2011$forme), - tige=c(arbre2005$tige,arbre2006$tige,arbre2007$tige,arbre2008$tige,arbre2009$tige,arbre2010$tige,arbre2011$tige), - mortb=c(rep(NA,length(arbre2005$idp)),arbre2006$mortb,arbre2007$mortb,arbre2008$mortb,arbre2009$mortb,arbre2010$mortb, - arbre2011$mortb), - sfgui=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfgui, - arbre2009$sfgui,arbre2010$sfgui,arbre2011$sfgui), - sfgeliv=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfgeliv, - arbre2009$sfgeliv,arbre2010$sfgeliv,arbre2011$sfgeliv), - sfpied=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfpied, - arbre2009$sfpied,arbre2010$sfpied,arbre2011$sfpied), - sfdorge=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$sfdorge, - arbre2009$sfdorge,arbre2010$sfdorge,arbre2011$sfdorge), - sfcoeur=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)), - rep(NA,length(arbre2008$idp)),arbre2009$sfcoeur,arbre2010$sfcoeur,arbre2011$sfcoeur), - c13=c(arbre2005$c13,arbre2006$c13,arbre2007$c13,arbre2008$c13,arbre2009$c13,arbre2010$c13,arbre2011$c13), - ir5=c(arbre2005$ir5,arbre2006$ir5,arbre2007$ir5,arbre2008$ir5,arbre2009$ir5,arbre2010$ir5,arbre2011$ir5), - htot=c(arbre2005$htot,arbre2006$htot,arbre2007$htot,arbre2008$htot,arbre2009$htot,arbre2010$htot,arbre2011$htot), - hdec=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$hdec, - arbre2009$hdec,arbre2010$hdec,arbre2011$hdec), - decoupe=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$decoupe, - arbre2009$decoupe,arbre2010$decoupe,arbre2011$decoupe), - q1=c(arbre2005$q1,arbre2006$q1,arbre2007$q1,arbre2008$q1,arbre2009$q1,arbre2010$q1,arbre2011$q1), - q2=c(arbre2005$q2,arbre2006$q2,arbre2007$q2,arbre2008$q2,arbre2009$q2,arbre2010$q2,arbre2011$q2), - q3=c(arbre2005$q3,arbre2006$q3,arbre2007$q3,arbre2008$q3,arbre2009$q3,arbre2010$q3,arbre2011$q3), - r=c(arbre2005$r,arbre2006$r,arbre2007$r,arbre2008$r,arbre2009$r,arbre2010$r,arbre2011$r), - lfsd=c(arbre2005$lfsd,arbre2006$lfsd,arbre2007$lfsd,arbre2008$lfsd,arbre2009$lfsd,arbre2010$lfsd,arbre2011$lfsd), - age=c(rep(NA,length(arbre2005$idp)),rep(NA,length(arbre2006$idp)),rep(NA,length(arbre2007$idp)),arbre2008$age, - arbre2009$age,arbre2010$age,arbre2011$age), - v=c(arbre2005$v,arbre2006$v,arbre2007$v,arbre2008$v,arbre2009$v,arbre2010$v,arbre2011$v), - w=c(arbre2005$w,arbre2006$w,arbre2007$w,arbre2008$w,arbre2009$w,arbre2010$w,arbre2011$w), - YEAR=c(rep(2005,length(arbre2005$idp)),rep(2006,length(arbre2006$idp)),rep(2007,length(arbre2007$idp)), - rep(2008,length(arbre2008$idp)),rep(2009,length(arbre2009$simplif)),rep(2010,length(arbre2010$simplif)), - rep(2011,length(arbre2011$simplif)))) - -rm(arbre2005,arbre2006,arbre2007,arbre2008,arbre2009,arbre2010,arbre2011) +arbre2011 <- read.csv("./data/raw/DataFrance/2011/arbres_foret_2011.csv", sep = ";", + stringsAsFactors = FALSE) + + + + + +#### ALL TREE after 2007 don't have the veget value as in 2005 aned 2006 because new +#### variable ACCI for tree with accident (trunk broken ...) NEED TO UPDATE VEGET +#### FROM ACCI +arbre2007$veget[arbre2007$acci > 0] <- 1 +arbre2008$veget[arbre2008$acci > 0] <- 1 +arbre2009$veget[arbre2009$acci > 0] <- 1 +arbre2010$veget[arbre2010$acci > 0] <- 1 +arbre2011$veget[arbre2011$acci > 0] <- 1 +## +arbre2005$veget <- unclass(arbre2005$veget) - 1 +arbre2006$veget <- unclass(arbre2006$veget) - 1 + +###### ORI NEED TO BE DEGRADED SINCE 2007 to two level from resprout or from seed +arbre2007$ori[arbre2007$ori == 2] <- 0 +arbre2008$ori[arbre2008$ori == 2] <- 0 +arbre2009$ori[arbre2009$ori == 2] <- 0 +arbre2010$ori[arbre2010$ori == 2] <- 0 +arbre2011$ori[arbre2011$ori == 2] <- 0 + + +############################## merge all table adding NA when no variable for that year +arbre.tot <- data.frame(idp = c(arbre2005$idp, arbre2006$idp, arbre2007$idp, arbre2008$idp, + arbre2009$idp, arbre2010$idp, arbre2011$idp), a = c(arbre2005$a, arbre2006$a, + arbre2007$a, arbre2008$a, arbre2009$a, arbre2010$a, arbre2011$a), veget = c(arbre2005$veget, + arbre2006$veget, arbre2007$veget, arbre2008$veget, arbre2009$veget, arbre2010$veget, + arbre2011$veget), simplif = c(rep(NA, length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), + rep(NA, length(arbre2007$idp)), rep(NA, length(arbre2008$idp)), arbre2009$simplif, + arbre2010$simplif, arbre2011$simplif), acci = c(rep(NA, length(arbre2005$idp)), + rep(NA, length(arbre2006$idp)), arbre2007$acci, arbre2008$acci, arbre2009$acci, + arbre2010$acci, arbre2011$acci), espar = c(as.character(arbre2005$espar), as.character(arbre2006$espar), + as.character(arbre2007$espar), as.character(arbre2008$espar), as.character(arbre2009$espar), + as.character(arbre2010$espar), as.character(arbre2011$espar)), ori = c(arbre2005$ori, + arbre2006$ori, arbre2007$ori, arbre2008$ori, arbre2009$ori, arbre2010$ori, arbre2011$ori), + lib = c(arbre2005$lib, arbre2006$lib, arbre2007$lib, arbre2008$lib, arbre2009$lib, + arbre2010$lib, arbre2011$lib), forme = c(arbre2005$forme, arbre2006$forme, + arbre2007$forme, arbre2008$forme, arbre2009$forme, arbre2010$forme, arbre2011$forme), + tige = c(arbre2005$tige, arbre2006$tige, arbre2007$tige, arbre2008$tige, arbre2009$tige, + arbre2010$tige, arbre2011$tige), mortb = c(rep(NA, length(arbre2005$idp)), + arbre2006$mortb, arbre2007$mortb, arbre2008$mortb, arbre2009$mortb, arbre2010$mortb, + arbre2011$mortb), sfgui = c(rep(NA, length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), + rep(NA, length(arbre2007$idp)), arbre2008$sfgui, arbre2009$sfgui, arbre2010$sfgui, + arbre2011$sfgui), sfgeliv = c(rep(NA, length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), + rep(NA, length(arbre2007$idp)), arbre2008$sfgeliv, arbre2009$sfgeliv, arbre2010$sfgeliv, + arbre2011$sfgeliv), sfpied = c(rep(NA, length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), + rep(NA, length(arbre2007$idp)), arbre2008$sfpied, arbre2009$sfpied, arbre2010$sfpied, + arbre2011$sfpied), sfdorge = c(rep(NA, length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), + rep(NA, length(arbre2007$idp)), arbre2008$sfdorge, arbre2009$sfdorge, arbre2010$sfdorge, + arbre2011$sfdorge), sfcoeur = c(rep(NA, length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), + rep(NA, length(arbre2007$idp)), rep(NA, length(arbre2008$idp)), arbre2009$sfcoeur, + arbre2010$sfcoeur, arbre2011$sfcoeur), c13 = c(arbre2005$c13, arbre2006$c13, + arbre2007$c13, arbre2008$c13, arbre2009$c13, arbre2010$c13, arbre2011$c13), + ir5 = c(arbre2005$ir5, arbre2006$ir5, arbre2007$ir5, arbre2008$ir5, arbre2009$ir5, + arbre2010$ir5, arbre2011$ir5), htot = c(arbre2005$htot, arbre2006$htot, arbre2007$htot, + arbre2008$htot, arbre2009$htot, arbre2010$htot, arbre2011$htot), hdec = c(rep(NA, + length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), rep(NA, length(arbre2007$idp)), + arbre2008$hdec, arbre2009$hdec, arbre2010$hdec, arbre2011$hdec), decoupe = c(rep(NA, + length(arbre2005$idp)), rep(NA, length(arbre2006$idp)), rep(NA, length(arbre2007$idp)), + arbre2008$decoupe, arbre2009$decoupe, arbre2010$decoupe, arbre2011$decoupe), + q1 = c(arbre2005$q1, arbre2006$q1, arbre2007$q1, arbre2008$q1, arbre2009$q1, + arbre2010$q1, arbre2011$q1), q2 = c(arbre2005$q2, arbre2006$q2, arbre2007$q2, + arbre2008$q2, arbre2009$q2, arbre2010$q2, arbre2011$q2), q3 = c(arbre2005$q3, + arbre2006$q3, arbre2007$q3, arbre2008$q3, arbre2009$q3, arbre2010$q3, arbre2011$q3), + r = c(arbre2005$r, arbre2006$r, arbre2007$r, arbre2008$r, arbre2009$r, arbre2010$r, + arbre2011$r), lfsd = c(arbre2005$lfsd, arbre2006$lfsd, arbre2007$lfsd, arbre2008$lfsd, + arbre2009$lfsd, arbre2010$lfsd, arbre2011$lfsd), age = c(rep(NA, length(arbre2005$idp)), + rep(NA, length(arbre2006$idp)), rep(NA, length(arbre2007$idp)), arbre2008$age, + arbre2009$age, arbre2010$age, arbre2011$age), v = c(arbre2005$v, arbre2006$v, + arbre2007$v, arbre2008$v, arbre2009$v, arbre2010$v, arbre2011$v), w = c(arbre2005$w, + arbre2006$w, arbre2007$w, arbre2008$w, arbre2009$w, arbre2010$w, arbre2011$w), + YEAR = c(rep(2005, length(arbre2005$idp)), rep(2006, length(arbre2006$idp)), + rep(2007, length(arbre2007$idp)), rep(2008, length(arbre2008$idp)), rep(2009, + length(arbre2009$simplif)), rep(2010, length(arbre2010$simplif)), rep(2011, + length(arbre2011$simplif)))) + +rm(arbre2005, arbre2006, arbre2007, arbre2008, arbre2009, arbre2010, arbre2011) gc() -######################################################## -## #### check problem of unit for c13 ir5 and htot by plotting the data -## plot(arbre.tot$c13,arbre.tot$ir5,col=unclass(factor(arbre.tot$YEAR)),cex=0.1) -## boxplot(arbre.tot$c13~arbre.tot$YEAR,ylab="c13") -## boxplot(arbre.tot$ir5~arbre.tot$YEAR,ylab="ir5") -## boxplot(arbre.tot$htot~arbre.tot$YEAR,ylab="htot") -## boxplot(arbre.tot$age~arbre.tot$YEAR,ylab="age") -## ##### SOMETHING VERY STRANGE FOR THE AGE WITH SEVERAL TREE OVER 1000 YEARS OLD -## boxplot(arbre.tot$mortb~arbre.tot$YEAR,ylab="mortality branche") -## boxplot(arbre.tot$veget~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$simplif~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$w~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$ori~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$lib~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$forme~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$tige~arbre.tot$YEAR,ylab="accident") -## ### NEED TO USE ONLY THE TIGE == 1 in the ANALYSIS -## boxplot(arbre.tot$sfgui~arbre.tot$YEAR,ylab="accident") -## boxplot(arbre.tot$sfgeliv~arbre.tot$YEAR,ylab="accident") +######################################################## #### check problem of unit for c13 ir5 and htot by plotting the data +######################################################## plot(arbre.tot$c13,arbre.tot$ir5,col=unclass(factor(arbre.tot$YEAR)),cex=0.1) +######################################################## boxplot(arbre.tot$c13~arbre.tot$YEAR,ylab='c13') +######################################################## boxplot(arbre.tot$ir5~arbre.tot$YEAR,ylab='ir5') +######################################################## boxplot(arbre.tot$htot~arbre.tot$YEAR,ylab='htot') +######################################################## boxplot(arbre.tot$age~arbre.tot$YEAR,ylab='age') ##### SOMETHING VERY STRANGE +######################################################## FOR THE AGE WITH SEVERAL TREE OVER 1000 YEARS OLD +######################################################## boxplot(arbre.tot$mortb~arbre.tot$YEAR,ylab='mortality branche') +######################################################## boxplot(arbre.tot$veget~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$simplif~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$w~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$ori~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$lib~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$forme~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$tige~arbre.tot$YEAR,ylab='accident') ### NEED TO USE ONLY THE +######################################################## TIGE == 1 in the ANALYSIS +######################################################## boxplot(arbre.tot$sfgui~arbre.tot$YEAR,ylab='accident') +######################################################## boxplot(arbre.tot$sfgeliv~arbre.tot$YEAR,ylab='accident') -## ##### CHECK OTHER VARIABLE OK DONE -## ### USE BRANCH MORTALITY AS AN INDICATOR OF MORTALITY ?? ABIOTIC STRESS ? +## ##### CHECK OTHER VARIABLE OK DONE ### USE BRANCH MORTALITY AS AN INDICATOR OF +## MORTALITY ?? ABIOTIC STRESS ? -## x11() -## plot(arbre.tot$c13,arbre.tot$htot,col=unclass(factor(arbre.tot$YEAR))) +## x11() plot(arbre.tot$c13,arbre.tot$htot,col=unclass(factor(arbre.tot$YEAR))) -save(arbre.tot,file="./data/process/arbre.tot.Rdata") +save(arbre.tot, file = "./data/process/arbre.tot.Rdata") -######################################### -###### DEAD -######################################### +######################################### DEAD -############################################################## -############################################################# -## READ AND MERGE DEAD DATA -### MERGE WITH DEAD TREE and MERGE WITH PLOT DATA!! +############################################################## READ AND MERGE DEAD DATA MERGE WITH DEAD TREE and MERGE WITH PLOT DATA!! ### read DEAD TREE table downloaded from the web -arbre_mort2005 <- read.csv("./data/raw/DataFrance/2005/arbres_morts_foret_2005.csv",sep=";" - , stringsAsFactors=FALSE) +arbre_mort2005 <- read.csv("./data/raw/DataFrance/2005/arbres_morts_foret_2005.csv", + sep = ";", stringsAsFactors = FALSE) ## summary(arbre_mort2005) -arbre_mort2006 <- read.csv("./data/raw/DataFrance/2006/arbres_morts_foret_2006.csv",sep=";" - , stringsAsFactors=FALSE) +arbre_mort2006 <- read.csv("./data/raw/DataFrance/2006/arbres_morts_foret_2006.csv", + sep = ";", stringsAsFactors = FALSE) ## summary(arbre_mort2006) -arbre_mort2007 <- read.csv("./data/raw/DataFrance/2007/arbres_morts_foret_2007.csv",sep=";" - , stringsAsFactors=FALSE) +arbre_mort2007 <- read.csv("./data/raw/DataFrance/2007/arbres_morts_foret_2007.csv", + sep = ";", stringsAsFactors = FALSE) ## summary(arbre_mort2007) -arbre_mort2008 <- read.csv("./data/raw/DataFrance/2008/arbres_morts_foret_2008.csv",sep=";" - , stringsAsFactors=FALSE) +arbre_mort2008 <- read.csv("./data/raw/DataFrance/2008/arbres_morts_foret_2008.csv", + sep = ";", stringsAsFactors = FALSE) ## summary(arbre_mort2005) -arbre_mort2009 <- read.csv("./data/raw/DataFrance/2009/arbres_morts_foret_2009.csv",sep=";" - , stringsAsFactors=FALSE) +arbre_mort2009 <- read.csv("./data/raw/DataFrance/2009/arbres_morts_foret_2009.csv", + sep = ";", stringsAsFactors = FALSE) ## summary(arbre_mort2009) -arbre_mort2010 <- read.csv("./data/raw/DataFrance/2010/arbres_morts_foret_2010.csv",sep=";" - , stringsAsFactors=FALSE) +arbre_mort2010 <- read.csv("./data/raw/DataFrance/2010/arbres_morts_foret_2010.csv", + sep = ";", stringsAsFactors = FALSE) ## summary(arbre_mort2010) -arbre_mort2011 <- read.csv("./data/raw/DataFrance/2011/arbres_morts_foret_2011.csv",sep=";" - , stringsAsFactors=FALSE) - -## names(arbre_mort2005) -## names(arbre_mort2006) -## names(arbre_mort2007) -## names(arbre_mort2008) -## names(arbre_mort2009) -## names(arbre_mort2010) +arbre_mort2011 <- read.csv("./data/raw/DataFrance/2011/arbres_morts_foret_2011.csv", + sep = ";", stringsAsFactors = FALSE) + +## names(arbre_mort2005) names(arbre_mort2006) names(arbre_mort2007) +## names(arbre_mort2008) names(arbre_mort2009) names(arbre_mort2010) ## names(arbre_mort2011) ### merge 2005 2006 2007 to compute c13 arbre_mort05_07 <- rbind(arbre_mort2005, arbre_mort2006, arbre_mort2007) -arbre_mort05_07$c13 <- rep(NA,length(arbre_mort05_07$c0)) -arbre_mort05_07$espar2 <- as.numeric(substr(arbre_mort05_07$espar,1,2)) -arbre_mort05_07$year <- c(rep(2005,length=length(arbre_mort2005[,1])), - rep(2006,length=length(arbre_mort2006[,1])), - rep(2007,length=length(arbre_mort2007[,1]))) - -#### NEED TO CONVERT c0 into c13 before 2008 -### before 2008 no date dead but all tree died less than 5 years ago. -### need to do convertion between c0 and c13. -## for that use the NFI data from previous inventory that were reporting both c0 and c13 for all species -## fit an allometric relationship and then use it to predict c13 in this data base +arbre_mort05_07$c13 <- rep(NA, length(arbre_mort05_07$c0)) +arbre_mort05_07$espar2 <- as.numeric(substr(arbre_mort05_07$espar, 1, 2)) +arbre_mort05_07$year <- c(rep(2005, length = length(arbre_mort2005[, 1])), rep(2006, + length = length(arbre_mort2006[, 1])), rep(2007, length = length(arbre_mort2007[, + 1]))) + +#### NEED TO CONVERT c0 into c13 before 2008 before 2008 no date dead but all tree +#### died less than 5 years ago. need to do convertion between c0 and c13. for +#### that use the NFI data from previous inventory that were reporting both c0 and +#### c13 for all species fit an allometric relationship and then use it to predict +#### c13 in this data base ### READ DATA CYCLE 3 ORGINAL /// NEED TO CONVERT C0 and C13 in cm *100 -arbre.cycle3 <- read.table("./data/raw/DataFrance/cycle3/data.arbre.tot.txt",sep=" ", stringsAsFactors=FALSE) +arbre.cycle3 <- read.table("./data/raw/DataFrance/cycle3/data.arbre.tot.txt", sep = " ", + stringsAsFactors = FALSE) ### change the C from m to cm -arbre.cycle3$C0 <- arbre.cycle3$C0*100 -arbre.cycle3$C13 <- arbre.cycle3$C13*100 +arbre.cycle3$C0 <- arbre.cycle3$C0 * 100 +arbre.cycle3$C13 <- arbre.cycle3$C13 * 100 ## LOAD library RMA regression library(lmodel2) -## the regression between C13 and C0 vary between species, but not same species in cycle 3 because less details (the classification is only based on number and no letters -## remove the letters and apply the same model to all species that have the same number code -species.list <- c(as.numeric(names(table(substr(arbre_mort05_07$espar,1,2))))[-1]) +## the regression between C13 and C0 vary between species, but not same species in +## cycle 3 because less details (the classification is only based on number and no +## letters remove the letters and apply the same model to all species that have +## the same number code +species.list <- c(as.numeric(names(table(substr(arbre_mort05_07$espar, 1, 2))))[-1]) ## length(table(arbre.cycle3$ESS)) -for (i in species.list) -{ -if (sum(arbre.cycle3$ESS==i)>50) - { -lmodel2.res <- lmodel2(C13~C0 ,data=arbre.cycle3[arbre.cycle3$ESS==i,],range.x="relative",range.y="relative") -arbre_mort05_07$c13[arbre_mort05_07$espar2==i & !is.na(arbre_mort05_07$espar2)] <- - lmodel2.res$regression.results[4,2] + arbre_mort05_07$c0[arbre_mort05_07$espar2==i & - !is.na(arbre_mort05_07$espar2)]*lmodel2.res$regression.results[4,3] -print(i) -print(lmodel2.res$regression.results[4,2:3]) -print(range(arbre_mort05_07$c13[arbre_mort05_07$espar2==i & - !is.na(arbre_mort05_07$espar2)],na.rm=T)) - } -else - { -lmodel2.res <- lmodel2(C13~C0 ,data=arbre.cycle3,range.x="relative",range.y="relative") -arbre_mort05_07$c13[arbre_mort05_07$espar2==i & !is.na(arbre_mort05_07$espar2)] <- - lmodel2.res$regression.results[4,2] + arbre_mort05_07$c0[arbre_mort05_07$espar2==i & - !is.na(arbre_mort05_07$espar2)]*lmodel2.res$regression.results[4,3] -print(i) -print(lmodel2.res$regression.results[4,2:3]) -print(range(arbre_mort05_07$c13[arbre_mort05_07$espar2==i & !is.na(arbre_mort05_07$espar2)],na.rm=T)) - - } -} +for (i in species.list) { + if (sum(arbre.cycle3$ESS == i) > 50) { + lmodel2.res <- lmodel2(C13 ~ C0, data = arbre.cycle3[arbre.cycle3$ESS == + i, ], range.x = "relative", range.y = "relative") + arbre_mort05_07$c13[arbre_mort05_07$espar2 == i & !is.na(arbre_mort05_07$espar2)] <- lmodel2.res$regression.results[4, + 2] + arbre_mort05_07$c0[arbre_mort05_07$espar2 == i & !is.na(arbre_mort05_07$espar2)] * + lmodel2.res$regression.results[4, 3] + print(i) + print(lmodel2.res$regression.results[4, 2:3]) + print(range(arbre_mort05_07$c13[arbre_mort05_07$espar2 == i & !is.na(arbre_mort05_07$espar2)], + na.rm = T)) + } else { + lmodel2.res <- lmodel2(C13 ~ C0, data = arbre.cycle3, range.x = "relative", + range.y = "relative") + arbre_mort05_07$c13[arbre_mort05_07$espar2 == i & !is.na(arbre_mort05_07$espar2)] <- lmodel2.res$regression.results[4, + 2] + arbre_mort05_07$c0[arbre_mort05_07$espar2 == i & !is.na(arbre_mort05_07$espar2)] * + lmodel2.res$regression.results[4, 3] + print(i) + print(lmodel2.res$regression.results[4, 2:3]) + print(range(arbre_mort05_07$c13[arbre_mort05_07$espar2 == i & !is.na(arbre_mort05_07$espar2)], + na.rm = T)) + + } +} ### for species with NO DATA in cycle 3 apply mean model over all species -lmodel2.res <- lmodel2(C13~C0 ,data=arbre.cycle3,range.x="relative",range.y="relative") -arbre_mort05_07$c13[is.na(arbre_mort05_07$espar2)] <- - lmodel2.res$regression.results[4,2] + arbre_mort05_07$c0[is.na(arbre_mort05_07$espar2)]*lmodel2.res$regression.results[4,3] +lmodel2.res <- lmodel2(C13 ~ C0, data = arbre.cycle3, range.x = "relative", range.y = "relative") +arbre_mort05_07$c13[is.na(arbre_mort05_07$espar2)] <- lmodel2.res$regression.results[4, + 2] + arbre_mort05_07$c0[is.na(arbre_mort05_07$espar2)] * lmodel2.res$regression.results[4, + 3] ## check predicted C13 from C0 -head(cbind(arbre_mort05_07$c13,arbre_mort05_07$c0,arbre_mort05_07$espar) ) +head(cbind(arbre_mort05_07$c13, arbre_mort05_07$c0, arbre_mort05_07$espar)) ### ok donne c13 added -######################## -## MERGE WITH other dead data - -arbre_mort_tot <- data.frame(idp=c(arbre_mort05_07$idp,arbre_mort2008$idp,arbre_mort2009$idp,arbre_mort2010$idp,arbre_mort2011$idp), - a=c(arbre_mort05_07$a,arbre_mort2008$a,arbre_mort2009$a,arbre_mort2010$a,arbre_mort2011$a), - espar=c(as.character(arbre_mort05_07$espar),as.character(arbre_mort2008$espar),as.character(arbre_mort2009$espar), - as.character(arbre_mort2010$espar),as.character(arbre_mort2011$espar)), - ori=c(arbre_mort05_07$ori,arbre_mort2008$ori,arbre_mort2009$ori,arbre_mort2010$ori,arbre_mort2011$ori), - veget=c(arbre_mort05_07$veget,arbre_mort2008$veget,arbre_mort2009$veget,arbre_mort2010$veget,arbre_mort2011$veget), - datemort=c(rep(NA,length(arbre_mort05_07$ori)),arbre_mort2008$datemort,arbre_mort2009$datemort,arbre_mort2010$datemort - ,arbre_mort2011$datemort), - c13=c(arbre_mort05_07$c13,arbre_mort2008$c13,arbre_mort2009$c13,arbre_mort2010$c13,arbre_mort2011$c13), - v=c(arbre_mort05_07$v,arbre_mort2008$v,arbre_mort2009$v,arbre_mort2010$v,arbre_mort2011$v), - w=c(arbre_mort05_07$w,arbre_mort2008$w,arbre_mort2009$w,arbre_mort2010$w,arbre_mort2011$w), - YEAR=c(rep(2005,length(arbre_mort2005$idp)),rep(2006,length(arbre_mort2006$idp)),rep(2007,length(arbre_mort2007$idp)) - ,rep(2008,length(arbre_mort2008$idp)),rep(2009,length(arbre_mort2009$idp)),rep(2010,length(arbre_mort2010$idp)) - ,rep(2011,length(arbre_mort2011$idp))) - ) -rm(arbre.cycle3,arbre_mort2005,arbre_mort2006,arbre_mort2007,arbre_mort2008,arbre_mort2009,arbre_mort2010,arbre_mort2011) +######################## MERGE WITH other dead data + +arbre_mort_tot <- data.frame(idp = c(arbre_mort05_07$idp, arbre_mort2008$idp, arbre_mort2009$idp, + arbre_mort2010$idp, arbre_mort2011$idp), a = c(arbre_mort05_07$a, arbre_mort2008$a, + arbre_mort2009$a, arbre_mort2010$a, arbre_mort2011$a), espar = c(as.character(arbre_mort05_07$espar), + as.character(arbre_mort2008$espar), as.character(arbre_mort2009$espar), as.character(arbre_mort2010$espar), + as.character(arbre_mort2011$espar)), ori = c(arbre_mort05_07$ori, arbre_mort2008$ori, + arbre_mort2009$ori, arbre_mort2010$ori, arbre_mort2011$ori), veget = c(arbre_mort05_07$veget, + arbre_mort2008$veget, arbre_mort2009$veget, arbre_mort2010$veget, arbre_mort2011$veget), + datemort = c(rep(NA, length(arbre_mort05_07$ori)), arbre_mort2008$datemort, arbre_mort2009$datemort, + arbre_mort2010$datemort, arbre_mort2011$datemort), c13 = c(arbre_mort05_07$c13, + arbre_mort2008$c13, arbre_mort2009$c13, arbre_mort2010$c13, arbre_mort2011$c13), + v = c(arbre_mort05_07$v, arbre_mort2008$v, arbre_mort2009$v, arbre_mort2010$v, + arbre_mort2011$v), w = c(arbre_mort05_07$w, arbre_mort2008$w, arbre_mort2009$w, + arbre_mort2010$w, arbre_mort2011$w), YEAR = c(rep(2005, length(arbre_mort2005$idp)), + rep(2006, length(arbre_mort2006$idp)), rep(2007, length(arbre_mort2007$idp)), + rep(2008, length(arbre_mort2008$idp)), rep(2009, length(arbre_mort2009$idp)), + rep(2010, length(arbre_mort2010$idp)), rep(2011, length(arbre_mort2011$idp)))) +rm(arbre.cycle3, arbre_mort2005, arbre_mort2006, arbre_mort2007, arbre_mort2008, + arbre_mort2009, arbre_mort2010, arbre_mort2011) gc() -save(arbre_mort_tot,file="./data/process/arbre_mort_tot.Rdata") +save(arbre_mort_tot, file = "./data/process/arbre_mort_tot.Rdata") -################################################################################ -#### MERGE DEAD AND ALIVE TREE +################################################################################ MERGE DEAD AND ALIVE TREE head(arbre_mort_tot) head(arbre.tot) -arbre.ALIVE.DEAD <- data.frame( - idp=c(arbre.tot$idp,arbre_mort_tot$idp), - a=c(arbre.tot$a,arbre_mort_tot$a), - veget=c(arbre.tot$veget,arbre_mort_tot$veget), - simplif=c(arbre.tot$simplif,rep(NA,length=length(arbre_mort_tot$idp))), - acci=c(arbre.tot$acci,rep(NA,length=length(arbre_mort_tot$idp))), - espar=c(as.character(arbre.tot$espar),as.character(arbre_mort_tot$espar)), - ori=c(arbre.tot$ori,arbre_mort_tot$ori), - lib=c(arbre.tot$lib,rep(NA,length=length(arbre_mort_tot$idp))), - forme=c(arbre.tot$forme,rep(NA,length=length(arbre_mort_tot$idp))), - tige=c(arbre.tot$tige,rep(NA,length=length(arbre_mort_tot$idp))), - mortb=c(arbre.tot$mortb,rep(NA,length=length(arbre_mort_tot$idp))), - sfgui=c(arbre.tot$sfgui,rep(NA,length=length(arbre_mort_tot$idp))), - sfgeliv=c(arbre.tot$sfgeliv,rep(NA,length=length(arbre_mort_tot$idp))), - sfpied=c(arbre.tot$sfpied,rep(NA,length=length(arbre_mort_tot$idp))), - sfdorge=c(arbre.tot$sfdorge,rep(NA,length=length(arbre_mort_tot$idp))), - sfcoeur=c(arbre.tot$sfcoeur,rep(NA,length=length(arbre_mort_tot$idp))), - c13=c(arbre.tot$c13,arbre_mort_tot$c13), - ir5=c(arbre.tot$ir5,rep(NA,length=length(arbre_mort_tot$idp))), - htot=c(arbre.tot$htot,rep(NA,length=length(arbre_mort_tot$idp))), - hdec=c(arbre.tot$hdec,rep(NA,length=length(arbre_mort_tot$idp))), - decoupe=c(arbre.tot$decoupe,rep(NA,length=length(arbre_mort_tot$idp))), - q1=c(arbre.tot$q1,rep(NA,length=length(arbre_mort_tot$idp))), - q2=c(arbre.tot$q2,rep(NA,length=length(arbre_mort_tot$idp))), - q3=c(arbre.tot$q3,rep(NA,length=length(arbre_mort_tot$idp))), - r=c(arbre.tot$r,rep(NA,length=length(arbre_mort_tot$idp))), - lfsd=c(arbre.tot$lfsd,rep(NA,length=length(arbre_mort_tot$idp))), - age=c(arbre.tot$age,rep(NA,length=length(arbre_mort_tot$idp))), - v=c(arbre.tot$v,arbre_mort_tot$v), - w=c(arbre.tot$w,rep(10000/(pi*(c(15))^2),length=length(arbre_mort_tot$w))),## assume that all dead tree are sampled on the whole plot as explained in the method - YEAR=c(arbre.tot$YEAR,arbre_mort_tot$YEAR), - datemort=c(rep(NA,length=length(arbre.tot$YEAR)),arbre_mort_tot$datemort), - dead=c(rep(0,length=length(arbre.tot$YEAR)),rep(1,length=length(arbre_mort_tot$idp))))## 1 = dead +arbre.ALIVE.DEAD <- data.frame(idp = c(arbre.tot$idp, arbre_mort_tot$idp), a = c(arbre.tot$a, + arbre_mort_tot$a), veget = c(arbre.tot$veget, arbre_mort_tot$veget), simplif = c(arbre.tot$simplif, + rep(NA, length = length(arbre_mort_tot$idp))), acci = c(arbre.tot$acci, rep(NA, + length = length(arbre_mort_tot$idp))), espar = c(as.character(arbre.tot$espar), + as.character(arbre_mort_tot$espar)), ori = c(arbre.tot$ori, arbre_mort_tot$ori), + lib = c(arbre.tot$lib, rep(NA, length = length(arbre_mort_tot$idp))), forme = c(arbre.tot$forme, + rep(NA, length = length(arbre_mort_tot$idp))), tige = c(arbre.tot$tige, rep(NA, + length = length(arbre_mort_tot$idp))), mortb = c(arbre.tot$mortb, rep(NA, + length = length(arbre_mort_tot$idp))), sfgui = c(arbre.tot$sfgui, rep(NA, + length = length(arbre_mort_tot$idp))), sfgeliv = c(arbre.tot$sfgeliv, rep(NA, + length = length(arbre_mort_tot$idp))), sfpied = c(arbre.tot$sfpied, rep(NA, + length = length(arbre_mort_tot$idp))), sfdorge = c(arbre.tot$sfdorge, rep(NA, + length = length(arbre_mort_tot$idp))), sfcoeur = c(arbre.tot$sfcoeur, rep(NA, + length = length(arbre_mort_tot$idp))), c13 = c(arbre.tot$c13, arbre_mort_tot$c13), + ir5 = c(arbre.tot$ir5, rep(NA, length = length(arbre_mort_tot$idp))), htot = c(arbre.tot$htot, + rep(NA, length = length(arbre_mort_tot$idp))), hdec = c(arbre.tot$hdec, rep(NA, + length = length(arbre_mort_tot$idp))), decoupe = c(arbre.tot$decoupe, rep(NA, + length = length(arbre_mort_tot$idp))), q1 = c(arbre.tot$q1, rep(NA, length = length(arbre_mort_tot$idp))), + q2 = c(arbre.tot$q2, rep(NA, length = length(arbre_mort_tot$idp))), q3 = c(arbre.tot$q3, + rep(NA, length = length(arbre_mort_tot$idp))), r = c(arbre.tot$r, rep(NA, + length = length(arbre_mort_tot$idp))), lfsd = c(arbre.tot$lfsd, rep(NA, length = length(arbre_mort_tot$idp))), + age = c(arbre.tot$age, rep(NA, length = length(arbre_mort_tot$idp))), v = c(arbre.tot$v, + arbre_mort_tot$v), w = c(arbre.tot$w, rep(10000/(pi * (c(15))^2), length = length(arbre_mort_tot$w))), + YEAR = c(arbre.tot$YEAR, arbre_mort_tot$YEAR), datemort = c(rep(NA, length = length(arbre.tot$YEAR)), + arbre_mort_tot$datemort), dead = c(rep(0, length = length(arbre.tot$YEAR)), + rep(1, length = length(arbre_mort_tot$idp)))) +## dead: 1 = dead w: assume that all dead tree are sampled on the whole plot as +## explained in the method ## delete plot with DEAD tree missing because of no C13 or no espar -arbre.ALIVE.DEAD2 <- arbre.ALIVE.DEAD[!(arbre.ALIVE.DEAD$idp %in% unique(c(names(tapply(is.na(arbre.ALIVE.DEAD$c13), - INDEX=arbre.ALIVE.DEAD$idp,FUN=sum))[tapply(is.na(arbre.ALIVE.DEAD$c13),INDEX=arbre.ALIVE.DEAD$idp,FUN=sum)>0], - names(tapply(is.na(arbre.ALIVE.DEAD$espar),INDEX=arbre.ALIVE.DEAD$idp,FUN=sum))[tapply(is.na(arbre.ALIVE.DEAD$espar), - INDEX=arbre.ALIVE.DEAD$idp,FUN=sum)>0]))),] +arbre.ALIVE.DEAD2 <- arbre.ALIVE.DEAD[!(arbre.ALIVE.DEAD$idp %in% unique(c(names(tapply(is.na(arbre.ALIVE.DEAD$c13), + INDEX = arbre.ALIVE.DEAD$idp, FUN = sum))[tapply(is.na(arbre.ALIVE.DEAD$c13), + INDEX = arbre.ALIVE.DEAD$idp, FUN = sum) > 0], names(tapply(is.na(arbre.ALIVE.DEAD$espar), + INDEX = arbre.ALIVE.DEAD$idp, FUN = sum))[tapply(is.na(arbre.ALIVE.DEAD$espar), + INDEX = arbre.ALIVE.DEAD$idp, FUN = sum) > 0]))), ] -save(arbre.ALIVE.DEAD2,file='./data/process/arbre.ALIVE.DEAD2.Rdata') +save(arbre.ALIVE.DEAD2, file = "./data/process/arbre.ALIVE.DEAD2.Rdata") -######################################################################################## -######################################### -######################################### -######################################### -##### LOAD DATA FOR PLOT INFO -### read DEAD TREE table downloaded from the web -placette2005 <- read.csv("./data/raw/DataFrance/2005/placettes_foret_2005.csv",sep=";" - , stringsAsFactors=FALSE) +######################################################################################## LOAD DATA FOR PLOT INFO read DEAD TREE table downloaded from the web +placette2005 <- read.csv("./data/raw/DataFrance/2005/placettes_foret_2005.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(placette2005) -placette2006 <- read.csv("./data/raw/DataFrance/2006/placettes_foret_2006.csv",sep=";" - , stringsAsFactors=FALSE) +placette2006 <- read.csv("./data/raw/DataFrance/2006/placettes_foret_2006.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(placette2006) -placette2007 <- read.csv("./data/raw/DataFrance/2007/placettes_foret_2007.csv",sep=";" - , stringsAsFactors=FALSE) +placette2007 <- read.csv("./data/raw/DataFrance/2007/placettes_foret_2007.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(placette2007) -placette2008 <- read.csv("./data/raw/DataFrance/2008/placettes_foret_2008.csv",sep=";" - , stringsAsFactors=FALSE) +placette2008 <- read.csv("./data/raw/DataFrance/2008/placettes_foret_2008.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(placette2005) -placette2009 <- read.csv("./data/raw/DataFrance/2009/placettes_foret_2009.csv",sep=";" - , stringsAsFactors=FALSE) +placette2009 <- read.csv("./data/raw/DataFrance/2009/placettes_foret_2009.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(placette2009) -placette2010 <- read.csv("./data/raw/DataFrance/2010/placettes_foret_2010.csv",sep=";" - , stringsAsFactors=FALSE) +placette2010 <- read.csv("./data/raw/DataFrance/2010/placettes_foret_2010.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(placette2010) -placette2011 <- read.csv("./data/raw/DataFrance/2011/placettes_foret_2011.csv",sep=";" - , stringsAsFactors=FALSE) - -## names(placette2005) -## ## uta -> uta1 -## ## sfo NA -## ## plisi NA -## names(placette2006) -## ## plisi NA -## names(placette2007) -## ## dcespar1 dcespar2 tpespar1 tpespar2 iti pentexp NA -## ## deleted acces -## names(placette2008) -## ## gest incid peupnr portance asperite -## names(placette2009) -## names(placette2010) -## names(placette2011) - -placette2005$tplant <- as.character(placette2005$tplant) -placette2005$tplant[placette2005$tplant==""] <-0 -placette2006$tplant[placette2006$tplant==""] <-0 - - - -### for selection of plot use plisi=0 dc=0 tplant=0 -## incid indicateur d incident récent utilisé ou pas ?? avec ou sans perturbatrion naturelle - -placette_tot <-data.frame(idp=c(placette2005$idp,placette2006$idp,placette2007$idp,placette2008$idp,placette2009$idp,placette2010$idp, - placette2011$idp), - xl93=c(placette2005$xl93,placette2006$xl93,placette2007$xl93,placette2008$xl93,placette2009$xl93,placette2010$xl93, - placette2011$xl93), - yl93=c(placette2005$yl93,placette2006$yl93,placette2007$yl93,placette2008$yl93,placette2009$yl93,placette2010$yl93, - placette2011$yl93), - dep=c(placette2005$dep,placette2006$dep,placette2007$dep,placette2008$dep,placette2009$dep,placette2010$dep, - placette2011$dep), - csa=c(placette2005$csa,placette2006$csa,placette2007$csa,placette2008$csa,placette2009$csa,placette2010$csa, - placette2011$csa), - plisi=c(rep(NA,length(placette2005$tm2)),rep(NA,length(placette2006$idp)),placette2007$plisi,placette2008$plisi, - placette2009$plisi,placette2010$plisi,placette2011$plisi), - uta1=c(placette2005$uta,placette2006$uta1,placette2007$uta1,placette2008$uta1,placette2009$uta1,placette2010$uta1, - placette2011$uta1), - tm2=c(placette2005$tm2,placette2006$tm2,placette2007$tm2,placette2008$tm2,placette2009$tm2,placette2010$tm2, - placette2011$tm2), - sfo=c(rep(NA,length(placette2005$tm2)),placette2006$sfo,placette2007$sfo,placette2008$sfo,placette2009$sfo, - placette2010$sfo,placette2011$sfo), - incid=c(rep(NA,length(placette2005$idp)),rep(NA,length(placette2006$idp)),rep(NA,length(placette2007$idp)), - rep(NA,length(placette2008$idp)),placette2009$incid,placette2010$incid,placette2011$incid), - dc=c(placette2005$dc,placette2006$dc,placette2007$dc,placette2008$dc,placette2009$dc,placette2010$dc,placette2011$dc), - tplant=c(placette2005$tplant,placette2006$tplant,placette2007$tplant,placette2008$tplant,placette2009$tplant, - placette2010$tplant,placette2011$tplant), - esspre=c(placette2005$esspre,placette2006$esspre,placette2007$esspre,placette2008$esspre,placette2009$esspre, - placette2010$esspre,placette2011$esspre), - cac=c(placette2005$cac,placette2006$cac,placette2007$cac,placette2008$cac,placette2009$cac,placette2010$cac, - placette2011$cac), - ess_age_1=c(placette2005$ess_age_1,placette2006$ess_age_1,placette2007$ess_age_1,placette2008$ess_age_1, - placette2009$ess_age_1,placette2010$ess_age_1,placette2011$ess_age_1), - YEAR=c(rep(2005,length(placette2005$idp)),rep(2006,length(placette2006$idp)),rep(2007,length(placette2007$idp)), - rep(2008,length(placette2008$idp)),rep(2009,length(placette2009$idp)),rep(2010,length(placette2010$idp)), - rep(2011,length(placette2011$idp)))) - -rm(placette2005,placette2006,placette2007,placette2008,placette2009,placette2010,placette2011) - - -save(placette_tot,file="./data/process/placette_tot.Rdata") - -##################################### -## LOAD elevation data -##### LOAD ALTITUDE DATA -## load("./data/process/placette_tot.Rdata") -alti <- read.csv("./data/raw/DataFrance/altitude/SER_alti.csv",header=T,sep=";", stringsAsFactors=FALSE) -alti2011 <- read.csv("./data/raw/DataFrance/altitude/SER_alti_2011.csv",header=T,sep=";",stringsAsFactors=FALSE) +placette2011 <- read.csv("./data/raw/DataFrance/2011/placettes_foret_2011.csv", sep = ";", + stringsAsFactors = FALSE) + +## names(placette2005) ## uta -> uta1 ## sfo NA ## plisi NA names(placette2006) ## +## plisi NA names(placette2007) ## dcespar1 dcespar2 tpespar1 tpespar2 iti pentexp +## NA ## deleted acces names(placette2008) ## gest incid peupnr portance asperite +## names(placette2009) names(placette2010) names(placette2011) + +placette2005$tplant <- as.character(placette2005$tplant) +placette2005$tplant[placette2005$tplant == ""] <- 0 +placette2006$tplant[placette2006$tplant == ""] <- 0 + + + +### for selection of plot use plisi=0 dc=0 tplant=0 incid indicateur d incident +### récent utilisé ou pas ?? avec ou sans perturbatrion naturelle + +placette_tot <- data.frame(idp = c(placette2005$idp, placette2006$idp, placette2007$idp, + placette2008$idp, placette2009$idp, placette2010$idp, placette2011$idp), xl93 = c(placette2005$xl93, + placette2006$xl93, placette2007$xl93, placette2008$xl93, placette2009$xl93, placette2010$xl93, + placette2011$xl93), yl93 = c(placette2005$yl93, placette2006$yl93, placette2007$yl93, + placette2008$yl93, placette2009$yl93, placette2010$yl93, placette2011$yl93), + dep = c(placette2005$dep, placette2006$dep, placette2007$dep, placette2008$dep, + placette2009$dep, placette2010$dep, placette2011$dep), csa = c(placette2005$csa, + placette2006$csa, placette2007$csa, placette2008$csa, placette2009$csa, placette2010$csa, + placette2011$csa), plisi = c(rep(NA, length(placette2005$tm2)), rep(NA, length(placette2006$idp)), + placette2007$plisi, placette2008$plisi, placette2009$plisi, placette2010$plisi, + placette2011$plisi), uta1 = c(placette2005$uta, placette2006$uta1, placette2007$uta1, + placette2008$uta1, placette2009$uta1, placette2010$uta1, placette2011$uta1), + tm2 = c(placette2005$tm2, placette2006$tm2, placette2007$tm2, placette2008$tm2, + placette2009$tm2, placette2010$tm2, placette2011$tm2), sfo = c(rep(NA, length(placette2005$tm2)), + placette2006$sfo, placette2007$sfo, placette2008$sfo, placette2009$sfo, placette2010$sfo, + placette2011$sfo), incid = c(rep(NA, length(placette2005$idp)), rep(NA, length(placette2006$idp)), + rep(NA, length(placette2007$idp)), rep(NA, length(placette2008$idp)), placette2009$incid, + placette2010$incid, placette2011$incid), dc = c(placette2005$dc, placette2006$dc, + placette2007$dc, placette2008$dc, placette2009$dc, placette2010$dc, placette2011$dc), + tplant = c(placette2005$tplant, placette2006$tplant, placette2007$tplant, placette2008$tplant, + placette2009$tplant, placette2010$tplant, placette2011$tplant), esspre = c(placette2005$esspre, + placette2006$esspre, placette2007$esspre, placette2008$esspre, placette2009$esspre, + placette2010$esspre, placette2011$esspre), cac = c(placette2005$cac, placette2006$cac, + placette2007$cac, placette2008$cac, placette2009$cac, placette2010$cac, placette2011$cac), + ess_age_1 = c(placette2005$ess_age_1, placette2006$ess_age_1, placette2007$ess_age_1, + placette2008$ess_age_1, placette2009$ess_age_1, placette2010$ess_age_1, placette2011$ess_age_1), + YEAR = c(rep(2005, length(placette2005$idp)), rep(2006, length(placette2006$idp)), + rep(2007, length(placette2007$idp)), rep(2008, length(placette2008$idp)), + rep(2009, length(placette2009$idp)), rep(2010, length(placette2010$idp)), + rep(2011, length(placette2011$idp)))) + +rm(placette2005, placette2006, placette2007, placette2008, placette2009, placette2010, + placette2011) + + +save(placette_tot, file = "./data/process/placette_tot.Rdata") + +##################################### LOAD elevation data LOAD ALTITUDE DATA +##################################### load('./data/process/placette_tot.Rdata') +alti <- read.csv("./data/raw/DataFrance/altitude/SER_alti.csv", header = T, sep = ";", + stringsAsFactors = FALSE) +alti2011 <- read.csv("./data/raw/DataFrance/altitude/SER_alti_2011.csv", header = T, + sep = ";", stringsAsFactors = FALSE) names(alti2011) <- names(alti) -alti.tot <- rbind(alti,alti2011) +alti.tot <- rbind(alti, alti2011) ## sum( placette_tot$idp %in% alti.tot$IDP)/length(placette_tot$idp) ## table(placette_tot$YEAR[! placette_tot$idp %in% alti.tot$IDP]) -placette_tot.alti <- merge(placette_tot,alti.tot,by.x="idp",by.y="IDP") +placette_tot.alti <- merge(placette_tot, alti.tot, by.x = "idp", by.y = "IDP") ### write csv file for Piedallu -write.csv(placette_tot.alti,"./data/process/placette_tot.alti.csv") +write.csv(placette_tot.alti, "./data/process/placette_tot.alti.csv") ### write new fiel for 2011 -write.csv(placette_tot.alti[placette_tot.alti$YEAR==2011,],"./data/process/placette_tot.alti.2011.csv") +write.csv(placette_tot.alti[placette_tot.alti$YEAR == 2011, ], "./data/process/placette_tot.alti.2011.csv") -######################################### -######################################### -######################################### -##### LOAD DATA ECOLOGIE -### read DEAD TREE table downloaded from the web -ecologie2005 <- read.csv("./data/raw/DataFrance/2005/ecologie_2005.csv",sep=";", stringsAsFactors=FALSE) +######################################### LOAD DATA ECOLOGIE read DEAD TREE table downloaded from the web +ecologie2005 <- read.csv("./data/raw/DataFrance/2005/ecologie_2005.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(ecologie2005) -ecologie2006 <- read.csv("./data/raw/DataFrance/2006/ecologie_2006.csv",sep=";", stringsAsFactors=FALSE) +ecologie2006 <- read.csv("./data/raw/DataFrance/2006/ecologie_2006.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(ecologie2006) -ecologie2007 <- read.csv("./data/raw/DataFrance/2007/ecologie_2007.csv",sep=";", stringsAsFactors=FALSE) +ecologie2007 <- read.csv("./data/raw/DataFrance/2007/ecologie_2007.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(ecologie2007) -ecologie2008 <- read.csv("./data/raw/DataFrance/2008/ecologie_2008.csv",sep=";", stringsAsFactors=FALSE) +ecologie2008 <- read.csv("./data/raw/DataFrance/2008/ecologie_2008.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(ecologie2005) -ecologie2009 <- read.csv("./data/raw/DataFrance/2009/ecologie_2009.csv",sep=";", stringsAsFactors=FALSE) +ecologie2009 <- read.csv("./data/raw/DataFrance/2009/ecologie_2009.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(ecologie2009) -ecologie2010 <- read.csv("./data/raw/DataFrance/2010/ecologie_2010.csv",sep=";", stringsAsFactors=FALSE) +ecologie2010 <- read.csv("./data/raw/DataFrance/2010/ecologie_2010.csv", sep = ";", + stringsAsFactors = FALSE) ## summary(ecologie2010) -ecologie2011 <- read.csv("./data/raw/DataFrance/2011/ecologie_2011.csv",sep=";", stringsAsFactors=FALSE) +ecologie2011 <- read.csv("./data/raw/DataFrance/2011/ecologie_2011.csv", sep = ";", + stringsAsFactors = FALSE) -ecologie_tot <- rbind(ecologie2005,ecologie2006,ecologie2007,ecologie2008,ecologie2009,ecologie2010,ecologie2011) +ecologie_tot <- rbind(ecologie2005, ecologie2006, ecologie2007, ecologie2008, ecologie2009, + ecologie2010, ecologie2011) head(ecologie_tot) -rm(ecologie2005,ecologie2006,ecologie2007,ecologie2008,ecologie2009,ecologie2010,ecologie2011) - -save(ecologie_tot,file="./data/process/ecologie_tot.Rdata") +rm(ecologie2005, ecologie2006, ecologie2007, ecologie2008, ecologie2009, ecologie2010, + ecologie2011) +save(ecologie_tot, file = "./data/process/ecologie_tot.Rdata") + diff --git a/R/format.function.R b/R/format.function.R index 1f5255e5228c89e2361730b5b033c935a100ed60..b5c4288ed9e3abcb4fba85bf42414063cc6d7820 100644 --- a/R/format.function.R +++ b/R/format.function.R @@ -1,13 +1,8 @@ -################################################### -################################################### -################################################### -##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS +################################################### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS -###### -###### -## FUNCTION TO PLOT MAP OF TREE +###### FUNCTION TO PLOT MAP OF TREE ##' .. Function to plot map of tree with circle function of their dbh.. ##' ##' .. content for \details{} .. @@ -21,18 +16,17 @@ ##' @param ... ##' @return ##' @author Kunstler -fun.circles.plot <- function(plot.select,x,y,plot,D,inches,...){ -x.t <- x[plot==plot.select] -y.t <- y[plot==plot.select] -D.t <- D[plot==plot.select] -D.t[is.na(D.t)] <- 0 -symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...) +fun.circles.plot <- function(plot.select, x, y, plot, D, inches, ...) { + x.t <- x[plot == plot.select] + y.t <- y[plot == plot.select] + D.t <- D[plot == plot.select] + D.t[is.na(D.t)] <- 0 + symbols(x.t, y.t, circles = D.t, main = plot.select, inches = inches, ...) } -######################### -## +######################### ##' .. Compute the basal area of competitor in a plot.. ##' ##' .. content for \details{} .. @@ -41,8 +35,8 @@ symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...) ##' @param weights weight to compute the basal area in cm^2/m^2 ##' @return basal area in cm^2/m^2 ##' @author Kunstler -BA.fun <- function(diam,weights){ -((diam/2)^2)*pi*weights +BA.fun <- function(diam, weights) { + ((diam/2)^2) * pi * weights } @@ -61,64 +55,65 @@ BA.fun <- function(diam,weights){ ##' @param weights.full.plot weights for the whole plot to compute basal area in cm^2/m^2 or if NA use weights of the individuals (for simple plots) ##' @return data frame with tree.id and one column per species with basal area of the species (without the target tree) ##' @author Kunstler -BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){ -require(data.table) -id.plot <- as.character(id.plot) -obs.id <- as.character(obs.id) - -## check equal length -if(!(length(obs.id)==length(diam) & length(obs.id)==length(sp) & length(obs.id)==length(id.plot) & length(obs.id)==length(weights))) - stop("length of obs.id diam,sp id.plot & weights need to be the same") - -## check sp is not numeric -if(is.numeric(sp)) stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") - -# compute BA tot per species per plot -BASP <- tapply(BA.fun(diam,weights),INDEX=list(id.plot,sp),FUN=sum,na.rm=T) -print(dim(BASP)) -DATA.BASP <- data.table(id.plot= rownames(BASP),BASP) -setnames( DATA.BASP,old=1:ncol(DATA.BASP), c("id.plot",colnames(BASP))) -setkeyv(DATA.BASP,c("id.plot")) -sp.name <- colnames(BASP) -rm(BASP) -print("first table created") -#### MERGE with indivudal tree -## use library(data.table) -if(!is.na(weight.full.plot)){ - data.indiv <- data.table(obs.id=obs.id,sp=sp, - id.plot=id.plot,diam=diam, - BA.indiv=BA.fun(diam,rep(weight.full.plot,length(diam)))) - setkeyv(data.indiv,"id.plot") - print("second table created") - data.merge <- merge(data.indiv,DATA.BASP) - print("merge done") - # substract target BA - for (i in (sp.name)){ - eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) - } - -}else{ - data.indiv <- data.table(obs.id=obs.id,sp=sp,id.plot=id.plot, - diam=diam,weights=weights, - BA.indiv=BA.fun(diam,weights)) - setkeyv(data.indiv,"id.plot") - print("second table created") - data.merge <- merge(data.indiv,DATA.BASP) - print("merge done") - for (i in (sp.name)){ - eval(parse(text=paste("data.merge[sp==\'",i,"\',",i,":=",i,"-BA.indiv]",sep=""))) - } -} -print("replacment done") -data.merge[,BA.indiv:=NULL] -print("first column removed") -#### delete column not used -data.merge[,sp:=NULL] -data.merge[,diam:=NULL] -data.merge[,id.plot:=NULL] -data.merge[,weights:=NULL] -print("columns removed") -return( (data.merge)) +BA.SP.FUN <- function(obs.id, diam, sp, id.plot, weights, weight.full.plot) { + require(data.table) + id.plot <- as.character(id.plot) + obs.id <- as.character(obs.id) + + ## check equal length + if (!(length(obs.id) == length(diam) & length(obs.id) == length(sp) & length(obs.id) == + length(id.plot) & length(obs.id) == length(weights))) + stop("length of obs.id diam,sp id.plot & weights need to be the same") + + ## check sp is not numeric + if (is.numeric(sp)) + stop("sp can not be numeric need to be charatcer do paste('sp',sp,sep='.')") + + # compute BA tot per species per plot + BASP <- tapply(BA.fun(diam, weights), INDEX = list(id.plot, sp), FUN = sum, na.rm = T) + print(dim(BASP)) + DATA.BASP <- data.table(id.plot = rownames(BASP), BASP) + setnames(DATA.BASP, old = 1:ncol(DATA.BASP), c("id.plot", colnames(BASP))) + setkeyv(DATA.BASP, c("id.plot")) + sp.name <- colnames(BASP) + rm(BASP) + print("first table created") + #### MERGE with indivudal tree use library(data.table) + if (!is.na(weight.full.plot)) { + data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, + BA.indiv = BA.fun(diam, rep(weight.full.plot, length(diam)))) + setkeyv(data.indiv, "id.plot") + print("second table created") + data.merge <- merge(data.indiv, DATA.BASP) + print("merge done") + # substract target BA + for (i in (sp.name)) { + eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", + sep = ""))) + } + + } else { + data.indiv <- data.table(obs.id = obs.id, sp = sp, id.plot = id.plot, diam = diam, + weights = weights, BA.indiv = BA.fun(diam, weights)) + setkeyv(data.indiv, "id.plot") + print("second table created") + data.merge <- merge(data.indiv, DATA.BASP) + print("merge done") + for (i in (sp.name)) { + eval(parse(text = paste("data.merge[sp=='", i, "',", i, ":=", i, "-BA.indiv]", + sep = ""))) + } + } + print("replacment done") + data.merge[, `:=`(BA.indiv, NULL)] + print("first column removed") + #### delete column not used + data.merge[, `:=`(sp, NULL)] + data.merge[, `:=`(diam, NULL)] + data.merge[, `:=`(id.plot, NULL)] + data.merge[, `:=`(weights, NULL)] + print("columns removed") + return((data.merge)) } #### @@ -135,177 +130,179 @@ return( (data.merge)) ##' @param rpuDist run with GPU distance computation ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler -BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ -rownames(xy.table) <- obs.id -if(rpuDist){ - require(rpud) - dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE) -}else{ - dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE)) +BA.SP.FUN.XY <- function(obs.id, xy.table, diam, sp, Rlim, parallel = FALSE, rpuDist = FALSE) { + rownames(xy.table) <- obs.id + if (rpuDist) { + require(rpud) + dist.mat <- rpuDist(xy.table, upper = TRUE, diag = TRUE) + } else { + dist.mat <- as.matrix(dist(xy.table, upper = TRUE, diag = TRUE)) + } + print("distance matrix computed") + dist.mat[dist.mat < Rlim] <- 1 + dist.mat[dist.mat > Rlim] <- 0 + diag(dist.mat) <- 0 + print("distance matrix set to 0 1") + BA <- BA.fun(diam, weights = 1/(pi * Rlim^2)) + BA.mat <- matrix(rep(BA, length(BA)), nrow = length(BA), byrow = TRUE) + print("starting tapply over species") + fun.sum.sp <- function(x, sp) tapply(x, INDEX = sp, FUN = sum, na.rm = TRUE) + if (parallel) { + ## parallel version + require(doParallel) + registerDoParallel(cores = 4) + mat <- dist.mat * BA.mat + res.temp <- foreach(i = 1:nrow(mat), .combine = rbind) %dopar% { + fun.sum.sp(mat[i, ], sp) + } + rownames(res.temp) <- obs.id + return((res.temp)) + } else { + res.temp <- t(apply(dist.mat * BA.mat, MARGIN = 1, FUN = fun.sum.sp, sp)) + return(res.temp) + } } -print('distance matrix computed') -dist.mat[dist.mat <Rlim] <- 1 -dist.mat[dist.mat >Rlim] <- 0 -diag(dist.mat) <- 0 -print('distance matrix set to 0 1') -BA <- BA.fun(diam,weights=1/(pi*Rlim^2)) -BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE) -print('starting tapply over species') -fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) - if(parallel){ - ## parallel version - require(doParallel) - registerDoParallel(cores=4) - mat <- dist.mat*BA.mat - res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% { - fun.sum.sp(mat[i,],sp) - } - rownames(res.temp) <- obs.id - return((res.temp)) - }else{ - res.temp <- t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp)) - return(res.temp) - } -} - -############################ -## FUNCTION remove trailing white space -trim.trailing <- function (x) sub("\\s+$", "", x) -## clean species.tab -fun.clean.species.tab <- function(species.tab){ -species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] - -### species IFN reformat names -## clean species names and synonyme names -species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) -species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) -## remove trailing white space -species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) +############################ FUNCTION remove trailing white space +trim.trailing <- function(x) sub("\\s+$", "", x) -species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), - c("code","Latin_name","Exotic_Native_cultivated")] -return(species.clean)} +## clean species.tab +fun.clean.species.tab <- function(species.tab) { + species.tab2 <- species.tab[!is.na(species.tab$Latin_name), ] + + ### species IFN reformat names clean species names and synonyme names + species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) + species.tab2$Latin_name_syn <- (gsub("_", " ", species.tab2$Latin_name_syn)) + ## remove trailing white space + species.tab2$Latin_name_syn <- trim.trailing(species.tab2$Latin_name_syn) + + species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), c("code", + "Latin_name", "Exotic_Native_cultivated")] + return(species.clean) +} ### compute quantile 99% and sd with a bootstrap library(boot) -f.quantile <- function (x,ind,probs){quantile(x[ind],probs=probs,na.rm=TRUE)} +f.quantile <- function(x, ind, probs) { + quantile(x[ind], probs = probs, na.rm = TRUE) +} -f.quantile.boot <- function(i,x,fac,R,probs=0.99){ +f.quantile.boot <- function(i, x, fac, R, probs = 0.99) { require(boot) - if(length(na.exclude(x[fac==i]))>0){ -quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) -return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) -}else{ -return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1)) + if (length(na.exclude(x[fac == i])) > 0) { + quant.boot <- boot(x[fac == i], f.quantile, R = R, probs = probs) + return(as.matrix(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), nobs = length(na.exclude(x[fac == + i]))), ncol = 3, nrow = 1)) + } else { + return(as.matrix(c(mean = NA, sd = NA, nobs = NA), ncol = 3, nrow = 1)) + } } -} - -####################### -### function to compute number of dead per plot -function.perc.dead <- function(dead){ - sum(dead)/length(dead)} - - -function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) } - -########################## -### GENERATE A R.object per ecoregion - -function.replace.NA.negative <- function(data.BA.SP){ - for (i in 2:ncol(data.BA.SP)){ -eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep=""))) -eval(parse(text=paste("data.BA.SP[",names(data.BA.SP)[i],"<0,",names(data.BA.SP)[i],":=0]",sep=""))) +####################### function to compute number of dead per plot +function.perc.dead <- function(dead) { + sum(dead)/length(dead) } -print('NA and negative replaced') -return(data.BA.SP) -} -############################################################## -##function to generate data in good format per ecoregion -fun.data.per.ecoregion <- function(ecoregion,data.tot,plot.name,weight.full.plot,name.country,data.TRY=NA,species.lookup=NA){ -require(data.table) -data.tot <- data.table(data.tot) -data <- data.tot[ecocode==ecoregion,] -rm(data.tot) -data.BA.SP <- BA.SP.FUN(obs.id=as.vector(data[['obs.id']]), - diam=as.vector(data[['D']]), - sp=as.vector(data[['sp']]), - id.plot=as.vector(data[[plot.name]]), - weights=data[['weights']], - weight.full.plot=weight.full.plot) -print('competition index computed') -## change NA and <0 data for 0 -data.BA.SP <- function.replace.NA.negative(data.BA.SP) -### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) >0) stop("competition index sp name not the same as in data") -#### compute BA tot for all competitors -## data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## slower than apply why?? -BATOT.s <- apply(data.frame(data.BA.SP)[,-1],MARGIN=1,FUN=sum) -data.BA.SP[,BATOT:=BATOT.s] -print('BATOT COMPUTED') -### create data frame -DT.temp <- data.table(obs.id=data[["obs.id"]],ecocode=data[["ecocode"]]) -setkeyv(DT.temp,"obs.id") -setkeyv(data.BA.SP,"obs.id") -print('starting last merge') -data.BA.sp <- merge(DT.temp,data.BA.SP) -## reorder data -data <- data.table(data) -setkeyv(data,"obs.id") -## test if same order -if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data") -##### -## ADD TRY DATA OR TRAITS IF NEEDED -if(!is.na(data.TRY)){ -sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),] -data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY) -## save everything as a list -list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) -}else{ -list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) -save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) +function.perc.dead2 <- function(dead) { + out <- sum(dead, na.rm = T)/length(dead[!is.na(dead)]) + if (!is.finite(out)) + out <- NA + return(out) } -} - -##################################### -##################################### -### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER -#### function to be apply per site -fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ -data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) -BA.SP.temp <- BA.SP.FUN.XY(obs.id=data.tree.s[['obs.id']], - xy.table=data.tree.s[,xy.name], - diam=data.tree.s[['D']], - sp=(data.tree.s[['sp']]), - Rlim=15, - parallel=FALSE, - rpuDist=FALSE) -## replace NA per zero -print('replacing NA per zero') -BA.SP.temp[is.na(BA.SP.temp)] <- 0 -print('done') -### rpud installation very cumbersome not needed ? -### longer in parallel why ? -if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') -if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') +########################## GENERATE A R.object per ecoregion -### compute sum per row -BATOT <- apply(BA.SP.temp,MARGIN=1,FUN=sum) -data.res <- data.frame(obs.id=data.tree.s[['obs.id']],BA.SP.temp,BATOT=BATOT) -return(data.res) +function.replace.NA.negative <- function(data.BA.SP) { + for (i in 2:ncol(data.BA.SP)) { + eval(parse(text = paste("data.BA.SP[is.na(", names(data.BA.SP)[i], "),", + names(data.BA.SP)[i], ":=0]", sep = ""))) + eval(parse(text = paste("data.BA.SP[", names(data.BA.SP)[i], "<0,", names(data.BA.SP)[i], + ":=0]", sep = ""))) + } + print("NA and negative replaced") + return(data.BA.SP) } +############################################################## function to generate data in good format per ecoregion +fun.data.per.ecoregion <- function(ecoregion, data.tot, plot.name, weight.full.plot, + name.country, data.TRY = NA, species.lookup = NA) { + require(data.table) + data.tot <- data.table(data.tot) + data <- data.tot[ecocode == ecoregion, ] + rm(data.tot) + data.BA.SP <- BA.SP.FUN(obs.id = as.vector(data[["obs.id"]]), diam = as.vector(data[["D"]]), + sp = as.vector(data[["sp"]]), id.plot = as.vector(data[[plot.name]]), weights = data[["weights"]], + weight.full.plot = weight.full.plot) + + print("competition index computed") + ## change NA and <0 data for 0 + data.BA.SP <- function.replace.NA.negative(data.BA.SP) + ### CHECK IF sp and sp name for column are the same + if (sum(!(names(data.BA.SP)[-1] %in% unique(data[["sp"]]))) > 0) + stop("competition index sp name not the same as in data") + #### compute BA tot for all competitors data.BA.SP[,BATOT:=sum(.SD),by=obs.id] ## + #### slower than apply why?? + BATOT.s <- apply(data.frame(data.BA.SP)[, -1], MARGIN = 1, FUN = sum) + data.BA.SP[, `:=`(BATOT, BATOT.s)] + print("BATOT COMPUTED") + ### create data frame + DT.temp <- data.table(obs.id = data[["obs.id"]], ecocode = data[["ecocode"]]) + setkeyv(DT.temp, "obs.id") + setkeyv(data.BA.SP, "obs.id") + print("starting last merge") + data.BA.sp <- merge(DT.temp, data.BA.SP) + ## reorder data + data <- data.table(data) + setkeyv(data, "obs.id") + ## test if same order + if (sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) > 0) + stop("competition index not in the same order than data") + ##### ADD TRY DATA OR TRAITS IF NEEDED + if (!is.na(data.TRY)) { + sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]), + ] + data.traits <- fun.extract.format.sp.traits.TRY(sp = sp.extract[["sp"]], + sp.syno.table = sp.extract, data.TRY) + ## save everything as a list + list.temp <- list(data.tree = data, data.BA.SP = data.BA.sp, data.traits = data.traits) + save(list.temp, file = paste("./data/process/list", name.country, ecoregion, + "Rdata", sep = ".")) + } else { + list.temp <- list(data.tree = data, data.BA.SP = data.BA.sp, data.traits = NA) + save(list.temp, file = paste("./data/process/list", name.country, ecoregion, + "Rdata", sep = ".")) + } +} - - +##################################### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER function to be apply +##################################### per site +fun.compute.BA.SP.XY.per.plot <- function(i, data.tree, Rlim, xy.name = c("x", "y"), + parallel = FALSE, rpuDist = FALSE) { + data.tree.s <- subset(data.tree, subset = data.tree[["plot"]] == i) + BA.SP.temp <- BA.SP.FUN.XY(obs.id = data.tree.s[["obs.id"]], xy.table = data.tree.s[, + xy.name], diam = data.tree.s[["D"]], sp = (data.tree.s[["sp"]]), Rlim = 15, + parallel = FALSE, rpuDist = FALSE) + + ## replace NA per zero + print("replacing NA per zero") + BA.SP.temp[is.na(BA.SP.temp)] <- 0 + print("done") + ### rpud installation very cumbersome not needed ? longer in parallel why ? + if (sum(!rownames(BA.SP.temp) == data.tree.s[["obs.id"]]) > 0) + stop("rows not in the good order") + if (sum(!colnames(BA.SP.temp) == as.character((levels(data.tree.s[["sp"]])))) > + 0) + stop("colnames does mot match species name") + + ### compute sum per row + BATOT <- apply(BA.SP.temp, MARGIN = 1, FUN = sum) + data.res <- data.frame(obs.id = data.tree.s[["obs.id"]], BA.SP.temp, BATOT = BATOT) + return(data.res) +} diff --git a/R/formatR.R b/R/formatR.R new file mode 100644 index 0000000000000000000000000000000000000000..4fc6e7e54684fbf6636b44446384b45209ec1e13 --- /dev/null +++ b/R/formatR.R @@ -0,0 +1,25 @@ + +# use formatR package to tidy code +library(formatR) + +for (f in dir(".", pattern = "*.R")) { + cat("Cleaning ", f, "\n") + tidy.source(f, file = f) +} + + +for (f in c(dir(".", pattern = "*.R"), dir("R", pattern = "*.R", full.names = TRUE))) { + cat("Cleaning ", f, "\n") + tidy.source(f, file = f) +} + + + + + + + + + + + diff --git a/TRY.R b/TRY.R index 07c67642dcb5852f0cb67cc4d35a5109a294984d..378fbcd613ba800341123e1e58e14388fcde63ba 100644 --- a/TRY.R +++ b/TRY.R @@ -1,10 +1,7 @@ -######################################################## -######################################################## -###### READ TRY AND FORMAT DATA CHECK ERROR +######################################################## READ TRY AND FORMAT DATA CHECK ERROR -################ -#### use AccSpeciesName because not author name +################ use AccSpeciesName because not author name source("./R/FUN.TRY.R") library(MASS) @@ -12,219 +9,187 @@ library(doParallel) library(mvoutlier) ## read TRY data -TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt", - sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE) +TRY.DATA <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_04_01.txt", + sep = "\t", header = TRUE, na.strings = "", stringsAsFactors = FALSE) -TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt", - sep = "\t",header=TRUE,na.strings="", stringsAsFactors=FALSE) +TRY.DATA2 <- read.table("./data/raw/DataTRY/TRY_Proposal_177_DataRelease_2013_07_23.txt", + sep = "\t", header = TRUE, na.strings = "", stringsAsFactors = FALSE) ### combine both data set -TRY.DATA <- rbind(TRY.DATA,TRY.DATA2) +TRY.DATA <- rbind(TRY.DATA, TRY.DATA2) rm(TRY.DATA2) -################################## -### ERROR FOUND IN THE DATA BASE -#1 -######################## -### problem with the seed mass of this obs seed mass = 0 DELETE -TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID==1034196 & TRY.DATA$DataName=="Seed dry mass"),] -#### IS "Quercuscrispla sp" an error standing for Quercus crispula synonym of Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens -## TRY.DATA[TRY.DATA$AccSpeciesName=="Quercuscrispla sp" ,] - - -######################## -######################## -### first create a table with one row per Observation.id and column for each traits and variable -Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement", -"Altitude", "Mean annual temperature (MAT)","Mean sum of annual precipitation (PPT)", - "Plant developmental status / plant age","Maximum height reference", - "Source in Glopnet", "Number of replicates", "Sun vers. shade leaf qualifier" ) +################################## ERROR FOUND IN THE DATA BASE 1 problem with the seed mass of this obs seed mass +################################## = 0 DELETE +TRY.DATA <- TRY.DATA[!(TRY.DATA$ObservationID == 1034196 & TRY.DATA$DataName == "Seed dry mass"), + ] +#### IS 'Quercuscrispla sp' an error standing for Quercus crispula synonym of +#### Quercus mongolica subsp. crispula (Blume) Menitsky ? ask Jens +#### TRY.DATA[TRY.DATA$AccSpeciesName=='Quercuscrispla sp' ,] + + +######################## first create a table with one row per Observation.id and column for each traits +######################## and variable +Non.Trait.Data <- c("Latitude", "Longitude", "Reference", "Date of harvest / measurement", + "Altitude", "Mean annual temperature (MAT)", "Mean sum of annual precipitation (PPT)", + "Plant developmental status / plant age", "Maximum height reference", "Source in Glopnet", + "Number of replicates", "Sun vers. shade leaf qualifier") Trait.Data <- sort(names(((table(TRY.DATA$TraitName))))) -########################## -#### REFORMAT DATA from TRY -registerDoParallel(cores=5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores -getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6) +########################## REFORMAT DATA from TRY +registerDoParallel(cores = 5) ## affect automaticaly half of the core detected to the foreach here I decide to affect 4 cores +getDoParWorkers() ## here 8 core so 4 core if want to use more registerDoParallel(cores=6) - TRY.DATA.FORMATED <- foreach(ObservationID.t=unique(TRY.DATA$ObservationID), .combine=rbind) %dopar% - { - fun.extract.try(ObservationID.t,data=TRY.DATA,Non.Trait.Data,Trait.Data) - } +TRY.DATA.FORMATED <- foreach(ObservationID.t = unique(TRY.DATA$ObservationID), .combine = rbind) %dopar% + { + fun.extract.try(ObservationID.t, data = TRY.DATA, Non.Trait.Data, Trait.Data) + } -## head(TRY.DATA.FORMATED) -## dim(TRY.DATA.FORMATED) +## head(TRY.DATA.FORMATED) dim(TRY.DATA.FORMATED) -saveRDS(TRY.DATA.FORMATED,file="./data/process/TRY.DATA.FORMATED.rds") +saveRDS(TRY.DATA.FORMATED, file = "./data/process/TRY.DATA.FORMATED.rds") -######################## -########## READ RDS +######################## READ RDS TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") -#################### -#################### -## COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI for 6 key traits -key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", -"StdValue.Seed.mass", -"StdValue.Leaf.specific.area..SLA.", -"StdValue.Stem.specific.density..SSD.", -"StdValue.Stem.conduit.area..vessel.and.tracheid.", -"StdValue.Leaf.lifespan") +#################### COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI for 6 key traits +key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", + "StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.", + "StdValue.Stem.conduit.area..vessel.and.tracheid.", "StdValue.Leaf.lifespan") -############################### -############################## -## READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA -species.tab <- read.csv("./data/species.list/species.csv",sep="\t") -species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] +############################### READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA +species.tab <- read.csv("./data/species.list/species.csv", sep = "\t") +species.tab2 <- species.tab[!is.na(species.tab$Latin_name), ] rm(species.tab) gc() -### species IFN reformat names -## clean species names and synonyme names +### species IFN reformat names clean species names and synonyme names species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) -species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) ## THIS TABLE HAS ALREADY THE SYNONYME FOR THE FRENCH SPECIES +species.tab2$Latin_name_syn <- (gsub("_", " ", species.tab2$Latin_name_syn)) ## THIS TABLE HAS ALREADY THE SYNONYME FOR THE FRENCH SPECIES ## remove trailing white space -species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) +species.tab2$Latin_name_syn <- trim.trailing(species.tab2$Latin_name_syn) ## create vector of species name -species.IFN <- unique(pecies.tab2$Latin_name ) +species.IFN <- unique(pecies.tab2$Latin_name) -########################################################################### -########################################################################### -##### EXTRACT SPECIES MEAN AND SD -### change format try species names +########################################################################### EXTRACT SPECIES MEAN AND SD change format try species names TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName) -key.main.traits2 <- +key.main.traits2 <- +##################################################################### COMPUTE mean SD species:genus for each traits +######## The table 5 in Kattge et al. 2011 GCB provides estimation of mean species sd +######## SLA species sd log 0.09 Nmass species sd log 0.08 Seed Mass sd log 0.13 +## # SEE sd.log.SLA <- 0.09 ### based on Kattge et al. 2011 sd.log.Nmass <- 0.08 +## ### based on Kattge et al. 2011 sd.log.Seed.Mass <- 0.13 ### based on Kattge et +## al. 2011 sd.log.LL <- 0.03 ### based on Kattge et al. 2011 -##################################################################### -#### COMPUTE mean SD species:genus for each traits - -######## -## The table 5 in Kattge et al. 2011 GCB provides estimation of mean species sd -### SLA species sd log 0.09 -### Nmass species sd log 0.08 -### Seed Mass sd log 0.13 - -## # SEE -## sd.log.SLA <- 0.09 ### based on Kattge et al. 2011 -## sd.log.Nmass <- 0.08 ### based on Kattge et al. 2011 -## sd.log.Seed.Mass <- 0.13 ### based on Kattge et al. 2011 -## sd.log.LL <- 0.03 ### based on Kattge et al. 2011 - - -###################### -### Computed sd over the data in log10 we have under the assumption sd constant over species with lm per species -traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", - "StdValue.Seed.mass", - "StdValue.Leaf.specific.area..SLA.", - "StdValue.Stem.specific.density..SSD.", - "StdValue.Plant.height.vegetative") +###################### Computed sd over the data in log10 we have under the assumption sd constant +###################### over species with lm per species +traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", + "StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.", + "StdValue.Plant.height.vegetative") ## minimum number of observation per species to be incldue N.min <- 3 -########################### -### SPECIES MEAN SD -sd.vec.sp <- rep(NA,5) +########################### SPECIES MEAN SD +sd.vec.sp <- rep(NA, 5) -for(i in 1:(length(traits)-1)){ - table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[i]]]),"AccSpeciesName"]) - data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names( - table.sp.tmp)[table.sp.tmp>N.min], - c("AccSpeciesName",traits[i])] - names(data.t) <-c("sp","trait") - lm.obj <-lm(log10(trait)~sp,data=data.t) +for (i in 1:(length(traits) - 1)) { + table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[i]]]), + "AccSpeciesName"]) + data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names(table.sp.tmp)[table.sp.tmp > + N.min], c("AccSpeciesName", traits[i])] + names(data.t) <- c("sp", "trait") + lm.obj <- lm(log10(trait) ~ sp, data = data.t) print(i) sd.vec.sp[i] <- sd(residuals(lm.obj)) } -### compute 99% quantile of height and its sd -## TODO COMPUTE WITH ONLY SPECIES WITH AT LEAST TWO OBSERVATIONS +### compute 99% quantile of height and its sd TODO COMPUTE WITH ONLY SPECIES WITH +### AT LEAST TWO OBSERVATIONS library(quantreg) -table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]), - "AccSpeciesName"]) - data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names( - table.sp.tmp)[table.sp.tmp>N.min],] -res.rq <- rq(log10(StdValue.Plant.height.vegetative)~ AccSpeciesName-1, - tau=0.99,data=data.t) -summary.res.rq <- summary(res.rq,se='boot') -sd.vec.sp[5] <- mean(summary.res.rq$coefficients[,"Std. Error"]) +table.sp.tmp <- table(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]), "AccSpeciesName"]) +data.t <- TRY.DATA.FORMATED[TRY.DATA.FORMATED[["AccSpeciesName"]] %in% names(table.sp.tmp)[table.sp.tmp > + N.min], ] +res.rq <- rq(log10(StdValue.Plant.height.vegetative) ~ AccSpeciesName - 1, tau = 0.99, + data = data.t) +summary.res.rq <- summary(res.rq, se = "boot") +sd.vec.sp[5] <- mean(summary.res.rq$coefficients[, "Std. Error"]) ## higher than the one reported in Kattge et al. 2011 -####################### -### Computed sd over the data we have under the assumption sd constant over genus -sd.vec.genus <- rep(NA,5) -for(i in 1:(length(traits)-1)){ - table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[i]]), - "AccSpeciesName"],FUN=fun.get.genus)) - data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]], - fun.get.genus) %in% names( - table.sp.tmp)[table.sp.tmp>N.min], - c("AccSpeciesName",traits[i])] - names(data.t) <-c("sp","trait") - data.t$gs <- sapply(data.t[["sp"]],fun.get.genus) - lm.obj <-lm(log10(trait)~gs,data=data.t) +####################### Computed sd over the data we have under the assumption sd constant over genus +sd.vec.genus <- rep(NA, 5) +for (i in 1:(length(traits) - 1)) { + table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[i]]), + "AccSpeciesName"], FUN = fun.get.genus)) + data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]], fun.get.genus) %in% + names(table.sp.tmp)[table.sp.tmp > N.min], c("AccSpeciesName", traits[i])] + names(data.t) <- c("sp", "trait") + data.t$gs <- sapply(data.t[["sp"]], fun.get.genus) + lm.obj <- lm(log10(trait) ~ gs, data = data.t) sd.vec.genus[i] <- sd(residuals(lm.obj)) } ## quantile for Height with quantreg -table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]),"AccSpeciesName"],FUN=fun.get.genus)) - data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]],fun.get.genus) %in% names(table.sp.tmp)[table.sp.tmp>N.min],] +table.sp.tmp <- table(sapply(TRY.DATA.FORMATED[!is.na(TRY.DATA.FORMATED[[traits[5]]]), + "AccSpeciesName"], FUN = fun.get.genus)) +data.t <- TRY.DATA.FORMATED[sapply(TRY.DATA.FORMATED[["AccSpeciesName"]], fun.get.genus) %in% + names(table.sp.tmp)[table.sp.tmp > N.min], ] -res.rq <- rq(log10(TRY.DATA.FORMATED$StdValue.Plant.height.vegetative)~ sapply(TRY.DATA.FORMATED$AccSpeciesName,FUN=fun.get.genus)-1, - tau=0.99) -summary.res.rq <- summary(res.rq,se='boot') -sd.vec.genus[5] <- mean(summary.res.rq$coefficients[,"Std. Error"]) +res.rq <- rq(log10(TRY.DATA.FORMATED$StdValue.Plant.height.vegetative) ~ sapply(TRY.DATA.FORMATED$AccSpeciesName, + FUN = fun.get.genus) - 1, tau = 0.99) +summary.res.rq <- summary(res.rq, se = "boot") +sd.vec.genus[5] <- mean(summary.res.rq$coefficients[, "Std. Error"]) -##### -### SET NAME VECTORS +##### SET NAME VECTORS -names(sd.vec.sp) <- c("sdlog10.sp.Nmass","sdlog10.sp.Seed.Mass","sdlog10.sp.SLA", - "sdlog10.sp.WD","sdlog10.sp.Height") +names(sd.vec.sp) <- c("sdlog10.sp.Nmass", "sdlog10.sp.Seed.Mass", "sdlog10.sp.SLA", + "sdlog10.sp.WD", "sdlog10.sp.Height") -names(sd.vec.genus) <- c("sdlog10.gs.Nmass","sdlog10.gs.Seed.Mass","sdlog10.gs.SLA", - "sdlog10.gs.WD","sdlog10.gs.Height") +names(sd.vec.genus) <- c("sdlog10.gs.Nmass", "sdlog10.gs.Seed.Mass", "sdlog10.gs.SLA", + "sdlog10.gs.WD", "sdlog10.gs.Height") ## save mean species and genus sd -saveRDS(sd.vec.sp,file="./data/process/sd.vec.sp.rds") -saveRDS(sd.vec.genus,file="./data/process/sd.vec.genus.rds") +saveRDS(sd.vec.sp, file = "./data/process/sd.vec.sp.rds") +saveRDS(sd.vec.genus, file = "./data/process/sd.vec.genus.rds") -sd.vec.sp <- readRDS(file="./data/process/sd.vec.sp.rds") -sd.vec.genus <- readRDS(file="./data/process/sd.vec.genus.rds") +sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds") +sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds") -###################################################################################################### -### add columns with mean sd per species or per genus depending on whether species or genus data +###################################################################################################### add columns with mean sd per species or per genus depending on whether species +###################################################################################################### or genus data #### add column with the mean sd species or genus -data.TRY.sd.update <- data.frame(data.ifn.species.try.noout, - data.ifn.species.try.noout[,sd.names]) +data.TRY.sd.update <- data.frame(data.ifn.species.try.noout, data.ifn.species.try.noout[, + sd.names]) -sd.names.1 <- paste(sd.names,1,sep=".") +sd.names.1 <- paste(sd.names, 1, sep = ".") -for (i in 1:length(sd.names.1)){ -data.TRY.sd.update[[sd.names.1[i]]][!data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.sp[i] -data.TRY.sd.update[[sd.names.1[i]]][data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.genus[i] +for (i in 1:length(sd.names.1)) { + data.TRY.sd.update[[sd.names.1[i]]][!data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.sp[i] + data.TRY.sd.update[[sd.names.1[i]]][data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.genus[i] } -head(data.TRY.sd.update,10) +head(data.TRY.sd.update, 10) -saveRDS(data.TRY.sd.update,file="./data/process/data.TRY.sd.update.rds") +saveRDS(data.TRY.sd.update, file = "./data/process/data.TRY.sd.update.rds") @@ -238,27 +203,14 @@ saveRDS(data.TRY.sd.update,file="./data/process/data.TRY.sd.update.rds") -### -# plot sd to show mark +### plot sd to show mark pdf("./figs/sd.traits.pdf") -r <- barplot(sd.vec.sp ,names.arg=c("Leaf.N","SM","SLA","WD","Vessel","LL"),las=2,ylim=c(0,0.9),ylab="sd log10") -points(r[,1],sd.vec.genus,col="red",pch=16,cex=2) -## for (i in 1:length(nobs.names)){ -## ## sd.obs <- data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]] -## ## points(rep(r[i,1],length(sd.obs)),sd.obs) -## ## sd.obs <- data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]] -## ## points(rep(r[i,1],length(sd.obs)),sd.obs,col="red",pch=4) -## print(sd.obs) -## } -dev.off() - - - - - - - - - - - +r <- barplot(sd.vec.sp, names.arg = c("Leaf.N", "SM", "SLA", "WD", "Vessel", "LL"), + las = 2, ylim = c(0, 0.9), ylab = "sd log10") +points(r[, 1], sd.vec.genus, col = "red", pch = 16, cex = 2) +## for (i in 1:length(nobs.names)){ ## sd.obs <- +## data.TRY.sd.update[[sd.names[i]]][!data.TRY.sd.update[[genus.names[i]]]] ## +## points(rep(r[i,1],length(sd.obs)),sd.obs) ## sd.obs <- +## data.TRY.sd.update[[sd.names[i]]][data.TRY.sd.update[[genus.names[i]]]] ## +## points(rep(r[i,1],length(sd.obs)),sd.obs,col='red',pch=4) print(sd.obs) } +dev.off() diff --git a/merge.data.BCI.R b/merge.data.BCI.R index 8499760bb56201265f9e7de0edf2493de8d47547..bf65743fa2dc8582ed0df07475525e24a619896c 100644 --- a/merge.data.BCI.R +++ b/merge.data.BCI.R @@ -1,128 +1,119 @@ -### MERGE BCI DATA -### Edited by FH +### MERGE BCI DATA Edited by FH rm(list = ls()) source("./R/format.function.R") library(reshape) -######################### -## READ DATA -#################### -### read individuals tree data -## Requires careful formatting of 7 census datasets -## The raw data is such that, once a tree dies in census X, then it no longer exists in census X+1, X+2 etc... -data.bci1 <- read.table("./data/raw/DataBCI/census1/PlotsDataReport.txt",header=TRUE,stringsAsFactors=FALSE,sep = "\t") +######################### READ DATA read individuals tree data Requires careful formatting of 7 census +######################### datasets The raw data is such that, once a tree dies in census X, then it no +######################### longer exists in census X+1, X+2 etc... +data.bci1 <- read.table("./data/raw/DataBCI/census1/PlotsDataReport.txt", header = TRUE, + stringsAsFactors = FALSE, sep = "\t") big.bci <- NULL -for(k in 2:7) { - new.directory <- paste("./data/raw/DataBCI/census",k,"/PlotsDataReport.txt",sep="") - data.bci2 <- read.table(new.directory,header=TRUE,stringsAsFactors=FALSE,sep = "\t") - if(!is.null(big.bci)) { - sub.bci <- merge(data.bci1[,c(2:7,11,13)], - data.frame(TreeID = data.bci2[["TreeID"]], DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], - dead = as.numeric(data.bci2[["Status"]] == "dead")),sort = T, by = "TreeID") - big.bci <- rbind(big.bci, sub.bci) } - if(is.null(big.bci)) { - big.bci <- merge(data.bci1[,c(2:7,11,13)], - data.frame(TreeID = data.bci2[["TreeID"]], DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], - dead = as.numeric(data.bci2[["Status"]] == "dead")), - sort = T, by = "TreeID") } - data.bci1 <- data.bci2 - cat("Census", k, "now included\n") } +for (k in 2:7) { + new.directory <- paste("./data/raw/DataBCI/census", k, "/PlotsDataReport.txt", + sep = "") + data.bci2 <- read.table(new.directory, header = TRUE, stringsAsFactors = FALSE, + sep = "\t") + if (!is.null(big.bci)) { + sub.bci <- merge(data.bci1[, c(2:7, 11, 13)], data.frame(TreeID = data.bci2[["TreeID"]], + DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == + "dead")), sort = T, by = "TreeID") + big.bci <- rbind(big.bci, sub.bci) + } + if (is.null(big.bci)) { + big.bci <- merge(data.bci1[, c(2:7, 11, 13)], data.frame(TreeID = data.bci2[["TreeID"]], + DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == + "dead")), sort = T, by = "TreeID") + } + data.bci1 <- data.bci2 + cat("Census", k, "now included\n") +} rm(data.bci1, data.bci2, sub.bci) -big.bci <- big.bci[order(big.bci$TreeID),] -colnames(big.bci)[c(7:8)] <- c("DBH1","Date1") +big.bci <- big.bci[order(big.bci$TreeID), ] +colnames(big.bci)[c(7:8)] <- c("DBH1", "Date1") data.bci <- big.bci rm(big.bci) ### read species names -species.clean <- read.table("./data/raw/DataBCI/TaxonomyDataReport.txt",stringsAsFactors=FALSE, header = T, sep = "\t") +species.clean <- read.table("./data/raw/DataBCI/TaxonomyDataReport.txt", stringsAsFactors = FALSE, + header = T, sep = "\t") ## Try to relate SpeciesID in species.clean species names in data.bci -data.bci$sp2 = species.clean$SpeciesID[match(data.bci$Latin, - paste(species.clean[["Genus"]], species.clean[["species"]]))] +data.bci$sp2 = species.clean$SpeciesID[match(data.bci$Latin, paste(species.clean[["Genus"]], + species.clean[["species"]]))] length(unique(data.bci$sp)) -###################################### -## MASSAGE TRAIT DATA -############################ -## Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY, BUT I DO NOT KNOW WHICH WOOD DENSITY VARIABLE TO USE -data.trait <- read.csv("./data/raw/DataBCI/BCITRAITS_20101220.csv",stringsAsFactors=FALSE, header = T) -data.trait$Latin <- apply(data.trait[,1:2], 1, paste, collapse = " ") -data.bci <- merge(data.bci, data.trait[,c(ncol(data.trait),3,7:10,13,15,18,20:21)], by = "Latin", all.x = T) - -data.bci <- data.bci[order(data.bci$TreeID),] -########################################## -## FORMAT INDIVIDUAL TREE DATA -############# +###################################### MASSAGE TRAIT DATA Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY, BUT I DO NOT KNOW +###################################### WHICH WOOD DENSITY VARIABLE TO USE +data.trait <- read.csv("./data/raw/DataBCI/BCITRAITS_20101220.csv", stringsAsFactors = FALSE, + header = T) +data.trait$Latin <- apply(data.trait[, 1:2], 1, paste, collapse = " ") +data.bci <- merge(data.bci, data.trait[, c(ncol(data.trait), 3, 7:10, 13, 15, 18, + 20:21)], by = "Latin", all.x = T) + +data.bci <- data.bci[order(data.bci$TreeID), ] +########################################## FORMAT INDIVIDUAL TREE DATA data.bci$Date1 <- as.Date(data.bci$Date1) data.bci$Date2 <- as.Date(data.bci$Date2) -#data.bci$yr1 <- format(strptime(data.bci$Date1, format = "%Y-%m-%d"),"%Y") -#data.bci$yr2 <- format(strptime(data.bci$Date2, format = "%Y-%m-%d"),"%Y") -data.bci$year <- as.numeric(difftime(data.bci$Date2, data.bci$Date1, units = "weeks")/52) ## Not rounded +# data.bci$yr1 <- format(strptime(data.bci$Date1, format = '%Y-%m-%d'),'%Y') +# data.bci$yr2 <- format(strptime(data.bci$Date2, format = '%Y-%m-%d'),'%Y') +data.bci$year <- as.numeric(difftime(data.bci$Date2, data.bci$Date1, units = "weeks")/52) ## Not rounded -## change unit and names of variables to be the same in all data for the tree -data.bci$G <- 10*(data.bci$DBH1-data.bci$DBH1)/data.bci$year ## diameter growth in mm per year - BASED ON UNROUNDED YEARS +## change unit and names of variables to be the same in all data for the tree +data.bci$G <- 10 * (data.bci$DBH1 - data.bci$DBH1)/data.bci$year ## diameter growth in mm per year - BASED ON UNROUNDED YEARS data.bci$D <- data.bci[["DBH1"]] -data.bci$plot <- data.bci[["Quadrat"]] ## plot code? +data.bci$plot <- data.bci[["Quadrat"]] ## plot code? data.bci$htot <- data.bci$HEIGHT_AVG data.bci$sp.name <- data.bci$Latin -###################### -## ECOREGION -################### -## bci has only 1 eco-region - -###################### -## PERCENT DEAD -################### -## variable percent dead/cannot do with since dead variable is missing -## compute numer of dead per plot to remove plot with disturbance - -function.perc.dead2 <- function(dead) { - out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]) - if(!is.finite(out)) - out <- NA - return(out) +###################### ECOREGION bci has only 1 eco-region + +###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is +###################### missing compute numer of dead per plot to remove plot with disturbance + +function.perc.dead2 <- function(dead) { + out <- sum(dead, na.rm = T)/length(dead[!is.na(dead)]) + if (!is.finite(out)) + out <- NA + return(out) } -perc.dead <- tapply(data.bci[["dead"]],INDEX=data.bci[["plot"]],FUN=function.perc.dead2) -data.bci <- merge(data.bci,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) +perc.dead <- tapply(data.bci[["dead"]], INDEX = data.bci[["plot"]], FUN = function.perc.dead2) +data.bci <- merge(data.bci, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### -## Remove data with dead == 1 +########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.bci$dead) -vec.abio.var.names <- c("MAT","MAP") ## MISSING -vec.basic.var <- c("treeid","sp","sp.name","plot","D","G","dead","year","htot","x","y","perc.dead") -data.tree <- subset(data.bci,select=c(vec.basic.var,vec.abio.var.names)) +vec.abio.var.names <- c("MAT", "MAP") ## MISSING +vec.basic.var <- c("treeid", "sp", "sp.name", "plot", "D", "G", "dead", "year", "htot", + "x", "y", "perc.dead") +data.tree <- subset(data.bci, select = c(vec.basic.var, vec.abio.var.names)) -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -## DON'T KNOW SUBPLOT SIZE! -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.bci[["TreeID"]]), diam=as.vector(data.bci[["D"]]), - sp=as.vector(data.bci[["sp"]]), id.plot=as.vector(data.bci[["plot"]]), - weights=1/(pi*(0.5*data.bci$D/100)^2), weight.full.plot=NA) +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species DON'T KNOW SUBPLOT SIZE! +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.bci[["TreeID"]]), diam = as.vector(data.bci[["D"]]), + sp = as.vector(data.bci[["sp"]]), id.plot = as.vector(data.bci[["plot"]]), weights = 1/(pi * + (0.5 * data.bci$D/100)^2), weight.full.plot = NA) ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0 -data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.bci[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(data.bci[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) +BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET rm(BATOT.COMPET) ### create data frame -names(data.BA.SP) <- c("TreeID",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(TreeID=data.bci[["TreeID"]],ecocode=data.bci[["ecocode"]]),data.BA.SP,by="TreeID",sort=FALSE) +names(data.BA.SP) <- c("TreeID", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(TreeID = data.bci[["TreeID"]], ecocode = data.bci[["ecocode"]]), + data.BA.SP, by = "TreeID", sort = FALSE) ## test -if(sum(!data.BA.sp[["TreeID"]] == data.tree[["TreeID"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.sp[["TreeID"]] == data.tree[["TreeID"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.bci <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.bci,file="./data/process/list.bci.Rdata") - +list.bci <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.bci, file = "./data/process/list.bci.Rdata") diff --git a/merge.data.CANADA.R b/merge.data.CANADA.R index 9506aba6765dbc8a37866fc210cf8f506b98fcfd..4abfd334f944b11a3cd53c99459d0724f743ab5f 100644 --- a/merge.data.CANADA.R +++ b/merge.data.CANADA.R @@ -1,106 +1,109 @@ -### MERGE canada DATA -### Edited by FH -rm(list = ls()); source("./R/format.function.R"); library(reshape) - -######################### -## READ DATA -#################### -### read individuals tree data -#data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130816.csv",header=TRUE,stringsAsFactors =FALSE) -data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv",header=TRUE,stringsAsFactors =FALSE) -data.canada <- data.canada[which(!is.na(data.canada$Species)),] +### MERGE canada DATA Edited by FH +rm(list = ls()) +source("./R/format.function.R") +library(reshape) + +######################### READ DATA read individuals tree data data.canada <- +######################### read.csv('./data/raw/DataCanada/Canada_Data2George_20130816.csv',header=TRUE,stringsAsFactors +######################### =FALSE) +data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv", + header = TRUE, stringsAsFactors = FALSE) +data.canada <- data.canada[which(!is.na(data.canada$Species)), ] colnames(data.canada)[2] <- "Species" ### read species names -species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv",stringsAsFactors=FALSE) +species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE) -###################################### -## MASSAGE TRAIT DATA -############################ -## HEIGHT DATA FOR TREE MISSING -## BRING US DATA FOR HEIGHT OVER WHEN WE ANALYZE THAT DATASET LATER ON +###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER +###################################### WHEN WE ANALYZE THAT DATASET LATER ON -########################################## -## FORMAT INDIVIDUAL TREE DATA -############# +########################################## FORMAT INDIVIDUAL TREE DATA -## change unit and names of variables to be the same in all data for the tree -data.canada$G <- 10*(data.canada$FinalDBH-data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year +## change unit and names of variables to be the same in all data for the tree +data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA -data.canada$year <- data.canada$Interval ## number of year between measuremen -data.canada$D <- data.canada[["InitDBH"]]; data.canada$D[data.canada$D == 0] <- NA ;## diameter in cm -data.canada$dead <- as.numeric(data.canada$FinalDBH > 0) ## dummy variable for dead tree 0 alive 1 dead -data.canada$sp <- as.character(data.canada[["Species"]]) ## species code -data.canada$plot <- (data.canada[["PLOT_ID"]]) ## plot code -data.canada$htot <- rep(NA,length(data.canada[["Species"]])) ## height of tree in m - MISSING -data.canada$tree.id <- gsub("_",".",data.canada$PLOTTREE); ## tree unique id -data.canada$sp.name <- NA; -for(i in 1:length(unique(data.canada$sp))) { - v <- species.clean$SPCD - data.canada$sp.name[which(data.canada$sp == unique(data.canada$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.canada$sp)[i])] } - - -###################### -## ECOREGION -################### -## merge greco to have no ecoregion with low number of observation -greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, sep = "\t") - -table(data.canada$Ecocode) -## Some ecoregions still have small # of individuals, so either drop off for analysis later on or wait for Quebec data to come in -# -# library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); -# ecoreg <- unclass(data.canada$eco_code); -# plot(data.canada[["CX"]][order(ecoreg)],data.canada[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); -# legend("bottomright", col = mycols, legend = levels(data.canada$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2) -# points(data.canada[["CX"]][ecoreg == 9],data.canada[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites -# ## PA1219 looks to be similar to PA1209; merge them together -# data.canada$eco_codemerged <- combine_factor(data.canada$eco_code, c(1:8,6,9)) - -###################### -## PERCENT DEAD -################### -## variable percent dead/cannot do with since dead variable is missing -## compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.canada[["dead"]],INDEX=data.canada[["plot"]],FUN=function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -data.canada <- merge(data.canada,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) - -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### -## Remove data with dead == 1 +data.canada$year <- data.canada$Interval ## number of year between measuremen +data.canada$D <- data.canada[["InitDBH"]] +data.canada$D[data.canada$D == 0] <- NA +## diameter in cm +data.canada$dead <- as.numeric(data.canada$FinalDBH > 0) ## dummy variable for dead tree 0 alive 1 dead +data.canada$sp <- as.character(data.canada[["Species"]]) ## species code +data.canada$plot <- (data.canada[["PLOT_ID"]]) ## plot code +data.canada$htot <- rep(NA, length(data.canada[["Species"]])) ## height of tree in m - MISSING +data.canada$tree.id <- gsub("_", ".", data.canada$PLOTTREE) +## tree unique id +data.canada$sp.name <- NA + +for (i in 1:length(unique(data.canada$sp))) { + v <- species.clean$SPCD + data.canada$sp.name[which(data.canada$sp == unique(data.canada$sp)[i])] <- species.clean$COMMON_NAME[which(v == + unique(data.canada$sp)[i])] +} + + +###################### ECOREGION merge greco to have no ecoregion with low number of observation +greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, + sep = "\t") + +table(data.canada$Ecocode) +## Some ecoregions still have small # of individuals, so either drop off for +## analysis later on or wait for Quebec data to come in library(RColorBrewer) +mycols <- brewer.pal(10, "Set3") + +# ecoreg <- unclass(data.canada$eco_code) + +# plot(data.canada[['CX']][order(ecoreg)],data.canada[['CY']][order(ecoreg)],pty='.',cex=.2, +# col = rep(mycols,as.vector(table(ecoreg)))) + +# legend('bottomright', col = mycols, legend = levels(data.canada$eco_code), pch +# = rep(19,length(levels(ecoreg))),cex=2) points(data.canada[['CX']][ecoreg == +# 9],data.canada[['CY']][ecoreg == 9],pty='.',cex=.2, col = 'black') Highlight +# the region with 55 sites ## PA1219 looks to be similar to PA1209, merge them +# together data.canada$eco_codemerged <- combine_factor(data.canada$eco_code, +# c(1:8,6,9)) + +###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is +###################### missing compute numer of dead per plot to remove plot with disturbance +perc.dead <- tapply(data.canada[["dead"]], INDEX = data.canada[["plot"]], FUN = function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +# AVAILABLE (disturbance record) +data.canada <- merge(data.canada, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) + +########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.canada$dead) ## Nothing to remove -colnames(data.canada)[c(3,1,11,13)] <- c("sp","plot","w","ecocode") -vec.abio.var.names <- c("MAT","MAP") -vec.basic.var <- c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") -data.tree <- subset(data.canada,select=c(vec.basic.var,vec.abio.var.names)) +colnames(data.canada)[c(3, 1, 11, 13)] <- c("sp", "plot", "w", "ecocode") +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "ecocode", "D", "G", "dead", + "year", "htot", "Lon", "Lat", "perc.dead") +data.tree <- subset(data.canada, select = c(vec.basic.var, vec.abio.var.names)) -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.canada[["tree.id"]]), diam=as.vector(data.canada[["D"]]), - sp=as.vector(data.canada[["sp"]]), id.plot=as.vector(data.canada[["plot"]]), - weights=1/(10000*data.canada[["SubPlot_Size"]]), weight.full.plot=NA) +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.canada[["tree.id"]]), diam = as.vector(data.canada[["D"]]), + sp = as.vector(data.canada[["sp"]]), id.plot = as.vector(data.canada[["plot"]]), + weights = 1/(10000 * data.canada[["SubPlot_Size"]]), weight.full.plot = NA) ## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +data.BA.SP[is.na(data.BA.SP)] <- 0 +data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.canada[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(data.canada[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) +BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) +data.BA.SP$BATOT.COMPET <- BATOT.COMPET +rm(BATOT.COMPET) ### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=data.canada[["tree.id"]],ecocode=data.canada[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id = data.canada[["tree.id"]], ecocode = data.canada[["ecocode"]]), + data.BA.SP, by = "tree.id", sort = FALSE) ## test -if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.canada <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.spain,file="./data/process/list.canada.Rdata") - +list.canada <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.spain, file = "./data/process/list.canada.Rdata") diff --git a/merge.data.FRANCE.R b/merge.data.FRANCE.R index 191350d74e5cee0d958805a114cfb4b90dcda0b9..36aec35322bf5aa26c85500a1bdfbf38424d3cf9 100644 --- a/merge.data.FRANCE.R +++ b/merge.data.FRANCE.R @@ -1,75 +1,69 @@ -############################################# -############################################# -############################################# -### MERGE FRENCH DATA +############################################# MERGE FRENCH DATA source("./R/format.function.R") -## source("./R/READ.DATA.NFI.FRANCE.R") need to be run if no already doen (long ...) -## source("./R/CLIMATE.FRANCE.R") +## source('./R/READ.DATA.NFI.FRANCE.R') need to be run if no already doen (long +## ...) source('./R/CLIMATE.FRANCE.R') -########################################################################### -######################### READ DATA (test) +########################################################################### READ DATA (test) ### read individuals tree data -load('./data/process/arbre.ALIVE.DEAD2.Rdata') +load("./data/process/arbre.ALIVE.DEAD2.Rdata") ### read climate ecologie.clim.data <- readRDS("./data/process/ecologie.clim.data.rds") #### merge -dataIFN.FRANCE.t <- merge(arbre.ALIVE.DEAD2, - subset(ecologie.clim.data,select= - c("idp","SER","sgdd","WB.y","WB.s","WS.y","WS.s","MAT","SAP")),by="idp") -rm(arbre.ALIVE.DEAD2,ecologie.clim.data) +dataIFN.FRANCE.t <- merge(arbre.ALIVE.DEAD2, subset(ecologie.clim.data, select = c("idp", + "SER", "sgdd", "WB.y", "WB.s", "WS.y", "WS.s", "MAT", "SAP")), by = "idp") +rm(arbre.ALIVE.DEAD2, ecologie.clim.data) #### load plot data and merge load("./data/process/placette_tot.Rdata") -dataIFN.FRANCE <- merge(dataIFN.FRANCE.t,placette_tot[,names(placette_tot) != "YEAR"],by="idp") -rm(placette_tot,dataIFN.FRANCE.t) +dataIFN.FRANCE <- merge(dataIFN.FRANCE.t, placette_tot[, names(placette_tot) != "YEAR"], + by = "idp") +rm(placette_tot, dataIFN.FRANCE.t) ### read IFN species names and clean -species.clean <- fun.clean.species.tab(read.csv("./data/species.list/species.csv", - sep="\t",stringsAsFactors=FALSE)) +species.clean <- fun.clean.species.tab(read.csv("./data/species.list/species.csv", + sep = "\t", stringsAsFactors = FALSE)) ### read TRY data -data.TRY.sd.update <- readRDS("./data/process/data.TRY.sd.update.rds") -data.frame.TRY <- data.frame(Latin_name=rownames(data.TRY.sd.update ),data.TRY.sd.update) +data.TRY.sd.update <- readRDS("./data/process/data.TRY.sd.update.rds") +data.frame.TRY <- data.frame(Latin_name = rownames(data.TRY.sd.update), data.TRY.sd.update) rm(data.TRY.sd.update) ### merge with code and species name -merge.TRY <- merge(species.clean,data.frame.TRY,by="Latin_name") -rm(species.clean,data.frame.TRY) +merge.TRY <- merge(species.clean, data.frame.TRY, by = "Latin_name") +rm(species.clean, data.frame.TRY) -######################################################################## -### Compute maximum height per species plus sd from observed height in NFI data to add variables to the traits data base -res.quant.boot <- t(sapply(levels(factor(dataIFN.FRANCE[["espar"]])), - FUN=f.quantile.boot, - R=1000, - x=log10(dataIFN.FRANCE[["htot"]]), - fac=factor(dataIFN.FRANCE[["espar"]]))) +######################################################################## Compute maximum height per species plus sd from observed height in NFI data to +######################################################################## add variables to the traits data base +res.quant.boot <- t(sapply(levels(factor(dataIFN.FRANCE[["espar"]])), FUN = f.quantile.boot, + R = 1000, x = log10(dataIFN.FRANCE[["htot"]]), fac = factor(dataIFN.FRANCE[["espar"]]))) ## create data base -data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3]) +data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, + 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) rm(res.quant.boot) -## write.csv(data.max.height,file="./data/process/data.max.height.csv") +## write.csv(data.max.height,file='./data/process/data.max.height.csv') -############## -## merge TRY with max height -merge.TRY <- merge(merge.TRY,data.max.height,by="code") +############## merge TRY with max height +merge.TRY <- merge(merge.TRY, data.max.height, by = "code") rm(data.max.height) # use mean sd of max tree height over all species -merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]],na.rm=TRUE),length=nrow(merge.TRY)) +merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]], na.rm = TRUE), + length = nrow(merge.TRY)) ### keep only variables needed in traits data -names.traits.data <- c("code","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", - "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd.1","Seed.mass.sd.1","SLA.sd.1", "Wood.Density.sd.1", - "Leaf.Lifespan.sd.1","Max.height.sd.1") +names.traits.data <- c("code", "Latin_name", "Leaf.N.mean", "Seed.mass.mean", "SLA.mean", + "Wood.Density.mean", "Leaf.Lifespan.mean", "Max.height.mean", "Leaf.N.sd.1", + "Seed.mass.sd.1", "SLA.sd.1", "Wood.Density.sd.1", "Leaf.Lifespan.sd.1", "Max.height.sd.1") -data.traits <- merge.TRY[,names.traits.data] -names(data.traits) <- c("sp","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", - "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd","Seed.mass.sd","SLA.sd", "Wood.Density.sd", - "Leaf.Lifespan.sd","Max.height.sd") ## rename to have standard variables name -rm(merge.TRY,names.traits.data) +data.traits <- merge.TRY[, names.traits.data] +names(data.traits) <- c("sp", "Latin_name", "Leaf.N.mean", "Seed.mass.mean", "SLA.mean", + "Wood.Density.mean", "Leaf.Lifespan.mean", "Max.height.mean", "Leaf.N.sd", "Seed.mass.sd", + "SLA.sd", "Wood.Density.sd", "Leaf.Lifespan.sd", "Max.height.sd") ## rename to have standard variables name +rm(merge.TRY, names.traits.data) @@ -77,86 +71,79 @@ rm(merge.TRY,names.traits.data) -################################################################ -############# FORMAT INDIVIDUAL TREE DATA -############# +################################################################ FORMAT INDIVIDUAL TREE DATA -## change unit and names of variables to be the same in all data for the tree -dataIFN.FRANCE$G <- dataIFN.FRANCE[["ir5"]]/5*2 ##diameter growth in mm per year -dataIFN.FRANCE$year <- rep(5,length(dataIFN.FRANCE[["ir5"]])) ## number of year between measurement -dataIFN.FRANCE$D <- dataIFN.FRANCE[["c13"]]/pi ## diameter in cm -dataIFN.FRANCE$dead <- dataIFN.FRANCE[["dead"]] ## dummy variable for dead tree 0 alive 1 dead -dataIFN.FRANCE$sp <- as.character(dataIFN.FRANCE[["espar"]]) ## species code -dataIFN.FRANCE$plot <- (dataIFN.FRANCE[["idp"]]) ## plot code -dataIFN.FRANCE$htot <- (dataIFN.FRANCE[["htot"]]) ## height of tree in m -dataIFN.FRANCE$tree.id <- paste(dataIFN.FRANCE[["idp"]],dataIFN.FRANCE[["a"]],sep=".") ## tree unique id +## change unit and names of variables to be the same in all data for the tree +dataIFN.FRANCE$G <- dataIFN.FRANCE[["ir5"]]/5 * 2 ##diameter growth in mm per year +dataIFN.FRANCE$year <- rep(5, length(dataIFN.FRANCE[["ir5"]])) ## number of year between measurement +dataIFN.FRANCE$D <- dataIFN.FRANCE[["c13"]]/pi ## diameter in cm +dataIFN.FRANCE$dead <- dataIFN.FRANCE[["dead"]] ## dummy variable for dead tree 0 alive 1 dead +dataIFN.FRANCE$sp <- as.character(dataIFN.FRANCE[["espar"]]) ## species code +dataIFN.FRANCE$plot <- (dataIFN.FRANCE[["idp"]]) ## plot code +dataIFN.FRANCE$htot <- (dataIFN.FRANCE[["htot"]]) ## height of tree in m +dataIFN.FRANCE$tree.id <- paste(dataIFN.FRANCE[["idp"]], dataIFN.FRANCE[["a"]], sep = ".") ## tree unique id #### change coordinates system of x y to be in lat long WGS84 library(sp) library(dismo) library(rgdal) -data.sp <- dataIFN.FRANCE[,c("idp","xl93","yl93")] -coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154 +data.sp <- dataIFN.FRANCE[, c("idp", "xl93", "yl93")] +coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154 proj4string(data.sp) <- CRS("+init=epsg:2154") # define projection system of our data ## EPSG CODE 2154 summary(data.sp) -data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon -dataIFN.FRANCE$Lon <- coordinates(data.sp2)[,"xl93"] -dataIFN.FRANCE$Lat <- coordinates(data.sp2)[,"yl93"] -rm(data.sp,data.sp2) -## ## plot on world map -## library(rworldmap) -## newmap <- getMap(resolution = "coarse") # different resolutions available -## plot(newmap) -## points(data.sp2,cex=0.2,col="red") - -############################ -## merge greco to have no ecoregion with low number of observation - -## merge A and B Grand Ouest cristallin and oceanique and Center semi-oceanique -## merge G D E Vosges Jura massif cemtral (low mountain) -## merge H and I Alpes and Pyrenees -## Merge J and K Corse and Mediteraneen -dataIFN.FRANCE$GRECO <- substr( dataIFN.FRANCE[["SER"]],1,1) ## get GRECO from SER (smaller division by keeping only the first letter +data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon +dataIFN.FRANCE$Lon <- coordinates(data.sp2)[, "xl93"] +dataIFN.FRANCE$Lat <- coordinates(data.sp2)[, "yl93"] +rm(data.sp, data.sp2) +## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') +## # different resolutions available plot(newmap) +## points(data.sp2,cex=0.2,col='red') + +############################ merge greco to have no ecoregion with low number of observation + +## merge A and B Grand Ouest cristallin and oceanique and Center semi-oceanique +## merge G D E Vosges Jura massif cemtral (low mountain) merge H and I Alpes and +## Pyrenees Merge J and K Corse and Mediteraneen +dataIFN.FRANCE$GRECO <- substr(dataIFN.FRANCE[["SER"]], 1, 1) ## get GRECO from SER (smaller division by keeping only the first letter GRECO.temp <- dataIFN.FRANCE[["GRECO"]] -GRECO.temp <- sub("[AB]","AB",GRECO.temp) -GRECO.temp <- sub("[GDE]","GDE",GRECO.temp) -GRECO.temp <- sub("[HI]","HI",GRECO.temp) -GRECO.temp <- sub("[JK]","JK",GRECO.temp) -## plot(dataIFN.FRANCE[["xl93"]],dataIFN.FRANCE[["yl93"]],col=unclass(factor(GRECO.temp))) +GRECO.temp <- sub("[AB]", "AB", GRECO.temp) +GRECO.temp <- sub("[GDE]", "GDE", GRECO.temp) +GRECO.temp <- sub("[HI]", "HI", GRECO.temp) +GRECO.temp <- sub("[JK]", "JK", GRECO.temp) +## plot(dataIFN.FRANCE[['xl93']],dataIFN.FRANCE[['yl93']],col=unclass(factor(GRECO.temp))) ## add NEW GRECO variable to data base -dataIFN.FRANCE$ecocode <- GRECO.temp ## a single code for each ecoregion +dataIFN.FRANCE$ecocode <- GRECO.temp ## a single code for each ecoregion -## variable percent dead -####################### -###compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(dataIFN.FRANCE[["dead"]],INDEX=dataIFN.FRANCE[["idp"]],FUN=function.perc.dead) -## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -dataIFN.FRANCE <- merge(dataIFN.FRANCE,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead), - sort=FALSE) +## variable percent dead compute numer of dead per plot to remove plot with +## disturbance +perc.dead <- tapply(dataIFN.FRANCE[["dead"]], INDEX = dataIFN.FRANCE[["idp"]], FUN = function.perc.dead) +## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +## AVAILABLE (disturbance record) +dataIFN.FRANCE <- merge(dataIFN.FRANCE, data.frame(idp = as.numeric(names(perc.dead)), + perc.dead = perc.dead), sort = FALSE) -########################################################################################### -### PLOT SELECTION FOR THE ANALYSIS +########################################################################################### PLOT SELECTION FOR THE ANALYSIS -## dataIFN.FRANCE <- subset(dataIFN.FRANCE,subset= dataIFN.FRANCE[["YEAR"]] != 2005)## year 2005 bad data according to IFN -dataIFN.FRANCE <- subset(dataIFN.FRANCE,subset= dataIFN.FRANCE[["plisi"]] == 0) # no plot on forest edge -dataIFN.FRANCE <- subset(dataIFN.FRANCE,subset= dataIFN.FRANCE[["dc"]] == 0) # no harvesting -dataIFN.FRANCE <- subset(dataIFN.FRANCE,subset= dataIFN.FRANCE[["tplant"]] == 0) # no plantation -dataIFN.FRANCE <- subset(dataIFN.FRANCE,subset= !is.na(dataIFN.FRANCE[["SER"]])) # missing SER +## dataIFN.FRANCE <- subset(dataIFN.FRANCE,subset= dataIFN.FRANCE[['YEAR']] != +## 2005)## year 2005 bad data according to IFN +dataIFN.FRANCE <- subset(dataIFN.FRANCE, subset = dataIFN.FRANCE[["plisi"]] == 0) # no plot on forest edge +dataIFN.FRANCE <- subset(dataIFN.FRANCE, subset = dataIFN.FRANCE[["dc"]] == 0) # no harvesting +dataIFN.FRANCE <- subset(dataIFN.FRANCE, subset = dataIFN.FRANCE[["tplant"]] == 0) # no plantation +dataIFN.FRANCE <- subset(dataIFN.FRANCE, subset = !is.na(dataIFN.FRANCE[["SER"]])) # missing SER -#################################### -################################### -## SELECT GOOD COLUMNS +#################################### SELECT GOOD COLUMNS -##names of variables for abiotic conditions -vec.abio.var.names <- c("MAT","SAP","sgdd","WB.s","WB.y","WS.s","WS.y") +## names of variables for abiotic conditions +vec.abio.var.names <- c("MAT", "SAP", "sgdd", "WB.s", "WB.y", "WS.s", "WS.y") ## other var -vec.basic.var <- c("tree.id","sp","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") +vec.basic.var <- c("tree.id", "sp", "plot", "ecocode", "D", "G", "dead", "year", + "htot", "Lon", "Lat", "perc.dead") -data.tree <- subset(dataIFN.FRANCE,select=c(vec.basic.var,vec.abio.var.names)) +data.tree <- subset(dataIFN.FRANCE, select = c(vec.basic.var, vec.abio.var.names)) @@ -164,39 +151,34 @@ data.tree <- subset(dataIFN.FRANCE,select=c(vec.basic.var,vec.abio.var.names)) -################################################################################################################## -################################################################################################################## -################################################################################################################## -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species +################################################################################################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +################################################################################################################## m^2/ha without the target species -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(dataIFN.FRANCE[["tree.id"]]), -diam=as.vector(dataIFN.FRANCE[["D"]]), -sp=as.vector(dataIFN.FRANCE[["sp"]]), -id.plot=as.vector(dataIFN.FRANCE[["idp"]]), -weights=as.vector(dataIFN.FRANCE[["w"]])/10000, -weight.full.plot=1/(pi*(c(15))^2)) +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(dataIFN.FRANCE[["tree.id"]]), diam = as.vector(dataIFN.FRANCE[["D"]]), + sp = as.vector(dataIFN.FRANCE[["sp"]]), id.plot = as.vector(dataIFN.FRANCE[["idp"]]), + weights = as.vector(dataIFN.FRANCE[["w"]])/10000, weight.full.plot = 1/(pi * + (c(15))^2)) ## change NA and <0 data for 0 -data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0 -data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0 +data.BA.SP[which(is.na(data.BA.SP), arr.ind = TRUE)] <- 0 +data.BA.SP[, -1][which(data.BA.SP[, -1] < 0, arr.ind = TRUE)] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(! (names(data.BA.SP)[-1] %in% unique(dataIFN.FRANCE[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(dataIFN.FRANCE[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE) +BATOT.COMPET <- apply(data.BA.SP[, -1], MARGIN = 1, FUN = sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET ### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=dataIFN.FRANCE[["tree.id"]],ecocode=dataIFN.FRANCE[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id = dataIFN.FRANCE[["tree.id"]], ecocode = dataIFN.FRANCE[["ecocode"]]), + data.BA.SP, by = "tree.id", sort = FALSE) ## test -if( sum(! data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.FRANCE <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.FRANCE,file="./data/process/list.FRANCE.Rdata") - +list.FRANCE <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.FRANCE, file = "./data/process/list.FRANCE.Rdata") diff --git a/merge.data.FUSHAN.R b/merge.data.FUSHAN.R index 4f9c3b2238f0495c8af9fb84e3024e8c6ac11db8..8e3b4bf24ff56e8462309d2c3edfb2b0dd18fb55 100644 --- a/merge.data.FUSHAN.R +++ b/merge.data.FUSHAN.R @@ -1,97 +1,91 @@ -### MERGE Fushan DATA -### Edited by FH -rm(list = ls()); source("./R/format.function.R"); library(reshape) - -######################### -## READ DATA -#################### -### read individuals tree data -data.fushan <- load("./data/raw/DataFushan/fushan.RData"); data.fushan <- data.frame(fushan); rm(fushan) -data.trait <- read.table("./data/raw/DataFushan/fs_trait_Kunstler.txt",header=T, sep = "\t") -colnames(data.trait) <- c("sp","sla","wd","seedmassmg","meanN","maxheightm") +### MERGE Fushan DATA Edited by FH +rm(list = ls()) +source("./R/format.function.R") +library(reshape) + +######################### READ DATA read individuals tree data +data.fushan <- load("./data/raw/DataFushan/fushan.RData") +data.fushan <- data.frame(fushan) +rm(fushan) +data.trait <- read.table("./data/raw/DataFushan/fs_trait_Kunstler.txt", header = T, + sep = "\t") +colnames(data.trait) <- c("sp", "sla", "wd", "seedmassmg", "meanN", "maxheightm") ### read species names -species.clean <- read.csv("./data/raw/DataFushan/Splist_Fushan_En.csv",stringsAsFactors=FALSE) +species.clean <- read.csv("./data/raw/DataFushan/Splist_Fushan_En.csv", stringsAsFactors = FALSE) -###################################### -## MASSAGE TRAIT DATA -############################ -## Obtain maximum height per species from data.trait; no sd available as we have only one observation for species +###################################### MASSAGE TRAIT DATA Obtain maximum height per species from data.trait no sd +###################################### available as we have only one observation for species data.max.height <- data.frame(sp = data.trait$sp, Max.height = log10(data.trait$maxheightm)) data.fushan <- merge(data.fushan, data.max.height, by = "sp") -########################################## -## FORMAT INDIVIDUAL TREE DATA -############# - -## change unit and names of variables to be the same in all data for the tree - DON'T HAVE A YEAR MEASUREMENT -data.fushan$G <- 10*(data.fushan$dbh2 -data.fushan$dbh1)/data.fushan$Interval ## diameter growth in mm per year -data.fushan$year <- data.fushan$Interval ## number of year between measurement - MISSING -data.fushan$D <- data.fushan[["dbh1"]]; ## diameter in cm -data.fushan$dead <- as.numeric(data.fushan$status2 == "alive") ## dummy variable for dead tree 0 alive 1 dead -data.fushan$plot <- (data.fushan[["PLOT_ID"]]) ## plot code - MISSING -data.fushan$htot <- rep(NA,length(data.fushan[["dbh1"]]))## height of tree in m - MISSING -data.fushan$tree.id <- data.fushan$tag ## tree unique id - MISSING -data.fushan$sp.name <- NA; v <- species.clean$SP -for(i in 1:length(unique(data.fushan$sp))) { - sel.spp <- which(data.fushan$sp == unique(data.fushan$sp)[i]) - data.fushan$sp.name[sel.spp] <- paste(species.clean$family[sel.spp],species.clean$genus[sel.spp], species.clean$epithet[sel.spp], sep = ".") } - -########################################## -## CHANGE COORDINATE SYSTEM -############# -## DON'T KNOW! - -###################### -## ECOREGION -################### -## DON'T SEE DATA FOR THIS AT THE MOMENT, ALTHOUGH IT'S PROBABLY ALL IN ONE ECOREGION ANYWAY? - -###################### -## PERCENT DEAD -################### -## CANNOT DO THIS WITHOUT A PLOTID -## compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.fushan[["dead"]],INDEX=data.fushan[["plot"]],FUN=function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -data.fushan <- merge(data.fushan,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) - -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### -## Remove data with dead == 1 +########################################## FORMAT INDIVIDUAL TREE DATA + +## change unit and names of variables to be the same in all data for the tree - +## DON'T HAVE A YEAR MEASUREMENT +data.fushan$G <- 10 * (data.fushan$dbh2 - data.fushan$dbh1)/data.fushan$Interval ## diameter growth in mm per year +data.fushan$year <- data.fushan$Interval ## number of year between measurement - MISSING +data.fushan$D <- data.fushan[["dbh1"]] +## diameter in cm +data.fushan$dead <- as.numeric(data.fushan$status2 == "alive") ## dummy variable for dead tree 0 alive 1 dead +data.fushan$plot <- (data.fushan[["PLOT_ID"]]) ## plot code - MISSING +data.fushan$htot <- rep(NA, length(data.fushan[["dbh1"]])) ## height of tree in m - MISSING +data.fushan$tree.id <- data.fushan$tag ## tree unique id - MISSING +data.fushan$sp.name <- NA +v <- species.clean$SP +for (i in 1:length(unique(data.fushan$sp))) { + sel.spp <- which(data.fushan$sp == unique(data.fushan$sp)[i]) + data.fushan$sp.name[sel.spp] <- paste(species.clean$family[sel.spp], species.clean$genus[sel.spp], + species.clean$epithet[sel.spp], sep = ".") +} + +########################################## CHANGE COORDINATE SYSTEM DON'T KNOW! + +###################### ECOREGION DON'T SEE DATA FOR THIS AT THE MOMENT, ALTHOUGH IT'S PROBABLY ALL IN +###################### ONE ECOREGION ANYWAY? + +###################### PERCENT DEAD CANNOT DO THIS WITHOUT A PLOTID compute numer of dead per plot to +###################### remove plot with disturbance +perc.dead <- tapply(data.fushan[["dead"]], INDEX = data.fushan[["plot"]], FUN = function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +# AVAILABLE (disturbance record) +data.fushan <- merge(data.fushan, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) + +########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.fushan$dead) -data.fushan <- data.fushan[data.fushan$dead == 1,] +data.fushan <- data.fushan[data.fushan$dead == 1, ] -colnames(data.fushan)[c(3,1,11,13)] <- c("sp","plot","w") -vec.abio.var.names <- c("MAT","MAP") -vec.basic.var <- c("tree.id","sp","sp.name","plot","D","G","dead","year","htot","gx","gy","perc.dead") -data.tree <- subset(data.fushan,select=c(vec.basic.var,vec.abio.var.names)) +colnames(data.fushan)[c(3, 1, 11, 13)] <- c("sp", "plot", "w") +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "D", "G", "dead", "year", + "htot", "gx", "gy", "perc.dead") +data.tree <- subset(data.fushan, select = c(vec.basic.var, vec.abio.var.names)) -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -## CANNOT DO WITHOUT A PLOT ID -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.fushan[["tree.id"]]), diam=as.vector(data.fushan[["D"]]), - sp=as.vector(data.fushan[["sp"]]), id.plot=as.vector(data.fushan[["plot"]]), - weights=1/(pi*(0.5*data.fushan[["dbh1"]])^2), weight.full.plot=NA) +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species CANNOT DO WITHOUT A PLOT ID +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.fushan[["tree.id"]]), diam = as.vector(data.fushan[["D"]]), + sp = as.vector(data.fushan[["sp"]]), id.plot = as.vector(data.fushan[["plot"]]), + weights = 1/(pi * (0.5 * data.fushan[["dbh1"]])^2), weight.full.plot = NA) ## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +data.BA.SP[is.na(data.BA.SP)] <- 0 +data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.fushan[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(data.fushan[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) +BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) +data.BA.SP$BATOT.COMPET <- BATOT.COMPET +rm(BATOT.COMPET) ### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=data.fushan[["tree.id"]],ecocode=data.fushan[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id = data.fushan[["tree.id"]], ecocode = data.fushan[["ecocode"]]), + data.BA.SP, by = "tree.id", sort = FALSE) ## test -if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.canada <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.spain,file="./data/process/list.canada.Rdata") - +list.canada <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.spain, file = "./data/process/list.canada.Rdata") diff --git a/merge.data.NSW.R b/merge.data.NSW.R index 64e60cc358ca330e804bfd596c78962f6fdfc0c7..4dc6dc0e8c6f45050c46bf8ed372dc36c90cdbfc 100644 --- a/merge.data.NSW.R +++ b/merge.data.NSW.R @@ -1,149 +1,168 @@ -### MERGE NSW DATA -### Edited by FH -rm(list = ls()); source("./R/format.function.R"); library(reshape) - -######################### -## READ DATA -#################### -### read individuals tree data -data.nswbrc <- read.csv("./data/raw/DataNSW/NSW_data_BRcontrols.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") -data.nswbrc$Date.of.measure <- as.vector(sapply(data.nswbrc$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only -data.nswbrt <- read.csv("./data/raw/DataNSW/NSW_data_BRtreatments.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") -data.nswbrt$Date.of.measure <- as.vector(sapply(data.nswbrt$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only -data.nswbs1 <- read.csv("./data/raw/DataNSW/NSW_data_BS1.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") -data.nswbs1$Date.of.measure <- as.character(format(as.Date(data.nswbs1$Date.of.measure, format = "%d-%b-%y"), format = "%d/%b/%Y")) -data.nswbs1$Date.of.measure <- as.vector(sapply(data.nswbs1$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only - -## data.nswbs2 has a different format to the other datasets, so format to match the above -data.nswbs2 <- read.csv("./data/raw/DataNSW/NSW_data_BS2.csv",header=TRUE,stringsAsFactors=FALSE,sep = "\t") -data.nswbs2$Plot <- apply(data.nswbs2[,1:2],1,paste,collapse=""); -data.nswbs2$Subplot <- NULL; data.nswbs2$Family <- NULL; colnames(data.nswbs2)[3] <- "species"; -data.nswbs2$x <- data.nswbs2$y <- rep(NA,nrow(data.nswbs2)) -data.nswbs22 <- data.nswbs2[rep(1:nrow(data.nswbs2),each=2),] -data.nswbs22$Date.of.measure <- rep(c(1988,2000),nrow(data.nswbs2)) +### MERGE NSW DATA Edited by FH +rm(list = ls()) +source("./R/format.function.R") +library(reshape) + +######################### READ DATA read individuals tree data +data.nswbrc <- read.csv("./data/raw/DataNSW/NSW_data_BRcontrols.csv", header = TRUE, + stringsAsFactors = FALSE, sep = "\t") +data.nswbrc$Date.of.measure <- as.vector(sapply(data.nswbrc$Date.of.measure, function(x) { + unlist(strsplit(x, "/"))[3] +})) ## Extract the years only +data.nswbrt <- read.csv("./data/raw/DataNSW/NSW_data_BRtreatments.csv", header = TRUE, + stringsAsFactors = FALSE, sep = "\t") +data.nswbrt$Date.of.measure <- as.vector(sapply(data.nswbrt$Date.of.measure, function(x) { + unlist(strsplit(x, "/"))[3] +})) ## Extract the years only +data.nswbs1 <- read.csv("./data/raw/DataNSW/NSW_data_BS1.csv", header = TRUE, stringsAsFactors = FALSE, + sep = "\t") +data.nswbs1$Date.of.measure <- as.character(format(as.Date(data.nswbs1$Date.of.measure, + format = "%d-%b-%y"), format = "%d/%b/%Y")) +data.nswbs1$Date.of.measure <- as.vector(sapply(data.nswbs1$Date.of.measure, function(x) { + unlist(strsplit(x, "/"))[3] +})) ## Extract the years only + +## data.nswbs2 has a different format to the other datasets, so format to match +## the above +data.nswbs2 <- read.csv("./data/raw/DataNSW/NSW_data_BS2.csv", header = TRUE, stringsAsFactors = FALSE, + sep = "\t") +data.nswbs2$Plot <- apply(data.nswbs2[, 1:2], 1, paste, collapse = "") + +data.nswbs2$Subplot <- NULL +data.nswbs2$Family <- NULL +colnames(data.nswbs2)[3] <- "species" + +data.nswbs2$x <- data.nswbs2$y <- rep(NA, nrow(data.nswbs2)) +data.nswbs22 <- data.nswbs2[rep(1:nrow(data.nswbs2), each = 2), ] +data.nswbs22$Date.of.measure <- rep(c(1988, 2000), nrow(data.nswbs2)) data.nswbs22$Dbh <- c(rbind(data.nswbs2[["DBH.cm..1988."]], data.nswbs2[["DBH.cm..2000."]])) data.nswbs22[["DBH.cm..1988."]] <- data.nswbs22[["DBH.cm..2000."]] <- NULL -data.nswbs2 <- data.nswbs22[,c(1:5,8:9,7,6)]; rm(data.nswbs22) +data.nswbs2 <- data.nswbs22[, c(1:5, 8:9, 7, 6)] +rm(data.nswbs22) -data.nswtnd <- read.csv("./data/raw/DataNSW/NSW_data_TND.csv",header=TRUE,stringsAsFactors=FALSE,sep = "\t") -data.nswtnd$Date.of.measure <- as.character(format(as.Date(data.nswtnd$Date.of.measure, format = "%d-%b-%y"), format = "%d/%b/%Y")) -data.nswtnd$Date.of.measure <- as.vector(sapply(data.nswtnd$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only +data.nswtnd <- read.csv("./data/raw/DataNSW/NSW_data_TND.csv", header = TRUE, stringsAsFactors = FALSE, + sep = "\t") +data.nswtnd$Date.of.measure <- as.character(format(as.Date(data.nswtnd$Date.of.measure, + format = "%d-%b-%y"), format = "%d/%b/%Y")) +data.nswtnd$Date.of.measure <- as.vector(sapply(data.nswtnd$Date.of.measure, function(x) { + unlist(strsplit(x, "/"))[3] +})) ## Extract the years only data.nsw <- rbind(data.nswbrc, data.nswbrt, data.nswbs1, data.nswbs2, data.nswtnd) -###################################### -## MASSAGE TRAIT DATA -############################ -data.traits <- read.csv("./data/raw/DataNSW/NSW_traits.csv",header=TRUE,stringsAsFactors=FALSE) - -########################################## -## FORMAT INDIVIDUAL TREE DATA -############# -## Each tree has at most 3 observations (from prelim checks of the data) -data.nsw$treeid <- apply(data.nsw[,1:2],1,paste,collapse=".") -data.nsw2 <- data.frame(data.nsw[1,], year1 = NA, year2 = NA, dbh1 = NA, dbh2 = NA) -for(k in 1:length(unique(data.nsw$treeid))) { - sub.datansw <- as.data.frame(data.nsw[which(data.nsw$treeid == unique(data.nsw$treeid)[k]),]) - if(nrow(sub.datansw) == 1) { data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw, year1 = sub.datansw$Date.of.measure[1], year2 = NA, dbh1 = sub.datansw$Dbh[1], dbh2 = NA)) } - if(nrow(sub.datansw) == 2) { - data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1,], year1 = sub.datansw$Date.of.measure[1], year2 = sub.datansw$Date.of.measure[2], dbh1 = sub.datansw$Dbh[1], dbh2 = sub.datansw$Dbh[2])) } - if(nrow(sub.datansw) == 3) { - data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1,], year1 = sub.datansw$Date.of.measure[1], year2 = sub.datansw$Date.of.measure[2], dbh1 = sub.datansw$Dbh[1], dbh2 = sub.datansw$Dbh[2])) - data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1,], year1 = sub.datansw$Date.of.measure[2], year2 = sub.datansw$Date.of.measure[3], dbh1 = sub.datansw$Dbh[2], dbh2 = sub.datansw$Dbh[3])) } - } -data.nsw2 <- data.nsw2[-1,]; data.nsw2$Date.of.measure <- data.nsw2$Dbh <- NULL -data.nsw <- data.nsw2; for(k in 9:12) data.nsw[,k] <- as.numeric(data.nsw[,k]) - -## change unit and names of variables to be the same in all data for the tree -data.nsw$year <- (data.nsw$year2-data.nsw$year1) ## number of year between measurements -data.nsw$G <- 10*(data.nsw$dbh2-data.nsw$dbh1)/(data.nsw$year) ## diameter growth in mm per year +###################################### MASSAGE TRAIT DATA +data.traits <- read.csv("./data/raw/DataNSW/NSW_traits.csv", header = TRUE, stringsAsFactors = FALSE) + +########################################## FORMAT INDIVIDUAL TREE DATA Each tree has at most 3 observations (from prelim +########################################## checks of the data) +data.nsw$treeid <- apply(data.nsw[, 1:2], 1, paste, collapse = ".") +data.nsw2 <- data.frame(data.nsw[1, ], year1 = NA, year2 = NA, dbh1 = NA, dbh2 = NA) +for (k in 1:length(unique(data.nsw$treeid))) { + sub.datansw <- as.data.frame(data.nsw[which(data.nsw$treeid == unique(data.nsw$treeid)[k]), + ]) + if (nrow(sub.datansw) == 1) { + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw, year1 = sub.datansw$Date.of.measure[1], + year2 = NA, dbh1 = sub.datansw$Dbh[1], dbh2 = NA)) + } + if (nrow(sub.datansw) == 2) { + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1, ], year1 = sub.datansw$Date.of.measure[1], + year2 = sub.datansw$Date.of.measure[2], dbh1 = sub.datansw$Dbh[1], dbh2 = sub.datansw$Dbh[2])) + } + if (nrow(sub.datansw) == 3) { + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1, ], year1 = sub.datansw$Date.of.measure[1], + year2 = sub.datansw$Date.of.measure[2], dbh1 = sub.datansw$Dbh[1], dbh2 = sub.datansw$Dbh[2])) + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1, ], year1 = sub.datansw$Date.of.measure[2], + year2 = sub.datansw$Date.of.measure[3], dbh1 = sub.datansw$Dbh[2], dbh2 = sub.datansw$Dbh[3])) + } +} +data.nsw2 <- data.nsw2[-1, ] +data.nsw2$Date.of.measure <- data.nsw2$Dbh <- NULL +data.nsw <- data.nsw2 +for (k in 9:12) data.nsw[, k] <- as.numeric(data.nsw[, k]) + +## change unit and names of variables to be the same in all data for the tree +data.nsw$year <- (data.nsw$year2 - data.nsw$year1) ## number of year between measurements +data.nsw$G <- 10 * (data.nsw$dbh2 - data.nsw$dbh1)/(data.nsw$year) ## diameter growth in mm per year ## THERE ARE SOME ROWS WITH STRONG NEGATIVE GROWTH THAT YOU MIGHT WANT TO REMOVE -head(data.nsw[order(data.nsw$G),]) - -data.nsw$D <- data.nsw[["dbh1"]]; ## diameter in cm -data.nsw$dead <- rep(NA, nrow(data.nsw)) ## dummy variable for dead tree 0 alive 1 dead - MISSING -data.nsw$sp <- as.character(data.nsw[["species"]]) ## species code - use the spp name as code -data.nsw$plot <- as.character(data.nsw[["Plot"]]) ## plot code -data.nsw$htot <- rep(NA,nrow(data.nsw)) ## height of tree in m - MISSING -### add plot weights for computation of competition index (in 1/m^2) - from the original excel file -data.nsw$weights <- rep(NA,nrow(data.nsw)) -data.nsw$weights[grep("AA",data.nsw$Plot)] <- 1/(20*80); data.nsw$weights[grep("BB",data.nsw$Plot)] <- 1/(20*80) -data.nsw$weights[grep("CC",data.nsw$Plot)] <- 1/(20*80); data.nsw$weights[grep("DD",data.nsw$Plot)] <- 1/(20*80) -data.nsw$weights[grep("BS",data.nsw$Plot)] <- 1/(25*30); -data.nsw$weights[grep("BR",data.nsw$Plot)] <- 1/(60.4*60.4); -data.nsw$weights[grep("END",data.nsw$Plot)] <- 1/(40*50); data.nsw$weights[grep("TND",data.nsw$Plot)] <- 1/(40*50); +head(data.nsw[order(data.nsw$G), ]) + +data.nsw$D <- data.nsw[["dbh1"]] +## diameter in cm +data.nsw$dead <- rep(NA, nrow(data.nsw)) ## dummy variable for dead tree 0 alive 1 dead - MISSING +data.nsw$sp <- as.character(data.nsw[["species"]]) ## species code - use the spp name as code +data.nsw$plot <- as.character(data.nsw[["Plot"]]) ## plot code +data.nsw$htot <- rep(NA, nrow(data.nsw)) ## height of tree in m - MISSING +### add plot weights for computation of competition index (in 1/m^2) - from the +### original excel file +data.nsw$weights <- rep(NA, nrow(data.nsw)) +data.nsw$weights[grep("AA", data.nsw$Plot)] <- 1/(20 * 80) +data.nsw$weights[grep("BB", data.nsw$Plot)] <- 1/(20 * 80) +data.nsw$weights[grep("CC", data.nsw$Plot)] <- 1/(20 * 80) +data.nsw$weights[grep("DD", data.nsw$Plot)] <- 1/(20 * 80) +data.nsw$weights[grep("BS", data.nsw$Plot)] <- 1/(25 * 30) + +data.nsw$weights[grep("BR", data.nsw$Plot)] <- 1/(60.4 * 60.4) + +data.nsw$weights[grep("END", data.nsw$Plot)] <- 1/(40 * 50) +data.nsw$weights[grep("TND", data.nsw$Plot)] <- 1/(40 * 50) + data.nsw$obs.id <- 1:nrow(data.nsw) -###################### -## ECOREGION -################### -## nsw has only 1 eco-region - -###################### -## PERCENT DEAD -################### -## NO DATA ON MORTALITY - -########################################### -### VARIABLES SELECTION FOR THE ANALYSIS -################### -vec.abio.var.names <- c("MAT","MAP") ## MISSING -#vec.basic.var <- c("obs.id","treeid","sp","plot","D","G","dead","year","htot","x","y","perc.dead") -#data.nsw <- subset(data.nsw,select=c(vec.basic.var)) #,vec.abio.var.names - -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE -### species as factor because number +###################### ECOREGION nsw has only 1 eco-region + +###################### PERCENT DEAD NO DATA ON MORTALITY + +########################################### VARIABLES SELECTION FOR THE ANALYSIS +vec.abio.var.names <- c("MAT", "MAP") ## MISSING +# vec.basic.var <- +# c('obs.id','treeid','sp','plot','D','G','dead','year','htot','x','y','perc.dead') +# data.nsw <- subset(data.nsw,select=c(vec.basic.var)) #,vec.abio.var.names + +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species NEED TO COMPUTE BASED ON RADIUS AROUND TARGET +############################################## TREE species as factor because number data.nsw[["species"]] <- factor(data.nsw[["species"]]) -data.nsw$spcode <- data.nsw[["species"]]; levels(data.nsw$spcode) <- 1:length(levels(data.nsw$spcode)) +data.nsw$spcode <- data.nsw[["species"]] +levels(data.nsw$spcode) <- 1:length(levels(data.nsw$spcode)) -data.BA.SP <- BA.SP.FUN(obs.id=as.vector(data.nsw[["treeid"]]), diam=as.vector(data.nsw[["D"]]), - sp=as.vector(data.nsw[["spcode"]]), id.plot=as.vector(data.nsw[["Plot"]]), - weights=data.nsw[["weights"]], weight.full.plot=NA) +data.BA.SP <- BA.SP.FUN(obs.id = as.vector(data.nsw[["treeid"]]), diam = as.vector(data.nsw[["D"]]), + sp = as.vector(data.nsw[["spcode"]]), id.plot = as.vector(data.nsw[["Plot"]]), + weights = data.nsw[["weights"]], weight.full.plot = NA) ## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; -data.BA.SP2 <- data.frame(data.BA.SP); colnames(data.BA.SP2) <- colnames(data.BA.SP) +data.BA.SP[is.na(data.BA.SP)] <- 0 + +data.BA.SP2 <- data.frame(data.BA.SP) +colnames(data.BA.SP2) <- colnames(data.BA.SP) ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP2)[-1] %in% unique(data.nsw[["spcode"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP2)[-1] %in% unique(data.nsw[["spcode"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP2[,-1],1,sum,na.rm=TRUE) -data.BA.SP2$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) +BATOT.COMPET <- apply(data.BA.SP2[, -1], 1, sum, na.rm = TRUE) +data.BA.SP2$BATOT.COMPET <- BATOT.COMPET +rm(BATOT.COMPET) data.BA.SP <- data.BA.SP2 -# Rlim <- 15 # set size of neighborhood for competition index -# system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.nsw,Rlim=15,parallel=TRUE,rpuDist=FALSE)) -# -# list.BA.SP.data <- mclapply(unique(data.nsw[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.nsw,Rlim=Rlim,mc.cores=4) -# data.BA.sp <- rbind.fill(list.BA.SP.data) -# dim(data.BA.SP) -# -# ### TEST DATA FORMAT -# if(sum(! rownames(BA.SP.temp)==data.tree[['obs.id']]) >0) stop('rows not in the good order') -# if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree[['species']]))))>0) stop('colnames does mot match species name') -# ## test same order as data.nsw -# if(sum(!data.BA.SP[["obs.id"]] == data.nsw[["obs.id"]]) >0) stop("competition index not in the same order than data.nsw") -# -# ## REMOVE TREE IN BUFFER ZONE -# not.in.buffer.zone <- (data.nsw[['x']]<(250-Rlim) & -# data.nsw[['x']]>(0+Rlim) & -# data.nsw[['y']]<(250-Rlim) & -# data.nsw[['y']]>(0+Rlim)) -# -# # remove subset -# data.nsw <- subset(data.nsw,subset=not.in.buffer.zone) -# data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) -# -# ## plot each plot -# pdf("./figs/plots.tree.pdf") -# lapply(unique(data.nsw[["plot"]]),FUN=fun.circles.plot,data.nsw[['x']],data.nsw[['y']],data.nsw[["plot"]],data.nsw[["D"]],inches=0.2,xlim=c(0,250),ylim=c(0,250)) +# Rlim <- 15 # set size of neighborhood for competition index system.time(test <- +# fun.compute.BA.SP.XY.per.plot(1,data.tree=data.nsw,Rlim=15,parallel=TRUE,rpuDist=FALSE)) +# list.BA.SP.data <- +# mclapply(unique(data.nsw[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.nsw,Rlim=Rlim,mc.cores=4) +# data.BA.sp <- rbind.fill(list.BA.SP.data) dim(data.BA.SP) ### TEST DATA FORMAT +# if(sum(! rownames(BA.SP.temp)==data.tree[['obs.id']]) >0) stop('rows not in the +# good order') +# if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree[['species']]))))>0) +# stop('colnames does mot match species name') ## test same order as data.nsw +# if(sum(!data.BA.SP[['obs.id']] == data.nsw[['obs.id']]) >0) stop('competition +# index not in the same order than data.nsw') ## REMOVE TREE IN BUFFER ZONE +# not.in.buffer.zone <- (data.nsw[['x']]<(250-Rlim) & data.nsw[['x']]>(0+Rlim) & +# data.nsw[['y']]<(250-Rlim) & data.nsw[['y']]>(0+Rlim)) # remove subset data.nsw +# <- subset(data.nsw,subset=not.in.buffer.zone) data.BA.sp <- +# subset(data.BA.sp,subset=not.in.buffer.zone) ## plot each plot +# pdf('./figs/plots.tree.pdf') +# lapply(unique(data.nsw[['plot']]),FUN=fun.circles.plot,data.nsw[['x']],data.nsw[['y']],data.nsw[['plot']],data.nsw[['D']],inches=0.2,xlim=c(0,250),ylim=c(0,250)) # dev.off() ## save everything as a list -list.nsw <- list(data.tree=data.nsw,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.nsw,file="./data/process/list.nsw.Rdata") - +list.nsw <- list(data.tree = data.nsw, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.nsw, file = "./data/process/list.nsw.Rdata") diff --git a/merge.data.NZ.R b/merge.data.NZ.R index c0aa1760f8ea88a57b192b767897f930c7ab05db..89673ab008ccd47992b3de06a6eac819507e3610 100644 --- a/merge.data.NZ.R +++ b/merge.data.NZ.R @@ -1,108 +1,108 @@ ### MERGE NZ DATA -rm(list = ls()); source("./R/format.function.R"); library(reshape) +rm(list = ls()) +source("./R/format.function.R") +library(reshape) -######################### -## READ DATA -#################### -### read individuals tree data -data.nz <- read.csv('./data/raw/DataNVS/nz_treedata_growth_130801.csv',header=TRUE,stringsAsFactors=FALSE,skip = 9); data.nz <- data.nz[,-1] -data.trait <- read.csv('./data/raw/DataNVS/nz_traits_130801.csv',,header=TRUE,stringsAsFactors=FALSE); data.trait <- data.trait[,-1] -data.plot <- read.csv('./data/raw/DataNVS/nz_plotinfo_130801.csv',,header=TRUE,stringsAsFactors=FALSE); data.plot <- data.plot[,-1] +######################### READ DATA read individuals tree data +data.nz <- read.csv("./data/raw/DataNVS/nz_treedata_growth_130801.csv", header = TRUE, + stringsAsFactors = FALSE, skip = 9) +data.nz <- data.nz[, -1] +data.trait <- read.csv("./data/raw/DataNVS/nz_traits_130801.csv", , header = TRUE, + stringsAsFactors = FALSE) +data.trait <- data.trait[, -1] +data.plot <- read.csv("./data/raw/DataNVS/nz_plotinfo_130801.csv", , header = TRUE, + stringsAsFactors = FALSE) +data.plot <- data.plot[, -1] colnames(data.trait)[1] <- "sp" -data.plot$plid <- gsub("__", "_",data.plot$plid); data.plot$plid <- gsub("_", ".",data.plot$plid) ## Replace all underscores with a single dot -data.nz$plid <- gsub("__", "_",data.nz$plid); data.nz$plid <- gsub("_", ".",data.nz$plid) ## Replace all underscores with a single dot +data.plot$plid <- gsub("__", "_", data.plot$plid) +data.plot$plid <- gsub("_", ".", data.plot$plid) ## Replace all underscores with a single dot +data.nz$plid <- gsub("__", "_", data.nz$plid) +data.nz$plid <- gsub("_", ".", data.nz$plid) ## Replace all underscores with a single dot -###################################### -## MASSAGE TRAIT DATA -############################ -## Maximum height per species is already available from data.trait (in m); so no sd's and only obs per spp -data.max.height <- data.frame(code=data.trait[["sp"]],Max.height.mean=log10(data.trait[["height.m"]]),Max.height.sd=NA,Max.height.nobs=1) -#write.csv(data.max.height,file="./data/process/data.max.height.nz.csv") ## I was planning to save processed data in that folder +###################################### MASSAGE TRAIT DATA Maximum height per species is already available from +###################################### data.trait (in m); so no sd's and only obs per spp +data.max.height <- data.frame(code = data.trait[["sp"]], Max.height.mean = log10(data.trait[["height.m"]]), + Max.height.sd = NA, Max.height.nobs = 1) +# write.csv(data.max.height,file='./data/process/data.max.height.nz.csv') ## I +# was planning to save processed data in that folder -################################################################ -## FORMAT INDIVIDUAL TREE DATA -############# +################################################################ FORMAT INDIVIDUAL TREE DATA -## change unit and names of variables to be the same in all data for the tree -data.nz$G <- 10*(data.nz[["D1"]]-data.nz[["D0"]])/(data.nz[["t1"]]-data.nz[["t0"]]) ## diameter growth in mm per year -data.nz$year <- (data.nz[["t1"]]-data.nz[["t0"]]) ## number of year between measurement -data.nz$D <- data.nz[["D0"]] ## diameter in mm convert to cm -data.nz$dead <- as.numeric(is.na(data.nz[["D1"]])) ## dummy variable for dead tree 0 alive 1 dead +## change unit and names of variables to be the same in all data for the tree +data.nz$G <- 10 * (data.nz[["D1"]] - data.nz[["D0"]])/(data.nz[["t1"]] - data.nz[["t0"]]) ## diameter growth in mm per year +data.nz$year <- (data.nz[["t1"]] - data.nz[["t0"]]) ## number of year between measurement +data.nz$D <- data.nz[["D0"]] ## diameter in mm convert to cm +data.nz$dead <- as.numeric(is.na(data.nz[["D1"]])) ## dummy variable for dead tree 0 alive 1 dead data.nz$sp <- data.nz$sp data.nz$plot <- data.nz$plid -data.nz$htot <- rep(NA,length(data.nz[["sp"]])) ## Max height is already available so have as missing -data.nz$tree.id <- data.nz[["tag"]]; data.nz$tree.id <- gsub("__", "_",data.nz$tree.id); data.nz$tree.id <- gsub("_", ".",data.nz$tree.id) ## tree unique id +data.nz$htot <- rep(NA, length(data.nz[["sp"]])) ## Max height is already available so have as missing +data.nz$tree.id <- data.nz[["tag"]] +data.nz$tree.id <- gsub("__", "_", data.nz$tree.id) +data.nz$tree.id <- gsub("_", ".", data.nz$tree.id) ## tree unique id -########################################## -## CHANGE COORDINATE SYSTEM -############# -## DON'T KNOW THE EPSG CODE HERE -#### change coordinates system of Easting Northing to be in lat long WGS84 -data.nz <- merge(data.plot[,c(1,3,4)], data.nz, by = "plid") -library(sp); -#library(dismo); library(rgdal); -data.sp <- data.nz[,c("tree.id","Easting","Northing")] -coordinates(data.sp) <- c("Easting", "Northing") # define x y +########################################## CHANGE COORDINATE SYSTEM DON'T KNOW THE EPSG CODE HERE change coordinates +########################################## system of Easting Northing to be in lat long WGS84 +data.nz <- merge(data.plot[, c(1, 3, 4)], data.nz, by = "plid") +library(sp) +# library(dismo); library(rgdal); +data.sp <- data.nz[, c("tree.id", "Easting", "Northing")] +coordinates(data.sp) <- c("Easting", "Northing") # define x y proj4string(data.sp) <- CRS("+init=epsg:23030") # define projection system of our data ## EPSG CODE 23030 ED50 / UTM zone 30N summary(data.sp) detach(package:rgdal) -data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon -data.nz$Lon <- coordinates(data.sp2)[,"Easting"]; data.nz$Lat <- coordinates(data.sp2)[,"Northing"] -## ## plot on world map -## library(rworldmap) -## newmap <- getMap(resolution = "coarse") # different resolutions available -## plot(newmap) -## points(data.sp2,cex=0.2,col="red") -rm(data.sp,data.sp2) +data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon +data.nz$Lon <- coordinates(data.sp2)[, "Easting"] +data.nz$Lat <- coordinates(data.sp2)[, "Northing"] +## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') +## # different resolutions available plot(newmap) +## points(data.sp2,cex=0.2,col='red') +rm(data.sp, data.sp2) -###################### -## ECOREGION -################### -## merge greco to have no ecoregion with low number of observation -data.nz <- merge(data.nz, data.frame(plot = data.plot$plid, data.plot[,11:12]), by = "plot") -table(data.nz$Broad); +###################### ECOREGION merge greco to have no ecoregion with low number of observation +data.nz <- merge(data.nz, data.frame(plot = data.plot$plid, data.plot[, 11:12]), + by = "plot") +table(data.nz$Broad) -###################### -## PERCENT DEAD -################### -perc.dead <- tapply(data.nz[["dead"]],INDEX=data.nz[["plot"]],FUN=function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -data.nz <- merge(data.nz,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) +###################### PERCENT DEAD +perc.dead <- tapply(data.nz[["dead"]], INDEX = data.nz[["plot"]], FUN = function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +# AVAILABLE (disturbance record) +data.nz <- merge(data.nz, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### -data.nz <- merge(data.nz, data.plot[,c(1,8:10)], by = "plid") -colnames(data.nz)[colnames(data.nz) %in% c("mat","map")] <- c("MAT","MAP") -#colnames(data.nz)[names(data.nz) =="eco_codemerged" ] <- c("ecocode") -vec.abio.var.names <- c("MAT","MAP") -vec.basic.var <- c("tree.id","sp","spname","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") -data.tree <- subset(data.nz,select=c(vec.basic.var,vec.abio.var.names)) +########################################################### PLOT SELECTION FOR THE ANALYSIS +data.nz <- merge(data.nz, data.plot[, c(1, 8:10)], by = "plid") +colnames(data.nz)[colnames(data.nz) %in% c("mat", "map")] <- c("MAT", "MAP") +# colnames(data.nz)[names(data.nz) =='eco_codemerged' ] <- c('ecocode') +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var <- c("tree.id", "sp", "spname", "plot", "ecocode", "D", "G", "dead", + "year", "htot", "Lon", "Lat", "perc.dead") +data.tree <- subset(data.nz, select = c(vec.basic.var, vec.abio.var.names)) -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.nz[["tree.id"]]), diam=as.vector(data.nz[["D"]]), - sp=as.vector(data.nz[["sp"]]), id.plot=as.vector(data.nz[["plot"]]), - weights=as.vector(1/(pi*(0.5*data.nz[["D0"]]/100)^2)), weight.full.plot=NA) +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.nz[["tree.id"]]), diam = as.vector(data.nz[["D"]]), + sp = as.vector(data.nz[["sp"]]), id.plot = as.vector(data.nz[["plot"]]), weights = as.vector(1/(pi * + (0.5 * data.nz[["D0"]]/100)^2)), weight.full.plot = NA) ## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +data.BA.SP[is.na(data.BA.SP)] <- 0 +data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.nz[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(data.nz[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE) +BATOT.COMPET <- apply(data.BA.SP[, -1], MARGIN = 1, FUN = sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET ### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=dataIFN.spain[["tree.id"]],ecocode=dataIFN.spain[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id = dataIFN.spain[["tree.id"]], ecocode = dataIFN.spain[["ecocode"]]), + data.BA.SP, by = "tree.id", sort = FALSE) ## test -if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]])>0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.nz <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.trait) -save(list.nz,file="./data/process/list.nz.Rdata") - +list.nz <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.trait) +save(list.nz, file = "./data/process/list.nz.Rdata") diff --git a/merge.data.PARACOU.R b/merge.data.PARACOU.R index f614a4ce4ecd005f1440a8f0743b2b436387b491..72f3d5f33a437e15314abef7f9dd465a706637a7 100644 --- a/merge.data.PARACOU.R +++ b/merge.data.PARACOU.R @@ -1,174 +1,172 @@ -### MERGE paracou DATA -### Edited by FH +### MERGE paracou DATA Edited by FH rm(list = ls()) source("./R/format.function.R") library(reshape) -######################### -## READ DATA -#################### -### read individuals tree data -data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv", - header=TRUE,stringsAsFactors=FALSE,sep = ";", na.strings = "NULL") -#barplot(apply(!is.na(data.paracou[,paste("circ_",1984:2012,sep="")]),MARGIN=2,FUN=sum),las=3) +######################### READ DATA read individuals tree data +data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv", + header = TRUE, stringsAsFactors = FALSE, sep = "\n", na.strings = "NULL") +# barplot(apply(!is.na(data.paracou[,paste('circ_',1984:2012,sep='')]),MARGIN=2,FUN=sum),las=3) # select good columns -data.paracou <- data.paracou[,c("foret","parcelle","carre","arbre","vernaculaire","idtaxon", - "x","y","circ_2001","code_2001","circ_2005","code_2005", - "circ_2009","code_2009","campagne_mort","type_mort")] -colnames(data.paracou) <- c("forest","plot","subplot","tree","vernacular","taxonid","x","y","circum2001","code2001","circum2005","code2005","circum2009","code2009","yeardied","typedeath") +data.paracou <- data.paracou[, c("foret", "parcelle", "carre", "arbre", "vernaculaire", + "idtaxon", "x", "y", "circ_2001", "code_2001", "circ_2005", "code_2005", "circ_2009", + "code_2009", "campagne_mort", "type_mort")] +colnames(data.paracou) <- c("forest", "plot", "subplot", "tree", "vernacular", "taxonid", + "x", "y", "circum2001", "code2001", "circum2005", "code2005", "circum2009", "code2009", + "yeardied", "typedeath") ### change numeric separator -numeric.col.name <- c("x","y","circum2001","code2001","circum2005","code2005","circum2009","code2009") -for(k in numeric.col.name){ - data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) - } ## Replace all , in decimals with . - -data.paracou$treeid <- apply(data.paracou[,c("plot","subplot","tree")],1,paste,collapse="."); ## Create a tree id -data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))] - -## ## plot each plot -## pdf("./figs/plots.paracou.pdf") -## lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2) +numeric.col.name <- c("x", "y", "circum2001", "code2001", "circum2005", "code2005", + "circum2009", "code2009") +for (k in numeric.col.name) { + data.paracou[, k] <- gsub(",", ".", data.paracou[, k]) + data.paracou[, k] <- as.numeric(data.paracou[, k]) +} ## Replace all , in decimals with . + +data.paracou$treeid <- apply(data.paracou[, c("plot", "subplot", "tree")], 1, paste, + collapse = ".") ## Create a tree id +data.paracou <- data.paracou[, c(ncol(data.paracou), 1:(ncol(data.paracou) - 1))] + +## ## plot each plot pdf('./figs/plots.paracou.pdf') +## lapply(unique(data.paracou[['plot']]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[['plot']],data.paracou[['circum2009']],inches=0.2) ## dev.off() -####################### -###### SELECT OBSERVATION WITHOUT PROBLEMS -## REMOVE ALL TREES WITH X OR Y >250 m -data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 & data.paracou[["y"]]<251) -#### REMOVE PLOTs 16 17 18 ACCORDING TO GHSILAIN -data.paracou <- subset(data.paracou,subset=! data.paracou[["plot"]] %in% 16:18) +####################### SELECT OBSERVATION WITHOUT PROBLEMS REMOVE ALL TREES WITH X OR Y >250 m +data.paracou <- subset(data.paracou, subset = (!is.na(data.paracou[["x"]])) & data.paracou[["x"]] < + 251 & data.paracou[["y"]] < 251) +#### REMOVE PLOTs 16 17 18 ACCORDING TO GHSILAIN +data.paracou <- subset(data.paracou, subset = !data.paracou[["plot"]] %in% 16:18) ## keep only tree alive in 2001 -data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied"]])<=2001 & !is.na(data.paracou[["yeardied"]]))) - - -###################################### -## MASSAGE TRAIT DATA -############################ - -########################################## -## FORMAT INDIVIDUAL TREE DATA -############# -data.paracou2 <- data.paracou[rep(1:nrow(data.paracou),each=2),c(1:10,(ncol(data.paracou)-2):ncol(data.paracou))] -rownames(data.paracou2) <- 1:nrow(data.paracou2); data.paracou2 <- as.data.frame(data.paracou2) -data.paracou2$yr1 <- rep(c(2001,2001+4),nrow(data.paracou)); data.paracou2$yr2 <- rep(c(2005,2005+4),nrow(data.paracou)) -data.paracou2$year <- rep(c(4,4),nrow(data.paracou)) -data.paracou2$dbh1 <- c(rbind(data.paracou$circum2001/pi,data.paracou$circum2005/pi)) -data.paracou2$dbh2 <- c(rbind(data.paracou$circum2005/pi,data.paracou$circum2009/pi)) -data.paracou2$code1 <- c(as.numeric(rbind(data.paracou$code2001,data.paracou$code2005))) -data.paracou2$code2 <- c(as.numeric(rbind(data.paracou$code2005,data.paracou$code2009))) -data.paracou2$dead <- rep(0,nrow(data.paracou)*2) -data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])), - as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1 +data.paracou <- subset(data.paracou, subset = !(as.numeric(data.paracou[["yeardied"]]) <= + 2001 & !is.na(data.paracou[["yeardied"]]))) + + +###################################### MASSAGE TRAIT DATA + +########################################## FORMAT INDIVIDUAL TREE DATA +data.paracou2 <- data.paracou[rep(1:nrow(data.paracou), each = 2), c(1:10, (ncol(data.paracou) - + 2):ncol(data.paracou))] +rownames(data.paracou2) <- 1:nrow(data.paracou2) +data.paracou2 <- as.data.frame(data.paracou2) +data.paracou2$yr1 <- rep(c(2001, 2001 + 4), nrow(data.paracou)) +data.paracou2$yr2 <- rep(c(2005, 2005 + 4), nrow(data.paracou)) +data.paracou2$year <- rep(c(4, 4), nrow(data.paracou)) +data.paracou2$dbh1 <- c(rbind(data.paracou$circum2001/pi, data.paracou$circum2005/pi)) +data.paracou2$dbh2 <- c(rbind(data.paracou$circum2005/pi, data.paracou$circum2009/pi)) +data.paracou2$code1 <- c(as.numeric(rbind(data.paracou$code2001, data.paracou$code2005))) +data.paracou2$code2 <- c(as.numeric(rbind(data.paracou$code2005, data.paracou$code2009))) +data.paracou2$dead <- rep(0, nrow(data.paracou) * 2) +data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])), + as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1 data.paracou2$sp <- data.paracou[["taxonid"]] ## remove tree dead at first census for both date (census 2001-2005 2005-2009) -data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]]))))) +data.paracou <- subset(data.paracou2, subset = !(data.paracou2[["yr1"]] == 2005 & + (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]]))))) -## change unit and names of variables to be the same in all data for the tree -data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year -data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 -data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 +## change unit and names of variables to be the same in all data for the tree +data.paracou$G <- 10 * (data.paracou$dbh2 - data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year +data.paracou$G[data.paracou$code1 > 0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 +data.paracou$G[data.paracou$code2 > 0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 -data.paracou[which(data.paracou$G < -50),] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA ## much less issue after removing diam problem -data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm -data.paracou$plot <- data.paracou$plot#apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code -data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m - MISSING +data.paracou[which(data.paracou$G < -50), ] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA ## much less issue after removing diam problem +data.paracou$D <- data.paracou[["dbh1"]] +data.paracou$D[data.paracou$D == 0] <- NA +## diameter in cm +data.paracou$plot <- data.paracou$plot #apply(data.paracou[,c('forest','plot','subplot')],1,paste,collapse='.') ## plot code +data.paracou$htot <- rep(NA, length(data.paracou[["G"]])) ## height of tree in m - MISSING data.paracou$obs.id <- 1:nrow(data.paracou) ### delete recruit in 2001 or 2005 for first census -data.paracou <- subset(data.paracou,subset=!is.na(data.paracou$D)) +data.paracou <- subset(data.paracou, subset = !is.na(data.paracou$D)) ## minimum circumfer 30 delete all tree with a dbh <30/pi, -data.paracou <- subset(data.paracou,subset= data.paracou[["D"]]>(30/pi)) +data.paracou <- subset(data.paracou, subset = data.paracou[["D"]] > (30/pi)) -###################### -## ECOREGION -################### -## paracou has only 1 eco-region YES NO ECOREGION +###################### ECOREGION paracou has only 1 eco-region YES NO ECOREGION -###################### -## PERCENT DEAD -################### -## variable percent dead -## compute numer of dead per plot to remove plot with disturbance -## THERE ARE LOTS OF NAs - DID YOU WANT TO REMOVE THEM OR TREAT THEM AS ALIVE +###################### PERCENT DEAD variable percent dead compute numer of dead per plot to remove +###################### plot with disturbance THERE ARE LOTS OF NAs - DID YOU WANT TO REMOVE THEM OR +###################### TREAT THEM AS ALIVE -perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=function.perc.dead2) -data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) +perc.dead <- tapply(data.paracou[["dead"]], INDEX = data.paracou[["plot"]], FUN = function.perc.dead2) +data.paracou <- merge(data.paracou, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) -########################################################### -### VARIABLES SELECTION FOR THE ANALYSIS -################### +########################################################### VARIABLES SELECTION FOR THE ANALYSIS -#vec.abio.var.names <- c("MAT","MAP") ## MISSING NEED OTHER BASED ON TOPOGRAPHY ASK BRUNO -vec.basic.var <- c("obs.id","treeid","sp","plot","D","G","dead","year","htot","x","y","perc.dead") -data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names +# vec.abio.var.names <- c('MAT','MAP') ## MISSING NEED OTHER BASED ON TOPOGRAPHY +# ASK BRUNO +vec.basic.var <- c("obs.id", "treeid", "sp", "plot", "D", "G", "dead", "year", "htot", + "x", "y", "perc.dead") +data.tree <- subset(data.paracou, select = c(vec.basic.var)) #,vec.abio.var.names -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species NEED TO COMPUTE BASED ON RADIUS AROUND TARGET +############################################## TREE ### species as factor because number -data.tree[['sp']] <- factor(data.tree[['sp']]) -Rlim <- 15 # set size of neighborhood for competition index +data.tree[["sp"]] <- factor(data.tree[["sp"]]) +Rlim <- 15 # set size of neighborhood for competition index -## system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=Rlim,parallel=TRUE,rpuDist=FALSE)) +## system.time(test <- +## fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=Rlim,parallel=TRUE,rpuDist=FALSE)) library(doParallel) -list.BA.SP.data <- mclapply(unique(data.tree[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree,Rlim=Rlim,mc.cores=4) +list.BA.SP.data <- mclapply(unique(data.tree[["plot"]]), FUN = fun.compute.BA.SP.XY.per.plot, + data.tree = data.tree, Rlim = Rlim, mc.cores = 4) data.BA.sp <- rbind.fill(list.BA.SP.data) dim(data.BA.SP) ### TEST DATA FORMAT -if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') -if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') +if (sum(!rownames(BA.SP.temp) == data.tree.s[["obs.id"]]) > 0) stop("rows not in the good order") +if (sum(!colnames(BA.SP.temp) == as.character((levels(data.tree.s[["sp"]])))) > 0) stop("colnames does mot match species name") ## test same order as data.tree -if(sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) > 0) stop("competition index not in the same order than data.tree") -################################################ -## REMOVE TREE IN BUFFER ZONE BUFFER ZONE -not.in.buffer.zone <- (data.tree[['x']]<(250-Rlim) & -data.tree[['x']]>(0+Rlim) & -data.tree[['y']]<(250-Rlim) & -data.tree[['y']]>(0+Rlim)) +################################################ REMOVE TREE IN BUFFER ZONE BUFFER ZONE +not.in.buffer.zone <- (data.tree[["x"]] < (250 - Rlim) & data.tree[["x"]] > (0 + + Rlim) & data.tree[["y"]] < (250 - Rlim) & data.tree[["y"]] > (0 + Rlim)) # remove subset -data.tree <- subset(data.tree,subset=not.in.buffer.zone) -data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) +data.tree <- subset(data.tree, subset = not.in.buffer.zone) +data.BA.sp <- subset(data.BA.sp, subset = not.in.buffer.zone) -######################## -######################### -##### TRAITS +######################## TRAITS ### read species names -species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") +species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv", + stringsAsFactors = FALSE, header = T, sep = "\n") species.clean$sp <- species.clean[["idTaxon"]] -species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ") +species.clean$Latin_name <- paste(species.clean[["Genre"]], species.clean[["Espece"]], + sep = " ") ## keep only one row pers idTaxon -species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille")) +species.clean <- subset(species.clean, subset = !duplicated(species.clean[["sp"]]), + select = c("sp", "Latin_name", "Genre", "Espece", "Famille")) ## select only species present in data base -species.clean <- subset(species.clean,subset=species.clean[["sp"]] %in% data.tree[["sp"]]) -## percentage of species with no taxonomic identification -length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25% +species.clean <- subset(species.clean, subset = species.clean[["sp"]] %in% data.tree[["sp"]]) +## percentage of species with no taxonomic identification +length(grep("Indet", species.clean[["Latin_name"]]))/nrow(species.clean) ## 25% ### need to read the different traits data based and merge ..... -bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";") -bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ") -dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ") -seed.traits <- read.csv("./data/raw/DataParacou/Autour de Paracou - Releves par trait et taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t") - -### +bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv", stringsAsFactors = FALSE, + header = T, sep = "\n") +bridge$Latin_name <- paste(bridge[["Genus"]], bridge[["species"]], sep = " ") +dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv", stringsAsFactors = FALSE, + header = T, sep = " ") +seed.traits <- read.csv("./data/raw/DataParacou/Autour de Paracou - Releves par trait et taxon.txt", + stringsAsFactors = FALSE, header = T, sep = "\t") + +### sum(species.clean[["Latin_name"]] %in% bridge[["Latin_name"]])/length(species.clean[["Latin_name"]]) ## only 307 species /775 are in teh traits data .... ## save everything as a list -list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.spain,file="./data/process/list.paracou.Rdata") - +list.paracou <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.spain, file = "./data/process/list.paracou.Rdata") diff --git a/merge.data.SPAIN.R b/merge.data.SPAIN.R index 2e33f299f15e164a6aba67c15307b633d9438c63..a9b6f70abb53109a63a5d045fa2e52a38324fbd3 100644 --- a/merge.data.SPAIN.R +++ b/merge.data.SPAIN.R @@ -1,149 +1,153 @@ -### MERGE spain DATA -### Edited by FH -rm(list = ls()); source("./R/format.function.R"); library(reshape) - -######################### -## READ DATA -#################### -### read individuals tree data -#data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t") -data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI_aug13_alldata.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t") - -###################################### -## MASSAGE TRAIT DATA -############################ -## Compute maximum height per species plus sd from observed height to add variables to the traits data base -## Because we have two heights, then take the max of the two heights and then bootstrap -res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.spain[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.spain[["SP_code"]]))) +### MERGE spain DATA Edited by FH +rm(list = ls()) +source("./R/format.function.R") +library(reshape) + +######################### READ DATA read individuals tree data data.spain <- +######################### read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep +######################### = '\t') +data.spain <- read.table("./data/raw/DataSpain/Tree_data_SFI_aug13_alldata.txt", + header = TRUE, stringsAsFactors = FALSE, sep = "\t") + +###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed +###################################### height to add variables to the traits data base Because we have two heights, +###################################### then take the max of the two heights and then bootstrap +res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])), FUN = f.quantile.boot, + R = 1000, x = log10(apply(data.spain[, c("ht1", "ht2")], 1, max, na.rm = T)), + fac = factor(data.spain[["SP_code"]]))) ## create data base -data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3]) +data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, + 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) rm(res.quant.boot) -write.csv(data.max.height,file="./data/process/data.max.height.spain.csv") # I was planning to save processed data in that folder -# -# ## merge TRY with max height -# merge.TRY <- merge(merge.TRY,data.max.height,by="code") -# rm(data.max.height) -# ## use mean sd of max tree height over all species -# merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]],na.rm=TRUE),length=nrow(merge.TRY)) -# -# ### keep only variables needed in traits data -# names.traits.data <- c("code","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", -# "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd.1","Seed.mass.sd.1","SLA.sd.1", "Wood.Density.sd.1", -# "Leaf.Lifespan.sd.1","Max.height.sd.1") -# -# data.traits <- merge.TRY[,names.traits.data] -# names(data.traits) <- c("sp","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", -# "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd","Seed.mass.sd","SLA.sd", "Wood.Density.sd", -# "Leaf.Lifespan.sd","Max.height.sd") ## rename to have standard variables name -# rm(merge.TRY,names.traits.data) - -################################################################ -## FORMAT INDIVIDUAL TREE DATA -############# - -## change unit and names of variables to be the same in all data for the tree -data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year -data.spain$year <- data.spain[["years"]] ## number of year between measurement - MISSING -data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm -data.spain$dead <- as.numeric(data.spain[["Life_status"]] == "dead") ## dummy variable for dead tree 0 alive 1 dead - MIGHT WANT TO CHANGE THIS TO BE BASED ON MORTALITY_CUT -data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code -data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code -data.spain$htot <- data.spain[["ht1"]]## height of tree in m -data.spain$tree.id <- data.spain$Tree_ID_SFI ## tree unique id - -#### change coordinates system of x y to be in lat long WGS84/don't know how to do this -library(sp); library(dismo); library(rgdal); -data.sp <- data.spain[,c("Tree_ID_SFI","CX","CY")] -coordinates(data.sp) <- c("CX", "CY") # define x y +write.csv(data.max.height, file = "./data/process/data.max.height.spain.csv") # I was planning to save processed data in that folder +# ## merge TRY with max height merge.TRY <- +# merge(merge.TRY,data.max.height,by='code') rm(data.max.height) ## use mean sd +# of max tree height over all species merge.TRY$Max.height.sd.1 <- +# rep(mean(merge.TRY[['Max.height.sd']],na.rm=TRUE),length=nrow(merge.TRY)) ### +# keep only variables needed in traits data names.traits.data <- +# c('code','Latin_name','Leaf.N.mean','Seed.mass.mean','SLA.mean','Wood.Density.mean', +# 'Leaf.Lifespan.mean','Max.height.mean','Leaf.N.sd.1','Seed.mass.sd.1','SLA.sd.1', +# 'Wood.Density.sd.1', 'Leaf.Lifespan.sd.1','Max.height.sd.1') data.traits <- +# merge.TRY[,names.traits.data] names(data.traits) <- +# c('sp','Latin_name','Leaf.N.mean','Seed.mass.mean','SLA.mean','Wood.Density.mean', +# 'Leaf.Lifespan.mean','Max.height.mean','Leaf.N.sd','Seed.mass.sd','SLA.sd', +# 'Wood.Density.sd', 'Leaf.Lifespan.sd','Max.height.sd') ## rename to have +# standard variables name rm(merge.TRY,names.traits.data) + +################################################################ FORMAT INDIVIDUAL TREE DATA + +## change unit and names of variables to be the same in all data for the tree +data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year +data.spain$year <- data.spain[["years"]] ## number of year between measurement - MISSING +data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm +data.spain$dead <- as.numeric(data.spain[["Life_status"]] == "dead") ## dummy variable for dead tree 0 alive 1 dead - MIGHT WANT TO CHANGE THIS TO BE BASED ON MORTALITY_CUT +data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code +data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code +data.spain$htot <- data.spain[["ht1"]] ## height of tree in m +data.spain$tree.id <- data.spain$Tree_ID_SFI ## tree unique id + +#### change coordinates system of x y to be in lat long WGS84/don't know how to do +#### this +library(sp) +library(dismo) +library(rgdal) + +data.sp <- data.spain[, c("Tree_ID_SFI", "CX", "CY")] +coordinates(data.sp) <- c("CX", "CY") # define x y proj4string(data.sp) <- CRS("+init=epsg:23030") # define projection system of our data ## EPSG CODE 23030 ED50 / UTM zone 30N summary(data.sp) detach(package:rgdal) -data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon -data.spain$Lon <- coordinates(data.sp2)[,"CX"] -data.spain$Lat <- coordinates(data.sp2)[,"CY"] -## ## plot on world map -## library(rworldmap) -## newmap <- getMap(resolution = "coarse") # different resolutions available -## plot(newmap) -## points(data.sp2,cex=0.2,col="red") -rm(data.sp,data.sp2) - -###################### -## ECOREGION -################### -## merge greco to have no ecoregion with low number of observation -greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) -greco <- greco[,c("Plot_ID_SFI","BIOME","eco_code")] -greco2 <- greco[!duplicated(greco$Plot),]; +data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon +data.spain$Lon <- coordinates(data.sp2)[, "CX"] +data.spain$Lat <- coordinates(data.sp2)[, "CY"] +## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') +## # different resolutions available plot(newmap) +## points(data.sp2,cex=0.2,col='red') +rm(data.sp, data.sp2) + +###################### ECOREGION merge greco to have no ecoregion with low number of observation +greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) +greco <- greco[, c("Plot_ID_SFI", "BIOME", "eco_code")] +greco2 <- greco[!duplicated(greco$Plot), ] + rm(greco) data.spain <- merge(data.spain, greco2, by = "Plot_ID_SFI") rm(greco2) table(data.spain$eco_code) -## There's an eco-region with no code, and one with < 1000 sites -## The former we could drop as they were on the border of Spain - -library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); -ecoreg <- unclass(data.spain$eco_code); -plot(data.spain[["CX"]][order(ecoreg)],data.spain[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); -legend("topleft", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2) -points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.5, col = "black"); ## Highlight the "rare" ecoregions -points(data.spain[["CX"]][ecoreg == 1],data.spain[["CY"]][ecoreg == 1],pty=".",cex=.5, col = "black"); ## Highlight the "rare" ecoregions -## PA1219 looks to be similar to PA1209; merge them together -data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8,6,9)) -data.spain <- data.spain[-which(data.spain$eco_codemerged == ""),] - -###################### -## PERCENT DEAD -################### -## variable percent dead/cannot do with since dead variable is missing -###compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.spain[["dead"]],INDEX=data.spain[["plot"]],FUN=function.perc.dead) +## There's an eco-region with no code, and one with < 1000 sites The former we +## could drop as they were on the border of Spain + +library(RColorBrewer) +mycols <- brewer.pal(10, "Set3") + +ecoreg <- unclass(data.spain$eco_code) + +plot(data.spain[["CX"]][order(ecoreg)], data.spain[["CY"]][order(ecoreg)], pty = ".", + cex = 0.2, col = rep(mycols, as.vector(table(ecoreg)))) + +legend("topleft", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19, + length(levels(ecoreg))), cex = 2) +points(data.spain[["CX"]][ecoreg == 9], data.spain[["CY"]][ecoreg == 9], pty = ".", + cex = 0.5, col = "black") +## Highlight the 'rare' ecoregions +points(data.spain[["CX"]][ecoreg == 1], data.spain[["CY"]][ecoreg == 1], pty = ".", + cex = 0.5, col = "black") +## Highlight the 'rare' ecoregions PA1219 looks to be similar to PA1209, merge +## them together +data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8, 6, 9)) +data.spain <- data.spain[-which(data.spain$eco_codemerged == ""), ] + +###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is +###################### missing compute numer of dead per plot to remove plot with disturbance +perc.dead <- tapply(data.spain[["dead"]], INDEX = data.spain[["plot"]], FUN = function.perc.dead) table(data.spain$dead) -## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -data.spain <- merge(data.spain,data.frame(plot=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE, by = "plot") +## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +## AVAILABLE (disturbance record) +data.spain <- merge(data.spain, data.frame(plot = as.numeric(names(perc.dead)), perc.dead = perc.dead), + sort = FALSE, by = "plot") -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### -## Remove data with mortality == 1 or 2 +########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with mortality == 1 or 2 table(data.spain$Mortality_Cut) -data.spain <- subset(data.spain,subset= (data.spain[["Mortality_Cut"]] == 0 | data.spain[["Mortality_Cut"]] == "")) - -colnames(data.spain)[colnames(data.spain) %in% c("mat","pp","PET")] <- c("MAT","PP","PET") -colnames(data.spain)[names(data.spain) =="eco_codemerged"] <- c("ecocode") -vec.abio.var.names <- c("MAT","PP","PET") -vec.basic.var <- c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") -data.tree <- subset(data.spain,select=c(vec.basic.var,vec.abio.var.names)) +data.spain <- subset(data.spain, subset = (data.spain[["Mortality_Cut"]] == 0 | data.spain[["Mortality_Cut"]] == + "")) + +colnames(data.spain)[colnames(data.spain) %in% c("mat", "pp", "PET")] <- c("MAT", + "PP", "PET") +colnames(data.spain)[names(data.spain) == "eco_codemerged"] <- c("ecocode") +vec.abio.var.names <- c("MAT", "PP", "PET") +vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "ecocode", "D", "G", "dead", + "year", "htot", "Lon", "Lat", "perc.dead") +data.tree <- subset(data.spain, select = c(vec.basic.var, vec.abio.var.names)) save(data.spain, file = "./data/process/datspain.RData") -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.spain[["tree.id"]]), diam=as.vector(data.spain[["D"]]), - sp=as.vector(data.spain[["sp"]]), id.plot=as.vector(data.spain[["plot"]]), - weights=as.vector(1/(pi*(data.spain[["R1"]])^2)), weight.full.plot=1/(pi*(25)^2)) +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.spain[["tree.id"]]), diam = as.vector(data.spain[["D"]]), + sp = as.vector(data.spain[["sp"]]), id.plot = as.vector(data.spain[["plot"]]), + weights = as.vector(1/(pi * (data.spain[["R1"]])^2)), weight.full.plot = 1/(pi * + (25)^2)) ## change NA and <0 data for 0 -data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0 -data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0 +data.BA.SP[which(is.na(data.BA.SP), arr.ind = TRUE)] <- 0 +data.BA.SP[, -1][which(data.BA.SP[, -1] < 0, arr.ind = TRUE)] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(data.spain[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE) +BATOT.COMPET <- apply(data.BA.SP[, -1], MARGIN = 1, FUN = sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET ### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=dataIFN.spain[["tree.id"]],ecocode=dataIFN.spain[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id = dataIFN.spain[["tree.id"]], ecocode = dataIFN.spain[["ecocode"]]), + data.BA.SP, by = "tree.id", sort = FALSE) ## test -if( sum(! data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.spain <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.spain,file="./data/process/list.spain.Rdata") - +list.spain <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.spain, file = "./data/process/list.spain.Rdata") diff --git a/merge.data.SWISS.R b/merge.data.SWISS.R index 96ce7cc91dde631f44ff9f225d5d3bf9035348d9..5f0d792c78748efcecae4d64788513b8ffc3da89 100644 --- a/merge.data.SWISS.R +++ b/merge.data.SWISS.R @@ -1,98 +1,95 @@ -### MERGE Swiss DATA -### Edited by FH -rm(list = ls()); source("./R/format.function.R"); library(reshape); library(foreign) - -######################### -## READ DATA -#################### -### read individuals tree data -data.swiss1 <- read.csv("./data/raw/DataSwiss/LFI12.csv",header=TRUE,stringsAsFactors =FALSE) -data.swiss2 <- read.csv("./data/raw/DataSwiss/LFI23.csv",header=TRUE,stringsAsFactors =FALSE) -data.swiss3 <- read.csv("./data/raw/DataSwiss/LFI34.csv",header=TRUE,stringsAsFactors =FALSE) +### MERGE Swiss DATA Edited by FH +rm(list = ls()) +source("./R/format.function.R") +library(reshape) +library(foreign) + +######################### READ DATA read individuals tree data +data.swiss1 <- read.csv("./data/raw/DataSwiss/LFI12.csv", header = TRUE, stringsAsFactors = FALSE) +data.swiss2 <- read.csv("./data/raw/DataSwiss/LFI23.csv", header = TRUE, stringsAsFactors = FALSE) +data.swiss3 <- read.csv("./data/raw/DataSwiss/LFI34.csv", header = TRUE, stringsAsFactors = FALSE) data.swiss <- rbind(data.swiss1, data.swiss2, data.swiss3) rm(data.swiss1, data.swiss2, data.swiss3) -data.swiss <- data.swiss[order(data.swiss$BANR),] -data.swiss <- data.swiss[,c("INVNR","CLNR","BANR","X","Y","BAUMART","TEXT","VEGPER", - "BHD1","BHD2","BA1","BA2","BAI","BHD_DIFF","RPSTZ1","RPSTZ2","HOEHE1","HOEHE2")] +data.swiss <- data.swiss[order(data.swiss$BANR), ] +data.swiss <- data.swiss[, c("INVNR", "CLNR", "BANR", "X", "Y", "BAUMART", "TEXT", + "VEGPER", "BHD1", "BHD2", "BA1", "BA2", "BAI", "BHD_DIFF", "RPSTZ1", "RPSTZ2", + "HOEHE1", "HOEHE2")] -colnames(data.swiss) <- c("Inventid","siteid","treeid","x","y","spcode","sp.name","year", - "dbh1","dbh2","ba1","ba2","ba_diff","dbh_diff","repfactor1","repfactor2","ht1","ht2") +colnames(data.swiss) <- c("Inventid", "siteid", "treeid", "x", "y", "spcode", "sp.name", + "year", "dbh1", "dbh2", "ba1", "ba2", "ba_diff", "dbh_diff", "repfactor1", "repfactor2", + "ht1", "ht2") ## Do not need to read in spp list as it is already available in data.swiss -###################################### -## MASSAGE TRAIT DATA -############################ -## Compute maximum height per species plus sd from observed height to add variables to the traits data base -## Because we have two heights, then take the max of the two heights and then bootstrap -res.quant.boot <- t(sapply(levels(factor(data.swiss[["spcode"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.swiss[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.swiss[["spcode"]]))) +###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed +###################################### height to add variables to the traits data base Because we have two heights, +###################################### then take the max of the two heights and then bootstrap +res.quant.boot <- t(sapply(levels(factor(data.swiss[["spcode"]])), FUN = f.quantile.boot, + R = 1000, x = log10(apply(data.swiss[, c("ht1", "ht2")], 1, max, na.rm = T)), + fac = factor(data.swiss[["spcode"]]))) ## create data base -data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3]) +data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, + 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) rm(res.quant.boot) -#write.csv(data.max.height,file="./data/process/data.max.height.swiss.csv") - -########################################## -## FORMAT INDIVIDUAL TREE DATA -############# -## change unit and names of variables to be the same in all data for the tree -data.swiss$G <- 10*(data.swiss$dbh_diff)/data.swiss$year ## diameter growth in mm per year - SOME EXTREMELY NEGATIVE HERE! -data.swiss$D <- data.swiss[["dbh1"]]; data.swiss$D[data.swiss$D == 0] <- NA ;## diameter in cm -data.swiss$dead <- rep(NA, length(data.swiss[["dbh1"]])) ## Mortality - MISSING -data.swiss$plot <- data.swiss$siteid ## plot code -data.swiss$htot <- data.swiss$ht1 ## height of tree in m - -###################### -## ECOREGION -################### -## Ecoregion not available for swiss data -data.swiss$ecocode <- rep("A",nrow(data.swiss)) -###################### -## PERCENT DEAD -################### -## variable percent dead/cannot do with since dead variable is missing -## compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.swiss[["dead"]],INDEX=data.swiss[["plot"]],FUN=function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -data.swiss <- merge(data.swiss,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) - -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### -## Remove data with dead == 1 +# write.csv(data.max.height,file='./data/process/data.max.height.swiss.csv') + +########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same +########################################## in all data for the tree +data.swiss$G <- 10 * (data.swiss$dbh_diff)/data.swiss$year ## diameter growth in mm per year - SOME EXTREMELY NEGATIVE HERE! +data.swiss$D <- data.swiss[["dbh1"]] +data.swiss$D[data.swiss$D == 0] <- NA +## diameter in cm +data.swiss$dead <- rep(NA, length(data.swiss[["dbh1"]])) ## Mortality - MISSING +data.swiss$plot <- data.swiss$siteid ## plot code +data.swiss$htot <- data.swiss$ht1 ## height of tree in m + +###################### ECOREGION Ecoregion not available for swiss data +data.swiss$ecocode <- rep("A", nrow(data.swiss)) +###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is +###################### missing compute numer of dead per plot to remove plot with disturbance +perc.dead <- tapply(data.swiss[["dead"]], INDEX = data.swiss[["plot"]], FUN = function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +# AVAILABLE (disturbance record) +data.swiss <- merge(data.swiss, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) + +########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.swiss$dead) ## Nothing to remove data.climate <- read.dbf(file = "./data/raw/DataSwiss/LFI14_climate.dbf") -data.climate <- data.climate[,c(1,7,15:19)] -data.climate$MAP <- apply(data.climate[,4:7],1,sum) +data.climate <- data.climate[, c(1, 7, 15:19)] +data.climate$MAP <- apply(data.climate[, 4:7], 1, sum) -data.swiss <- merge(data.swiss, data.frame(siteid = data.climate$CLNR, swb = data.climate$swb_100, MAT = data.climate$tave_68, MAP = data.climate$MAP), sort = F, all.x = T) +data.swiss <- merge(data.swiss, data.frame(siteid = data.climate$CLNR, swb = data.climate$swb_100, + MAT = data.climate$tave_68, MAP = data.climate$MAP), sort = F, all.x = T) rm(data.climate) -############################################## -## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -########################### -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.swiss[["treeid"]]), diam=as.vector(data.swiss[["D"]]), - sp=as.vector(data.swiss[["spcode"]]), id.plot=as.vector(data.swiss[["siteid"]]), - weights=data.swiss[["repfactor1"]], weight.full.plot=NA) +############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in +############################################## m^2/ha without the target species +data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.swiss[["treeid"]]), diam = as.vector(data.swiss[["D"]]), + sp = as.vector(data.swiss[["spcode"]]), id.plot = as.vector(data.swiss[["siteid"]]), + weights = data.swiss[["repfactor1"]], weight.full.plot = NA) ## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +data.BA.SP[is.na(data.BA.SP)] <- 0 +data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 ### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.swiss[["spcode"]]))) >0) stop("competition index sp name not the same as in data.tree") +if (sum(!(names(data.BA.SP)[-1] %in% unique(data.swiss[["spcode"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) +BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) +data.BA.SP$BATOT.COMPET <- BATOT.COMPET +rm(BATOT.COMPET) ### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=data.swiss[["tree.id"]],ecocode=data.swiss[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id = data.swiss[["tree.id"]], ecocode = data.swiss[["ecocode"]]), + data.BA.SP, by = "tree.id", sort = FALSE) ## test -if(sum(!data.BA.SP[["treeid"]] == data.swiss[["treeid"]]) >0) stop("competition index not in the same order than data.tree") +if (sum(!data.BA.SP[["treeid"]] == data.swiss[["treeid"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list -list.swiss <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.swiss,file="./data/process/list.swiss.Rdata") - +list.swiss <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) +save(list.swiss, file = "./data/process/list.swiss.Rdata") diff --git a/merge.data.US.R b/merge.data.US.R index 80d0c0c8d00124140cc2694b5350f775cc2f8781..275c7c10b3b411573e04403500fd8ffcf12c2854 100644 --- a/merge.data.US.R +++ b/merge.data.US.R @@ -1,92 +1,93 @@ -### MERGE us DATA -### Edited by FH -rm(list = ls()); source("./R/format.function.R"); library(reshape) +### MERGE us DATA Edited by FH +rm(list = ls()) +source("./R/format.function.R") +library(reshape) source("./R/FUN.TRY.R") -######################### -## READ DATA -#################### -### read individuals tree data -data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv",header=TRUE,stringsAsFactors =FALSE) +######################### READ DATA read individuals tree data +data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv", header = TRUE, stringsAsFactors = FALSE) ### read species names -species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV",stringsAsFactors=FALSE) +species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV", stringsAsFactors = FALSE) ## select column to keep -species.clean <- subset(species.clean,select=c("SPCD","GENUS","SPECIES","VARIETY","SUBSPECIES","SPECIES_SYMBOL")) -species.clean$Latin_name <- paste(species.clean[["GENUS"]],species.clean[["SPECIES"]],sep=" ") -species.clean$Latin_name_syn<- paste(species.clean[["GENUS"]],species.clean[["SPECIES"]],sep=" ") +species.clean <- subset(species.clean, select = c("SPCD", "GENUS", "SPECIES", "VARIETY", + "SUBSPECIES", "SPECIES_SYMBOL")) +species.clean$Latin_name <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]], + sep = " ") +species.clean$Latin_name_syn <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]], + sep = " ") names(species.clean)[1] <- "sp" -species.clean[["sp"]] <- paste("sp",species.clean[["sp"]],sep=".") +species.clean[["sp"]] <- paste("sp", species.clean[["sp"]], sep = ".") -###################################### -## MASSAGE TRAIT DATA -############################ -## HEIGHT DATA FOR TREE MISSING -## BRING US DATA FOR HEIGHT OVER WHEN WE ANALYZE THAT DATASET LATER ON +###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER +###################################### WHEN WE ANALYZE THAT DATASET LATER ON -##################################### -## FORMAT INDIVIDUAL TREE DATA -############# +##################################### FORMAT INDIVIDUAL TREE DATA -## change unit and names of variables to be the same in all data for the tree -data.us$G <- 10*(data.us$FinalDbh-data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year +## change unit and names of variables to be the same in all data for the tree +data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA -data.us$year <- data.us$Interval ## number of year between measuremen -data.us$D <- data.us[["InitDbh"]]; data.us$D[data.us$D == 0] <- NA ;## diameter in cm -data.us$dead <- as.numeric(data.us$FinalDbh > 0) ## dummy variable for dead tree 0 alive 1 dead -data.us$sp <- as.character(data.us[["Species"]]) ## species code -data.us$plot <- as.character(data.us[["PlotID"]]) ## plot code -data.us$subplot <- paste(as.character(data.us[["PlotID"]]),as.character(data.us[["SubplotNumber"]]),sep=".") ## plot code -data.us$htot <- rep(NA,length(data.us[["Species"]])) ## height of tree in m - MISSING -data.us$tree.id <- as.character(data.us$TreeID); ## tree unique id -data.us$sp.name <- NA; +data.us$year <- data.us$Interval ## number of year between measuremen +data.us$D <- data.us[["InitDbh"]] +data.us$D[data.us$D == 0] <- NA +## diameter in cm +data.us$dead <- as.numeric(data.us$FinalDbh > 0) ## dummy variable for dead tree 0 alive 1 dead +data.us$sp <- as.character(data.us[["Species"]]) ## species code +data.us$plot <- as.character(data.us[["PlotID"]]) ## plot code +data.us$subplot <- paste(as.character(data.us[["PlotID"]]), as.character(data.us[["SubplotNumber"]]), + sep = ".") ## plot code +data.us$htot <- rep(NA, length(data.us[["Species"]])) ## height of tree in m - MISSING +data.us$tree.id <- as.character(data.us$TreeID) +## tree unique id +data.us$sp.name <- NA + ### add plot weights for computation of competition index (in 1/m^2) -data.us$weights <- 1/(10000*data.us[['PlotSize']]) - -###################### -## ECOREGION -################### -## merge greco to have no ecoregion with low number of observation -greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T); colnames(greco)[1] <- "Ecocode" - -table(data.us$Ecocode) -data.us <- merge(data.us, greco[,-4], by = "Ecocode"); data.us$DIVISION <- factor(data.us$DIVISION) -## Some ecoregions still have small # of individuals, so create a variable which does division if # ind < 10000; else it reads Domain -# -data.us$eco_codemerged <- as.character(data.us$DIVISION); tab.small.div <- table(data.us$eco_codemerged) +data.us$weights <- 1/(10000 * data.us[["PlotSize"]]) + +###################### ECOREGION merge greco to have no ecoregion with low number of observation +greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T) +colnames(greco)[1] <- "Ecocode" + +table(data.us$Ecocode) +data.us <- merge(data.us, greco[, -4], by = "Ecocode") +data.us$DIVISION <- factor(data.us$DIVISION) +## Some ecoregions still have small # of individuals, so create a variable which +## does division if # ind < 10000 else it reads Domain +data.us$eco_codemerged <- as.character(data.us$DIVISION) +tab.small.div <- table(data.us$eco_codemerged) sel.small.div <- which(table(data.us$eco_codemerged) < 10000) -for(i in 1:length(sel.small.div)) { - find.ind <- which(data.us$eco_codemerged == names(tab.small.div)[sel.small.div[i]]); print(length(find.ind)) - data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind] - } - -###################### -## PERCENT DEAD -################### -## variable percent dead/cannot do with since dead variable is missing -## compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.us[["dead"]],INDEX=data.us[["plot"]],FUN=function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) -data.us <- merge(data.us,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) - - -########################################################### -### PLOT SELECTION FOR THE ANALYSIS -################### +for (i in 1:length(sel.small.div)) { + find.ind <- which(data.us$eco_codemerged == names(tab.small.div)[sel.small.div[i]]) + print(length(find.ind)) + data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind] +} + +###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is +###################### missing compute numer of dead per plot to remove plot with disturbance +perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF +# AVAILABLE (disturbance record) +data.us <- merge(data.us, data.frame(plot = names(perc.dead), perc.dead = perc.dead), + by = "plot", sort = FALSE) + + +########################################################### PLOT SELECTION FOR THE ANALYSIS ## remove everything from memory not need before computation -rm(greco,perc.dead,tab.small.div,sel.small.div) +rm(greco, perc.dead, tab.small.div, sel.small.div) #### create good data format to be run per ecoregion -data.us$ecocode <- unlist(lapply(lapply(strsplit(data.us[["eco_codemerged"]]," "),FUN=substr,1,2),FUN=paste,collapse=".")) -data.us[["sp"]] <- paste("sp",data.us[["sp"]],sep=".") - -##variables to keep -vec.abio.var.names <- c("MAT","MAP") -vec.basic.var <- c("tree.id","sp","plot","subplot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead","weights") -data.tree <- subset(data.us,select=c(vec.basic.var,vec.abio.var.names)) +data.us$ecocode <- unlist(lapply(lapply(strsplit(data.us[["eco_codemerged"]], " "), + FUN = substr, 1, 2), FUN = paste, collapse = ".")) +data.us[["sp"]] <- paste("sp", data.us[["sp"]], sep = ".") + +## variables to keep +vec.abio.var.names <- c("MAT", "MAP") +vec.basic.var <- c("tree.id", "sp", "plot", "subplot", "ecocode", "D", "G", "dead", + "year", "htot", "Lon", "Lat", "perc.dead", "weights") +data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names)) rm(data.us) ## creat row unique id data.tree$obs.id <- as.character(1:nrow(data.tree)) @@ -95,13 +96,13 @@ gc() ### read TRY data TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") -#################### -#### GENERATE ONE OBJECT PER ECOREGION +#################### GENERATE ONE OBJECT PER ECOREGION # vector of ecoregion name ecoregion.unique <- unique(data.tree[["ecocode"]]) #### lapply function -system.time(lapply(ecoregion.unique,FUN=fun.data.per.ecoregion,data.tot=data.tree,plot.name='subplot',weight.full.plot=NA,name.country="US",data.TRY=TRY.DATA.FORMATED,species.lookup=species.clean) -) +system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.tree, + plot.name = "subplot", weight.full.plot = NA, name.country = "US", data.TRY = TRY.DATA.FORMATED, + species.lookup = species.clean)) diff --git a/species.list.R b/species.list.R index 1b229c7c99f89e02897a2764515af7249689d5ee..beff60a67a893236ddf18a2ee9ff298bc30ff0da 100644 --- a/species.list.R +++ b/species.list.R @@ -1,107 +1,91 @@ -#################################################### -#################################################### -##### SPECIES LIST FOR WORKSHOP - - -################################# -################################# -#### READ SPECIES LIST I HAVE NOW FOR EACH SITES NON TROPICAL -#### (ASSUMING THAT TROPICAL SITES COMES WITH TRAITS DATA - -#############################3 -### US based on FIA full species list -Data.Species.FIA <- read.csv("./data/species.list/REF_SPECIES.CSV",stringsAsFactors=FALSE,na.strings="") -## head(Data.Species.FIA) -## merge genus & species -Data.Species.FIA$sp <- gsub("spp.","sp",paste(Data.Species.FIA$GENUS,Data.Species.FIA$SPECIES)) -Species.FIA.short <- Data.Species.FIA$sp[Data.Species.FIA$SPCD<6000] +#################################################### SPECIES LIST FOR WORKSHOP + + +################################# READ SPECIES LIST I HAVE NOW FOR EACH SITES NON TROPICAL (ASSUMING THAT +################################# TROPICAL SITES COMES WITH TRAITS DATA + +############################# 3 US based on FIA full species list +Data.Species.FIA <- read.csv("./data/species.list/REF_SPECIES.CSV", stringsAsFactors = FALSE, + na.strings = "") +## head(Data.Species.FIA) merge genus & species +Data.Species.FIA$sp <- gsub("spp.", "sp", paste(Data.Species.FIA$GENUS, Data.Species.FIA$SPECIES)) +Species.FIA.short <- Data.Species.FIA$sp[Data.Species.FIA$SPCD < 6000] Species.FIA.long <- Data.Species.FIA$sp -#################################### -## US SPECIES Based on Burns and Honkala -BurnsAndHonkalaSpeciesSummary<- read.csv("./data/species.list/BurnsAndHonkalaSpeciesSummary.csv", - stringsAsFactors=FALSE,na.strings="",sep=";") -## head(BurnsAndHonkalaSpeciesSummary) -## clean species name -fun.get.sp <- function(x) gsub(paste(" ",gsub("^([a-zA-Z]* [a-zA-Z]* )","",x),sep=""),"",x,fixed=TRUE) -BurnsAndHonkalaSpeciesSummary$sp <- sapply(BurnsAndHonkalaSpeciesSummary[,"Scientific_name"],fun.get.sp) +#################################### US SPECIES Based on Burns and Honkala +BurnsAndHonkalaSpeciesSummary <- read.csv("./data/species.list/BurnsAndHonkalaSpeciesSummary.csv", + stringsAsFactors = FALSE, na.strings = "", sep = ";") +## head(BurnsAndHonkalaSpeciesSummary) clean species name +fun.get.sp <- function(x) gsub(paste(" ", gsub("^([a-zA-Z]* [a-zA-Z]* )", "", x), + sep = ""), "", x, fixed = TRUE) +BurnsAndHonkalaSpeciesSummary$sp <- sapply(BurnsAndHonkalaSpeciesSummary[, "Scientific_name"], + fun.get.sp) Species.Burns <- BurnsAndHonkalaSpeciesSummary$sp -######################################### -### canada check http://en.wikipedia.org/wiki/List_of_trees_of_Canada -Data.canadian.species <- read.csv("./data/species.list/canadian.species.csv",stringsAsFactors=FALSE) -Species.Canada <- Data.canadian.species$species +######################################### canada check http://en.wikipedia.org/wiki/List_of_trees_of_Canada +Data.canadian.species <- read.csv("./data/species.list/canadian.species.csv", stringsAsFactors = FALSE) +Species.Canada <- Data.canadian.species$species -######################### -## SPAIN BASED ON TABLE PROVIDED BY MIGUEL -Data.Species.Spain<- read.csv("./data/species.list/List_120_SPP.csv", - stringsAsFactors=FALSE) +######################### SPAIN BASED ON TABLE PROVIDED BY MIGUEL +Data.Species.Spain <- read.csv("./data/species.list/List_120_SPP.csv", stringsAsFactors = FALSE) ## head(Data.Species.Spain) Species.Spain <- Data.Species.Spain$Scoentific.Name -######################################### -### SPECIE Sin Swizerland not in FRANCE -Species.Swiss <- c('Pinus montana','Ulmus scabra') - -####################################3 -#### SWEDEN -Data.Sweden <- read.csv("./data/species.list/Swedish_NFI_tree_species.csv", - sep=";",stringsAsFactors=FALSE) +######################################### SPECIE Sin Swizerland not in FRANCE +Species.Swiss <- c("Pinus montana", "Ulmus scabra") + +#################################### 3 SWEDEN +Data.Sweden <- read.csv("./data/species.list/Swedish_NFI_tree_species.csv", sep = ";", + stringsAsFactors = FALSE) Species.Sweden <- Data.Sweden$Species -########################### -########################### -## NEW ZEALAND -load("./data/species.list/vec.code.nvs.Rdata") ### load list of species code from the NVS data I used to calibrate SORTIE -Data.NVS.species <- read.csv("./data/species.list/CurrentNVSNames.csv",sep=";",stringsAsFactors=FALSE) +########################### NEW ZEALAND +load("./data/species.list/vec.code.nvs.Rdata") ### load list of species code from the NVS data I used to calibrate SORTIE +Data.NVS.species <- read.csv("./data/species.list/CurrentNVSNames.csv", sep = ";", + stringsAsFactors = FALSE) -table.NVS.tree <- Data.NVS.species[Data.NVS.species$NVS.Code %in% vec.code.nvs,] -write.csv(table.NVS.tree,file="./data/species.list/table.NVS.tree.csv") +table.NVS.tree <- Data.NVS.species[Data.NVS.species$NVS.Code %in% vec.code.nvs, ] +write.csv(table.NVS.tree, file = "./data/species.list/table.NVS.tree.csv") Species.NZ <- table.NVS.tree$Species.Name -########################### -########################### -## NSW -Data.NSW <- read.csv("./data/species.list/Sub-trop_RF_trees.csv",sep=";",stringsAsFactors=FALSE) +########################### NSW +Data.NSW <- read.csv("./data/species.list/Sub-trop_RF_trees.csv", sep = ";", stringsAsFactors = FALSE) Species.NSW <- Data.NSW$Species..trees...10cm.dbhob. -########################################### -########################################### -#### LOAD TRY SPECIES -try.species <- read.csv("./data/species.list/SPECIES.TRY.csv", - stringsAsFactors=FALSE,header=TRUE) -## head(try.species,20) -## dim(try.species) +########################################### LOAD TRY SPECIES +try.species <- read.csv("./data/species.list/SPECIES.TRY.csv", stringsAsFactors = FALSE, + header = TRUE) +## head(try.species,20) dim(try.species) ## genus already asked to try genus.asked.try <- read.csv("./data/species.list/genus.asked.try.csv")$genus.asked.try #### LIST OF ALL SPECIES with short FIA -species.vector <- unique(c(Species.Spain,Species.Canada,Species.Burns - ,Species.Swiss,Species.Sweden,Species.NZ,Species.NSW,Species.FIA.short)) -# +species.vector <- unique(c(Species.Spain, Species.Canada, Species.Burns, Species.Swiss, + Species.Sweden, Species.NZ, Species.NSW, Species.FIA.short)) +# ### get genus -fun.get.genus <- function(x) gsub(paste(" ",gsub("^([a-zA-Z]* )","",x),sep=""),"",x,fixed=TRUE) -Genus.tot <- unique(sapply(species.vector,fun.get.genus)) +fun.get.genus <- function(x) gsub(paste(" ", gsub("^([a-zA-Z]* )", "", x), sep = ""), + "", x, fixed = TRUE) +Genus.tot <- unique(sapply(species.vector, fun.get.genus)) -## all FIA data -## species.vector2 <- unique(c(Species.Spain,Species.Canada,Species.Burns -## ,Species.Swiss,Species.Sweden,as.character(Species.NZ),Species.NSW,Species.FIA.long)) +## all FIA data species.vector2 <- +## unique(c(Species.Spain,Species.Canada,Species.Burns +## ,Species.Swiss,Species.Sweden,as.character(Species.NZ),Species.NSW,Species.FIA.long)) ## Genus.tot2 <- unique(sapply(species.vector2,fun.get.genus)) -#### GENUS TO ASK +#### GENUS TO ASK GENUS.to.ASK <- Genus.tot[!(Genus.tot %in% genus.asked.try)] ## GENUS.to.ASK2 <- Genus.tot2[!(Genus.tot2 %in% genus.asked.try)] ## CHECK which genus are in TRY -GENUS.TRY <- unique(sapply(try.species$AccSpeciesName,fun.get.genus)) -#############################3 +GENUS.TRY <- unique(sapply(try.species$AccSpeciesName, fun.get.genus)) +############################# 3 genus.list.short <- sort(GENUS.to.ASK[(GENUS.to.ASK %in% GENUS.TRY)]) -write.csv(as.data.frame(genus.list.short),file="./data/process/genus.list.try.csv") +write.csv(as.data.frame(genus.list.short), file = "./data/process/genus.list.try.csv") sort(GENUS.to.ASK[!(GENUS.to.ASK %in% GENUS.TRY)]) -## genus.lis.long <- sort(GENUS.to.ASK2[(GENUS.to.ASK2 %in% GENUS.TRY)]) - +## genus.lis.long <- sort(GENUS.to.ASK2[(GENUS.to.ASK2 %in% GENUS.TRY)])