diff --git a/R/FUN.TRY.R b/R/FUN.TRY.R index fe88a61992c03cbb7eac2762dbace3c4c18dcf5f..a43fcaa3753e85114867a38dd4b38cf23f923bdb 100644 --- a/R/FUN.TRY.R +++ b/R/FUN.TRY.R @@ -238,7 +238,6 @@ fun.turn.list.in.DF <- function(sp, res.list) { fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { - ## check syno data if not create a table with column syno repating the species ### test data sp and sp.syno.table match if (sum(!(sp %in% sp.syno.table[["sp"]])) > 0) @@ -247,8 +246,7 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { 0) stop("not a single similar species name in sp and TRY") ## extract - traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", - "StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.") + traits <- c("Leaf.N", "Seed.mass", "SLA", "Wood.Density") res.list <- lapply(sp, FUN = fun.species.traits, species.table = sp.syno.table, traits = traits, data = data) @@ -257,7 +255,7 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { ##### TRANSFORM LIST IN A TABLE extract.species.try <- fun.turn.list.in.DF(sp, res.list) - ##### TODO ADD A TEST OF GOOD EXTRACTION OF TRAITS + ##### ADD A TEST OF GOOD EXTRACTION OF TRAITS test.num <- sample((1:length(sp))[!is.na(extract.species.try[["SLA.mean"]])],1) if( extract.species.try[test.num,"SLA.mean"] != fun.species.traits(sp[test.num], species.table = sp.syno.table, @@ -288,62 +286,97 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { } +############################## +############################## +### NO TRY TRAITS -fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data,name.match.sp="Latin_name_syn",name.match.traits="Latin_name") { - ## check syno data if not create a table with column syno repating the species + + +### function to return mean and sd of traits per species or at genus level in a single line data.frame + +fun.spe.traits.notry <- function(Latin_name,data.tot,traits.mean,traits.sd,name.match.traits="Latin_name",SD.TF){ + +mean.vec <- c() +sd.vec <- c() +genus.vec <- c() +for(i in 1:length(traits.mean)){ +if(sum(!is.na(data.tot[[traits.mean[i]]]))>0){ + data <- data.tot[!is.na(data.tot[[traits.mean[i]]]),] + if(Latin_name %in% data[[name.match.traits]] ){ + mean.vec[i] <-mean( data[data[[name.match.traits]] %in% Latin_name,traits.mean[i]]) + genus.vec[i] <- FALSE + if(SD.TF){sd.vec[i] <- mean(data[data[[name.match.traits]] %in% Latin_name,traits.sd[i]],na.rm=TRUE) + }else{sd.vec[i] <- NA} + }else{## do genus mean + genus <- sub(" .*", "", Latin_name) + genus.species <- sub(" .*", "", data[[name.match.traits]]) + genus.vec[i] <- TRUE + mean.vec[i] <- mean(data[genus.species %in% genus,traits.mean[i]],na.rm=TRUE) + if(SD.TF){sd.vec[i] <- mean(data[genus.species %in% genus ,traits.sd[i]],na.rm=TRUE) + }else{sd.vec[i] <- NA} + + } + }else{ + mean.vec[i] <- NA + sd.vec[i] <- NA + genus.vec[i] <- TRUE + + + } +} +names(mean.vec) <- traits.mean +names(sd.vec) <- traits.sd +names(genus.vec) <- sub("sd","genus",traits.sd) + + +extract.species.traits <- data.frame(Latin_name,t(mean.vec),t(sd.vec),t(genus.vec)) +return(extract.species.traits) +} + + + +########### +### FUNCTION TO EXTRACT ALL SPECIES + +fun.extract.format.sp.traits.NOT.TRY <- function(sp, Latin_name, data,name.match.traits="Latin_name") { ### test data sp and sp.syno.table match - if (sum(!(sp %in% sp.syno.table[["sp"]])) > 0) - stop("not same species name in sp and sp.syno.table") - if (sum((sp.syno.table[[name.match.sp]] %in% data[[name.match.traits]])) == + if (sum((Latin_name %in% data[[name.match.traits]])) == 0) stop("not a single similar species name in sp and Traits data") ## traits to extract traits <- c("Leaf.N", "Seed.mass", - "SLA", "Wood.Density") + "SLA", "Wood.Density","Max.height") ### NEED TO ADD TEST IF SD available in the data if(sum(grepl("sd",names(data)))>0) SD.TF <- TRUE - ## check traits available -traits.mean <- names(data)[(names(data) %in% paste(traits,"mean",sep="."))] -if (SD.TF) traits.sd <- names(data)[(names(data) %in% paste(traits,"sd",sep="."))] - #### TO CHANGE HERE !!!! - res.list <- lapply(sp, FUN = fun.species.traits, species.table = sp.syno.table, - traits = traits, data = data) - names(res.list) <- sp - - ##### TRANSFORM LIST IN A TABLE - extract.species.try <- fun.turn.list.in.DF(sp, res.list) +traits.mean <- paste(traits,"mean",sep=".") +traits.genus <- paste(traits,"genus",sep=".") +if (SD.TF) traits.sd <- paste(traits,"sd",sep=".") - ##### TODO ADD A TEST OF GOOD EXTRACTION OF TRAITS - test.num <- sample((1:length(sp))[!is.na(extract.species.try[["SLA.mean"]])],1) +## extract data +extract.species.traits <- rbind.fill(lapply(Latin_name,FUN=fun.spe.traits.notry ,data,traits.mean,traits.sd,name.match.traits,SD.TF)) - if( extract.species.try[test.num,"SLA.mean"] != fun.species.traits(sp[test.num], species.table = sp.syno.table, - traits = traits, data = data)$mean[grep("SLA",traits)]) stop('traits value not good for the species in extraction from TRY') ############### add mean sd of species or genus if we want to use that sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds") sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds") - sd.names <- paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), - "sd", sep = ".") - genus.names <- paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), - "genus", sep = ".") ### add columns - extract.species.try.2 <- data.frame(extract.species.try, - extract.species.try[,sd.names], stringsAsFactors =FALSE) + extract.species.traits.2 <- data.frame(extract.species.traits, + extract.species.traits[,traits.sd], stringsAsFactors =FALSE) ## update value - sd.names.1 <- paste(sd.names, 1, sep = ".") - for (i in 1:length(sd.names.1)) { - extract.species.try.2[[sd.names.1[i]]][!extract.species.try.2[[genus.names[i]]]] <- sd.vec.sp[i] - extract.species.try.2[[sd.names.1[i]]][extract.species.try.2[[genus.names[i]]]] <- sd.vec.genus[i] + traits.sd.1 <- paste(traits.sd, 1, sep = ".") + for (i in 1:length(traits.sd.1)) { + extract.species.traits.2[[traits.sd.1[i]]][!extract.species.traits.2[[traits.genus[i]]]] <- sd.vec.sp[i] + extract.species.traits.2[[traits.sd.1[i]]][extract.species.traits.2[[traits.genus[i]]]] <- sd.vec.genus[i] } - data.frame.TRY <- data.frame(sp = sp, Latin_name = sp.syno.table[["Latin_name_syn"]], - extract.species.try.2, stringsAsFactors =FALSE) - if (sum(!data.frame.TRY[["sp"]] == sp) > 0) + data.frame.TRAITS <- data.frame(sp = sp, Latin_name , + extract.species.traits.2, stringsAsFactors =FALSE) + if (sum(!data.frame.TRAITS[["sp"]] == sp) > 0) stop("Wrong order of species code") - return(data.frame.TRY) + return(data.frame.TRAITS) } diff --git a/R/format.function.R b/R/format.function.R index caca0189db76759d501cd392c663391b51359071..a65b31221b637762ba8083931ad0190c5ebacbb1 100644 --- a/R/format.function.R +++ b/R/format.function.R @@ -5,6 +5,52 @@ ##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP ANALYSIS +############################ +## FUNCTION remove trailing white space +trim.trailing <- function (x) sub("\\s+$", "", x) + + +############################################# +## FUN to clean species name +fun.clean.species.tab <- function(species.tab){ +species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] +### species IFN reformat names +## clean species names and synonyme names +species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) +species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) +## remove trailing white space +species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) +species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), + c("code","Latin_name","Exotic_Native_cultivated")] +return(species.clean)} + + + +##################################################### +### compute quantile 99% and sd with a bootstrap +f.quantile <- function (x,ind,probs){ + require(boot) + quantile(x[ind],probs=probs,na.rm=TRUE)} + +f.quantile.boot <- function(i,x,fac,R,probs=0.99){ + require(boot) + if(length(na.exclude(x[fac==i]))>0){ +quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) +return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) +}else{ +return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1)) +} +} + +####################### +### function to compute number of dead per plot +function.perc.dead <- function(dead){ + sum(dead)/length(dead)} + +function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) } + + + ###### ###### ## FUNCTION TO PLOT MAP OF TREE @@ -153,7 +199,8 @@ return(res.mat) ##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) ##' @author Kunstler BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ -rownames(xy.table) <- obs.id +print(paste("n observation = ",nrow(xy.table))) +print("start computing distance matrix") if(rpuDist){ require(rpud) dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE) @@ -166,6 +213,7 @@ dist.mat[dist.mat >Rlim] <- 0 diag(dist.mat) <- 0 print('distance matrix set to 0 1') BA <- BA.fun(diam,weights=1/(pi*Rlim^2)) +print(c(length(BA),length(diam))) BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE) print('starting tapply over species') fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) @@ -177,79 +225,122 @@ fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% { fun.sum.sp(mat[i,],sp) } - rownames(res.temp) <- obs.id + res.temp <- data.frame(obs.id=obs.id,res.temp) return((res.temp)) }else{ - res.temp <- t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp)) + res.temp <- data.frame(obs.id=obs.id,t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))) return(res.temp) } } -## function for lapply per census -BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ -return(BA.SP.FUN.XY(obs.id[census==census.id],xy.table[census==census.id,],diam[census==census.id],sp[census==census.id],Rlim,parallel,rpuDist)) +#### +##' .. function to compute competition index with X Y coordinates based on a neighborhood of radius R WITHOUT THE DISTANCE MATRIX.. +##' +##' .. content for \details{} .. +##' @title +##' @param obs.id id of the observation (if one tree as multiple growth measurement one obs.id per measurment +##' @param xy.table table with x.y of teh individual +##' @param diam diam in cm +##' @param sp species +##' @param Rlim radius of neighborhood search +##' @param parallel run in paralle or not ? +##' @param rpuDist run with GPU distance computation +##' @return a data frame with nrow = length of obs.id and ncol =unique(sp) +##' @author Kunstler +BA.SP.FUN.XY2 <- function(obs.id,xy.table,diam,sp,Rlim,parallel){ +rownames(xy.table) <- obs.id +print(paste("n observation = ",nrow(xy.table))) +print("start computing distance matrix") +## create local function +fun.compute.local <- function(obs.id.t,obs.id,xy.table,diam,sp,Rlim){ +dist.t <- as.vector(sqrt((outer(xy.table[obs.id==obs.id.t,"x"],xy.table[,"x"],FUN="-"))^2 + + (outer(xy.table[obs.id==obs.id.t,"y"],xy.table[,"y"],FUN="-"))^2)) +dist.t[dist.t>Rlim] <- 0 +dist.t[dist.t<=Rlim] <- 1 +res.BA.t <- tapply(BA.fun(diam,weights=dist.t/(pi*Rlim^2)),INDEX=sp,FUN=sum,na.rm=TRUE) +res.BA.t[is.na(res.BA.t)] <- 0 +return(data.frame(obs.id=obs.id.t,t(res.BA.t))) +} +#lapply function +if(parallel){ + library(doParallel) + print("start mclapply") + list.BA.SP.data <- mclapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, + diam,sp,Rlim,mc.cores=4) + prin("done") + data.BA.SP <- rbind.fill(list.BA.SP.data) + }else{ + list.BA.SP.data <- lapply(obs.id,FUN=fun.compute.local,obs.id,xy.table, + diam,sp,Rlim) + data.BA.SP <- rbind.fill(list.BA.SP.data) + } +return(data.BA.SP) +} + + + +## function for lapply per census +BA.SP.FUN.XY.l <- function(census.id,census,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist){ +print(dim(xy.table)) +if(length(obs.id[census==census.id]) <10000){ +data.temp<-BA.SP.FUN.XY(obs.id=obs.id[census==census.id], + xy.table=xy.table[census==census.id,], + diam=diam[census==census.id],sp=sp[census==census.id], + Rlim=Rlim,parallel,rpuDist) + }else{ +data.temp<-BA.SP.FUN.XY2(obs.id=obs.id[census==census.id], + xy.table=xy.table[census==census.id,], + diam=diam[census==census.id],sp=sp[census==census.id], + Rlim=Rlim,parallel) + } +return(data.temp) } ### wrapping function to run BA.SP.FUN.XY per census BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ unique.census <- unique(census) -print(unique.census) -res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist) +print(paste("Vector of census to lapply over" ,paste(unique.census,collapse=","),sep=" ")) +res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,census=census,obs.id=obs.id,xy.table=xy.table,diam=diam,sp=sp,Rlim=Rlim,parallel,rpuDist) res.mat <- rbind.fill(res.list ) -res.mat <- res.mat[match(obs.id,rownames(res.mat)),] -return(res.mat) +res.DF <- res.mat[match(obs.id,res.mat[["obs.id"]]),] +return(res.DF) } -############################ -## FUNCTION remove trailing white space -trim.trailing <- function (x) sub("\\s+$", "", x) - -## clean species.tab -fun.clean.species.tab <- function(species.tab){ -species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] - -### species IFN reformat names -## clean species names and synonyme names -species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) -species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) -## remove trailing white space -species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) - -species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), - c("code","Latin_name","Exotic_Native_cultivated")] -return(species.clean)} - - -### compute quantile 99% and sd with a bootstrap - -library(boot) - -f.quantile <- function (x,ind,probs){quantile(x[ind],probs=probs,na.rm=TRUE)} +##################################### +##################################### +### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER +#### function to be apply per site +fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ +data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) +print(paste("size of the data for plot",i,"is",nrow(data.tree.s))) +BA.SP.temp <- BA.SP.FUN.XY.census(census=data.tree[["census"]], + obs.id=data.tree.s[['obs.id']], + xy.table=data.tree.s[,xy.name], + diam=data.tree.s[['D']], + sp=(data.tree.s[['sp']]), + Rlim=Rlim, + parallel, + rpuDist) +## replace NA per zero +print('replacing NA per zero') +BA.SP.temp[is.na(BA.SP.temp)] <- 0 +print('done') +### TEST GOOD ORDER +if(sum(! (BA.SP.temp[["obs.id"]])==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') +if(sum(!colnames(BA.SP.temp[,-1])==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') -f.quantile.boot <- function(i,x,fac,R,probs=0.99){ - require(boot) - if(length(na.exclude(x[fac==i]))>0){ -quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) -return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) -}else{ -return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1)) -} +### compute sum per row +BATOT <- apply(BA.SP.temp[,-1],MARGIN=1,FUN=sum) +data.res <- data.frame(BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE) +return(data.res) } -####################### -### function to compute number of dead per plot -function.perc.dead <- function(dead){ - sum(dead)/length(dead)} - - -function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) } ########################## -### GENERATE A R.object per ecoregion - +### function to replace missing value per zero in competition matrix function.replace.NA.negative <- function(data.BA.SP){ for (i in 2:ncol(data.BA.SP)){ eval(parse(text=paste("data.BA.SP[is.na(",names(data.BA.SP)[i],"),",names(data.BA.SP)[i],":=0]",sep=""))) @@ -272,7 +363,7 @@ data.BA.SP <- BA.SP.FUN.census(census=data[['census']], obs.id=as.vector(data[['obs.id']]), diam=as.vector(data[['D']]), sp=as.vector(data[['sp']]), - id.plot=as.vector(data[['plot']]), + id.plot=as.vector(data[[plot.name]]), weights=data[['weights']], weight.full.plot=weight.full.plot) @@ -314,36 +405,47 @@ if(is.data.frame(data.TRY)){ } -##################################### -##################################### -### FUNCTION TO COMPUTE BA.SP.XY PER PLOT AND MERGE TOGETHER -#### function to be apply per site -fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ -data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) -BA.SP.temp <- BA.SP.FUN.XY(obs.id=data.tree.s[['obs.id']], - xy.table=data.tree.s[,xy.name], - diam=data.tree.s[['D']], - sp=(data.tree.s[['sp']]), - Rlim=15, - parallel=FALSE, - rpuDist=FALSE) - -## replace NA per zero -print('replacing NA per zero') -BA.SP.temp[is.na(BA.SP.temp)] <- 0 -print('done') -### rpud installation very cumbersome not needed ? -### longer in parallel why ? -if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') -if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') - -### compute sum per row -BATOT <- apply(BA.SP.temp,MARGIN=1,FUN=sum) -data.res <- data.frame(obs.id=data.tree.s[['obs.id']],BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE) -return(data.res) -} +####### +#### NEED TO WRITE FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT +fun.data.per.bigplot <- function(data,name.site,data.TRAITS=NA,Rlim=15,xy.name,parallel=FALSE,rpuDist=FALSE){ +require(data.table) +## compute competition matrix +list.BA.SP.data <- lapply(unique(data[["plot"]]),FUN=fun.compute.BA.SP.XY.per.plot,data=data,Rlim=Rlim,xy.name=xy.name,parallel=TRUE,rpuDist=FALSE) +print('competition index computed in list') +data.BA.SP <- rbind.fill(list.BA.SP.data) +print('competition index computed') +### test +if(sum(!(names(data.BA.SP)[-1] %in% c(levels(data[["sp"]]),"BATOT"))) >0) stop("competition index sp name not the same as in data") + +### TODO REMOVE TREE IN BUFFER ZONE +obs.id.no.buffer <- subset(data,subset=((data[["x"]]-15)>0 & (data[["x"]]+15)<max(data[["x"]])) & + ((data[["y"]]-15)>0 & (data[["y"]]+15)<max(data[["y"]])), + select="obs.id") +data <- subset(data,subset=data[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) +data.BA.SP <- subset(data.BA.SP,subset=data.BA.SP[["obs.id"]] %in% as.character(obs.id.no.buffer[,1])) +data.BA.SP$obs.id <- as.character(data.BA.SP$obs.id) +## reorder data +data <- data.table(data) +setkeyv(data,"obs.id") +data.BA.SP <- data.table(data.BA.SP) +setkeyv(data.BA.SP,"obs.id") +## test if same order +if(sum(!as.character(data.BA.SP[["obs.id"]]) == data[["obs.id"]]) >0) stop("competition index not in the same order than data") +##### +## ADD TRY DATA OR TRAITS IF NEEDED +if(is.data.frame(data.TRAITS)){ + data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=unique(data[["sp"]]), + Latin_name=unique(data[["sp.name"]]), + data=data.TRAITS,name.match.traits="Latin_name") + list.temp <- list(data.tree=data,data.BA.SP=data.BA.SP,data.traits=data.traits) + save(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep=".")) + }else{ + list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) + saveRDS(list.temp,file=paste("./data/process/list",name.site,"Rdata",sep=".")) + } +} diff --git a/TRY.R b/TRY.R index 378fbcd613ba800341123e1e58e14388fcde63ba..b3d7b4da0ac3aca425784ba202a24f458035aea2 100644 --- a/TRY.R +++ b/TRY.R @@ -50,40 +50,27 @@ TRY.DATA.FORMATED <- foreach(ObservationID.t = unique(TRY.DATA$ObservationID), . saveRDS(TRY.DATA.FORMATED, file = "./data/process/TRY.DATA.FORMATED.rds") +#### TODO MODIFY THE FUNCTION TO KEEP TRAITS SD AND INCLUDE IT LATER ON + ######################## READ RDS TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") +TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName) - -#################### COMPUTE MEAN AND SD FOR SPECIES from FRENCH NFI for 6 key traits -key.main.traits2 <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", +### select only the five traits we will use +traits <- c("StdValue.Leaf.nitrogen..N..content.per.dry.mass", "StdValue.Seed.mass", "StdValue.Leaf.specific.area..SLA.", "StdValue.Stem.specific.density..SSD.", - "StdValue.Stem.conduit.area..vessel.and.tracheid.", "StdValue.Leaf.lifespan") - - - -############################### READ CSV TABLE WITH LATIN NAME and CODE FOR FRENCH NFI DATA -species.tab <- read.csv("./data/species.list/species.csv", sep = "\t") -species.tab2 <- species.tab[!is.na(species.tab$Latin_name), ] -rm(species.tab) -gc() - -### species IFN reformat names clean species names and synonyme names -species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) -species.tab2$Latin_name_syn <- (gsub("_", " ", species.tab2$Latin_name_syn)) ## THIS TABLE HAS ALREADY THE SYNONYME FOR THE FRENCH SPECIES -## remove trailing white space -species.tab2$Latin_name_syn <- trim.trailing(species.tab2$Latin_name_syn) -## create vector of species name -species.IFN <- unique(pecies.tab2$Latin_name) + "StdValue.Plant.height.vegetative") +data.TRY.std <- subset(TRY.DATA.FORMATED,select=c("ObservationID","AccSpeciesName","Reference","Sun.vers..shade.leaf.qualifier",traits)) +names(data.TRY.std) <- c("obs.id","Latin_name","Reference","Sun.vs.shade.leaf","Leaf.N", "Seed.mass", "SLA", "Wood.Density","height") +saveRDS(data.TRY.std,file="./data/process/data.TRY.std.rds") -########################################################################### EXTRACT SPECIES MEAN AND SD change format try species names -TRY.DATA.FORMATED$AccSpeciesName <- as.character(TRY.DATA.FORMATED$AccSpeciesName) -key.main.traits2 <- -##################################################################### COMPUTE mean SD species:genus for each traits +##################################################################### +### COMPUTE mean SD species:genus for each traits ######## The table 5 in Kattge et al. 2011 GCB provides estimation of mean species sd ######## SLA species sd log 0.09 Nmass species sd log 0.08 Seed Mass sd log 0.13 @@ -167,38 +154,6 @@ names(sd.vec.genus) <- c("sdlog10.gs.Nmass", "sdlog10.gs.Seed.Mass", "sdlog10.gs saveRDS(sd.vec.sp, file = "./data/process/sd.vec.sp.rds") saveRDS(sd.vec.genus, file = "./data/process/sd.vec.genus.rds") -sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds") -sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds") - - -###################################################################################################### add columns with mean sd per species or per genus depending on whether species -###################################################################################################### or genus data - - - - -#### add column with the mean sd species or genus -data.TRY.sd.update <- data.frame(data.ifn.species.try.noout, data.ifn.species.try.noout[, - sd.names]) - -sd.names.1 <- paste(sd.names, 1, sep = ".") - -for (i in 1:length(sd.names.1)) { - data.TRY.sd.update[[sd.names.1[i]]][!data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.sp[i] - data.TRY.sd.update[[sd.names.1[i]]][data.TRY.sd.update[[genus.names[i]]]] <- sd.vec.genus[i] -} -head(data.TRY.sd.update, 10) - -saveRDS(data.TRY.sd.update, file = "./data/process/data.TRY.sd.update.rds") - - - - - - - - - diff --git a/merge.data.BCI.R b/merge.data.BCI.R index 2c93036a16131ee8a187758976bfcd3a28db9e1c..6ff08ef067ab506805f87541e317ab4f94e96965 100644 --- a/merge.data.BCI.R +++ b/merge.data.BCI.R @@ -1,11 +1,12 @@ ### MERGE BCI DATA rm(list = ls()) source("./R/format.function.R") +source("./R/FUN.TRY.R") library(reshape) -######################### READ DATA read individuals tree data Requires careful formatting of 7 census -######################### datasets The raw data is such that, once a tree dies in census X, then it no -######################### longer exists in census X+1, X+2 etc... +############# READ DATA read individuals tree data Requires careful formatting of 7 census +############ datasets The raw data is such that, once a tree dies in census X, then it no +########## longer exists in census X+1, X+2 etc... data.bci1 <- read.table("./data/raw/DataBCI/census1/PlotsDataReport.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t") data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL @@ -41,39 +42,42 @@ rm(big.bci) ### read species names species.clean <- read.table("./data/raw/DataBCI/TaxonomyDataReport.txt", stringsAsFactors = FALSE, header = T, sep = "\t") +species.clean$Latin_name <- paste(species.clean[["Genus"]], + species.clean[["species"]],sep=" ") -## Try to relate SpeciesID in species.clean species names in data.bci -data.bci$sp = species.clean$SpeciesID[match(data.bci$Latin, paste(species.clean[["Genus"]], - species.clean[["species"]]))] -length(unique(data.bci$sp)) +## ## Try to relate SpeciesID in species.clean species names in data.bci +## unique(data.bci$Latin) %in% species.clean$Latin_name ###################################### MASSAGE TRAIT DATA Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY data.trait <- read.csv("./data/raw/DataBCI/BCITRAITS_20101220.csv", stringsAsFactors = FALSE, header = T) data.trait$Latin <- apply(data.trait[, 1:2], 1, paste, collapse = " ") data.trait <- data.trait[,c("GENUS.","SP.","Latin","SEED_DRY","LMALAM_AVD","LMALAM_SED","LMALAM_ND","SG100C_AVG","SG100C_SEM","SG100C_N","HEIGHT_AVG","HEIGHT_SEM","HEIGHT_N")] -data.trait$sp <- data.trait[["SP."]]; data.trait[["SP."]] <- NULL -data.trait$Leaf.N.mean <- NA; -data.trait$Leaf.N.sd <- NA; -data.trait$Seed.mass.mean <- data.trait$SEED_DRY*1000; data.trait$SEED_DRY <- NULL -data.trait$Seed.mass.sd <- NA; -data.trait$SLA.mean <- 1/data.trait$LMALAM_AVD; data.trait$SLA.mean <- data.trait$SLA.mean*1000 ## Conversion from g m^-2 to mm2 mg^-1 -data.trait$SLA.sd <- 1/data.trait$LMALAM_SED; data.trait$SLA.sd <- data.trait$SLA.sd*1000 ## Conversion from g m^-2 to mm2 mg^-1 -data.trait$SLA.sd <- data.trait$SLA.sd*sqrt(data.trait$LMALAM_ND) +data.trait$sp <- data.trait[["SP."]] +data.trait[["SP."]] <- NULL +data.trait$Leaf.N.mean <- NA +data.trait$Leaf.N.sd <- NA +data.trait$Seed.mass.mean <- data.trait$SEED_DRY*1000 +data.trait$SEED_DRY <- NULL +data.trait$Seed.mass.sd <- NA +data.trait$SLA.mean <- 1/data.trait$LMALAM_AVD +data.trait$SLA.mean <- data.trait$SLA.mean*1000 ## Conversion from g m^-2 to mm2 mg^-1 +data.trait$SLA.sd <- 1/data.trait$LMALAM_SED +data.trait$SLA.sd <- data.trait$SLA.sd*1000 ## Conversion from g m^-2 to mm2 mg^-1 +data.trait$SLA.sd <- data.trait$SLA.sd*sqrt(data.trait$LMALAM_ND) ## conversion of SEM in SD data.trait$LMALAM_AVD <- data.trait$LMALAM_SED <- data.trait$LMALAM_ND <- NULL data.trait$Wood.density.mean <- data.trait$SG100C_AVG; -data.trait$Wood.density.sd <- data.trait$SG100C_SEM*sqrt(data.trait$SG100C_N) +data.trait$Wood.density.sd <- data.trait$SG100C_SEM*sqrt(data.trait$SG100C_N) ## conversion of SEM in SD data.trait$SG100C_AVG <- data.trait$SG100C_N <- data.trait$SG100C_SEM <- NULL -data.trait$Max.height.mean <- log10(data.trait$HEIGHT_AVG) -data.trait$Max.height.sd <- log10(data.trait$HEIGHT_SEM*sqrt(data.trait$HEIGHT_N)) +data.trait$Max.height.mean <- (data.trait$HEIGHT_AVG) +data.trait$Max.height.sd <- (data.trait$HEIGHT_SEM*sqrt(data.trait$HEIGHT_N)) data.trait$HEIGHT_SEM <- data.trait$HEIGHT_N <- data.trait$HEIGHT_AVG <- NULL +data.trait$Latin_name <- sub(" ","_",data.trait$Latin) -#data.bci <- merge(data.bci, data.trait[, c(ncol(data.trait), 3, 7:10, 13, 15, 18, -# 20:21)], by = "Latin", all.x = T) +## THIS NEW FUNCTION IS WORKING +fun.extract.format.sp.traits.NOT.TRY(sp=unique(data.bci$Latin), Latin_name=unique(data.bci$Latin), data=data.trait,name.match.traits="Latin") -## NO NEED TO MERGE NEED TO DECIDE STANDARD STRUCTURE FOR TRAITS AS TRY !! - ########################################## FORMAT INDIVIDUAL TREE DATA data.bci <- data.bci[order(data.bci[["TreeID"]]),] data.bci$Date1 <- as.Date(data.bci$Date1) @@ -82,58 +86,49 @@ data.bci$Date2 <- as.Date(data.bci$Date2) # data.bci$yr2 <- format(strptime(data.bci$Date2, format = '%Y-%m-%d'),'%Y') data.bci$year <- as.numeric(difftime(data.bci$Date2, data.bci$Date1, units = "weeks")/52) ## Not rounded data.bci$obs.id <- apply(data.bci[,c("TreeID","Census")],1,paste,collapse="_") -data.bci$treeid <- data.bci$TreeID +data.bci$tree.id <- data.bci$TreeID data.bci$x <- data.bci$gx data.bci$y <- data.bci$gy ## change unit and names of variables to be the same in all data for the tree data.bci$G <- 10 * (data.bci$DBH1 - data.bci$DBH1)/data.bci$year ## diameter growth in mm per year - BASED ON UNROUNDED YEARS -data.bci$D <- data.bci[["DBH1"]] -data.bci$plot <- data.bci[["Quadrat"]] ## plot code? +data.bci$D <- data.bci[["DBH1"]]/10 +data.bci$subplot <- data.bci[["Quadrat"]] ## +data.bci$plot <- rep(1,nrow(data.bci)) data.bci$htot <- NA data.bci$sp.name <- data.bci$Latin -###################### ECOREGION bci has only 1 eco-region +data.bci$sp <- sub(" ","_",data.bci$sp.name) +data.bci$sp.name <- data.bci$sp +data.bci$census <- data.bci$Census -###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is -###################### missing compute numer of dead per plot to remove plot with disturbance +###################### ECOREGION bci has only 1 eco-region -perc.dead <- tapply(data.bci[["dead"]], INDEX = data.bci[["plot"]], FUN = function.perc.dead2) +###################### PERCENT DEAD +perc.dead <- tapply(data.bci[["dead"]], INDEX = data.bci[["plot"]], + FUN = function.perc.dead2) data.bci <- merge(data.bci, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) -########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 -table(data.bci$dead) -data.bci <- data.bci[data.bci$dead == 0,] ## vec.abio.var.names <- NA ## MISSING -vec.basic.var <- c("treeid", "sp", "sp.name", "plot", "D", "G", "dead", "year", "htot", - "x", "y", "perc.dead") +vec.basic.var <- c("tree.id","obs.id", "sp", "sp.name", "plot", "D", "G", "dead", "year", "htot", + "x", "y", "perc.dead","census") data.tree <- subset(data.bci, select = c(vec.basic.var)) ############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in ############################################## m^2/ha without the target species -data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.bci[["TreeID"]]), diam = as.vector(data.bci[["D"]]), - sp = as.vector(data.bci[["sp"]]), id.plot = as.vector(data.bci[["plot"]]), weights = data.bic[["weights"]], weight.full.plot = NA) - -## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0 -data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 - -### CHECK IF sp and sp name for column are the same -if (sum(!(names(data.BA.SP)[-1] %in% unique(data.bci[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") - -#### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET -rm(BATOT.COMPET) - -### create data frame -names(data.BA.SP) <- c("TreeID", names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(TreeID = data.bci[["TreeID"]], ecocode = data.bci[["ecocode"]]), - data.BA.SP, by = "TreeID", sort = FALSE) -## test -if (sum(!data.BA.sp[["TreeID"]] == data.tree[["TreeID"]]) > 0) stop("competition index not in the same order than data.tree") - -## save everything as a list -list.bci <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) -save(list.bci, file = "./data/process/list.bci.Rdata") + +data.tree <- subset(data.tree,subset=!is.na(data.tree[["D"]])) +data.tree <- subset(data.tree,subset=data.tree[["D"]]>5) ### select only tree below 5cm of dbh to start + +### species as factor because number +data.tree[['sp']] <- factor(data.tree[['sp']]) +Rlim <- 15 # set size of neighborhood for competition index + +## for each census compute competition index + +## FOR CENSUS 1 +data.tree1 <- data.tree[1:1000,] + +fun.data.per.bigplot(data=data.tree1,name.site="BCI",data.TRAITS=data.trait,Rlim=15,xy.name=c("x","y")) +ls() diff --git a/merge.data.SPAIN.R b/merge.data.SPAIN.R index 3ebf106a73995d89844880606e0b641fae830053..c8033cbfdb6a0ac4c9bf76771f2ecefda321c379 100644 --- a/merge.data.SPAIN.R +++ b/merge.data.SPAIN.R @@ -134,7 +134,7 @@ ecoregion.unique <- unique(data.spain[["ecocode"]]) sum(data.spain[["ecocode"]] == ecoregion.unique[1]) ### read TRY data -TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") +TRY.DATA.FORMATED <- readRDS("./data/process/data.TRY.std.rds") ## create lookup table for spain species.lookup <- data.frame(sp=data.spain[!duplicated(data.spain[["sp"]]),"sp"], diff --git a/merge.data.SWISS.R b/merge.data.SWISS.R index 71896ef1cf9a582c35f2fdf31bcc931dfb553cbc..1bc1458c9e7f6b00bca647e2f64d348669ef6e87 100644 --- a/merge.data.SWISS.R +++ b/merge.data.SWISS.R @@ -108,7 +108,7 @@ ecoregion.unique <- unique(data.swiss[["ecocode"]]) ### read TRY data -TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") +TRY.DATA.FORMATED <- readRDS("./data/process/data.TRY.std.rds") ## create lookup table for swiss species.lookup <- data.frame(sp=data.swiss[!duplicated(data.swiss[["sp"]]),"sp"], diff --git a/merge.data.US.R b/merge.data.US.R index 307cfcc9c334ff3b366941f8844e834f58df9c7f..4ecc8f1b9f42fdc254e0e46ea7ba1146c65d9250 100644 --- a/merge.data.US.R +++ b/merge.data.US.R @@ -92,7 +92,7 @@ rm(data.us) gc() ### read TRY data -TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") +TRY.DATA.FORMATED <- readRDS("./data/process/data.TRY.std.rds") #### GENERATE ONE OBJECT PER ECOREGION