diff --git a/R/format.function.R b/R/format.function.R index c3cc78ca31a090dae049fc4896a5f0667272422b..e352396d18379fc2062b78bb36d84e5177f48fd5 100644 --- a/R/format.function.R +++ b/R/format.function.R @@ -110,15 +110,37 @@ return( (data.merge)) #### function with X Y coordinates based on a neighborhood of radius R -BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim){ -dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE)) +BA.SP.FUN.XY <- function(obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ +rownames(xy.table) <- obs.id +if(rpuDist){ + require(rpud) + dist.mat <- rpuDist(xy.table,upper=TRUE,diag=TRUE) +}else{ + dist.mat <- as.matrix(dist(xy.table,upper=TRUE,diag=TRUE)) +} +print('distance matrix computed') dist.mat[dist.mat <Rlim] <- 1 dist.mat[dist.mat >Rlim] <- 0 diag(dist.mat) <- 0 -BA <- BA.fun(diam,weights=10000/(pi*Rlim^2)) +print('distance matrix set to 0 1') +BA <- BA.fun(diam,weights=1/(pi*Rlim^2)) BA.mat <- matrix(rep(BA,length(BA)),nrow=length(BA),byrow=TRUE) +print('starting tapply over species') fun.sum.sp <- function(x,sp) tapply(x,INDEX=sp,FUN=sum,na.rm=TRUE) -return(t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp))) + if(parallel){ + ## parallel version + require(doParallel) + registerDoParallel(cores=4) + mat <- dist.mat*BA.mat + res.temp <- foreach(i=1:nrow(mat), .combine=rbind) %dopar% { + fun.sum.sp(mat[i,],sp) + } + rownames(res.temp) <- obs.id + return((res.temp)) + }else{ + res.temp <- t(apply(dist.mat*BA.mat,MARGIN=1,FUN=fun.sum.sp ,sp)) + return(res.temp) + } } diff --git a/merge.data.PARACOU.R b/merge.data.PARACOU.R index fd412fa7c84a2fac059acfc9e32f13ae471db51b..41163e869094739565bedf6d73daa6497d6ade2c 100644 --- a/merge.data.PARACOU.R +++ b/merge.data.PARACOU.R @@ -15,40 +15,26 @@ data.paracou <- data.paracou[,c("foret","parcelle","carre","arbre","vernaculaire "circ_2009","code_2009","campagne_mort","type_mort")] colnames(data.paracou) <- c("forest","plot","subplot","tree","vernacular","taxonid","x","y","circum2001","code2001","circum2005","code2005","circum2009","code2009","yeardied","typedeath") +### change numeric separator for(k in 7:14) { data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) } ## Replace all , in decimals with . data.paracou$treeid <- apply(data.paracou[,1:4],1,paste,collapse="."); ## Create a tree id data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))] -## REMOVE ALL TREES WITH X OR Y >250 m or NA ASK GHISLAIN!!! -data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 & data.paracou[["y"]]<251) - - -## plot each -pdf("./figs/plots.paracou.pdf") -lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2) -dev.off() +## ## plot each plot +## pdf("./figs/plots.paracou.pdf") +## lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2) +## dev.off() -pdf("./figs/plots.paracou.pdf") -by(data.paracou[["circum2009"]],INDICES=data.paracou[["plot"]],FUN=function(x) hist(x,breaks=50)) -dev.off() - -#################### -#### PLOT 16 17 18 ARE STRANGE ASK GHSILAIN -##################### +####################### +###### SELECT OBSERVATION WITHOUT PROBLEMS +## REMOVE ALL TREES WITH X OR Y >250 m +data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 & data.paracou[["y"]]<251) +#### REMOVE PLOTs 16 17 18 ACCORDING TO GHSILAIN data.paracou <- subset(data.paracou,subset=! data.paracou[["plot"]] %in% 16:18) - ## keep only tree alive in 2001 data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied"]])<=2001 & !is.na(data.paracou[["yeardied"]]))) -### read species names -species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") - -## SPECIES CODE COME FROM idTaxon in paracou_taxonomie and taxonid in paracou_1984_2012 to match the traits data we need to use the "Genus species" -## we better work not work with vernacular because this doesn't match necesseraly the Genus species taxonomie - -species.clean$sp <- species.clean[["idTaxon"]] -## data.paracou <- merge(data.paracou, as.data.frame(species.clean[!duplicated(species.clean[["sp"]]),c("Genre","Espece","sp")]), by = "sp", sort = FALSE) ###################################### ## MASSAGE TRAIT DATA @@ -70,7 +56,7 @@ data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!i as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1 data.paracou2$sp <- data.paracou[["taxonid"]] -## remove tree dead in 2005 in second census (2005-2009) +## remove tree dead at first census for both date (census 2001-2005 2005-2009) data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]]))))) @@ -79,7 +65,7 @@ data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 -data.paracou[which(data.paracou$G < -50),] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA ## much less issue +data.paracou[which(data.paracou$G < -50),] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA ## much less issue after removing diam problem data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm data.paracou$plot <- data.paracou$plot#apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m - MISSING @@ -106,14 +92,12 @@ perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=func data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) ########################################################### -### PLOT SELECTION FOR THE ANALYSIS +### VARIABLES SELECTION FOR THE ANALYSIS ################### -## Nothing to remove - #vec.abio.var.names <- c("MAT","MAP") ## MISSING NEED OTHER BASED ON TOPOGRAPHY ASK BRUNO vec.basic.var <- c("obs.id","treeid","sp","plot","D","G","dead","year","htot","x","y","perc.dead") -data.tree <- subset(data.paracou,select=c(vec.basic.var)) +data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names ############################################## ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species @@ -121,30 +105,74 @@ data.tree <- subset(data.paracou,select=c(vec.basic.var)) ## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE -## TODO NEED TO ADD BUFFER ZONE -data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==1) -BA.SP.FUN.XY(obs.id,xy.table,diam,sp,Rlim){ +### species as factor because number +data.tree[['sp']] <- factor(data.tree[['sp']]) + + +#### function to be apply per site +fun.compute.BA.SP.XY.per.plot <- function(i,data.tree,Rlim,xy.name=c('x','y'),parallel=FALSE,rpuDist=FALSE){ +data.tree.s <- subset(data.tree,subset=data.tree[["plot"]] ==i) +BA.SP.temp <- BA.SP.FUN.XY(obs.id=data.tree.s[['obs.id']], + xy.table=data.tree.s[,xy.name], + diam=data.tree.s[['D']], + sp=(data.tree.s[['sp']]), + Rlim=15, + parallel=FALSE, + rpuDist=FALSE) + +## replace NA per zero +print('replacing NA per zero') +BA.SP.temp[is.na(BA.SP.temp)] <- 0 +print('done') +### rpud installation very cumbersome not needed ? +### longer in parallel why ? +if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') +if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') + +### compute sum per row +BATOT <- apply(BA.SP.temp,MARGIN=1,FUN=sum) +data.res <- data.frame(obs.id=data.tree.s[['obs.id']],BA.SP.temp,BATOT=BATOT) +return(data.res) +} + +test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=15) + +list.BA.SP.data <- mclapply(unique(data.tree[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree,Rlim=15,mc.cores=4) +data.BA.SP <- rbind.fill(list.BA.SP.data) +dim(data.BA.SP) + +### TEST DATA FORMAT +if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') +if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') +## test same order as data.tree +if(sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) >0) stop("competition index not in the same order than data.tree") + +## REMOVE TREE IN BUFFER ZONE BUFFER ZONE +## plot each plot +pdf("./figs/plots.tree.pdf") +lapply(unique(data.tree[["plot"]]),FUN=fun.circles.plot,data.tree[['x']],data.tree[['y']],data.tree[["plot"]],data.tree[["D"]],inches=0.2) +dev.off() + + +######################## +######################### +##### TRAITS -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.paracou[["treeid"]]), diam=as.vector(data.paracou[["D"]]), - sp=as.vector(data.paracou[["sp"]]), id.plot=as.vector(data.paracou[["plot"]]), - weights=1/(pi*(0.5*data.paracou$D/100)^2), weight.full.plot=NA) +### read species names +species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") -## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +### need to read the different traits data based and merge ..... + + +## SPECIES CODE COME FROM idTaxon in paracou_taxonomie and taxonid in paracou_1984_2012 to match the traits data we need to use the "Genus species" +## we better work not work with vernacular because this doesn't match necesseraly the Genus species taxonomie + +species.clean$sp <- species.clean[["idTaxon"]] +## data.paracou <- merge(data.paracou, as.data.frame(species.clean[!duplicated(species.clean[["sp"]]),c("Genre","Espece","sp")]), by = "sp", sort = FALSE) -### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.paracou[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") -#### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) -### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=data.paracou[["tree.id"]],ecocode=data.paracou[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) -## test -if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") ## save everything as a list list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits)