merge.data.PARACOU.R 8.58 KiB
### MERGE paracou DATA
### Edited by FH
rm(list = ls()); source("./R/format.function.R"); library(reshape)
#########################
## READ DATA
####################
### read individuals tree data
data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv",header=TRUE,stringsAsFactors=FALSE,sep = ";", na.strings = "NULL")
#barplot(apply(!is.na(data.paracou[,paste("circ_",1984:2012,sep="")]),MARGIN=2,FUN=sum),las=3)
# select good columns
data.paracou <- data.paracou[,c("foret","parcelle","carre","arbre","vernaculaire","idtaxon",
                                "x","y","circ_2001","code_2001","circ_2005","code_2005",
                                "circ_2009","code_2009","campagne_mort","type_mort")]
colnames(data.paracou) <- c("forest","plot","subplot","tree","vernacular","taxonid","x","y","circum2001","code2001","circum2005","code2005","circum2009","code2009","yeardied","typedeath")
### change numeric separator
for(k in 7:14) { 
	data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) } ## Replace all , in decimals with .
data.paracou$treeid <- apply(data.paracou[,1:4],1,paste,collapse="."); ## Create a tree id
data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))]	
## ## plot each plot
## pdf("./figs/plots.paracou.pdf")
## lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2)
## dev.off()
#######################
###### SELECT OBSERVATION WITHOUT PROBLEMS
## REMOVE ALL TREES WITH X OR Y >250 m 
data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 &  data.paracou[["y"]]<251)
#### REMOVE PLOTs 16 17 18 ACCORDING TO  GHSILAIN
data.paracou <- subset(data.paracou,subset=! data.paracou[["plot"]] %in% 16:18)
## keep only tree alive in 2001
data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied"]])<=2001 & !is.na(data.paracou[["yeardied"]])))
######################################
## MASSAGE TRAIT DATA
############################
##########################################
## FORMAT INDIVIDUAL TREE DATA
#############
data.paracou2 <- data.paracou[rep(1:nrow(data.paracou),each=2),c(1:10,(ncol(data.paracou)-2):ncol(data.paracou))]
rownames(data.paracou2) <- 1:nrow(data.paracou2); data.paracou2 <- as.data.frame(data.paracou2)
data.paracou2$yr1 <- rep(c(2001,2001+4),nrow(data.paracou)); data.paracou2$yr2 <- rep(c(2005,2005+4),nrow(data.paracou))
data.paracou2$year <- rep(c(4,4),nrow(data.paracou))
data.paracou2$dbh1 <- c(rbind(data.paracou$circum2001/pi,data.paracou$circum2005/pi))
data.paracou2$dbh2 <- c(rbind(data.paracou$circum2005/pi,data.paracou$circum2009/pi))
data.paracou2$code1 <- c(as.numeric(rbind(data.paracou$code2001,data.paracou$code2005)))
data.paracou2$code2 <- c(as.numeric(rbind(data.paracou$code2005,data.paracou$code2009)))
data.paracou2$dead <- rep(0,nrow(data.paracou)*2)
data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])),
                     as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1
data.paracou2$sp <- data.paracou[["taxonid"]]
## remove tree dead at first census for both date (census 2001-2005 2005-2009)
data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])))))
## change unit and names of variables to be the same in all data for the tree 
data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year
data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1
data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2
data.paracou[which(data.paracou$G < -50),] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA ## much less issue after removing diam problem
data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm
data.paracou$plot <- data.paracou$plot#apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m - MISSING data.paracou$obs.id <- 1:nrow(data.paracou) ### delete recruit in 2001 or 2005 for first census data.paracou <- subset(data.paracou,subset=!is.na(data.paracou$D)) ## minimum circumfer 30 delete all tree with a dbh <30/pi, data.paracou <- subset(data.paracou,subset= data.paracou[["D"]]>(30/pi)) ###################### ## ECOREGION ################### ## paracou has only 1 eco-region YES NO ECOREGION ###################### ## PERCENT DEAD ################### ## variable percent dead ## compute numer of dead per plot to remove plot with disturbance ## THERE ARE LOTS OF NAs - DID YOU WANT TO REMOVE THEM OR TREAT THEM AS ALIVE perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=function.perc.dead2) data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) ########################################################### ### VARIABLES SELECTION FOR THE ANALYSIS ################### #vec.abio.var.names <- c("MAT","MAP") ## MISSING NEED OTHER BASED ON TOPOGRAPHY ASK BRUNO vec.basic.var <- c("obs.id","treeid","sp","plot","D","G","dead","year","htot","x","y","perc.dead") data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names ############################################## ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species ########################### ## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE ### species as factor because number data.tree[['sp']] <- factor(data.tree[['sp']]) Rlim <- 15 # set size of neighborhood for competition index ## system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=Rlim,parallel=TRUE,rpuDist=FALSE)) library(doParallel) list.BA.SP.data <- mclapply(unique(data.tree[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree,Rlim=Rlim,mc.cores=4) data.BA.sp <- rbind.fill(list.BA.SP.data) dim(data.BA.SP) ### TEST DATA FORMAT if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') ## test same order as data.tree if(sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) >0) stop("competition index not in the same order than data.tree") ################################################ ## REMOVE TREE IN BUFFER ZONE BUFFER ZONE not.in.buffer.zone <- (data.tree[['x']]<(250-Rlim) & data.tree[['x']]>(0+Rlim) & data.tree[['y']]<(250-Rlim) & data.tree[['y']]>(0+Rlim)) # remove subset data.tree <- subset(data.tree,subset=not.in.buffer.zone) data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) ######################## ######################### ##### TRAITS
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
### read species names species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") species.clean$sp <- species.clean[["idTaxon"]] species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ") ## keep only one row pers idTaxon species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille")) ## select only species present in data base species.clean <- subset(species.clean,subset=species.clean[["sp"]] %in% data.tree[["sp"]]) ## percentage of species with no taxonomic identification length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25% ### need to read the different traits data based and merge ..... bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";") bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ") dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ") seed.traits <- read.csv("./data/raw/DataParacou/Autour de Paracou - Releves par trait et taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t") ### sum(species.clean[["Latin_name"]] %in% bridge[["Latin_name"]])/length(species.clean[["Latin_name"]]) ## only 307 species /775 are in teh traits data .... ## save everything as a list list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.spain,file="./data/process/list.paracou.Rdata")