From e700ea0c06b6f757e5dd6a6789805374a80de15e Mon Sep 17 00:00:00 2001
From: fhui28 <fhui28@gmail.com>
Date: Tue, 20 Aug 2013 11:11:15 +1000
Subject: [PATCH] merge data script paracou

---
 merge.data.PARACOU.R | 110 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 merge.data.PARACOU.R

diff --git a/merge.data.PARACOU.R b/merge.data.PARACOU.R
new file mode 100644
index 0000000..5a3c196
--- /dev/null
+++ b/merge.data.PARACOU.R
@@ -0,0 +1,110 @@
+### MERGE paracou DATA
+### Edited by FH
+rm(list = ls()); source("./R/format.function.R"); library(reshape)
+
+#########################
+## READ DATA
+####################
+### read individuals tree data
+data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv",header=TRUE,stringsAsFactors=FALSE,sep = ";", na.strings = "NULL")
+#barplot(apply(!is.na(data.paracou[,paste("circ_",1984:2012,sep="")]),MARGIN=2,FUN=sum),las=3)
+
+data.paracou <- data.paracou[,c(1:8,61:62,73:74,85:86,96:97)]
+colnames(data.paracou) <- c("forest","plot","subplot","tree","vernacular","taxonid","x","y","circum2001","dead2001","circum2005","dead2005","circum2009","dead2009","yeardied","typedeath")
+for(k in 7:14) { 
+	data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) } ## Replace all , in decimals with .
+data.paracou$treeid <- apply(data.paracou[,1:4],1,paste,collapse="."); ## Create a tree id
+data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))]	
+
+### read species names
+species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";")
+
+## Create a species code to try and species.clean and data.paracou - NEED TO CHECK WHETHER SPCODE COMES FROM TAXONID OR VERNACULAR OR BOTH
+species.clean$sp <- apply(species.clean[,1:5],1,paste,collapse=".")
+data.paracou$sp <- apply(data.paracou[,3:7],1,paste,collapse=".")
+data.paracou <- merge(data.paracou, as.data.frame(species.clean[,c(10,13)]), by = "sp", sort = F)
+## "Simplify" the species.code to just vernacular and taxonid, which is hopefully sufficient to identify species
+species.clean$sp = apply(species.clean[,4:5],1,paste,collapse=".")
+data.paracou$sp = apply(data.paracou[,7:8],1,paste,collapse=".")
+length(unique(data.paracou$sp))
+
+######################################
+## MASSAGE TRAIT DATA
+############################
+## HEIGHT DATA IS AVAILABLE IN BRIDGE.DATA, BUT THE PLOT_IDS, TAXONIDS ETC...DO NOT APPEAR TO MATCH DATA.PARACOU
+
+##########################################
+## FORMAT INDIVIDUAL TREE DATA
+#############
+data.paracou2 <- data.paracou[rep(1:nrow(data.paracou),each=2),c(1:10,(ncol(data.paracou)-2):ncol(data.paracou))]
+rownames(data.paracou2) <- 1:nrow(data.paracou2); data.paracou2 <- as.data.frame(data.paracou2)
+data.paracou2$yr1 <- rep(c(2001,2001+4),nrow(data.paracou)); data.paracou2$yr2 <- rep(c(2005,2005+4),nrow(data.paracou))
+data.paracou2$year <- rep(c(5,5),nrow(data.paracou))
+data.paracou2$dbh1 <- c(rbind(data.paracou$circum2001/pi,data.paracou$circum2005/pi))
+data.paracou2$dbh2 <- c(rbind(data.paracou$circum2005/pi,data.paracou$circum2009/pi))
+data.paracou2$dead <- c(as.numeric(rbind(data.paracou$dead2005 > 0,data.paracou$dead2009 > 0)))
+	
+data.paracou <- data.paracou2	
+
+## change unit and names of variables to be the same in all data for the tree 
+data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year
+data.paracou[which(data.paracou$G < -50),] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA
+data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm
+data.paracou$plot <- apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code
+data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m - MISSING
+data.paracou$sp.name <- data.paracou$NomVern
+
+######################
+## ECOREGION
+###################
+## paracou has only 1 eco-region?
+
+######################
+## PERCENT DEAD
+###################
+## variable percent dead/cannot do with since dead variable is missing
+## compute numer of dead per plot to remove plot with disturbance
+## THERE ARE LOTS OF NAs - DID YOU WANT TO REMOVE THEM OR TREAT THEM AS ALIVE
+
+function.perc.dead2 <- function(dead) { out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]); if(!is.finite(out)) out <- NA; return(out) }
+perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=function.perc.dead2)
+data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE)
+
+###########################################################
+### PLOT SELECTION FOR THE ANALYSIS
+###################
+## Remove data with dead == 1
+table(data.paracou$dead)
+## Nothing to remove
+
+vec.abio.var.names <-  c("MAT","MAP") ## MISSING 
+vec.basic.var <-  c("treeid","sp","sp.name","plot","D","G","dead","year","htot","x","y","perc.dead")
+data.tree <- subset(data.paracou,select=c(vec.basic.var,vec.abio.var.names))
+
+##############################################
+## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species
+###########################
+## DON'T KNOW SUBPLOT SIZE!
+data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.paracou[["treeid"]]), diam=as.vector(data.paracou[["D"]]),
+	sp=as.vector(data.paracou[["sp"]]), id.plot=as.vector(data.paracou[["plot"]]),
+	weights=1/(10000*data.paracou[["SubPlot_Size"]]), weight.full.plot=NA)
+
+## change NA and <0 data for 0
+data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0
+
+### CHECK IF sp and sp name for column are the same
+if(sum(!(names(data.BA.SP)[-1] %in% unique(data.paracou[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree")
+
+#### compute BA tot for all competitors
+BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE)
+data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET)
+### create data frame
+names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1])
+data.BA.sp <- merge(data.frame(tree.id=data.paracou[["tree.id"]],ecocode=data.paracou[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE)
+## test
+if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree")
+
+## save everything as a list
+list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits)
+save(list.spain,file="./data/process/list.paracou.Rdata")
+
-- 
GitLab