From ecc447d9e6be2bdcfb053ccce51f464920e7464f Mon Sep 17 00:00:00 2001
From: fhui28 <fhui28@gmail.com>
Date: Fri, 16 Aug 2013 19:54:03 +1000
Subject: [PATCH] mergecanadafile

---
 merge.data.CANADA-fhv1.R | 111 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 merge.data.CANADA-fhv1.R

diff --git a/merge.data.CANADA-fhv1.R b/merge.data.CANADA-fhv1.R
new file mode 100644
index 0000000..fd5496d
--- /dev/null
+++ b/merge.data.CANADA-fhv1.R
@@ -0,0 +1,111 @@
+### MERGE canada DATA
+### Edited by FH
+rm(list = ls()); source("./R/format.function.R"); library(reshape)
+
+#########################
+## READ DATA
+####################
+### read individuals tree data
+data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130815.csv",header=TRUE,stringsAsFactors =FALSE)
+data.canada <- data.canada[which(!is.na(data.canada$Species)),]
+colnames(data.canada)[2] <- "Species"
+
+### read species names
+species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv",stringsAsFactors=FALSE)
+
+######################################
+## MASSAGE TRAIT DATA
+############################
+# ## Compute maximum height per species plus sd from observed height to add variables to the traits data base
+# ## Because we have two heights, then take the max of the two heights and then bootstrap 
+# q <- log10(apply(data.canada[,c("ht1","ht2")],1,max,na.rm=T)); q[!is.finite(q)] <- NA
+# res.quant.boot <- t(sapply(levels(factor(data.canada[["Species"]])),FUN=f.quantile.boot,R=1000,x=q,fac=(data.canada[["Species"]])))
+# #max.heights <- read.csv("/media/fhui/Lexar/Career & Work/GKunstler_competition/data/raw/DataCanada/MaximumHeigth.csv", header = T)
+# # 
+# # ## create data base
+# data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3])
+# rm(res.quant.boot)
+
+##########################################
+## FORMAT INDIVIDUAL TREE DATA
+#############
+
+## change unit and names of variables to be the same in all data for the tree 
+data.canada$G <- (data.canada[["FinalDBH"]]-data.canada[["InitDBH"]])/data.canada$Interval ## diameter growth in mm per year
+data.canada$year <- data.canada$Interval ## number of year between measurement/missing!
+data.canada$D <- data.canada[["InitDBH"]] ## diameter in mm
+data.canada$dead <- rep(NA,length(data.canada[["Species"]])) ## dummy variable for dead tree 0 alive 1 dead/missing!
+data.canada$sp <- as.character(data.canada[["Species"]]) ## species code
+data.canada$plot <- (data.canada[["PlotID"]]) ## plot code
+data.canada$htot <- rep(NA,length(data.canada[["Species"]]))## height of tree in m / missing
+get.treeid <- function(x) { out <- strsplit(x,"_")[[1]]; out <- out[length(out)]; return(out) }
+data.canada$tree.id <- sapply(data.canada$PLOTTREE,get.treeid) ## tree unique id
+
+############################
+## merge greco to have no ecoregion with low number of observation
+# greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) 
+# greco 	<- greco[,c("Plot_ID_SFI","BIOME","eco_code")]
+# greco2 <- greco[!duplicated(greco$Plot),]; 
+# rm(greco)
+# 
+# data.canada <- merge(data.canada, greco2, by = "Plot_ID_SFI")
+# rm(greco2)
+# 
+# table(data.canada$eco_code)
+# ## There's an eco-region with no code, and one with 55 sites
+# 
+# library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); 
+# ecoreg <- unclass(data.canada$eco_code); 
+# plot(data.canada[["CX"]][order(ecoreg)],data.canada[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); 
+# legend("bottomright", col = mycols, legend = levels(data.canada$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
+# points(data.canada[["CX"]][ecoreg == 9],data.canada[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites
+# ## PA1219 looks to be similar to PA1209; merge them together
+# data.canada$eco_codemerged <- combine_factor(data.canada$eco_code, c(1:8,6,9))
+
+#######################
+# ## variable percent dead/cannot do with since dead variable is missing
+# ###compute numer of dead per plot to remove plot with disturbance
+# perc.dead <- tapply(data.canada[["dead"]],INDEX=data.canada[["idp"]],FUN=function.perc.dead)
+# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
+# data.canada <- merge(data.canada,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE)
+data.canada$perc.dead <- NA
+
+###########################################################
+### PLOT SELECTION FOR THE ANALYSIS
+###################
+# ## Remove data with mortality == 1 or 2
+# table(data.canada$Mortality_Cut)
+# data.canada <- subset(data.canada,subset= (data.canada[["Mortality_Cut"]] == 0 |  data.canada[["Mortality_Cut"]] == ""))
+
+colnames(data.canada)[c(2,5,10)] <- c("sp","plot","w")
+vec.abio.var.names <-  c("MAT","PP","PET")
+vec.basic.var <-  c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead")
+data.tree <- subset(data.canada,select=c(vec.basic.var,vec.abio.var.names))
+
+##############################################
+## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species
+###########################
+data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.canada[["tree.id"]]), diam=as.vector(data.canada[["D"]]),
+	sp=as.vector(data.canada[["sp"]]), id.plot=as.vector(data.canada[["plot"]]),
+	weights=1/(10000*data.canada[["SubPlotSize"]]), weight.full.plot=NA)
+
+## change NA and <0 data for 0
+data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0
+data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0
+
+### CHECK IF sp and sp name for column are the same
+if(sum(! (names(data.BA.SP)[-1] %in% unique(dataIFN.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree")
+
+#### compute BA tot for all competitors
+BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE)
+data.BA.SP$BATOT.COMPET <- BATOT.COMPET
+### create data frame
+names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1])
+data.BA.sp <- merge(data.frame(tree.id=dataIFN.spain[["tree.id"]],ecocode=dataIFN.spain[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE)
+## test
+if( sum(! data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree")
+
+## save everything as a list
+list.spain <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits)
+save(list.spain,file="./data/process/list.spain.Rdata")
+
-- 
GitLab