merge.data.CANADA-fhv1.R

### MERGE canada DATA
### Edited by FH
rm(list = ls()); source("./R/format.function.R"); library(reshape)

#########################
## READ DATA
####################
### read individuals tree data
data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130815.csv",header=TRUE,stringsAsFactors =FALSE)
data.canada <- data.canada[which(!is.na(data.canada$Species)),]
colnames(data.canada)[2] <- "Species"

### read species names
species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv",stringsAsFactors=FALSE)

######################################
## MASSAGE TRAIT DATA
############################
# ## Compute maximum height per species plus sd from observed height to add variables to the traits data base
# ## Because we have two heights, then take the max of the two heights and then bootstrap
# q <- log10(apply(data.canada[,c("ht1","ht2")],1,max,na.rm=T)); q[!is.finite(q)] <- NA
# res.quant.boot <- t(sapply(levels(factor(data.canada[["Species"]])),FUN=f.quantile.boot,R=1000,x=q,fac=(data.canada[["Species"]])))
# #max.heights <- read.csv("/media/fhui/Lexar/Career & Work/GKunstler_competition/data/raw/DataCanada/MaximumHeigth.csv", header = T)
# #
# # ## create data base
# data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3])
# rm(res.quant.boot)

##########################################
## FORMAT INDIVIDUAL TREE DATA
#############

## change unit and names of variables to be the same in all data for the tree
data.canada$G <- (data.canada[["FinalDBH"]]-data.canada[["InitDBH"]])/data.canada$Interval ## diameter growth in mm per year
data.canada$year <- data.canada$Interval ## number of year between measurement/missing!
data.canada$D <- data.canada[["InitDBH"]] ## diameter in mm
data.canada$dead <- rep(NA,length(data.canada[["Species"]])) ## dummy variable for dead tree 0 alive 1 dead/missing!
data.canada$sp <- as.character(data.canada[["Species"]]) ## species code
data.canada$plot <- (data.canada[["PlotID"]]) ## plot code
data.canada$htot <- rep(NA,length(data.canada[["Species"]]))## height of tree in m / missing
data.canada$tree.id <- data.canada$PLOTTREE ## tree unique id
data.canada$sp.name <- NA;
for(i in 1:length(unique(data.canada$sp))) {
	v <- species.clean$SPCD
	data.canada$sp.name[which(data.canada$sp == unique(data.canada$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.canada$sp)[i])] }


############################
## merge greco to have no ecoregion with low number of observation
# greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T)
# greco 	<- greco[,c("Plot_ID_SFI","BIOME","eco_code")]
# greco2 <- greco[!duplicated(greco$Plot),];
# rm(greco)
#
# data.canada <- merge(data.canada, greco2, by = "Plot_ID_SFI")
# rm(greco2)
#
# table(data.canada$eco_code)
# ## There's an eco-region with no code, and one with 55 sites
#
# library(RColorBrewer); mycols <- brewer.pal(10,"Set3");
# ecoreg <- unclass(data.canada$eco_code);
# plot(data.canada[["CX"]][order(ecoreg)],data.canada[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg))));
# legend("bottomright", col = mycols, legend = levels(data.canada$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
# points(data.canada[["CX"]][ecoreg == 9],data.canada[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites
# ## PA1219 looks to be similar to PA1209; merge them together
# data.canada$eco_codemerged <- combine_factor(data.canada$eco_code, c(1:8,6,9))

#######################
# ## variable percent dead/cannot do with since dead variable is missing