merge.data.CANADA.R 5.37 KiB
### MERGE canada DATA
### Edited by FH
rm(list = ls()); source("./R/format.function.R"); library(reshape)
#########################
## READ DATA
####################
### read individuals tree data
#data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130816.csv",header=TRUE,stringsAsFactors =FALSE)
data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv",header=TRUE,stringsAsFactors =FALSE)
data.canada <- data.canada[which(!is.na(data.canada$Species)),]
colnames(data.canada)[2] <- "Species"
### read species names
species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv",stringsAsFactors=FALSE)
######################################
## MASSAGE TRAIT DATA
############################
## HEIGHT DATA FOR TREE MISSING
## BRING US DATA FOR HEIGHT OVER WHEN WE ANALYZE THAT DATASET LATER ON
##########################################
## FORMAT INDIVIDUAL TREE DATA
#############
## change unit and names of variables to be the same in all data for the tree 
data.canada$G <- 10*(data.canada$FinalDBH-data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year
data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA
data.canada$year <- data.canada$Interval ## number of year between measuremen
data.canada$D <- data.canada[["InitDBH"]]; data.canada$D[data.canada$D == 0] <- NA ;## diameter in cm
data.canada$dead <- as.numeric(data.canada$FinalDBH > 0) ## dummy variable for dead tree 0 alive 1 dead
data.canada$sp <- as.character(data.canada[["Species"]]) ## species code
data.canada$plot <- (data.canada[["PLOT_ID"]]) ## plot code
data.canada$htot <- rep(NA,length(data.canada[["Species"]])) ## height of tree in m - MISSING
data.canada$tree.id <- gsub("_",".",data.canada$PLOTTREE); ## tree unique id
data.canada$sp.name <- NA; 
for(i in 1:length(unique(data.canada$sp))) { 
	v <- species.clean$SPCD
	data.canada$sp.name[which(data.canada$sp == unique(data.canada$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.canada$sp)[i])] }
######################
## ECOREGION
###################
## merge greco to have no ecoregion with low number of observation
greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, sep = "\t") 
table(data.canada$Ecocode) 
## Some ecoregions still have small # of individuals, so either drop off for analysis later on or wait for Quebec data to come in
# library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); 
# ecoreg <- unclass(data.canada$eco_code); 
# plot(data.canada[["CX"]][order(ecoreg)],data.canada[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); 
# legend("bottomright", col = mycols, legend = levels(data.canada$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
# points(data.canada[["CX"]][ecoreg == 9],data.canada[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites
# ## PA1219 looks to be similar to PA1209; merge them together
# data.canada$eco_codemerged <- combine_factor(data.canada$eco_code, c(1:8,6,9))
######################
## PERCENT DEAD
###################
## variable percent dead/cannot do with since dead variable is missing
## compute numer of dead per plot to remove plot with disturbance
perc.dead <- tapply(data.canada[["dead"]],INDEX=data.canada[["plot"]],FUN=function.perc.dead)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
data.canada <- merge(data.canada,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE)
###########################################################
### PLOT SELECTION FOR THE ANALYSIS
###################
7172737475767778798081828384858687888990919293949596979899100101102103104105106
## Remove data with dead == 1 table(data.canada$dead) ## Nothing to remove colnames(data.canada)[c(3,1,11,13)] <- c("sp","plot","w","ecocode") vec.abio.var.names <- c("MAT","MAP") vec.basic.var <- c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") data.tree <- subset(data.canada,select=c(vec.basic.var,vec.abio.var.names)) ############################################## ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species ########################### data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.canada[["tree.id"]]), diam=as.vector(data.canada[["D"]]), sp=as.vector(data.canada[["sp"]]), id.plot=as.vector(data.canada[["plot"]]), weights=1/(10000*data.canada[["SubPlot_Size"]]), weight.full.plot=NA) ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 ### CHECK IF sp and sp name for column are the same if(sum(!(names(data.BA.SP)[-1] %in% unique(data.canada[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) ### create data frame names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) data.BA.sp <- merge(data.frame(tree.id=data.canada[["tree.id"]],ecocode=data.canada[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) ## test if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") ## save everything as a list list.canada <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.spain,file="./data/process/list.canada.Rdata")