merge.data.CANADA.R 5.48 KiB
### MERGE canada DATA
rm(list = ls())
source("./R/format.function.R")
source("./R/FUN.TRY.R")
library(reshape)
######################### READ DATA read individuals tree data
#data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv", 
#    header = TRUE, stringsAsFactors = FALSE)
data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130910.csv", 
    header = TRUE, stringsAsFactors = FALSE)
sum(is.na(data.canada$species))
data.canada$sp = data.canada$species_FIACode
data.canada$species_FIACode <- NULL
### read species names and merge with data.canada
species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE)
data.canada <- merge(data.canada, data.frame(sp = species.clean$SPCD, species.clean[,2:3], stringsAsFactors = F), by = "sp", all.x = T)
###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
###################################### WHEN WE ANALYZE THAT DATASET LATER ON
########################################## FORMAT INDIVIDUAL TREE DATA
data.canada$FinalDBH[data.canada$FinalDBH < 0] <- NA 
data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval  ## diameter growth in mm per year
data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA
data.canada$year <- data.canada$Interval; data.canada$IntervalYears <- NULL ## number of year between measurement
data.canada$D <- data.canada[["InitDBH"]]
data.canada$D[data.canada$D == 0] <- NA ## diameter in cm
data.canada$dead <- as.numeric(is.na(data.canada$FinalDBH))  ## dummy variable for dead tree 0 alive 1 dead
data.canada$htot <- rep(NA, nrow(data.canada))  ## height of tree in m
data.canada$plot.id <- data.canada$PlotID_InitYear; data.canada$PlotID_InitYear <- NULL ## plot id
data.canada$tree.id <- data.canada[["PlotTree_ID"]]; data.canada[["PlotTree_ID"]] <- NULL ## tree unique id
data.canada$census.id <- rep(1,nrow(data.canada))
data.canada$obs.id <- data.canada$tree.id
data.canada$weights <- 1/(data.canada$SubPlotSize*10000) ### there is some strange small value in SubPlotSize 20 m^2 even for large tree NEED TO ASK Hongcheng
###################### ECOREGION merge to have no ecoregion with low number of observation
greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, 
    sep = "\t")
table(data.canada$Ecocode)
## Some ecoregions still have small # of individuals, so either drop off for
## analysis later on or wait for Quebec data to come in 
library(RColorBrewer)
ecoreg <- factor(data.canada$Ecocode); levels(ecoreg) <- mycols <- brewer.pal(length(levels(ecoreg)), "Set1")
plot(data.canada[['Lon']],data.canada[['Lat']],pty='.',cex=.2,col = as.character(ecoreg))
legend('bottomleft', col = mycols, legend = names(table(data.canada$Ecocode)), pch = rep(19,length(levels(ecoreg)))) 
points(data.canada[['Lon']][data.canada$Ecocode == "-331"], data.canada[['Lat']][data.canada$Ecocode == "-331"],pty=2,cex=1, col = 'black') 
points(data.canada[['Lon']][data.canada$Ecocode == "-251"], data.canada[['Lat']][data.canada$Ecocode == "-251"],pty=2,cex=1, col = 'red') 
#data.canada$eco_codemerged <- combine_factor(data.canada$eco_code,c(1:8,6,9))
###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###################### missing compute numer of dead per plot to remove plot with disturbance
perc.dead <- tapply(data.canada[["dead"]], INDEX = data.canada[["plot.id"]], FUN = function.perc.dead2)
data.canada <- merge(data.canada, data.frame(plot.id = names(perc.dead), perc.dead = perc.dead), 
    by = "plot.id", sort = FALSE)
####################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1
table(data.canada$dead)
data.canada <- data.canada[data.canada$dead == 0,]
vec.abio.var.names <- c("MAT", "MAP")
vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "ecocode", "D", "G", "dead", 
    "year", "htot", "Lon", "Lat", "perc.dead")
data.tree <- subset(data.canada, select = c(vec.basic.var, vec.abio.var.names))
############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in
717273747576777879808182838485868788899091929394959697
############################################## m^2/ha without the target species data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.canada[["tree.id"]]), diam = as.vector(data.canada[["D"]]), sp = as.vector(data.canada[["sp"]]), id.plot = as.vector(data.canada[["plot"]]), weights = 1/(10000 * data.canada[["SubPlot_Size"]]), weight.full.plot = NA) ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0 data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 ### CHECK IF sp and sp name for column are the same if (sum(!(names(data.BA.SP)[-1] %in% unique(data.canada[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET rm(BATOT.COMPET) ### create data frame names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) data.BA.sp <- merge(data.frame(tree.id = data.canada[["tree.id"]], ecocode = data.canada[["ecocode"]]), data.BA.SP, by = "tree.id", sort = FALSE) ## test if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list list.canada <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) save(list.canada, file = "./data/process/list.canada.Rdata")