diff --git a/merge.data.CANADA.R b/merge.data.CANADA.R index ed1858bd0939f843f0662e064d8fdebb13a1eb6f..9506aba6765dbc8a37866fc210cf8f506b98fcfd 100644 --- a/merge.data.CANADA.R +++ b/merge.data.CANADA.R @@ -6,7 +6,7 @@ rm(list = ls()); source("./R/format.function.R"); library(reshape) ## READ DATA #################### ### read individuals tree data -#data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130815.csv",header=TRUE,stringsAsFactors =FALSE) +#data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130816.csv",header=TRUE,stringsAsFactors =FALSE) data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv",header=TRUE,stringsAsFactors =FALSE) data.canada <- data.canada[which(!is.na(data.canada$Species)),] colnames(data.canada)[2] <- "Species" @@ -18,6 +18,7 @@ species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv",stringsAsF ## MASSAGE TRAIT DATA ############################ ## HEIGHT DATA FOR TREE MISSING +## BRING US DATA FOR HEIGHT OVER WHEN WE ANALYZE THAT DATASET LATER ON ########################################## ## FORMAT INDIVIDUAL TREE DATA @@ -26,13 +27,13 @@ species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv",stringsAsF ## change unit and names of variables to be the same in all data for the tree data.canada$G <- 10*(data.canada$FinalDBH-data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA -data.canada$year <- data.canada$Interval ## number of year between measurement - MISSING +data.canada$year <- data.canada$Interval ## number of year between measuremen data.canada$D <- data.canada[["InitDBH"]]; data.canada$D[data.canada$D == 0] <- NA ;## diameter in cm -data.canada$dead <- as.numeric(data.canada$FinalDBH == -999) ## dummy variable for dead tree 0 alive 1 dead +data.canada$dead <- as.numeric(data.canada$FinalDBH > 0) ## dummy variable for dead tree 0 alive 1 dead data.canada$sp <- as.character(data.canada[["Species"]]) ## species code data.canada$plot <- (data.canada[["PLOT_ID"]]) ## plot code data.canada$htot <- rep(NA,length(data.canada[["Species"]])) ## height of tree in m - MISSING -data.canada$tree.id <- data.canada$PLOTTREE ## tree unique id +data.canada$tree.id <- gsub("_",".",data.canada$PLOTTREE); ## tree unique id data.canada$sp.name <- NA; for(i in 1:length(unique(data.canada$sp))) { v <- species.clean$SPCD @@ -46,7 +47,7 @@ for(i in 1:length(unique(data.canada$sp))) { greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, sep = "\t") table(data.canada$Ecocode) -## There is only four ecoregions though; do you want to aggregate into divsion still? +## Some ecoregions still have small # of individuals, so either drop off for analysis later on or wait for Quebec data to come in # # library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); # ecoreg <- unclass(data.canada$eco_code); diff --git a/merge.data.NZ.R b/merge.data.NZ.R index c9cb1d5b0ed12a3018c15ece4bc24c279ef4e726..f429b477e7d69802a8b44e9067b58b8674c72f82 100644 --- a/merge.data.NZ.R +++ b/merge.data.NZ.R @@ -17,7 +17,7 @@ data.nz$plid <- gsub("__", "_",data.nz$plid); data.nz$plid <- gsub("_", ".",data ############################ ## Maximum height per species is already available from data.trait (in m); so no sd's and only obs per spp data.max.height <- data.frame(code=data.trait[["sp"]],Max.height.mean=log10(data.trait[["height.m"]]),Max.height.sd=NA,Max.height.nobs=1) -write.csv(data.max.height,file="./data/process/data.max.height.nz.csv") ## I was planning to save processed data in that folder +#write.csv(data.max.height,file="./data/process/data.max.height.nz.csv") ## I was planning to save processed data in that folder ################################################################ ## FORMAT INDIVIDUAL TREE DATA @@ -27,10 +27,10 @@ write.csv(data.max.height,file="./data/process/data.max.height.nz.csv") ## I was data.nz$G <- 10*(data.nz[["D1"]]-data.nz[["D0"]])/(data.nz[["t1"]]-data.nz[["t0"]]) ## diameter growth in mm per year data.nz$year <- (data.nz[["t1"]]-data.nz[["t0"]]) ## number of year between measurement data.nz$D <- data.nz[["D0"]] ## diameter in mm convert to cm -data.nz$dead <- rep(NA,length(data.nz[["sp"]])) ## dummy variable for dead tree 0 alive 1 dead - MISSING +data.nz$dead <- as.numeric(is.na(data.nz[["D1"]])) ## dummy variable for dead tree 0 alive 1 dead data.nz$sp <- data.nz$sp -data.nz$plot <- data.nz$plid -data.nz$htot <- rep(NA,length(data.nz[["sp"]])) ## I ASSUME THE "height.m" IN data.trait IS A MAX HEIGHT, WHICH IS NOT WHAT WE WANT HERE? +data.nz$plot <- data.nz$plid +data.nz$htot <- rep(NA,length(data.nz[["sp"]])) ## Max height is already available so have as missing data.nz$tree.id <- data.nz[["tag"]]; data.nz$tree.id <- gsub("__", "_",data.nz$tree.id); data.nz$tree.id <- gsub("_", ".",data.nz$tree.id) ## tree unique id ########################################## @@ -59,22 +59,24 @@ rm(data.sp,data.sp2) ###################### ## ECOREGION ################### -## DON'T KNOW WHICH VARIABLE TO USE FOR ECOREGION -greco <- greco[,c("tree.id","BIOME","eco_code")] +## merge greco to have no ecoregion with low number of observation +data.nz <- merge(data.nz, data.frame(plot = data.plot$plid, data.plot[,11:12]), by = "plot") +table(data.nz$Broad); ###################### ## PERCENT DEAD ################### -## variable percent dead - MISSING -data.nz$perc.dead <- NA +perc.dead <- tapply(data.nz[["dead"]],INDEX=data.nz[["plot"]],FUN=function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) +data.nz <- merge(data.nz,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) ########################################################### ### PLOT SELECTION FOR THE ANALYSIS ################### data.nz <- merge(data.nz, data.plot[,c(1,8:10)], by = "plid") -colnames(data.nz)[colnames(data.nz) %in% c("mat","map")] <- c("MAT","PP") +colnames(data.nz)[colnames(data.nz) %in% c("mat","map")] <- c("MAT","MAP") #colnames(data.nz)[names(data.nz) =="eco_codemerged" ] <- c("ecocode") -vec.abio.var.names <- c("MAT","PP") +vec.abio.var.names <- c("MAT","MAP") vec.basic.var <- c("tree.id","sp","spname","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") data.tree <- subset(data.nz,select=c(vec.basic.var,vec.abio.var.names)) @@ -86,8 +88,7 @@ data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.nz[["tree.id"]]), diam=as.vector( weights=as.vector(1/(pi*(0.5*data.nz[["D0"]])^2)), weight.full.plot=NA) ## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0 -data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 +data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 ### CHECK IF sp and sp name for column are the same if(sum(!(names(data.BA.SP)[-1] %in% unique(data.nz[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") diff --git a/merge.data.SPAIN.R b/merge.data.SPAIN.R index 459f6c764d41d5dafc538a5b5808e6a274754537..c842cef61fc3948b0483efd4514185d645205f20 100644 --- a/merge.data.SPAIN.R +++ b/merge.data.SPAIN.R @@ -86,6 +86,7 @@ rm(greco2) table(data.spain$eco_code) ## There's an eco-region with no code, and one with 55 sites +## The former we could drop as they were on the border of Spain library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); ecoreg <- unclass(data.spain$eco_code); @@ -94,6 +95,7 @@ legend("bottomright", col = mycols, legend = levels(data.spain$eco_code), pch = points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites ## PA1219 looks to be similar to PA1209; merge them together data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8,6,9)) +data.spain <- data.spain[-which(data.spain$eco_codemerged == ""),] ####################### ## variable percent dead/cannot do with since dead variable is missing diff --git a/merge.data.US.R b/merge.data.US.R new file mode 100644 index 0000000000000000000000000000000000000000..5acd84e22694e440027f440de0bb48d89c9d3e4d --- /dev/null +++ b/merge.data.US.R @@ -0,0 +1,101 @@ +### MERGE us DATA +### Edited by FH +rm(list = ls()); source("./R/format.function.R"); library(reshape) + +######################### +## READ DATA +#################### +### read individuals tree data +data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv",header=TRUE,stringsAsFactors =FALSE) + +### read species names +species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV",stringsAsFactors=FALSE) + +###################################### +## MASSAGE TRAIT DATA +############################ +## HEIGHT DATA FOR TREE MISSING +## BRING US DATA FOR HEIGHT OVER WHEN WE ANALYZE THAT DATASET LATER ON + +##################################### +## FORMAT INDIVIDUAL TREE DATA +############# + +## change unit and names of variables to be the same in all data for the tree +data.us$G <- 10*(data.us$FinalDbh-data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year +data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA +data.us$year <- data.us$Interval ## number of year between measuremen +data.us$D <- data.us[["InitDbh"]]; data.us$D[data.us$D == 0] <- NA ;## diameter in cm +data.us$dead <- as.numeric(data.us$FinalDbh > 0) ## dummy variable for dead tree 0 alive 1 dead +data.us$sp <- as.character(data.us[["Species"]]) ## species code +data.us$plot <- (data.us[["PlotID"]]) ## plot code +data.us$htot <- rep(NA,length(data.us[["Species"]])) ## height of tree in m - MISSING +data.us$tree.id <- data.us$TreeID; ## tree unique id +data.us$sp.name <- NA; +v <- species.clean$SPCD; for(i in 1:length(unique(data.us$sp))) { + data.us$sp.name[which(data.us$sp == unique(data.us$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.us$sp)[i])] } + +###################### +## ECOREGION +################### +## merge greco to have no ecoregion with low number of observation +greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T); colnames(greco)[1] <- "Ecocode" + +table(data.us$Ecocode) +data.us <- merge(data.us, greco[,-4], by = "Ecocode"); data.us$DIVISION <- factor(data.us$DIVISION) +## Some ecoregions still have small # of individuals, so create a variable which does division if # ind < 10000; else it reads Domain +# +data.us$eco_codemerged <- as.character(data.us$DIVISION); tab.small.div <- table(data.us$eco_codemerged) +sel.small.div <- which(table(data.us$eco_codemerged) < 10000) +for(i in 1:length(sel.small.div)) { + find.ind <- which(data.us$eco_codemerged == names(tab.small.div)[sel.small.div[i]]); print(length(find.ind)) + data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind] + } + +data.us <- data.us[,-c(2,24,25)] ## Remove other ecocode related stuff to save space +###################### +## PERCENT DEAD +################### +## variable percent dead/cannot do with since dead variable is missing +## compute numer of dead per plot to remove plot with disturbance +perc.dead <- tapply(data.us[["dead"]],INDEX=data.us[["plot"]],FUN=function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) +data.us <- merge(data.us,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) + +########################################################### +### PLOT SELECTION FOR THE ANALYSIS +################### +## Remove data with dead == 1 +table(data.us$dead) +data.us <- data.us[data.us$dead == 1,] + +vec.abio.var.names <- c("MAT","MAP") +vec.basic.var <- c("tree.id","sp","sp.name","plot","eco_codemerged","D","G","dead","year","htot","Lon","Lat","perc.dead") +data.tree <- subset(data.us,select=c(vec.basic.var,vec.abio.var.names)) + +############################################## +## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species +########################### +data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.us[["tree.id"]]), diam=as.vector(data.us[["D"]]), + sp=as.vector(data.us[["sp"]]), id.plot=as.vector(data.us[["plot"]]), + weights=1/(10000*data.us[["PlotSize"]]), weight.full.plot=NA) + +## change NA and <0 data for 0 +data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 + +### CHECK IF sp and sp name for column are the same +if(sum(!(names(data.BA.SP)[-1] %in% unique(data.us[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") + +#### compute BA tot for all competitors +BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) +data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) +### create data frame +names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id=data.us[["tree.id"]],ecocode=data.us[["eco_codemerged"]]),data.BA.SP,by="tree.id",sort=FALSE) +## test +if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") + +## save everything as a list +list.us <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) +save(list.spain,file="./data/process/list.us.Rdata") +