diff --git a/merge.data.NSW.R b/merge.data.NSW.R new file mode 100644 index 0000000000000000000000000000000000000000..64e60cc358ca330e804bfd596c78962f6fdfc0c7 --- /dev/null +++ b/merge.data.NSW.R @@ -0,0 +1,149 @@ +### MERGE NSW DATA +### Edited by FH +rm(list = ls()); source("./R/format.function.R"); library(reshape) + +######################### +## READ DATA +#################### +### read individuals tree data +data.nswbrc <- read.csv("./data/raw/DataNSW/NSW_data_BRcontrols.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") +data.nswbrc$Date.of.measure <- as.vector(sapply(data.nswbrc$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only +data.nswbrt <- read.csv("./data/raw/DataNSW/NSW_data_BRtreatments.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") +data.nswbrt$Date.of.measure <- as.vector(sapply(data.nswbrt$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only +data.nswbs1 <- read.csv("./data/raw/DataNSW/NSW_data_BS1.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") +data.nswbs1$Date.of.measure <- as.character(format(as.Date(data.nswbs1$Date.of.measure, format = "%d-%b-%y"), format = "%d/%b/%Y")) +data.nswbs1$Date.of.measure <- as.vector(sapply(data.nswbs1$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only + +## data.nswbs2 has a different format to the other datasets, so format to match the above +data.nswbs2 <- read.csv("./data/raw/DataNSW/NSW_data_BS2.csv",header=TRUE,stringsAsFactors=FALSE,sep = "\t") +data.nswbs2$Plot <- apply(data.nswbs2[,1:2],1,paste,collapse=""); +data.nswbs2$Subplot <- NULL; data.nswbs2$Family <- NULL; colnames(data.nswbs2)[3] <- "species"; +data.nswbs2$x <- data.nswbs2$y <- rep(NA,nrow(data.nswbs2)) +data.nswbs22 <- data.nswbs2[rep(1:nrow(data.nswbs2),each=2),] +data.nswbs22$Date.of.measure <- rep(c(1988,2000),nrow(data.nswbs2)) +data.nswbs22$Dbh <- c(rbind(data.nswbs2[["DBH.cm..1988."]], data.nswbs2[["DBH.cm..2000."]])) +data.nswbs22[["DBH.cm..1988."]] <- data.nswbs22[["DBH.cm..2000."]] <- NULL +data.nswbs2 <- data.nswbs22[,c(1:5,8:9,7,6)]; rm(data.nswbs22) + +data.nswtnd <- read.csv("./data/raw/DataNSW/NSW_data_TND.csv",header=TRUE,stringsAsFactors=FALSE,sep = "\t") +data.nswtnd$Date.of.measure <- as.character(format(as.Date(data.nswtnd$Date.of.measure, format = "%d-%b-%y"), format = "%d/%b/%Y")) +data.nswtnd$Date.of.measure <- as.vector(sapply(data.nswtnd$Date.of.measure, function(x) { unlist(strsplit(x,"/"))[3] })) ## Extract the years only + +data.nsw <- rbind(data.nswbrc, data.nswbrt, data.nswbs1, data.nswbs2, data.nswtnd) + +###################################### +## MASSAGE TRAIT DATA +############################ +data.traits <- read.csv("./data/raw/DataNSW/NSW_traits.csv",header=TRUE,stringsAsFactors=FALSE) + +########################################## +## FORMAT INDIVIDUAL TREE DATA +############# +## Each tree has at most 3 observations (from prelim checks of the data) +data.nsw$treeid <- apply(data.nsw[,1:2],1,paste,collapse=".") +data.nsw2 <- data.frame(data.nsw[1,], year1 = NA, year2 = NA, dbh1 = NA, dbh2 = NA) +for(k in 1:length(unique(data.nsw$treeid))) { + sub.datansw <- as.data.frame(data.nsw[which(data.nsw$treeid == unique(data.nsw$treeid)[k]),]) + if(nrow(sub.datansw) == 1) { data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw, year1 = sub.datansw$Date.of.measure[1], year2 = NA, dbh1 = sub.datansw$Dbh[1], dbh2 = NA)) } + if(nrow(sub.datansw) == 2) { + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1,], year1 = sub.datansw$Date.of.measure[1], year2 = sub.datansw$Date.of.measure[2], dbh1 = sub.datansw$Dbh[1], dbh2 = sub.datansw$Dbh[2])) } + if(nrow(sub.datansw) == 3) { + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1,], year1 = sub.datansw$Date.of.measure[1], year2 = sub.datansw$Date.of.measure[2], dbh1 = sub.datansw$Dbh[1], dbh2 = sub.datansw$Dbh[2])) + data.nsw2 <- rbind(data.nsw2, data.frame(sub.datansw[1,], year1 = sub.datansw$Date.of.measure[2], year2 = sub.datansw$Date.of.measure[3], dbh1 = sub.datansw$Dbh[2], dbh2 = sub.datansw$Dbh[3])) } + } +data.nsw2 <- data.nsw2[-1,]; data.nsw2$Date.of.measure <- data.nsw2$Dbh <- NULL +data.nsw <- data.nsw2; for(k in 9:12) data.nsw[,k] <- as.numeric(data.nsw[,k]) + +## change unit and names of variables to be the same in all data for the tree +data.nsw$year <- (data.nsw$year2-data.nsw$year1) ## number of year between measurements +data.nsw$G <- 10*(data.nsw$dbh2-data.nsw$dbh1)/(data.nsw$year) ## diameter growth in mm per year +## THERE ARE SOME ROWS WITH STRONG NEGATIVE GROWTH THAT YOU MIGHT WANT TO REMOVE +head(data.nsw[order(data.nsw$G),]) + +data.nsw$D <- data.nsw[["dbh1"]]; ## diameter in cm +data.nsw$dead <- rep(NA, nrow(data.nsw)) ## dummy variable for dead tree 0 alive 1 dead - MISSING +data.nsw$sp <- as.character(data.nsw[["species"]]) ## species code - use the spp name as code +data.nsw$plot <- as.character(data.nsw[["Plot"]]) ## plot code +data.nsw$htot <- rep(NA,nrow(data.nsw)) ## height of tree in m - MISSING +### add plot weights for computation of competition index (in 1/m^2) - from the original excel file +data.nsw$weights <- rep(NA,nrow(data.nsw)) +data.nsw$weights[grep("AA",data.nsw$Plot)] <- 1/(20*80); data.nsw$weights[grep("BB",data.nsw$Plot)] <- 1/(20*80) +data.nsw$weights[grep("CC",data.nsw$Plot)] <- 1/(20*80); data.nsw$weights[grep("DD",data.nsw$Plot)] <- 1/(20*80) +data.nsw$weights[grep("BS",data.nsw$Plot)] <- 1/(25*30); +data.nsw$weights[grep("BR",data.nsw$Plot)] <- 1/(60.4*60.4); +data.nsw$weights[grep("END",data.nsw$Plot)] <- 1/(40*50); data.nsw$weights[grep("TND",data.nsw$Plot)] <- 1/(40*50); +data.nsw$obs.id <- 1:nrow(data.nsw) + +###################### +## ECOREGION +################### +## nsw has only 1 eco-region + +###################### +## PERCENT DEAD +################### +## NO DATA ON MORTALITY + +########################################### +### VARIABLES SELECTION FOR THE ANALYSIS +################### +vec.abio.var.names <- c("MAT","MAP") ## MISSING +#vec.basic.var <- c("obs.id","treeid","sp","plot","D","G","dead","year","htot","x","y","perc.dead") +#data.nsw <- subset(data.nsw,select=c(vec.basic.var)) #,vec.abio.var.names + +############################################## +## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species +########################### +## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE +### species as factor because number +data.nsw[["species"]] <- factor(data.nsw[["species"]]) +data.nsw$spcode <- data.nsw[["species"]]; levels(data.nsw$spcode) <- 1:length(levels(data.nsw$spcode)) + +data.BA.SP <- BA.SP.FUN(obs.id=as.vector(data.nsw[["treeid"]]), diam=as.vector(data.nsw[["D"]]), + sp=as.vector(data.nsw[["spcode"]]), id.plot=as.vector(data.nsw[["Plot"]]), + weights=data.nsw[["weights"]], weight.full.plot=NA) + +## change NA and <0 data for 0 +data.BA.SP[is.na(data.BA.SP)] <- 0; +data.BA.SP2 <- data.frame(data.BA.SP); colnames(data.BA.SP2) <- colnames(data.BA.SP) + +### CHECK IF sp and sp name for column are the same +if(sum(!(names(data.BA.SP2)[-1] %in% unique(data.nsw[["spcode"]]))) >0) stop("competition index sp name not the same as in data.tree") + +#### compute BA tot for all competitors +BATOT.COMPET <- apply(data.BA.SP2[,-1],1,sum,na.rm=TRUE) +data.BA.SP2$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) +data.BA.SP <- data.BA.SP2 + +# Rlim <- 15 # set size of neighborhood for competition index +# system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.nsw,Rlim=15,parallel=TRUE,rpuDist=FALSE)) +# +# list.BA.SP.data <- mclapply(unique(data.nsw[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.nsw,Rlim=Rlim,mc.cores=4) +# data.BA.sp <- rbind.fill(list.BA.SP.data) +# dim(data.BA.SP) +# +# ### TEST DATA FORMAT +# if(sum(! rownames(BA.SP.temp)==data.tree[['obs.id']]) >0) stop('rows not in the good order') +# if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree[['species']]))))>0) stop('colnames does mot match species name') +# ## test same order as data.nsw +# if(sum(!data.BA.SP[["obs.id"]] == data.nsw[["obs.id"]]) >0) stop("competition index not in the same order than data.nsw") +# +# ## REMOVE TREE IN BUFFER ZONE +# not.in.buffer.zone <- (data.nsw[['x']]<(250-Rlim) & +# data.nsw[['x']]>(0+Rlim) & +# data.nsw[['y']]<(250-Rlim) & +# data.nsw[['y']]>(0+Rlim)) +# +# # remove subset +# data.nsw <- subset(data.nsw,subset=not.in.buffer.zone) +# data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) +# +# ## plot each plot +# pdf("./figs/plots.tree.pdf") +# lapply(unique(data.nsw[["plot"]]),FUN=fun.circles.plot,data.nsw[['x']],data.nsw[['y']],data.nsw[["plot"]],data.nsw[["D"]],inches=0.2,xlim=c(0,250),ylim=c(0,250)) +# dev.off() + +## save everything as a list +list.nsw <- list(data.tree=data.nsw,data.BA.SP=data.BA.sp,data.traits=data.traits) +save(list.nsw,file="./data/process/list.nsw.Rdata") +