diff --git a/merge.data.BCI.R b/merge.data.BCI.R index bf65743fa2dc8582ed0df07475525e24a619896c..62f936e85137c48d701cec259b0729674f36a911 100644 --- a/merge.data.BCI.R +++ b/merge.data.BCI.R @@ -1,4 +1,4 @@ -### MERGE BCI DATA Edited by FH +### MERGE BCI DATA rm(list = ls()) source("./R/format.function.R") library(reshape) @@ -8,29 +8,33 @@ library(reshape) ######################### longer exists in census X+1, X+2 etc... data.bci1 <- read.table("./data/raw/DataBCI/census1/PlotsDataReport.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t") +data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL +data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL big.bci <- NULL for (k in 2:7) { new.directory <- paste("./data/raw/DataBCI/census", k, "/PlotsDataReport.txt", sep = "") data.bci2 <- read.table(new.directory, header = TRUE, stringsAsFactors = FALSE, - sep = "\t") + sep = "\t"); if (!is.null(big.bci)) { - sub.bci <- merge(data.bci1[, c(2:7, 11, 13)], data.frame(TreeID = data.bci2[["TreeID"]], + sub.bci <- merge(data.bci1[, c("Latin","Quadrat","Census","gx","gy","TreeID","Tag","Date1","DBH1")], data.frame(TreeID = data.bci2[["TreeID"]], DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == - "dead")), sort = T, by = "TreeID") + "dead"),stringsAsFactors=F), sort = T, by = "TreeID") ## Uses the Date1 as the census number big.bci <- rbind(big.bci, sub.bci) } if (is.null(big.bci)) { - big.bci <- merge(data.bci1[, c(2:7, 11, 13)], data.frame(TreeID = data.bci2[["TreeID"]], + big.bci <- merge(data.bci1[, c("Latin","Quadrat","Census","gx","gy","TreeID","Tag","Date1","DBH1")], data.frame(TreeID = data.bci2[["TreeID"]], DBH2 = data.bci2[["DBH"]], Date2 = data.bci2[["Date"]], dead = as.numeric(data.bci2[["Status"]] == - "dead")), sort = T, by = "TreeID") + "dead"),stringsAsFactors=F), sort = T, by = "TreeID") } - data.bci1 <- data.bci2 + data.bci1 <- data.bci2 + data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL + data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL cat("Census", k, "now included\n") + print(summary(big.bci$DBH1)); print(summary(big.bci$DBH2)) } rm(data.bci1, data.bci2, sub.bci) big.bci <- big.bci[order(big.bci$TreeID), ] -colnames(big.bci)[c(7:8)] <- c("DBH1", "Date1") data.bci <- big.bci rm(big.bci) @@ -39,25 +43,25 @@ species.clean <- read.table("./data/raw/DataBCI/TaxonomyDataReport.txt", strings header = T, sep = "\t") ## Try to relate SpeciesID in species.clean species names in data.bci -data.bci$sp2 = species.clean$SpeciesID[match(data.bci$Latin, paste(species.clean[["Genus"]], +data.bci$sp = species.clean$SpeciesID[match(data.bci$Latin, paste(species.clean[["Genus"]], species.clean[["species"]]))] length(unique(data.bci$sp)) -###################################### MASSAGE TRAIT DATA Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY, BUT I DO NOT KNOW -###################################### WHICH WOOD DENSITY VARIABLE TO USE +###################################### MASSAGE TRAIT DATA Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY data.trait <- read.csv("./data/raw/DataBCI/BCITRAITS_20101220.csv", stringsAsFactors = FALSE, header = T) data.trait$Latin <- apply(data.trait[, 1:2], 1, paste, collapse = " ") data.bci <- merge(data.bci, data.trait[, c(ncol(data.trait), 3, 7:10, 13, 15, 18, 20:21)], by = "Latin", all.x = T) -data.bci <- data.bci[order(data.bci$TreeID), ] ########################################## FORMAT INDIVIDUAL TREE DATA +data.bci <- data.bci[order(data.bci[["TreeID"]]),] data.bci$Date1 <- as.Date(data.bci$Date1) data.bci$Date2 <- as.Date(data.bci$Date2) # data.bci$yr1 <- format(strptime(data.bci$Date1, format = '%Y-%m-%d'),'%Y') # data.bci$yr2 <- format(strptime(data.bci$Date2, format = '%Y-%m-%d'),'%Y') data.bci$year <- as.numeric(difftime(data.bci$Date2, data.bci$Date1, units = "weeks")/52) ## Not rounded +data.bci$obs.id <- apply(data.bci[,c("TreeID","Census")],1,paste,collapse="_") ## change unit and names of variables to be the same in all data for the tree data.bci$G <- 10 * (data.bci$DBH1 - data.bci$DBH1)/data.bci$year ## diameter growth in mm per year - BASED ON UNROUNDED YEARS @@ -65,24 +69,19 @@ data.bci$D <- data.bci[["DBH1"]] data.bci$plot <- data.bci[["Quadrat"]] ## plot code? data.bci$htot <- data.bci$HEIGHT_AVG data.bci$sp.name <- data.bci$Latin - +data.bci$weights <- 1/(pi*(0.5*data.bci$D/100)^2) ###################### ECOREGION bci has only 1 eco-region ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance -function.perc.dead2 <- function(dead) { - out <- sum(dead, na.rm = T)/length(dead[!is.na(dead)]) - if (!is.finite(out)) - out <- NA - return(out) -} perc.dead <- tapply(data.bci[["dead"]], INDEX = data.bci[["plot"]], FUN = function.perc.dead2) data.bci <- merge(data.bci, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) ########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.bci$dead) +data.bci <- data.bci[data.bci$dead == 0,] vec.abio.var.names <- c("MAT", "MAP") ## MISSING vec.basic.var <- c("treeid", "sp", "sp.name", "plot", "D", "G", "dead", "year", "htot", @@ -90,10 +89,9 @@ vec.basic.var <- c("treeid", "sp", "sp.name", "plot", "D", "G", "dead", "year", data.tree <- subset(data.bci, select = c(vec.basic.var, vec.abio.var.names)) ############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in -############################################## m^2/ha without the target species DON'T KNOW SUBPLOT SIZE! +############################################## m^2/ha without the target species data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.bci[["TreeID"]]), diam = as.vector(data.bci[["D"]]), - sp = as.vector(data.bci[["sp"]]), id.plot = as.vector(data.bci[["plot"]]), weights = 1/(pi * - (0.5 * data.bci$D/100)^2), weight.full.plot = NA) + sp = as.vector(data.bci[["sp"]]), id.plot = as.vector(data.bci[["plot"]]), weights = data.bic[["weights"]], weight.full.plot = NA) ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0 diff --git a/merge.data.CANADA.R b/merge.data.CANADA.R index 8711afb850fdf8b5e004691c34623493ccc04035..1831a8ee8492ff98cef4462e7fd6d5cf38569dc3 100644 --- a/merge.data.CANADA.R +++ b/merge.data.CANADA.R @@ -1,14 +1,14 @@ -### MERGE canada DATA Edited by FH +### MERGE canada DATA rm(list = ls()) source("./R/format.function.R") source("./R/FUN.TRY.R") library(reshape) -######################### READ DATA read individuals tree data data.canada <- +######################### READ DATA read individuals tree data data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv", header = TRUE, stringsAsFactors = FALSE) -data.canada <- data.canada[which(!is.na(data.canada$Species)), ] -colnames(data.canada)[2] <- "Species" +data.canada <- data.canada[which(!is.na(data.canada$Species)),] +colnames(data.canada)[2] <- "sp" ### read species names species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE) @@ -18,26 +18,22 @@ species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv", stringsAs ########################################## FORMAT INDIVIDUAL TREE DATA -## change unit and names of variables to be the same in all data for the tree data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA -data.canada$year <- data.canada$Interval ## number of year between measuremen +data.canada$year <- data.canada$Interval ## number of year between measurement data.canada$D <- data.canada[["InitDBH"]] -data.canada$D[data.canada$D == 0] <- NA -## diameter in cm -data.canada$dead <- as.numeric(data.canada$FinalDBH > 0) ## dummy variable for dead tree 0 alive 1 dead -data.canada$sp <- as.character(data.canada[["Species"]]) ## species code -data.canada$plot <- (data.canada[["PLOT_ID"]]) ## plot code -data.canada$htot <- rep(NA, length(data.canada[["Species"]])) ## height of tree in m - MISSING -data.canada$tree.id <- gsub("_", ".", data.canada$PLOTTREE) -## tree unique id +data.canada$D[data.canada$D == 0] <- NA ## diameter in cm +data.canada$dead <- as.numeric(data.canada$FinalDBH < 0) ## dummy variable for dead tree 0 alive 1 dead +data.canada$plot <- (data.canada[["PLOT_ID"]]); data.canada[["PLOT_ID"]] <- NULL ## plot code +data.canada$htot <- rep(NA, nrow(data.canada)) ## height of tree in m - MISSING +data.canada$tree.id <- data.canada$PLOTTREE_I; data.canada$PLOTTREE_I <- NULL ## tree unique id data.canada$sp.name <- NA - for (i in 1:length(unique(data.canada$sp))) { v <- species.clean$SPCD data.canada$sp.name[which(data.canada$sp == unique(data.canada$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.canada$sp)[i])] } +data.canada$weights <- 1/(pi*(0.5*data.bci$D/100)^2) ###################### ECOREGION merge greco to have no ecoregion with low number of observation @@ -63,17 +59,14 @@ mycols <- brewer.pal(10, "Set3") ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.canada[["dead"]], INDEX = data.canada[["plot"]], FUN = function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF -# AVAILABLE (disturbance record) +perc.dead <- tapply(data.canada[["dead"]], INDEX = data.canada[["plot"]], FUN = function.perc.dead2) data.canada <- merge(data.canada, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) ########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.canada$dead) -## Nothing to remove +data.canada <- data.canada[data.canada$dead == 0,] -colnames(data.canada)[c(3, 1, 11, 13)] <- c("sp", "plot", "w", "ecocode") vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead") diff --git a/merge.data.FUSHAN.R b/merge.data.FUSHAN.R index 8e3b4bf24ff56e8462309d2c3edfb2b0dd18fb55..8e725a882aa36a00cacac761ab758d88eb4e3995 100644 --- a/merge.data.FUSHAN.R +++ b/merge.data.FUSHAN.R @@ -1,4 +1,4 @@ -### MERGE Fushan DATA Edited by FH +### MERGE Fushan DATA rm(list = ls()) source("./R/format.function.R") library(reshape) @@ -20,43 +20,37 @@ data.max.height <- data.frame(sp = data.trait$sp, Max.height = log10(data.trait$ data.fushan <- merge(data.fushan, data.max.height, by = "sp") ########################################## FORMAT INDIVIDUAL TREE DATA - -## change unit and names of variables to be the same in all data for the tree - -## DON'T HAVE A YEAR MEASUREMENT -data.fushan$G <- 10 * (data.fushan$dbh2 - data.fushan$dbh1)/data.fushan$Interval ## diameter growth in mm per year data.fushan$year <- data.fushan$Interval ## number of year between measurement - MISSING -data.fushan$D <- data.fushan[["dbh1"]] -## diameter in cm -data.fushan$dead <- as.numeric(data.fushan$status2 == "alive") ## dummy variable for dead tree 0 alive 1 dead -data.fushan$plot <- (data.fushan[["PLOT_ID"]]) ## plot code - MISSING -data.fushan$htot <- rep(NA, length(data.fushan[["dbh1"]])) ## height of tree in m - MISSING -data.fushan$tree.id <- data.fushan$tag ## tree unique id - MISSING -data.fushan$sp.name <- NA -v <- species.clean$SP +data.fushan$G <- 10*(data.fushan$dbh2 - data.fushan$dbh1)/data.fushan$Interval ## diameter growth in mm per year +data.fushan$D <- data.fushan[["dbh1"]] ## diameter in cm +data.fushan$dead <- as.numeric(data.fushan$status2 == "dead") ## dummy variable for dead tree 0 alive 1 dead +data.fushan$plot <- rep(NA,nrow(data.fushan)) ## plot code - MISSING +data.fushan$htot <- rep(NA,nrow(data.fushan)) ## height of tree in m - MISSING +data.fushan$tree.id <- data.fushan$tag ## tree unique id +data.fushan$sp.name <- rep(NA,nrow(data.fushan)) +v <- species.clean$sp for (i in 1:length(unique(data.fushan$sp))) { sel.spp <- which(data.fushan$sp == unique(data.fushan$sp)[i]) data.fushan$sp.name[sel.spp] <- paste(species.clean$family[sel.spp], species.clean$genus[sel.spp], - species.clean$epithet[sel.spp], sep = ".") + species.clean$epithet[sel.spp], sep = "_") } +data.fushan$weights <- 1/(pi*(0.5*data.bci$D/100)^2) +data.fushan$obs.id <- data.fushan$tag ########################################## CHANGE COORDINATE SYSTEM DON'T KNOW! -###################### ECOREGION DON'T SEE DATA FOR THIS AT THE MOMENT, ALTHOUGH IT'S PROBABLY ALL IN -###################### ONE ECOREGION ANYWAY? +###################### 1 ECOREGION ONLY? ###################### PERCENT DEAD CANNOT DO THIS WITHOUT A PLOTID compute numer of dead per plot to ###################### remove plot with disturbance -perc.dead <- tapply(data.fushan[["dead"]], INDEX = data.fushan[["plot"]], FUN = function.perc.dead) -# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF -# AVAILABLE (disturbance record) +perc.dead <- tapply(data.fushan[["dead"]], INDEX = data.fushan[["plot"]], FUN = function.perc.dead2) data.fushan <- merge(data.fushan, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) ########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.fushan$dead) -data.fushan <- data.fushan[data.fushan$dead == 1, ] +data.fushan <- data.fushan[data.fushan$dead == 0, ] -colnames(data.fushan)[c(3, 1, 11, 13)] <- c("sp", "plot", "w") vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "D", "G", "dead", "year", "htot", "gx", "gy", "perc.dead") @@ -66,7 +60,7 @@ data.tree <- subset(data.fushan, select = c(vec.basic.var, vec.abio.var.names)) ############################################## m^2/ha without the target species CANNOT DO WITHOUT A PLOT ID data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.fushan[["tree.id"]]), diam = as.vector(data.fushan[["D"]]), sp = as.vector(data.fushan[["sp"]]), id.plot = as.vector(data.fushan[["plot"]]), - weights = 1/(pi * (0.5 * data.fushan[["dbh1"]])^2), weight.full.plot = NA) + weights = data.fushan$weights, weight.full.plot = NA) ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0