merge.data.US.R

#!/usr/bin/env Rscript

### MERGE us DATA Edited by FH
library(reshape, quietly=TRUE)
source("./R/format.function.R")
source("./R/FUN.TRY.R")

### read species names
species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV", stringsAsFactors = FALSE)
## select column to keep
species.clean <- subset(species.clean, select = c("SPCD", "GENUS", "SPECIES", "VARIETY",
    "SUBSPECIES", "SPECIES_SYMBOL"))
species.clean$Latin_name <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]],
    sep = " ")
species.clean$Latin_name_syn <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]],
    sep = " ")

names(species.clean)[1] <- "sp"
species.clean[["sp"]] <- paste("sp", species.clean[["sp"]], sep = ".")

###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
###################################### WHEN WE ANALYZE THAT DATASET LATER ON

##################################### FORMAT INDIVIDUAL TREE DATA

## change unit and names of variables to be the same in all data for the tree
data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$Interval  ## diameter growth in mm per year
data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA
data.us$year <- data.us$Interval  ## number of year between measuremen
data.us$D <- data.us[["InitDbh"]]
data.us$D[data.us$D == 0] <- NA
## diameter in cm
data.us$dead <- as.numeric(data.us$FinalDbh > 0)  ## dummy variable for dead tree 0 alive 1 dead
data.us$sp <- as.character(data.us[["Species"]])  ## species code
data.us$plot <- as.character(data.us[["PlotID"]])  ## plot code
data.us$subplot <- paste(as.character(data.us[["PlotID"]]), as.character(data.us[["SubplotNumber"]]),
    sep = ".")  ## plot code
data.us$htot <- rep(NA, length(data.us[["Species"]]))  ## height of tree in m - MISSING
data.us$tree.id <- as.character(data.us$TreeID)
## tree unique id
data.us$sp.name <- NA
## census is missing use as only one census and check with mark
data.us$census <- rep(1,nrow(data.us))
### add plot weights for computation of competition index (in 1/m^2)
data.us$weights <- 1/(10000 * data.us[["PlotSize"]])

###################### ECOREGION merge greco to have no ecoregion with low number of observation
greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T)
colnames(greco)[1] <- "Ecocode"

table(data.us$Ecocode)
data.us <- merge(data.us, greco[, -4], by = "Ecocode")
data.us$DIVISION <- factor(data.us$DIVISION)

## Some ecoregions still have small # of individuals, so create a variable which
## does division if # ind < 10000 else it reads Domain
data.us$eco_codemerged <- as.character(data.us$DIVISION)
tab.small.div <- table(data.us$eco_codemerged)
sel.small.div <- which(table(data.us$eco_codemerged) < 10000)
for (i in 1:length(sel.small.div)) {
    find.ind <- which(data.us$eco_codemerged == names(tab.small.div)[sel.small.div[i]])
    print(length(find.ind))
    data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind]
}

###### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###### missing compute numer of dead per plot to remove plot with disturbance
perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function.perc.dead)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF
# AVAILABLE (disturbance record)