Commit 92dabaed authored by Daniel Falster's avatar Daniel Falster
Browse files

split US scripts into functions

No related merge requests found
Showing with 1073 additions and 347 deletions
+1073 -347
This diff is collapsed.
#!/usr/bin/env Rscript
### MERGE us DATA Edited by FH
rm(list = ls())
library(reshape, quietly=TRUE)
source("./R/format.function.R")
library(reshape)
source("./R/FUN.TRY.R")
######################### READ DATA read individuals tree data
data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv", header = TRUE, stringsAsFactors = FALSE)
### read species names
species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV", stringsAsFactors = FALSE)
## select column to keep
species.clean <- subset(species.clean, select = c("SPCD", "GENUS", "SPECIES", "VARIETY",
"SUBSPECIES", "SPECIES_SYMBOL"))
species.clean$Latin_name <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]],
sep = " ")
species.clean$Latin_name_syn <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]],
sep = " ")
names(species.clean)[1] <- "sp"
species.clean[["sp"]] <- paste("sp", species.clean[["sp"]], sep = ".")
###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
###################################### WHEN WE ANALYZE THAT DATASET LATER ON
##################################### FORMAT INDIVIDUAL TREE DATA
## change unit and names of variables to be the same in all data for the tree
data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year
data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA
data.us$year <- data.us$Interval ## number of year between measuremen
data.us$D <- data.us[["InitDbh"]]
data.us$D[data.us$D == 0] <- NA
## diameter in cm
data.us$dead <- as.numeric(data.us$FinalDbh > 0) ## dummy variable for dead tree 0 alive 1 dead
data.us$sp <- as.character(data.us[["Species"]]) ## species code
data.us$plot <- as.character(data.us[["PlotID"]]) ## plot code
data.us$subplot <- paste(as.character(data.us[["PlotID"]]), as.character(data.us[["SubplotNumber"]]),
sep = ".") ## plot code
data.us$htot <- rep(NA, length(data.us[["Species"]])) ## height of tree in m - MISSING
data.us$tree.id <- as.character(data.us$TreeID)
## tree unique id
data.us$sp.name <- NA
### add plot weights for computation of competition index (in 1/m^2)
data.us$weights <- 1/(10000 * data.us[["PlotSize"]])
###################### ECOREGION merge greco to have no ecoregion with low number of observation
load_species_list <- function(filename = "./data/raw/DataUS/REF_SPECIES.CSV"){
data <- read.csv(filename, stringsAsFactors = FALSE)
## select column to keep
data <- subset(data, select = c("SPCD", "GENUS", "SPECIES", "VARIETY",
"SUBSPECIES", "SPECIES_SYMBOL"))
data$Latin_name <- paste(data[["GENUS"]], data[["SPECIES"]], sep = " ")
data$Latin_name_syn <- paste(data[["GENUS"]], data[["SPECIES"]], sep = " ")
names(data)[1] <- "sp"
data[["sp"]] <- paste("sp", data[["sp"]], sep = ".")
data
}
load_inventory_data <- function(filename = "./data/raw/DataUS/FIA51_trees_w_supp.csv"){
## READ DATA read individuals tree data
data <- read.csv(filename, header = TRUE, stringsAsFactors = FALSE)
####### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
####### WHEN WE ANALYZE THAT DATASET LATER ON
###### FORMAT INDIVIDUAL TREE DATA
## change unit and names of variables to be the same in all data for the tree
data$G <- 10 * (data$FinalDbh - data$InitDbh)/data$Interval ## diameter growth in mm per year
data$G[which(data$InitDbh == 0 | data$FinalDbh == -999)] <- NA
data$year <- data$Interval ## number of year between measuremen
data$D <- data[["InitDbh"]] ## diameter in cm
data$D[data$D == 0] <- NA
data$dead <- as.numeric(data$FinalDbh > 0) ## dummy variable for dead tree 0 alive 1 dead
data$sp <- as.character(data[["Species"]]) ## species code
data$plot <- as.character(data[["PlotID"]]) ## plot code
data$subplot <- paste(as.character(data[["PlotID"]]), as.character(data[["SubplotNumber"]]),
sep = ".") ## plot code
data$htot <- rep(NA, length(data[["Species"]])) ## height of tree in m - MISSING
data$tree.id <- as.character(data$TreeID) ## unique id
data$sp.name <- NA
### add plot weights for computation of competition index (in 1/m^2)
data$weights <- 1/(10000 * data.us[["PlotSize"]])
data
}
data.us <- load_inventory_data()
###### ECOREGION merge greco to have no ecoregion with low number of observation
greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T)
colnames(greco)[1] <- "Ecocode"
table(data.us$Ecocode)
data.us <- merge(data.us, greco[, -4], by = "Ecocode")
data.us$DIVISION <- factor(data.us$DIVISION)
## Some ecoregions still have small # of individuals, so create a variable which
## does division if # ind < 10000 else it reads Domain
data.us$eco_codemerged <- as.character(data.us$DIVISION)
......@@ -63,8 +69,8 @@ for (i in 1:length(sel.small.div)) {
data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind]
}
###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###################### missing compute numer of dead per plot to remove plot with disturbance
###### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###### missing compute numer of dead per plot to remove plot with disturbance
perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function.perc.dead)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF
# AVAILABLE (disturbance record)
......@@ -72,8 +78,7 @@ data.us <- merge(data.us, data.frame(plot = names(perc.dead), perc.dead = perc.d
by = "plot", sort = FALSE)
########################################################### PLOT SELECTION FOR THE ANALYSIS
##### PLOT SELECTION FOR THE ANALYSIS
## remove everything from memory not need before computation
rm(greco, perc.dead, tab.small.div, sel.small.div)
......@@ -89,6 +94,7 @@ vec.basic.var <- c("tree.id", "sp", "plot", "subplot", "ecocode", "D", "G", "dea
"year", "htot", "Lon", "Lat", "perc.dead", "weights")
data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names))
rm(data.us)
## creat row unique id
data.tree$obs.id <- as.character(1:nrow(data.tree))
gc()
......@@ -96,13 +102,17 @@ gc()
### read TRY data
TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds")
#################### GENERATE ONE OBJECT PER ECOREGION
#### GENERATE ONE OBJECT PER ECOREGION
# vector of ecoregion name
ecoregion.unique <- unique(data.tree[["ecocode"]])
#### lapply function
system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.tree,
plot.name = "subplot", weight.full.plot = NA, name.country = "US", data.TRY = TRY.DATA.FORMATED,
species.lookup = species.clean))
#### split data by ecoregion and save to file
system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion,
data.tot = data.tree,
plot.name = "subplot",
weight.full.plot = NA,
name.country = "US",
data.TRY = TRY.DATA.FORMATED,
species.lookup = load_species_list(),
output.dir = "output"))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment