analysis-fun.R 2.13 KiB
# function to load std input data
load.formatted.data <- function(path){
  data <- list()
  for(f in c("tree", "traits")){
    fname <- file.path(path, paste0(f, ".csv"))
    if(file.exists(fname))
      data[[f]] <- read.csv(fname, stringsAsFactors = FALSE)
    data[[f]] <- data[[f]][, names(data[[f]]) != "X"]
#  TODO: add check on column names to make sure essential ones are correct
  data
load_all_formatted_data <- function(path){
  sites <- dir(path.formatted)
  sites <- sites[sites != "TRY"]
  data <- list()
  for(site in sites)
    data[[site]] <- load.formatted.data(file.path(path, site))
  data  
summarise.data <- function(data.site){
  list(tree = summarise.tree(data.site[["tree"]]),
      traits = summarise.traits(data.site[["traits"]]))
summarise.tree <- function(data.tree){
  out <- list()
  expected <- read.csv("docs/workflow/cols-tree.csv", stringsAsFactors = FALSE)
  if(!is.null(data.tree)){
    out[["n.species"]] <- length(unique(data.tree[["sp.name"]]))
    out[["n.indivs"]] <- table(data.tree[["sp.name"]])
    out[["n.eco"]] <- table(data.tree[["ecocode"]])
    out[["dbh.range"]] <- range(data.tree[["D"]], na.rm= TRUE) 
    out[["abio.vars"]] <-  names(data.tree)[!(names(data.tree) %in% expected$var)]  
  out  
summarise.traits <- function(data.traits){
  expected <- read.csv("docs/workflow/cols-traits.csv", stringsAsFactors = FALSE)
  # filter to mean, numeric
  var <- expected$var[expected$numeric==1]
  out <- list()
  if(!is.null(data.traits)){
    # TODO: CHANGE Latin_name to sp.name so consistent with tree
    out[["n.species"]] <- length(data.traits[["Latin_name"]])
    for(v in var)
      out[[v]] = sum(!is.na(data.traits[[v]]))
  out  
summary.table.1 <- function(x){
 traits <- c("Leaf.N.mean","Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean")
 trait.coverage <-  (as.data.frame(x$traits)/x$traits$n.species) [, traits]
 names(trait.coverage) <- gsub(".mean", "", traits)
 data.frame(
            N.indivs = sum(x$tree$n.indivs),
            N.species =  x$tree$n.species,
7172737475
N.ecoregion = max(1, length(x$tree$n.eco)), trait.coverage) }