########################### ########################### ### FUNCTION TO RUN LMER ESTIMATION WITH NO logBA for all in one big model library(lme4) run.multiple.model.for.set.one.trait <- function(model.files, fun.model, trait, data.type = 'simple', ...){ for (m in model.files) (run.model.for.set.one.trait (m, fun.model, trait, data.type = data.type, ...)) } run.model.for.set.one.trait <- function(model.file, fun.model, trait, data.type, ...){ fun.model <- match.fun(fun.model) try(fun.model(model.file, trait, data.type = data.type, ...)) } #===================================================================== # Run lmer() (in package lme4) for one trait, one model #===================================================================== path.model <- "R/analysis/model.lmer" model.files.lmer.Tf.0 <- file.path(path.model, c("model.lmer.LOGLIN.r.set.species.R", "model.lmer.LOGLIN.r.set.fixed.biomes.species.R")) model.files.lmer.Tf.1 <- file.path(path.model, c("model.lmer.LOGLIN.ER.AD.Tf.r.set.species.R", "model.lmer.LOGLIN.ER.AD.Tf.r.set.fixed.biomes.species.R")) model.files.lmer.Tf.2 <- file.path(path.model, c("model.lmer.LOGLIN.ER.AD.Tf.r.ecocode.species.R", "model.lmer.LOGLIN.ER.AD.Tf.r.ecocode.fixed.biomes.species.R")) model.files.lmer.Tf.3 <- file.path(path.model, c("model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.set.species.R", "model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.set.fixed.biomes.species.R")) model.files.lmer.Tf.4 <- file.path(path.model, c("model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.ecocode.species.R", "model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.ecocode.fixed.biomes.species.R")) model.files.lmer.Tf.intra.1 <- file.path(path.model, c("model.lmer.LOGLIN.ER.AD.Tf.intra.r.set.species.R", "model.lmer.LOGLIN.ER.AD.Tf.intra.r.set.fixed.biomes.species.R")) model.files.lmer.Tf.intra.2 <- file.path(path.model, c("model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.intra.r.set.species.R", "model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.intra.r.set.fixed.biomes.species.R")) call.lmer.and.save <- function(formula, df.lmer, path.out, var.sample, ecocode.var, select.biome, list.sd){ lmer.output <- lmer(formula = formula, data = df.lmer, REML = FALSE, control = lmerControl(optCtrl = list(maxfun = 40000) ) ) print(warnings()) print(summary(lmer.output)) relgrad <- with(lmer.output@optinfo$derivs,solve(Hessian,gradient)) print('test convergence of relative gradient of hessian') print(max(abs(relgrad)) ) if(is.na(select.biome)){ name.file <- paste(var.sample,ecocode.var, "results.nolog.all.rds", sep ='.') } if(!is.na(select.biome)){ name.file <- paste(var.sample, ecocode.var, gsub(' ', '_',select.biome), "results.nolog.all.rds", sep ='.') } saveRDS(list(output = lmer.output, relgrad = relgrad, list.sd = list.sd), file = file.path(path.out, name.file)) } run.lmer <- function (model.file, trait, min.obs = 10, sample.size = NA, ecocode.var = 'wwf', data.type ='simple', var.sample = NA, select.biome = NA, merge.biomes.TF = FALSE, sample.vec.TF = FALSE) { require(lme4) source(model.file, local = TRUE) model <- load.model() #= Path for output path.out <- output.dir('lmer', model$name, trait, data.type) print(path.out) dir.create(path.out, recursive = TRUE, showWarnings = FALSE) cat("run lmer for model", model.file, " for trait", trait, "\n") list.df.lmer <- load.data.for.lmer(trait, data.type = data.type, sample.size. = sample.size, ecocode.var. = ecocode.var, var.sample. = var.sample, select.biome. = select.biome, merge.biomes.TF = merge.biomes.TF, sample.vec.TF. = sample.vec.TF) # return a list of DF cat("Ok data with Nobs", nrow(list.df.lmer[[1]]), "\n") print(names(list.df.lmer[[1]])) print(dim(list.df.lmer[[1]])) #= Run model call.lmer.and.save(formula = model$lmer.formula.tree.id, list.df.lmer[[1]], path.out = path.out, var.sample = var.sample, ecocode.var = ecocode.var, select.biome = select.biome, list.sd = list.df.lmer[[2]]) gc() } #======================================================================== output.dir <- function (type , model, trait, set) { file.path("output", type, set, trait, model) } #============================================================ # Function to prepare data for lmer #============================================================ ## add sample equivalent per ecocode add.sampling.prob.by.var.sample <- function(df, var.sample){ vec.prob <- 1/table(df[[var.sample]]) df1 <- data.frame(ecocode = names(vec.prob), prob.sample = as.numeric(vec.prob)) names(df1) <- c(var.sample, 'prob.sample') df2 <- left_join(df, df1, by = var.sample) return(df2) } resample.plot.by.var <- function(df, var.sample, sample.size){ df2 <- add.sampling.prob.by.var.sample(df, var.sample) df1 <- df2 %>% group_by(plot) %>% summarise(prob.sample = mean(prob.sample)) num.sample.plot <- sample(1:nrow(df1), size = sample.size, prob = df1$prob.sample) vec.sample.plot <- df1[['plot']][num.sample.plot] return(vec.sample.plot) } load.data.for.lmer <- function(trait, data.type, base.dir = "output/processed", sample.size. , ecocode.var., var.sample. = 'wwf', select.biome. = NA, select.set. = NA, sample.vec.TF. = FALSE, merge.biomes.TF = FALSE){ require(dplyr) if (! data.type %in% c('simple', 'intra', 'all.census')) stop('not good data.type') df <- readRDS( file.path(base.dir,paste('data', trait, data.type, 'rds', sep = '.'))) df <- dplyr::mutate(df,sp.name = ifelse(is.na(sp.name), 'missing.sp', sp.name)) if(merge.biomes.TF) df <- merge.biomes(df) if(!is.na(select.biome.)) { df <- dplyr::filter(df, biomes == select.biome.) } if(!is.na(select.set.)) { df <- dplyr::filter(df, set == select.set.) } if(!is.na(sample.size.)){ if(sample.size. < length(unique(df$plot))){ if(!sample.vec.TF.){ if(sample.size. > length(unique(df$plot))){ sample.size. <- length(unique(df$plot))} plot.selected <- resample.plot.by.var(df, var.sample., sample.size.) sample.vec <- (1:nrow(df))[df$plot %in% plot.selected] df <- df[sample.vec, ] saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec', trait, data.type,'rds', sep = '.'))) print(paste('sub-sampled by',var.sample.)) }else{ sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait, data.type,'rds', sep = '.'))) df <- df[sample.vec, ] print(paste('sub-sampled by',var.sample., 'by loading sample.vec')) } } } if(data.type != 'all.census') { df <- select.one.census.per.plot(df) } list.sd <- get.sd.lmer(df, trait, min.obs = 10) print('sd ok') res <- format.data.for.lmer(df, trait, data.type = data.type, ecocode.var = ecocode.var.) return( list(res, list.sd)) } merge.biomes <- function(data){ data$biomes <- factor(data$biomes) levels.name <- levels(data$biomes) levels.name[levels.name == 'Tundra'] <- 'Boreal forest' levels.name[levels.name == 'Subtropical desert'] <- 'Temperate grassland desert' levels.name[levels.name == "Tropical rain forest"] <- "Tropical forest savanna" levels(data$biomes) <- levels.name print('biomes merged') return(data) } select.one.census.per.plot <- function(df){ require(dplyr) cat(dim(df)) df <- df %>% mutate(plot.c = paste(plot, census)) df.select <- df %>% sample_n(., nrow(df)) %>% group_by( plot) %>% summarise( plot.cs = first(plot.c)) %>% dplyr::select(plot.cs) plots.select <- as.vector(df.select$plot.cs) df2 <- filter(df, plot.c %in% plots.select) print(dim(df2)) if(!(nrow(df2) < nrow(df))) stop('selection census error') return(df2) } select.data.for.analysis <- function(data.tree, abs.CWM.tntf, perc.neigh.sp, perc.neigh.gs, BATOT, min.obs, min.perc.neigh.sp = 0.90, min.perc.neigh.gs = 0.95, min.BA.G = -40, max.BA.G = 180){ select.temp <-(1:nrow(data.tree))[!is.na(data.tree[["BA.G"]]) & !is.na(data.tree[["D"]]) & !is.na(data.tree[["biomes"]]) & !is.na(data.tree[["MAT"]]) & !is.na(data.tree[["MAP"]]) & data.tree[["BA.G"]]>min.BA.G & data.tree[["BA.G"]]9.9 & !is.na(data.tree[[abs.CWM.tntf]]) & !is.na(data.tree[[BATOT]]) & data.tree[[perc.neigh.sp]] > min.perc.neigh.sp & !is.na(data.tree[[perc.neigh.sp]]) & data.tree[[perc.neigh.gs]] > min.perc.neigh.gs & !is.na(data.tree[[perc.neigh.gs]])] ## remove tree with NA data.tree <- data.tree[select.temp ,] # select species with minimum obs select.temp <- (1:nrow(data.tree))[data.tree[["sp"]] %in% names(table(factor(data.tree[["sp"]])))[ table(factor(data.tree[["sp"]]))>min.obs]] data.tree <- data.tree[select.temp, ] DT <- drop(data.tree) rm(data.tree) gc() return(DT) } #============================================================ # Function to prepare data for lmer with new CWM data # that will be used in a simple linear model #============================================================ select.data.trait <- function(data.tree, trait, min.obs = 10) { if(! trait %in% c("SLA", "Leaf.N", "Seed.mass", "Wood.density", "Max.height")) stop("trait need to be in SLA, Leaf.N, Seed.mass, Wood.density, or Max.height ") # get vars names CWM.tn <- paste(trait, "CWM", 'fill', sep = ".") abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".") tf <- paste(trait, "focal", sep = ".") BATOT <- "BATOT" perc.neigh.sp <- paste(trait, "perc", 'species', sep = ".") perc.neigh.gs <- paste(trait, "perc", 'genus', sep = ".") data.tree <- select.data.for.analysis(data.tree, abs.CWM.tntf, perc.neigh.sp, perc.neigh.gs, BATOT, min.obs) return(data.tree) } load.and.save.data.for.lmer <- function(trait, min.obs= 10, data.type = 'simple', base.dir = "output/processed"){ fname <- 'data.all.no.log.all.census.rds' data.tree.tot <- readRDS(file.path(base.dir, fname)) df <- select.data.trait(data.tree.tot, trait) saveRDS(df,file = file.path(base.dir,paste('data', trait, data.type, 'rds', sep = '.'))) } ################### ## FORMAT DATA ### get variables for analysis three type simple cat multi scale.d <- function(x, ...){ return(as.vector(scale(x, ...))) } scale.nc <- function(x, center = FALSE){ return(as.vector(scale(x, center = center, scale = sd(x, na.rm = TRUE)))) } get.variables <- function(data.tree, BATOT, CWM.tn, abs.CWM.tntf, tf, ecocode.var = 'wwf', min.BA.G = 40, min.MAT = 10){ logG <- scale.d(log(data.tree[["BA.G"]] + min.BA.G+1)) logD <- scale.d(log(data.tree[["D"]])) MAT <- scale.d(log(data.tree[["MAT"]]+min.MAT)) MAP <- scale.d(log(data.tree[["MAP"]])) species.id <- as.character(factor(data.tree[["sp.name"]])) plot.id <- as.character(factor(data.tree[["plot"]])) tree.id <- as.character(factor(data.tree[["tree.id"]])) set.id <- as.character(factor(data.tree[["set"]])) ecocode.id <- as.character(factor(paste(data.tree[[ecocode.var]], data.tree[["set"]]))) biomes.id <- factor(data.tree[['biomes']]) #= multiply CWMs by BATOT sumTnTfBn.abs <- data.tree[[abs.CWM.tntf]]* data.tree[[BATOT]] sumTnBn <- data.tree[[CWM.tn]]*data.tree[[BATOT]] sumTfBn <- data.tree[[tf]]*data.tree[[BATOT]] sumBn <- data.tree[[BATOT]] return(data.frame(logG = logG, logD = logD, MAT = MAT, MAP = MAP, species.id = species.id, set.id = set.id, ecocode.id = ecocode.id, biomes.id = biomes.id, plot.id = plot.id, tree.id = tree.id, sumTnTfBn.abs = scale.nc(sumTnTfBn.abs, center = FALSE), Tf = scale.d(data.tree[[tf]]), sumTnBn = scale.nc(sumTnBn, center = FALSE), sumTfBn = scale.nc(sumTfBn, center = FALSE), sumBn = scale.nc(sumBn, center = FALSE), stringsAsFactors = FALSE)) } get.variables.intra <- function(data.tree, BATOT, CWM.tn, abs.CWM.tntf, tf, ecocode.var = 'wwf', min.BA.G = 40, min.MAT = 10){ logG <- scale.d(log(data.tree[["BA.G"]] + min.BA.G+1)) logD <- scale.d(log(data.tree[["D"]])) MAT <- scale.d(log(data.tree[["MAT"]]+min.MAT)) MAP <- scale.d(log(data.tree[["MAP"]])) species.id <- as.character(factor(data.tree[["sp.name"]])) plot.id <- as.character(factor(data.tree[["plot"]])) tree.id <- as.character(factor(data.tree[["tree.id"]])) set.id <- as.character(factor(data.tree[["set"]])) ecocode.id <- as.character(factor(paste(data.tree[[ecocode.var]], data.tree[["set"]]))) biomes.id <- factor(data.tree[['biomes']]) #= multiply CWMs by BATOT sumTnTfBn.abs <- data.tree[[abs.CWM.tntf]]* (data.tree[[BATOT]] - data.tree[['BAintra']]) sumTnBn <- data.tree[[CWM.tn]]*data.tree[[BATOT]] sumTfBn <- data.tree[[tf]]*data.tree[[BATOT]] sumBn.inter <- (data.tree[[BATOT]] - data.tree[['BAintra']]) return(data.frame(logG = logG, logD = logD, MAT = MAT, MAP = MAP, species.id = species.id, set.id = set.id, ecocode.id = ecocode.id, biomes.id = biomes.id, plot.id = plot.id, tree.id = tree.id, sumTnTfBn.abs = scale.nc(sumTnTfBn.abs, center = FALSE), Tf = scale.d(data.tree[[tf]]), sumTnBn = scale.nc(sumTnBn, center = FALSE), sumTfBn = scale.nc(sumTfBn, center = FALSE), sumBn.inter = scale.nc(sumBn.inter, center = FALSE), sumBn.intra = scale.nc(data.tree[['BAintra']], center = FALSE), stringsAsFactors = FALSE)) } #============================================================ # Function to prepare data for lmer with CWM data #============================================================ format.data.for.lmer <- function(data.tree, trait, min.obs = 10, data.type = 'simple', ecocode.var = 'wwf') { if(! trait %in% c("SLA", "Leaf.N", "Seed.mass", "SLA", "Wood.density", "Max.height")) stop("need trait to be in SLA Leaf.N Seed.mass SLA Wood.density or Max.height") # get vars names CWM.tn <- paste(trait, "CWM", 'fill', sep = ".") abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".") tf <- paste(trait, "focal", sep = ".") BATOT <- "BATOT" #= DATA LIST FOR LMER if(data.type == 'simple' | data.type == 'all.census') { print('simple') lmer.data <- get.variables(data.tree, BATOT, CWM.tn, abs.CWM.tntf, tf, ecocode.var) } if(data.type =='intra') { print('intra') lmer.data <- get.variables.intra(data.tree, BATOT, CWM.tn, abs.CWM.tntf, tf, ecocode.var) } return(lmer.data) } ## SD for plot get.sd.lmer <- function(data.tree, trait, min.obs = 10) { if(! trait %in% c("SLA", "Leaf.N", "Seed.mass", "SLA", "Wood.density", "Max.height")) stop("need trait to be in SLA Leaf.N Seed.mass SLA Wood.density or Max.height") # get vars names CWM.tn <- paste(trait, "CWM", 'fill', sep = ".") abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".") tf <- paste(trait, "focal", sep = ".") BATOT <- "BATOT" #= DATA SD LIST FOR LMER list.sd <- compute.sd.mean.var.lmer(data.tree, tf, CWM.tn, abs.CWM.tntf, min.BA.G = 40) return(list.sd) } compute.sd.mean.var.lmer <- function(data.tree, tf, CWM.tn, abs.CWM.tntf, min.BA.G = 40){ list.sd <- list( 'mean.logG' = mean(log(data.tree[["BA.G"]] + min.BA.G+1), na.rm = TRUE), 'sd.logG' = sd(log(data.tree[["BA.G"]] + min.BA.G+1), na.rm = TRUE), 'mean.logD' = mean(log(data.tree[["D"]] ), na.rm = TRUE), 'sd.logD' = sd(log(data.tree[["D"]] ), na.rm = TRUE), 'mean.tf' = mean(data.tree[[tf]] , na.rm = TRUE), 'sd.tf' = sd(data.tree[[tf]] , na.rm = TRUE), 'sd.sumBn' = sd( data.tree[['BATOT']], na.rm = TRUE), 'sd.sumBn.inter' = sd( data.tree[['BATOT']] - data.tree[['BAintra']], na.rm = TRUE), 'sd.sumBn.intra' = sd( data.tree[['BAintra']], na.rm = TRUE), 'sd.sumTfBn' = sd(data.tree[[tf]]* data.tree[['BATOT']], na.rm = TRUE), 'sd.sumTnTfBn.abs' = sd(data.tree[[abs.CWM.tntf]]* data.tree[['BATOT']], na.rm = TRUE), 'sd.sumTnBn' = sd(data.tree[[CWM.tn]]*data.tree[['BATOT']], na.rm = TRUE) ) return(list.sd) }