Commit 436ef78d authored by kunstler's avatar kunstler
Browse files

new version of MS in latex

parent 9b4323db
#!/usr/bin/env Rscript
source("R/analysis/glmer.output-fun.R")
source("R/analysis/glmer.run.R")
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA", "Wood.density","Max.height","Leaf.N","Seed.mass")
type.filling <- 'species'
var.sample <- 'ecocode.id'
files <- c()
for (trait in traits){
for(model in c(model.files.glmer.Tf.1,
model.files.glmer.Tf.2,
model.files.glmer.Tf.3)){
source(model, local = TRUE)
model.obj <- load.model()
pathout <- output.dir.glmer('all.no.log', model.obj$name, trait,
type.filling=type.filling)
files <- c(files,file.path(pathout,paste(var.sample, "glmer.results.global.rds")))
}
}
out <- lapply(files, summarise.glmer.output.all.list)
names(out) <- lapply(lapply(files,files.details.all),function(x) paste(as.vector(x[-5]),collapse="_"))
saveRDS(out,file='output/glmer.global.list.out.ecocode.id.rds')
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA", "Wood.density","Max.height","Leaf.N","Seed.mass")
type.filling <- 'species'
var.sample <- 'species.id'
files <- c()
for (trait in traits){
for(model in c(model.files.glmer.Tf.1,
model.files.glmer.Tf.2,
model.files.glmer.Tf.3)){
source(model, local = TRUE)
model.obj <- load.model()
pathout <- output.dir.glmer('all.no.log', model.obj$name, trait,
type.filling=type.filling)
files <- c(files,file.path(pathout,paste(var.sample, "glmer.results.global.rds")))
}
}
out <- lapply(files, summarise.glmer.output.all.list)
names(out) <- lapply(lapply(files,files.details.all),function(x) paste(as.vector(x[-5]),collapse="_"))
saveRDS(out,file='output/glmer.global.list.out.species.id.rds')
#### function to analyse glmer output
library(lme4)
read.glmer.output <- function(file.name){
tryCatch(readRDS(file.name), error=function(cond)return(NULL)) # Choose a return value in case of error
}
summarise.glmer.output <- function(x){
list( nobs = nobs(x),
R2m =Rsquared.glmm.lmer(x)$Marginal,
R2c =Rsquared.glmm.lmer(x)$Conditional,
AIC = AIC(x),
deviance = deviance(x),
conv=x@optinfo$conv,
effect.response.var=variance.fixed.glmm.lmer.effect.and.response(x),
fixed.coeff.E=fixef(x),
fixed.coeff.Std.Error=sqrt(diag(vcov(x))),
fixed.var=variance.fixed.glmm.lmer(x))
}
summarise.glmer.all.output <- function(x, random.name = 'biomes.id'){
list( nobs = nobs(x),
R2m =Rsquared.glmm.lmer(x)$Marginal,
R2c =Rsquared.glmm.lmer(x)$Conditional,
AIC = AIC(x),
deviance = deviance(x),
conv=x@optinfo$conv,
effect.response.var=variance.fixed.glmm.lmer.effect.and.response(x),
fixed.coeff.E=fixef(x),
fixed.coeff.Std.Error=sqrt(diag(vcov(x))),
fixed.var=variance.fixed.glmm.lmer(x),
set.BLUP=coef(x)[[random.name]])
}
summarise.glmer.output.list <- function(f ){
output.glmer <- read.glmer.output(f)
if(!is.null(output.glmer)){
res <- list(files.details=files.details(f),
glmer.summary=summarise.glmer.output( output.glmer))
}else{
res <- NULL
}
return(res)
}
summarise.glmer.output.all.list <- function(f, random.name = 'biomes.id' ){
output.glmer <- read.glmer.output(f)
if(!is.null(output.glmer)){
res <- list(files.details=files.details.all(f),
glmer.summary=summarise.glmer.all.output( output.glmer, random.name = random.name))
}else{
res <- NULL
}
return(res)
}
files.details <- function(x){
s <- data.frame(t(strsplit(x, "/", fixed=TRUE)[[1]]), stringsAsFactors= FALSE)
names(s) <- c("d1", "d2", "set", "ecocode", "trait", "filling", "model", "file" )
s[-(1:2)]
}
files.details.all <- function(x){
s <- data.frame(t(strsplit(x, "/", fixed=TRUE)[[1]]), stringsAsFactors= FALSE)
names(s) <- c("d1", "d2", "trait", "filling", "model", "file" )
s[-(1:2)]
}
#### R squred functions
# Function rsquared.glmm requires models to be input as a list (can include fixed-
# effects only models,but not a good idea to mix models of class "mer" with models
# of class "lme") FROM http://jslefche.wordpress.com/2013/03/13/r2-for-linear-mixed-effects-models/
Rsquared.glmm.lmer <- function(i){
{
# Get variance of fixed effects by multiplying coefficients by design matrix
VarF=var(as.vector(fixef(i) %*% t(i@pp$X)))
# Get variance of random effects by extracting variance components
VarRand=colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1])))
# Calculate marginal R-squared
Rm=VarF/(VarF+VarRand+pi^2/3)
# Calculate conditional R-squared (fixed effects+random effects/total variance)
Rc=(VarF+VarRand)/(VarF+VarRand+pi^2/3)
# Bind R^2s into a matrix and return with AIC values
Rsquared.mat=data.frame(Class=class(i),Family=summary(i)$family,Marginal=Rm,
Conditional=Rc,AIC=AIC(i)) }
return(Rsquared.mat)
}
variance.fixed.glmm.lmer <- function(i){
# Get variance of for each fixed effects by multiplying coefficients by design matrix
var.vec <- apply(fixef(i) * t(i@pp$X),MARGIN=1,var)
# Get variance of fixed effects by multiplying coefficients by design matrix
VarF=var(as.vector(fixef(i) %*% t(i@pp$X)))
# Get variance of random effects by extracting variance components
VarRand=colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1])))
var.vec <- var.vec/(VarF+VarRand+pi^2/3)
names(var.vec) <- paste(names(var.vec),"VAR",sep=".")
return(var.vec)
}
variance.fixed.glmm.lmer.effect.and.response <- function(i){
if(sum(c("sumTfBn","sumTnBn") %in% names(fixef(i)))==2){
# Get variance of for each fixed effects by multiplying coefficients by design matrix
var.vec <- var(as.vector(fixef(i)[c("sumTfBn","sumTnBn")] %*% t(i@pp$X[,c("sumTfBn","sumTnBn")])))
# Get variance of fixed effects by multiplying coefficients by design matrix
VarF <- var(as.vector(fixef(i) %*% t(i@pp$X)))
# Get variance of random effects by extracting variance components
VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1])))
# Get residual variance
var.vec <- var.vec/(VarF+VarRand)
}else{
var.vec <- NA
}
names(var.vec) <- paste("effect.response","VAR",sep=".")
return(var.vec)
}
## function to turn lmer output from list to DF
fun.format.in.data.frame <- function(list.res,names.param){
dat.t <- data.frame(t(rep(NA,3*length(names.param))))
names(dat.t) <- c(names.param,paste(names.param,"Std.Error",sep=".")
,paste(names.param,"VAR",sep="."))
dat.t[,match(names(list.res$glmer.summary$fixed.coeff.E),names(dat.t))] <-
list.res$glmer.summary$fixed.coeff.E
dat.t[,length(names.param)+match(names(list.res$glmer.summary$fixed.coeff.E),names(dat.t))] <-
list.res$glmer.summary$fixed.coeff.Std.Error
dat.t[,match(names(list.res$glmer.summary$fixed.var),names(dat.t))] <-
list.res$glmer.summary$fixed.var
res <- data.frame(list.res$files.details,list.res$glmer.summary[1:7],dat.t)
return(res)
}
###########################
###########################
### FUNCTION TO RUN GLMER ESTIMATION
library(lme4)
run.multiple.model.for.set.one.trait <- function(model.files, fun.model, trait,
type.filling,
fname = 'data.all.no.log.rds',
cat.TF = FALSE, ...){
for (m in model.files)
try(run.model.for.set.one.trait (m, fun.model, trait,
type.filling = type.filling,
fname = fname, cat.TF = cat.TF, ...))
}
run.model.for.set.one.trait <- function(model.file, fun.model, trait,
type.filling, fname, cat.TF, ...){
fun.model <- match.fun(fun.model)
try(fun.model(model.file, trait, type.filling = type.filling,
fname = fname, cat.TF = cat.TF, ...))
}
#=====================================================================
# Run glmer() (in package lme4) for one ecoregion, one trait, one model
#=====================================================================
model.files.glmer.Tf.1 <-
c("R/analysis/model.glmer/model.glmer.LOGLIN.ER.AD.Tf.r.species.R")
model.files.glmer.Tf.2 <-
c("R/analysis/model.glmer/model.glmer.LOGLIN.ER.AD.Tf.r.biomes.species.R")
model.files.glmer.Tf.3 <-
c("R/analysis/model.glmer/model.glmer.LOGLIN.ER.AD.Tf.r.ecocode.species.R")
model.files.glmer.Tf.4 <-
c("R/analysis/model.glmer/model.glmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.biomes.species.R")
model.files.glmer.Tf.5 <-
c("R/analysis/model.glmer/model.glmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.ecocode.species.R")
### TODO ER AND AD TO CHAbNGE NEED TO ADD CAT
fun.call.glmer.and.save <- function(formula, df.glmer, path.out, var.sample){
library(MASS)
logexp <- function(exposure = 1)
{
linkfun <- function(mu) qlogis(mu^(1/exposure))
## FIXME: is there some trick we can play here to allow
## evaluation in the context of the 'data' argument?
linkinv <- function(eta) plogis(eta)^exposure
logit_mu_eta <- function(eta) {
ifelse(abs(eta)>30, .Machine$double.eps,
exp(eta)/(1+exp(eta))^2)
## OR .Call(stats:::C_logit_mu_eta, eta, PACKAGE = "stats")
}
mu.eta <- function(eta) {
exposure * plogis(eta)^(exposure-1) *
logit_mu_eta(eta)
}
valideta <- function(eta) TRUE
link <- paste("logexp(", deparse(substitute(exposure)), ")",
sep = "")
structure(list(linkfun = linkfun, linkinv = linkinv,
mu.eta = mu.eta, valideta = valideta,
name = link),
class = "link-glm")
}
Start <- Sys.time()
## glmer.output <- glmer(formula = formula, data = df.glmer,
## family = binomial(link = 'cloglog'))
glmer.output <- glmer(formula = formula, data = df.glmer,
family = binomial(link = logexp(exp(df.glmer$logyear))),
control=glmerControl(optimizer = "bobyqa",
tolPwrss = 1e-1,
optCtrl = list(maxfun=500)))
end <- Sys.time()
print(end - Start)
print(summary(glmer.output))
saveRDS(glmer.output,
file = file.path(path.out,
paste(var.sample,
"glmer.results.global.rds", sep = '.')))
}
run.glmer <- function (model.file, trait,
min.obs = 5, sample.size = NA,
type.filling, fname , cat.TF,
var.sample = 'ecocode.id') {
require(lme4)
source(model.file, local = TRUE)
model <- load.model()
#= Path for output
if(fname == 'data.all.no.log.rds') dir <- 'all.no.log'
if(fname == 'data.all.rds') dir <- 'all.log'
path.out <- output.dir.glmer(dir, model = model$name,
trait = trait,
type.filling = type.filling)
print(path.out)
dir.create(path.out, recursive = TRUE, showWarnings = FALSE)
cat("run glmer for model", model.file, " for set",
'all', "trait",
trait, "\n")
df.glmer <- load.data.for.glmer(trait,
fname = fname,
cat.TF = cat.TF,
sample.size = sample.size,
var.sample = var.sample) # return a DF
cat("Ok data with Nobs", nrow(df.glmer), "\n")
#= Run model
fun.call.glmer.and.save(formula = model$glmer.formula,
df.glmer = df.glmer,
path.out = path.out,
var.sample = var.sample)
}
#========================================================================
output.dir.glmer <- function (dir, model, trait, type.filling) {
file.path("output/glmer", dir, trait, type.filling, model)
}
#============================================================
# Function to prepare data for lmer
#============================================================
## add sample equivalent per ecocode
add.sampling.prob.by.var.sample <- function(df, var.sample){
vec.prob <- 1/table(df[[var.sample]])
return(df)
}
load.data.for.glmer <- function(trait, cat.TF,
fname = 'data.all.no.log.rds',
base.dir = "output/processed",
sample.size ,
var.sample = 'ecocode.id'){
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
if (!cat.TF) cat <- 'no.cat'
df <- readRDS(file = file.path(base.dir,
paste('data.glmer',
trait,
type,
cat,
'rds',
sep = '.')))
df$species.id[is.na(df$species.id)] <- 'missing.sp'
if(!is.na(sample.size)){
df$species.id[is.na(df$species.id)] <- 'missing.sp'
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample)
df <- df[sample(1:nrow(df), size = sample.size,
prob = df$prob.sample), ]
print(paste('sub-sampled by',var.sample))
}
return( df)
}
load.and.save.data.for.glmer <- function(trait,
min.obs= 10,
type.filling = species,
cat.TF= FALSE,
fname = 'data.all.no.log.rds',
base.dir = "output/processed",
data.table.TF = FALSE){
data.tree.tot <- readRDS(file.path(base.dir, fname))
df <- fun.data.for.glmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
if (!cat.TF) cat <- 'no.cat'
saveRDS(df,file = file.path(base.dir,paste('data.glmer', trait, type, cat,'rds',
sep = '.')))
}
fun.select.data.for.analysis <- function(data.tree, abs.CWM.tntf,
perc.neigh.sp,
perc.neigh.gs,
BATOT, min.obs,
min.perc.neigh.sp = 0.90,
min.perc.neigh.gs = 0.95){
## remove tree with NA
data.tree <- subset(data.tree, subset = (!is.na(data.tree[["dead"]])) &
(!is.na(data.tree[["D"]])) &
(!is.na(data.tree[['year']])))
## remove tree with zero
data.tree <- subset(data.tree, subset = data.tree[["D"]]>9.9)
## select species with missing traits
data.tree <- data.tree[(!is.na(data.tree[[abs.CWM.tntf]]) &
!is.na(data.tree[[BATOT]])), ]
# select obs abov min perc.neigh species
data.tree <- subset(data.tree, subset = data.tree[[perc.neigh.sp]] >
min.perc.neigh.sp &
!is.na(data.tree[[perc.neigh.sp]]))
# select obs abov min perc.neigh genus
data.tree <- subset(data.tree, subset = data.tree[[perc.neigh.gs]] >
min.perc.neigh.gs & !is.na(data.tree[[perc.neigh.gs]]))
# select species with minimum obs
data.tree <- subset(data.tree, subset = data.tree[["sp"]] %in%
names(table(factor(data.tree[["sp"]])))[table(factor(
data.tree[["sp"]]))>min.obs])
return(data.tree)
}
fun.center.and.standardized.var <- function(x){
return((x-mean(x))/sd(x))
}
fun.standardized.var <- function(x){
return(x/sd(x))
}
### get variables for lmer
fun.get.the.variables.for.glmer.no.tree.id <- function(data.tree, BATOT, CWM.tn,
abs.CWM.tntf, tf,
min.BA.G = 50){
dead <- data.tree[["dead"]]
logD <- fun.center.and.standardized.var(log(data.tree[["D"]]))
logyear <- log(data.tree[["year"]])
MAT <- fun.center.and.standardized.var(data.tree[["MAT"]])
MAP <- fun.center.and.standardized.var(data.tree[["MAP"]])
species.id <- factor(data.tree[["sp"]])
tree.id <- factor(data.tree[["tree.id"]])
plot.id <- factor(data.tree[["plot"]])
set.id <- factor(data.tree[["set"]])
ecocode.id <- factor(data.tree[['ecocode']])
biomes.id <- factor(data.tree[['biomes']])
#= multiply CWMs by BATOT
sumTnTfBn.abs <- data.tree[[abs.CWM.tntf]]*data.tree[[BATOT]]
sumTnBn <- data.tree[[CWM.tn]]*data.tree[[BATOT]]
sumTfBn <- data.tree[[tf]]*data.tree[[BATOT]]
sumTnTfBn.diff <- sumTnBn-sumTfBn
sumBn <- data.tree[[BATOT]]
return(
data.frame(dead = dead,
logD = logD,
logyear = logyear,
MAT = MAT,
MAP = MAP,
species.id = species.id,
tree.id = tree.id,
plot.id = plot.id,
set.id = set.id,
ecocode.id = ecocode.id,
biomes.id = biomes.id,
sumTnTfBn.diff = fun.standardized.var(sumTnTfBn.diff),
sumTnTfBn.abs = fun.standardized.var(sumTnTfBn.abs),
Tf = fun.standardized.var(data.tree[[tf]]),
sumTnBn = fun.standardized.var(sumTnBn),
sumTfBn = fun.standardized.var(sumTfBn),
sumBn = fun.standardized.var(sumBn),
stringsAsFactors = FALSE))
}
### get variables for lmer
fun.get.the.variables.for.glmer.no.tree.id.cat <- function(data.tree, BATOT,
CWM.tn,
abs.CWM.tntf, tf,
min.BA.G = 50){
dead <- data.tree[["dead"]]
logD <- fun.center.and.standardized.var(log(data.tree[["D"]]))
logyear <- log(data.tree[["year"]])
MAT <- fun.center.and.standardized.var(data.tree[["MAT"]])
MAP <- fun.center.and.standardized.var(data.tree[["MAP"]])
species.id <- factor(data.tree[["sp"]])
tree.id <- factor(data.tree[["tree.id"]])
plot.id <- factor(data.tree[["plot"]])
set.id <- factor(data.tree[["set"]])
ecocode.id <- factor(data.tree[['ecocode']])
biomes.id <- factor(data.tree[['biomes']])
#get the three cwm per cat
vec.CWM.tn <- paste(CWM.tn, c('A_EV', 'A_D', 'C'), sep = '.')
vec.abs.CWM.tntf <- paste(abs.CWM.tntf, c('A_EV', 'A_D', 'C'), sep = '.')
sumTnTfBn.abs.A_EV<- data.tree[[vec.abs.CWM.tntf[1]]]
sumTnTfBn.abs.A_D <- data.tree[[vec.abs.CWM.tntf[2]]]
sumTnTfBn.abs.C <- data.tree[[vec.abs.CWM.tntf[3]]]
sumTnBn.A_EV<- data.tree[[vec.CWM.tn[1]]]
sumTnBn.A_D <- data.tree[[vec.CWM.tn[2]]]
sumTnBn.C <- data.tree[[vec.CWM.tn[3]]]
sumTfBn <- data.tree[[tf]]*data.tree[[BATOT]]
sumTfBn.A_EV <- as.numeric(data.tree[['SLA.cat']] == 1) *sumTfBn
sumTfBn.A_D <- as.numeric(data.tree[['SLA.cat']] == 2) *sumTfBn
sumTfBn.C <- as.numeric(data.tree[['SLA.cat']] == 3) *sumTfBn
sumBn <- data.tree[[BATOT]]
Tf.A_EV <- as.numeric(data.tree[['SLA.cat']] == 1) *data.tree[[tf]]
Tf.A_D <- as.numeric(data.tree[['SLA.cat']] == 2) *data.tree[[tf]]
Tf.C <- as.numeric(data.tree[['SLA.cat']] == 3) *data.tree[[tf]]
return(data.frame(dead = dead,
logD = logD,
logyear = logyear,
MAT = MAT,
MAP = MAP,
species.id = species.id,
tree.id = tree.id,
plot.id = plot.id,
set.id = set.id,
ecocode.id = ecocode.id,
sumTnTfBn.abs.A_EV = fun.standardized.var(sumTnTfBn.abs.A_EV),
sumTnTfBn.abs.A_D = fun.standardized.var(sumTnTfBn.abs.A_D),
sumTnTfBn.abs.C = fun.standardized.var(sumTnTfBn.abs.C),
Tf.A_EV = fun.standardized.var(Tf.A_EV),
Tf.A_D = fun.standardized.var(Tf.A_D),
Tf.C = fun.standardized.var(Tf.C),
sumTnBn.A_EV = fun.standardized.var(sumTnBn.A_EV),
sumTnBn.A_D = fun.standardized.var(sumTnBn.A_D),
sumTnBn.C = fun.standardized.var(sumTnBn.C),
sumTfBn.A_EV = fun.standardized.var(sumTfBn.A_EV),
sumTfBn.A_D = fun.standardized.var(sumTfBn.A_D),
sumTfBn.C = fun.standardized.var(sumTfBn.C),
sumBn = fun.standardized.var(sumBn),
stringsAsFactors = FALSE))
}
#============================================================
# Function to prepare data for lmer with new CWM data
# that will be used in a simple linear model
#============================================================
fun.data.for.glmer <- function(data.tree, trait, min.obs = 10,
type.filling = 'species', cat.TF = FALSE) {
if(! trait %in% c("SLA", "Leaf.N","Seed.mass","SLA",
"Wood.density", "Max.height"))
stop("need trait to be in SLA Leaf.N Seed.mass SLA Wood.density or Max.height ")
# get vars names
CWM.tn <- paste(trait, "CWM", 'fill', sep = ".")
abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".")
tf <- paste(trait, "focal", sep = ".")
BATOT <- "BATOT"
perc.neigh.sp <- paste(trait, "perc", 'species', sep = ".")
perc.neigh.gs <- paste(trait, "perc", 'genus', sep = ".")
data.tree <- fun.select.data.for.analysis(data.tree,
abs.CWM.tntf,
perc.neigh.sp,
perc.neigh.gs,
BATOT, min.obs)
#= DATA LIST FOR JAGS
if(!cat.TF) {
print('no cat')
glmer.data <- fun.get.the.variables.for.glmer.no.tree.id(data.tree,
BATOT,
CWM.tn,
abs.CWM.tntf,
tf)
}
if(cat.TF) {
print('cat')
glmer.data <- fun.get.the.variables.for.glmer.no.tree.id.cat(data.tree,
BATOT,
CWM.tn,
abs.CWM.tntf,
tf)
}
return(glmer.data)
}