Commit 3d41364d authored by Georges Kunstler's avatar Georges Kunstler
Browse files

progress on JAGS simplest model

parent c698aae5
### test jags
source('R/analysis/jags.run.R')
run.multiple.model.for.set.one.trait(model.files.jags.Tf.1, run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 3,
init.TF = TRUE)
t1000 <- system.time(
res <- run.jags(model.files.jags.Tf.1, trait = 'Wood.density', type.filling = 'species',
init.TF = FALSE, cat.TF = FALSE, iter = 200,
warmup = 10, chains = 2,
thin = 1)
)
t1000.4 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.1,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 10000, warmup = 1000,
chains = 3, thin = 10,
init.TF = FALSE))
t1000.8 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.1,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 50000, warmup = 20,
chains = 3, thin = 50,
init.TF = FALSE))
## 50000 obs
run.multiple.model.for.set.one.trait(model.files.jags.Tf.1, run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 100000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 3,
init.TF = TRUE)
t10000 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.1,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 100000,
var.sample = 'ecocode.id',
iter = 200,
warmup = 10, chains = 2,
thin = 5,
init.TF = FALSE))
t10000.4 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.1,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 10000, warmup = 1000,
chains = 3, thin = 10,
init.TF = FALSE))
t10000.8 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.1,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 100000,
var.sample = 'ecocode.id',
iter = 50000, warmup = 20,
chains = 3, thin = 50,
init.TF = FALSE))
sample.vec <- c(5000, 10000, 50000)
t <- c(t1000['elapsed'], t1000.4['elapsed'], t1000.8['elapsed'])
t2 <- c(t10000['elapsed'], t10000.4['elapsed'], t10000.8['elapsed'])
pdf('time.jags.pdf')
plot(sample.vec, t, ylim = range(t, t2), type = 'b')
lines(sample.vec, t2, type = 'b', col = 'green')
dev.off()
###
run.multiple.model.for.set.one.trait(model.files.jags.Tf.2, run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 3,
init.TF = TRUE)
t1000 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.2,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 5000,
warmup = 500, chains = 3,
thin = 5,
init.TF = FALSE))
t1000.4 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.2,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 10000, warmup = 1000,
chains = 3, thin = 10,
init.TF = FALSE))
t1000.8 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.2,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 50000, warmup = 20,
chains = 3, thin = 50,
init.TF = FALSE))
## 50000 obs
run.multiple.model.for.set.one.trait(model.files.jags.Tf.2, run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 100000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 3,
init.TF = TRUE)
t10000 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.2,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 100000,
var.sample = 'ecocode.id',
iter = 5000,
warmup = 500, chains = 3,
thin = 5,
init.TF = FALSE))
t10000.4 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.2,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 10000,
var.sample = 'ecocode.id',
iter = 10000, warmup = 1000,
chains = 3, thin = 10,
init.TF = FALSE))
t10000.8 <- system.time(
res <- run.multiple.model.for.set.one.trait(model.files.jags.Tf.2,
run.jags,
trait = 'Wood.density',
type.filling='species',
sample.size = 100000,
var.sample = 'ecocode.id',
iter = 50000, warmup = 20,
chains = 3, thin = 50,
init.TF = FALSE))
sample.vec <- c(5000, 10000, 50000)
t <- c(t1000['elapsed'], t1000.4['elapsed'], t1000.8['elapsed'])
t2 <- c(t10000['elapsed'], t10000.4['elapsed'], t10000.8['elapsed'])
pdf('time.jags.r.pdf')
plot(sample.vec, t, ylim = range(t, t2), type = 'b')
lines(sample.vec, t2, type = 'b', col = 'green')
dev.off()
###
###########################
###########################
### FUNCTION TO RUN LMER ESTIMATION WITH NO logBA for all in one big model
library(lme4)
run.multiple.model.for.set.one.trait <- function(model.files, fun.model, trait,
type.filling, cat.TF = FALSE,
fname = 'data.all.no.log.rds',
...){
for (m in model.files)
(run.model.for.set.one.trait (m, fun.model, trait,
type.filling = type.filling, cat.TF = cat.TF,
fname, ...))
}
run.model.for.set.one.trait <- function(model.file, fun.model, trait,
type.filling, cat.TF, fname, ...){
fun.model <- match.fun(fun.model)
try(fun.model(model.file, trait, type.filling = type.filling,
cat.TF = cat.TF, fname = fname, ...))
}
#=====================================================================
# Run lmer() (in package lme4) for one ecoregion, one trait, one model
#=====================================================================
model.files.lmer.Tf.0 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.Tf.r.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.AD.Tf.r.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.Tf.r.biomes.species.R")
model.files.lmer.Tf.1 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.species.R")
model.files.lmer.Tf.2 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.species.R")
model.files.lmer.Tf.3 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.biomes.species.R")
model.files.lmer.Tf.3b <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.fixed.biomes.species.R")
model.files.lmer.Tf.4 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.ecocode.species.R")
model.files.lmer.Tf.5 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.biomes.species.R")
model.files.lmer.Tf.CAT.1 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.r.species.R")
model.files.lmer.Tf.CAT.2 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.r.biomes.species.R")
model.files.lmer.Tf.CAT.3 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.MAT.MAP.r.species.R")
fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
var.sample, select.biome){
lmer.output <- lmer(formula = formula, data = df.lmer, REML = FALSE,
control = lmerControl(optCtrl = list(maxfun = 40000) ) )
print(summary(lmer.output))
if(is.na(select.biome)){
name.file <- paste(var.sample,
"results.nolog.all.rds", sep ='.')
}
if(!is.na(select.biome)){
name.file <- paste(var.sample, gsub(' ', '_',select.biome),
"results.nolog.all.rds", sep ='.')
}
saveRDS(lmer.output, file = file.path(path.out, name.file))
}
run.lmer <- function (model.file, trait,
min.obs = 10, sample.size = NA,
type.filling, cat.TF, fname = 'data.all.no.log.rds',
var.sample = 'ecocode',
select.biome = NA) {
require(lme4)
source(model.file, local = TRUE)
model <- load.model()
#= Path for output
if(fname == 'data.all.no.log.rds') dir <- 'all.no.log'
if(fname == 'data.all.log.rds') dir <- 'all.log'
path.out <- output.dir('lmer', model$name, trait, dir,
type.filling = type.filling)
print(path.out)
dir.create(path.out, recursive = TRUE, showWarnings = FALSE)
cat("run lmer for model", model.file, " for trait",
trait, "\n")
df.lmer <- load.data.for.lmer(trait,fname = fname,
cat.TF = cat.TF,
sample.size. = sample.size,
var.sample. = var.sample,
select.biome. = select.biome)
# return a DF
cat("Ok data with Nobs", nrow(df.lmer),
"\n")
#= Run model
fun.call.lmer.and.save(formula = model$lmer.formula.tree.id,
df.lmer, path.out = path.out,
var.sample = var.sample,
select.biome = select.biome)
rm(df.lmer)
gc()
}
#========================================================================
output.dir <- function (type , model, trait, set, type.filling) {
file.path("output", type, set, trait, type.filling, model)
}
#============================================================
# Function to prepare data for lmer
#============================================================
#
## add sample equivalent per ecocode
add.sampling.prob.by.var.sample <- function(df, var.sample){
vec.prob <- 1/table(df[[var.sample]])
return(df)
}
load.data.for.lmer <- function(trait, cat.TF,
fname = 'data.all.no.log.rds',
base.dir = "output/processed",
sample.size. ,
var.sample. = 'ecocode',
select.biome. = NA,
sample.vec.TF. = FALSE){
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
if (!cat.TF) cat <- 'no.cat'
df <- readRDS(file = file.path(base.dir,paste('data', trait, type,'rds',
sep = '.')))
df$sp.name[is.na(df$sp.name)] <- 'missing.sp'
if(!is.na(select.biome.)) {
df <- df[df$biomes == select.biome., ]
}
if(!is.na(sample.size.)){
if(!sample.vec.TF.){
if(sample.size. >nrow(df)){ sample.size. <- nrow(df)}
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample.)
sample.vec <- sample(1:nrow(df), size = sample.size.,
prob = df$prob.sample)
df <- df[sample.vec, ]
saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
print(paste('sub-sampled by',var.sample.))
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}else{
sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
df <- df[sample.vec, ]
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
print(paste('sub-sampled by',var.sample., 'by loading sample.vec'))
}
}else{
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}
return( res)
}
load.and.save.data.for.lmer <- function(trait,
min.obs= 10,
type.filling = species,
fname = 'data.all.no.log.rds',
base.dir = "output/processed"){
data.tree.tot <- readRDS(file.path(base.dir, fname))
df <- fun.select.data.for.lmer(data.tree.tot, trait,
type.filling = type.filling)
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
saveRDS(df,file = file.path(base.dir,paste('data', trait, type, 'rds',
sep = '.')))
}
fun.select.data.for.analysis <- function(data.tree, abs.CWM.tntf, perc.neigh.sp,
perc.neigh.gs, BATOT, min.obs,
min.perc.neigh.sp = 0.90,
min.perc.neigh.gs = 0.95,
min.BA.G = -100,
max.BA.G = 500){
select.temp <-(1:nrow(data.tree))[!is.na(data.tree[["BA.G"]]) &
!is.na(data.tree[["D"]]) &
data.tree[["BA.G"]]>min.BA.G &
data.tree[["BA.G"]]<max.BA.G &
data.tree[["D"]]>9.9 &
!is.na(data.tree[[abs.CWM.tntf]]) &
!is.na(data.tree[[BATOT]]) &
data.tree[[perc.neigh.sp]] >
min.perc.neigh.sp &
!is.na(data.tree[[perc.neigh.sp]]) &
data.tree[[perc.neigh.gs]] >
min.perc.neigh.gs &
!is.na(data.tree[[perc.neigh.gs]])]
## remove tree with NA
data.tree <- data.tree[select.temp ,]
# select species with minimum obs
select.temp <- (1:nrow(data.tree))[data.tree[["sp"]] %in%
names(table(factor(data.tree[["sp"]])))[
table(factor(data.tree[["sp"]]))>min.obs]]
data.tree <- data.tree[select.temp, ]
DT <- drop(data.tree)
rm(data.tree)
gc()
return(DT)
}
fun.center.and.standardized.var <- function(x){
return((x-mean(x))/sd(x))
}
fun.standardized.var <- function(x){
return(x/sd(x))
}
### get variables for lmer
fun.get.the.variables.for.lmer.tree.id <- function(data.tree, BATOT, CWM.tn,
abs.CWM.tntf, tf,
min.BA.G = 100){
logG <- fun.center.and.standardized.var(log(data.tree[["BA.G"]] + min.BA.G+1))
logD <- fun.center.and.standardized.var(log(data.tree[["D"]]))
MAT <- fun.center.and.standardized.var(data.tree[["MAT"]])
MAP <- fun.center.and.standardized.var(data.tree[["MAP"]])
species.id <- as.character(factor(data.tree[["sp.name"]]))
tree.id <- as.character(factor(data.tree[["tree.id"]]))
plot.id <- as.character(factor(data.tree[["plot"]]))
set.id <- as.character(factor(data.tree[["set"]]))
ecocode.id <- as.character(factor(data.tree[['ecocode']]))
biomes.id <- factor(data.tree[['biomes']])
#= multiply CWMs by BATOT
sumTnTfBn.abs <- data.tree[[abs.CWM.tntf]]*
data.tree[[BATOT]]
sumTnBn <- data.tree[[CWM.tn]]*data.tree[[BATOT]]
sumTfBn <- fun.center.and.standardized.var(data.tree[[tf]])*data.tree[[BATOT]]
sumTnTfBn.diff <- sumTnBn-sumTfBn
sumBn <- data.tree[[BATOT]]
return(data.frame(logG = logG,
logD = logD,
MAT = MAT,
MAP = MAP,
species.id = species.id,
tree.id = tree.id,
set.id = set.id,
ecocode.id = ecocode.id,
biomes.id = biomes.id,
plot.id = plot.id,
sumTnTfBn.diff = fun.standardized.var(sumTnTfBn.diff),
sumTnTfBn.abs = fun.standardized.var(sumTnTfBn.abs),
Tf = fun.center.and.standardized.var(data.tree[[tf]]),
sumTnBn = fun.standardized.var(sumTnBn),
sumTfBn = fun.standardized.var(sumTfBn),
sumBn = fun.standardized.var(sumBn),
stringsAsFactors = FALSE))
}
fun.get.the.variables.for.lmer.tree.id.cat <- function(data.tree, BATOT, CWM.tn,
abs.CWM.tntf, tf,
min.BA.G = 100){
logG <- fun.center.and.standardized.var(log(data.tree[["BA.G"]]+min.BA.G+1))
logD <- fun.center.and.standardized.var(log(data.tree[["D"]]))
MAT <- fun.center.and.standardized.var(data.tree[["MAT"]])
MAP <- fun.center.and.standardized.var(data.tree[["MAP"]])
species.id <- factor(data.tree[["sp.name"]])
tree.id <- factor(data.tree[["tree.id"]])
plot.id <- factor(data.tree[["plot"]])
set.id <- factor(data.tree[["set"]])
ecocode.id <- factor(data.tree[['ecocode']])
biomes.id <- factor(data.tree[['biomes']])
#get the three cwm per cat
vec.CWM.tn <- paste(CWM.tn, c('A_EV', 'A_D', 'C'), sep = '.')
vec.abs.CWM.tntf <- abs.CWM.tntf
sumTnTfBn.abs<- data.tree[[vec.abs.CWM.tntf]]
sumTnBn.A_EV<- data.tree[[vec.CWM.tn[1]]]
sumTnBn.A_D <- data.tree[[vec.CWM.tn[2]]]
sumTnBn.C <- data.tree[[vec.CWM.tn[3]]]
sumTfBn <- data.tree[[tf]]*data.tree[[BATOT]]
sumTfBn.A_EV <- as.numeric(data.tree[['SLA.cat']] == 1) *sumTfBn
sumTfBn.A_D <- as.numeric(data.tree[['SLA.cat']] == 2) *sumTfBn
sumTfBn.C <- as.numeric(data.tree[['SLA.cat']] == 3) *sumTfBn
sumBn <- data.tree[[BATOT]]
Tf.A_EV <- as.numeric(data.tree[['SLA.cat']] == 1) *data.tree[[tf]]
Tf.A_D <- as.numeric(data.tree[['SLA.cat']] == 2) *data.tree[[tf]]
Tf.C <- as.numeric(data.tree[['SLA.cat']] == 3) *data.tree[[tf]]
return(
data.frame(logG = logG,
logD = logD,
MAT = MAT,
MAP = MAP,
species.id = species.id,
tree.id = tree.id,
set.id = set.id,
ecocode.id = ecocode.id,
biomes.id = biomes.id,
plot.id = plot.id,
sumTnTfBn.abs =
fun.standardized.var(sumTnTfBn.abs),
Tf.A_EV= fun.standardized.var(Tf.A_EV),
Tf.A_D= fun.standardized.var(Tf.A_D),
Tf.C= fun.standardized.var(Tf.C),
sumTnBn.A_EV= fun.standardized.var(sumTnBn.A_EV),
sumTnBn.A_D= fun.standardized.var(sumTnBn.A_D),
sumTnBn.C = fun.standardized.var(sumTnBn.C),
sumTfBn.A_EV= fun.standardized.var(sumTfBn.A_EV),
sumTfBn.A_D= fun.standardized.var(sumTfBn.A_D),
sumTfBn.C = fun.standardized.var(sumTfBn.C),
sumBn = fun.standardized.var(sumBn)))
}
#============================================================
# Function to prepare data for lmer with new CWM data
# that will be used in a simple linear model
#============================================================
fun.select.data.for.lmer <- function(data.tree, trait, min.obs = 10,
type.filling = 'species') {
if(! trait %in% c("SLA", "Leaf.N", "Seed.mass",
"SLA", "Wood.density", "Max.height"))
stop("need trait to be in SLA Leaf.N Seed.mass
SLA Wood.density or Max.height ")
# get vars names
CWM.tn <- paste(trait, "CWM", 'fill', sep = ".")
abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".")
tf <- paste(trait, "focal", sep = ".")
BATOT <- "BATOT"
perc.neigh.sp <- paste(trait, "perc", 'species', sep = ".")
perc.neigh.gs <- paste(trait, "perc", 'genus', sep = ".")
data.tree <- fun.select.data.for.analysis(data.tree,
abs.CWM.tntf,
perc.neigh.sp,
perc.neigh.gs,
BATOT,
min.obs)
return(data.tree)
}
#============================================================
# Function to prepare data for lmer with new CWM data
# that will be used in a simple linear model
#============================================================
fun.format.data.for.lmer <- function(data.tree, trait, min.obs = 10,
type.filling = 'species', cat.TF) {
if(! trait %in% c("SLA", "Leaf.N", "Seed.mass",
"SLA", "Wood.density", "Max.height"))
stop("need trait to be in SLA Leaf.N Seed.mass
SLA Wood.density or Max.height ")
# get vars names
CWM.tn <- paste(trait, "CWM", 'fill', sep = ".")
abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".")
tf <- paste(trait, "focal", sep = ".")
BATOT <- "BATOT"
#= DATA LIST FOR LMER
if(!cat.TF) {
print('no cat')
lmer.data <- fun.get.the.variables.for.lmer.tree.id(data.tree,
BATOT,
CWM.tn,
abs.CWM.tntf,
tf)
}
if(cat.TF) {
print('cat')
lmer.data <- fun.get.the.variables.for.lmer.tree.id.cat(data.tree,
BATOT,
CWM.tn,
abs.CWM.tntf,