Commit e92a6834 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

changed to subsampling per plot close #99

parent fad23301
### test stan
## I changed the coding of the random effect with norm(0,10*sigma it seems more slow!
## PROBLEM WITH STANDARDIZED VARIABLE DO I NEED TO INCLUDE AN INTERCEPT??
## VERY BAD CONVERGENCE ???
## Need to compare the two code
source('R/analysis/stan.run.R')
### TEST simple model on France only
df.lmer <- load.data.for.lmer(trait = 'SLA',fname = 'data.all.no.log.rds', cat.TF = FALSE,
sample.size = NA,
var.sample = NA,
sample.vec.TF. = FALSE,
select.set. = NA)
stan.list <- fun.turn.in.list.for.jags.stan(df.lmer,
cat.TF = FALSE)
source('R/analysis/model.stan/model.stan.LOGLIN.size.fixed.R', local = TRUE)
model <- load.model()
fun.init.stan <- function(chain_id= 1, stan.list){
init <- list(intercept = 0+(chain_id-1)/10,
intercept_species = rnorm(stan.list$N_species, 0, 0.5+(chain_id-1)/10),
intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +(chain_id-1)/10),
mean_logD = 2/3+(chain_id-1)/10,
sigma_inter_species = 0.5 +(chain_id-1)/10,
sigma_inter_plot = 0.5 + +(chain_id-1)/10,
sigma = 0.5 +(chain_id-1)/10)
}
inits <- list(model$init.fun(1, stan.list),
model$init.fun(2, stan.list),
model$init.fun(3, stan.list))
library(rstan)
system.time( stan.output <- stan(model_code = model$stan,
data = stan.list,
pars = model$pars,
init = inits,
iter = 1000,
warmup = 500,
chains = 3,
verbose = FALSE))
source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R', local = TRUE)
model <- load.model()
fun.init.stan <- function(chain_id= 1, stan.list){
step <- (chain_id-1)/10
init <- list(intercept = 0+step,
intercept_species = rnorm(stan.list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(stan.list$N_species, 0, 0.5+step),
param_logD_species = rnorm(stan.list$N_species, 0, 0.5+step),
intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf = 2/3+step,
param_sumBn = 2/3+step,
param_sumTfBn = 2/3+step,
param_sumTnBn = 2/3+step,
param_sumTnTfBn_abs= 2/3+step,
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step
)
}
inits <- list(fun.init.stan(1, stan.list),
fun.init.stan(2, stan.list),
fun.init.stan(3, stan.list))
library(rstan)
system.time( stan.output2 <- stan(model_code = model$stan,
data = stan.list,
pars = model$pars,
init = inits,
iter = 1000,
warmup = 500,
chains = 3,
verbose = FALSE))
save.image('test.stan.Rdata')
stan.output
plot(stan.output)
traceplot(stan.output)
pairs(stan.output)
library(ggmcmc)
S <- ggs(stan.output)
extract(stan.output, permuted = FALSE, inc_warmup=FALSE))
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S)
ggs_traceplot(S)
ggs_running(S)
ggs_autocorrelation(S)
ggs_Rhat(S)
ggs_geweke(S)
ggs_caterpillar(S)
kunstler@grp2233-Latitude-E6420.12167:1409576364
\ No newline at end of file
......@@ -27,34 +27,15 @@ run.model.for.set.one.trait <- function(model.file, fun.model, trait,
#=====================================================================
# Run lmer() (in package lme4) for one ecoregion, one trait, one model
#=====================================================================
model.files.lmer.Tf.0 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.Tf.r.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.AD.Tf.r.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.Tf.r.biomes.species.R")
model.files.lmer.Tf.1 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.species.R")
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.fixed.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.fixed.species.R")
model.files.lmer.Tf.2 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.species.R")
model.files.lmer.Tf.3 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.fixed.biomes.species.R",
"R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.fixed.species.R")
model.files.lmer.Tf.4 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.ecocode.species.R")
model.files.lmer.Tf.5 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.biomes.species.R")
"R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.r.ecocode.species.R")
model.files.lmer.Tf.CAT.1 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.r.species.R")
model.files.lmer.Tf.CAT.2 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.r.biomes.species.R")
model.files.lmer.Tf.CAT.3 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.MAT.MAP.r.species.R")
fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
var.sample, select.biome){
......@@ -131,6 +112,18 @@ add.sampling.prob.by.var.sample <- function(df, var.sample){
return(df2)
}
resample.plot.by.var <- function(df, var.sample, sample.size){
df2 <- add.sampling.prob.by.var.sample(df, var.sample)
mean.prob.per.plot <- tapply(df2[['prob.sample']], df2[['plot']], mean,
na.rm =TRUE)
rm(df2,df1)
df1 <- data.frame(plot = names(mean.prob.per.plot),
prob.sample = as.numeric(mean.prob.per.plot))
num.sample.plot <- sample(1:nrow(df1), size = sample.size,
prob = df1$prob.sample)
vec.sample.plot <- df1[['plot']][num.sample.plot]
return(vec.sample.plot)
}
load.data.for.lmer <- function(trait, cat.TF,
......@@ -157,31 +150,27 @@ load.data.for.lmer <- function(trait, cat.TF,
}
if(!is.na(sample.size.)){
if(!sample.vec.TF.){
if(sample.size. >nrow(df)){ sample.size. <- nrow(df)}
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample.)
sample.vec <- sample(1:nrow(df), size = sample.size.,
prob = df$prob.sample)
if(sample.size. > length(unique(df$plot))){
sample.size. <- length(unique(df$plot))}
plot.selected <- resample.plot.by.var(df, var.sample., sample.size.)
sample.vec <- (1:nrow(df))[df$plot %in% plot.selected]
df <- df[sample.vec, ]
saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec',
trait, type, cat,'rds',
sep = '.')))
print(paste('sub-sampled by',var.sample.))
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}else{
sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait,
type, cat,'rds',
sep = '.')))
df <- df[sample.vec, ]
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
print(paste('sub-sampled by',var.sample., 'by loading sample.vec'))
}
}else{
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
return( res)
}
......
load.model <- function () {
list(name="lmer.LOGLIN.AD.Tf.r.biomes.species",
var.BLUP = 'biomes.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnTfBn.abs+(1|species.id) +(logD-1|species.id) +(sumBn-1|species.id)+(Tf-1|biomes.id)+(sumBn-1|biomes.id)+(sumTnTfBn.abs-1|biomes.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.CAT.MAT.MAP.r.species",
var.BLUP = 'species.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+MAT+MAP+Tf.A_EV+Tf.A_EV:MAT+Tf.A_EV:MAP+Tf.A_D+Tf.A_D:MAT+Tf.A_D:MAP+Tf.C+Tf.C:MAT+Tf.C:MAP+logD+sumBn+sumBn:MAT+sumBn:MAP+sumTfBn.A_EV+sumTfBn.A_EV:MAT+sumTfBn.A_EV:MAP+sumTfBn.A_D+sumTfBn.A_D:MAT+sumTfBn.A_D:MAP+sumTfBn.C+sumTfBn.C:MAT+sumTfBn.C:MAP+sumTnBn.A_EV+sumTnBn.A_EV:MAT+sumTnBn.A_EV:MAP+sumTnBn.A_D+sumTnBn.A_D:MAT+sumTnBn.A_D:MAP+sumTnBn.C+sumTnBn.C:MAT+sumTnBn.C:MAP+sumTnTfBn.abs+sumTnTfBn.abs:MAT+sumTnTfBn.abs:MAP+(1|species.id)+(logD-1|species.id)+(sumBn-1|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.CAT",
lmer.formula.tree.id=formula("logG~1+(1|plot.id)+Tf.A_EV+Tf.A_D+Tf.C+logD+sumBn+sumTfBn.A_EV+sumTfBn.A_D+sumTfBn.C+sumTnBn.A_EV+sumTnBn.A_D+sumTnBn.C+sumTnTfBn.abs+(1+logD|species.id)+(1+Tf.A_EV +Tf.A_D-1+Tf.C+sumBn+sumTfBn.A_EV+sumTfBn.A_D+sumTfBn.C+sumTnBn.A_EV+sumTnBn.A_D+sumTnBn.C+sumTnTfBn.abs|set.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.CAT.norandom",
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+Tf.A_EV+Tf.A_D+Tf.C+logD+sumBn+sumTfBn.A_EV+sumTfBn.A_D+sumTfBn.C+sumTnBn.A_EV+sumTnBn.A_D+sumTnBn.C+sumTnTfBn.abs+(1+logD|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.CAT.r.biomes.species",
var.BLUP = 'biomes.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+Tf.A_EV+Tf.A_D+Tf.C+logD+sumBn+sumTfBn.A_EV+sumTfBn.A_D+sumTfBn.C+sumTnBn.A_EV+sumTnBn.A_D+sumTnBn.C+sumTnTfBn.abs+(1|species.id) +(logD-1|species.id) +(sumBn-1|species.id)+(Tf.A_EV-1|biomes.id)+(Tf.A_D-1|biomes.id)+(Tf.C-1|biomes.id)+(sumBn-1|biomes.id)+(sumTfBn.A_EV-1|biomes.id)+(sumTfBn.A_D-1|biomes.id)+(sumTfBn.C-1|biomes.id)+(sumTnBn.A_EV-1|biomes.id)+(sumTnBn.A_D-1|biomes.id)+(sumTnBn.C-1|biomes.id)+(sumTnTfBn.abs-1|biomes.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.CAT.r.species",
var.BLUP = 'species.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+Tf.A_EV+Tf.A_D+Tf.C+logD+sumBn+sumTfBn.A_EV+sumTfBn.A_D+sumTfBn.C+sumTnBn.A_EV+sumTnBn.A_D+sumTnBn.C+sumTnTfBn.abs+(1|species.id) +(logD-1|species.id) +(sumBn-1|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.MAT.MAP",
lmer.formula.tree.id=formula("logG~1+(1|plot.id)+Tf+Tf:MAT+Tf:MAP+MAT+MAP+logD+sumBn+sumBn:MAT+sumBn:MAP+sumTfBn+sumTfBn:MAT+sumTfBn:MAP+sumTnBn+sumTnBn:MAT+sumTnBn:MAP+sumTnTfBn.abs+sumTnTfBn.abs:MAT+sumTnTfBn.abs:MAP+(1+logD|species.id)+(1+Tf+sumBn+sumTfBn+sumTnBn+sumTnTfBn.abs|set.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.MAT.MAP.norandom",
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+Tf+Tf:MAT+Tf:MAP+MAT+MAP+logD+sumBn+sumBn:MAT+sumBn:MAP+sumTfBn+sumTfBn:MAT+sumTfBn:MAP+sumTnBn+sumTnBn:MAT+sumTnBn:MAP+sumTnTfBn.abs+sumTnTfBn.abs:MAT+sumTnTfBn.abs:MAP+(1+logD|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.biomes.species",
var.BLUP = 'biomes.id',
lmer.formula.tree.id=formula("logG~1+(1|plot.id)+Tf+Tf:MAT+Tf:MAP+MAT+MAP+logD+sumBn+sumBn:MAT+sumBn:MAP+sumTfBn+sumTfBn:MAT+sumTfBn:MAP+sumTnBn+sumTnBn:MAT+sumTnBn:MAP+sumTnTfBn.abs+sumTnTfBn.abs:MAT+sumTnTfBn.abs:MAP+(1|species.id)+(logD-1|species.id)+(sumBn-1|species.id)+(MAT-1|species.id)+(MAP-1|species.id)+(Tf-1|biomes.id)+(sumBn-1|biomes.id)+(sumTfBn-1|biomes.id)+(sumTnBn-1|biomes.id)+(sumTnTfBn.abs-1|biomes.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.MAT.MAP.r.species",
var.BLUP = 'species.id',
lmer.formula.tree.id=formula("logG~1+(1|plot.id)+Tf+Tf:MAT+Tf:MAP+MAT+MAP+logD+sumBn+sumBn:MAT+sumBn:MAP+sumTfBn+sumTfBn:MAT+sumTfBn:MAP+sumTnBn+sumTnBn:MAT+sumTnBn:MAP+sumTnTfBn.abs+sumTnTfBn.abs:MAT+sumTnTfBn.abs:MAP+(1|species.id)+(logD-1|species.id)+(sumBn-1|species.id)+(MAT-1|species.id)+(MAP-1|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf",
lmer.formula.tree.id=formula("logG~1+(1|plot.id)+(1|set.id)+Tf+logD+sumBn+sumTfBn+sumTnBn+sumTnTfBn.abs+(1+logD|species.id)+(Tf-1|set.id)+(sumBn-1|set.id)+(sumTfBn-1|set.id)+(sumTnBn-1|set.id)+(sumTnTfBn.abs-1|set.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.norandom",
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+Tf+logD+sumBn+sumTfBn+sumTnBn+sumTnTfBn.abs+(1+logD|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.r.biomes",
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTfBn+sumTnBn+sumTnTfBn.abs+(1+logD|species.id)+(Tf-1|biomes.id)+(sumBn-1|biomes.id)+(sumTfBn-1|biomes.id)+(sumTnBn-1|biomes.id)+(sumTnTfBn.abs-1|biomes.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.AD.Tf.r.species",
var.BLUP = 'species.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|plot.id)+Tf+logD+sumBn+sumTfBn+sumTnBn+sumTnTfBn.abs+(1|species.id)+(logD-1|species.id)+(sumBn-1|species.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.ER.Tf.r.biomes.species",
var.BLUP = 'biomes.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTfBn+sumTnBn+(1|species.id) +(logD-1|species.id) +(sumBn-1|species.id)+(Tf-1|biomes.id)+(sumBn-1|biomes.id)+(sumTfBn-1|biomes.id)+(sumTnBn-1|biomes.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.Tf.r.biomes.species",
var.BLUP = 'biomes.id',
lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+(1|species.id) +(logD-1|species.id) +(sumBn-1|species.id)+(Tf-1|biomes.id)+(sumBn-1|biomes.id)"))
}
load.model <- function () {
list(name="lmer.LOGLIN.nocomp.Tf.MAT.MAP",
lmer.formula.tree.id=formula("logG~1+(1|plot.id)+Tf+Tf:MAT+Tf:MAP+MAT+MAP+logD+(1+logD|species.id)+(1+Tf|set.id)"))
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment