Commit e045633e authored by Georges Kunstler's avatar Georges Kunstler
Browse files

fix problem in removing outlier in data.table closes #89 #86 #87

parent ab4eb10e
......@@ -4,6 +4,7 @@ trait.workshop
ignore
data/raw*
figs
*.DS_Store
*.xls
*.xlsx
.Rhistory
......
#!/usr/bin/env Rscript
rm(list = ls())
source("R/analysis/lmer.output-fun.R")
source("R/utils/plot.R")
## load results
DF.results <- readRDS('output/glmer.global.std.DF.rds')
table(DF.results$conv)
DF.results$id <- paste(DF.results$set, DF.results$ecocode, sep = ".")
## climate
site.clim.all <- read.csv( file.path("output/processed", "all.sites.clim.csv"),
stringsAsFactors = FALSE)
site.clim.all$id <- paste(site.clim.all$set, site.clim.all$ecocode, sep = ".")
mean.MAT <- tapply(site.clim.all$MAT, INDEX = site.clim.all$id,
FUN = mean, na.rm = TRUE)
mean.MAP <- tapply(site.clim.all$MAP, INDEX = site.clim.all$id,
FUN = mean, na.rm = TRUE)
unique.set <- tapply(site.clim.all$set, INDEX = site.clim.all$id,
FUN = unique, na.rm = TRUE)
data.clim.ecocode <- data.frame(id = names(mean.MAT),
MAT = mean.MAT,
MAP = mean.MAP,
set = unique.set, stringsAsFactors = FALSE)
biomes.vec <- fun.overly.plot.on.biomes(MAP = data.clim.ecocode$MAP/10,
MAT = data.clim.ecocode$MAT,
names.vec = 1:nrow(data.clim.ecocode))
data.clim.ecocode$biomes <- as.character(unlist(biomes.vec))
### merge climate and lmer results
DF.results <- merge(DF.results,
data.clim.ecocode[, c('id','MAT','MAP','biomes')] ,
by = "id")
DF.results$id2 <- paste(DF.results$id, DF.results$trait, DF.results$filling)
## add value for NVS
DF.results$biomes[DF.results$ecocode == 'MixedCool'] <- 'temperate_rainforest'
## ADD percentage traits
site.perc.all <- read.csv('output/processed/all.sites.perc.traits.csv',
stringsAsFactors = FALSE)
site.perc.all$id <- paste(site.perc.all$set, site.perc.all$ecocode, sep = ".")
##
DF.results <- merge(DF.results,
subset(site.perc.all,
select = c('id', 'perc.gymno',
'perc.ev', 'BATOT.m')),
by = "id")
### Analysis of the results
DF.R2m.diff <- do.call("rbind", lapply(1:nrow(DF.results),
fun.compute.criteria.diff,
DF.results, "R2m"))
DF.R2c.diff <- do.call("rbind", lapply(1:nrow(DF.results),
fun.compute.criteria.diff,
DF.results, "R2c"))
DF.AIC.diff <- do.call("rbind", lapply(1:nrow(DF.results),
fun.compute.criteria.diff,
DF.results, "AIC"))
DF.delta.AIC <- do.call("rbind", lapply(1:nrow(DF.results),
fun.compute.delta.AIC, DF.results))
DF.var.fixed <- fun.ratio.var.fixed.effect(DF.results)
DF.results <- cbind(DF.results, DF.R2m.diff, DF.R2c.diff,
DF.AIC.diff, DF.delta.AIC, DF.var.fixed)
## best model
DF.best.and.all.AIC <- do.call('rbind', lapply(unique(DF.results$id2),
FUN = fun.AIC, DF.results))
table(DF.best.and.all.AIC$best.model, DF.best.and.all.AIC$trait)
# global AIC
global.AIC.list <- fun.global.aic(DF.results)
### plots
models <- c('glmer.LOGLIN.ER.Tf', 'glmer.LOGLIN.ER.Tf')
names(models) <- c('Effect/response effect', 'Effect/response response')
list.params <- list(c(Response = 'sumTnBn'),
c(Effect = 'sumTfBn'))
## pdf('figs/parameters.glmer.MAP.ER.all.nolog.pdf', width = 9, height = 7)
fun.plot.panel.lmer.parameters.c(models = models,
traits = c('Wood.density', 'SLA',
'Leaf.N', 'Max.height'),
DF.results, var.x = 'MAP',
list.params = list.params,
small.bar = 0.2, threshold.delta.AIC = 10000000,
ylim = c(-0.2, 0.2), ylim.same = TRUE,
log = 'x', lwd = 1.5)
## dev.off()
models <- c('glmer.LOGLIN.AD.Tf')
names(models) <- c('Absolut distance effect')
list.params <- list(c(Response = 'sumTnTfBn.abs'))
## pdf('figs/parameters.glmer.MAP.ER.all.nolog.pdf', width = 9, height = 7)
fun.plot.panel.lmer.parameters.c(models = models,
traits = c('Wood.density', 'SLA',
'Leaf.N', 'Max.height'),
DF.results, var.x = 'MAP',
list.params = list.params,
small.bar = 0.2, threshold.delta.AIC = 10000000,
ylim = c(-0.005, 0.2), ylim.same = FALSE,
log = 'x', lwd = 1.5)
## dev.off()
models <- c('glmer.LOGLIN.ER.Tf')
names(models) <- c('ER Tf')
list.params <- list(c(Response = 'Tf'))
fun.plot.panel.lmer.parameters.c(models = models,
traits = c('Wood.density', 'SLA',
'Leaf.N', 'Max.height'),
DF.results, var.x = 'MAP',
list.params = list.params,
small.bar = 0.2, threshold.delta.AIC = 10000000,
ylim = c(-1, 1), ylim.same = FALSE, log = 'x')
models <- c('glmer.LOGLIN.ER.Tf')
names(models) <- c('ER Tf')
list.params <- list(c(Response = 'logD'))
fun.plot.panel.lmer.parameters.c(models = models,
traits = c('Wood.density', 'SLA',
'Leaf.N', 'Max.height'),
DF.results, var.x = 'MAP',
list.params = list.params,
small.bar = 0.2, threshold.delta.AIC = 10000000,
ylim = c(-1, 1), ylim.same = FALSE, log = 'x')
#########################
## Functions to Run JAGS
source('R/analysis/lmer.run.R')
source('R/analysis/stan.run.R')
### FUNCTION TO GENERATE THE GOOD FORMAT OF DATA
model.files.jags.Tf.1 <-
c("R/analysis/model.jags/model.LOGLIN.ER.AD.Tf.r.biomes.R")
fun.call.jags.and.save <- function(jags.model,
list.jags,
path.out,
var.sample,
iter = 100,
warmup = 50,
chains = 3,
thin = 5){
start <- Sys.time()
model.file <-
cat(jags.model$bug, file = file.path(path.out,"model.file.bug")
, sep=" ", fill = FALSE, labels = NULL, append = FALSE)
### SEND to jags
jags.output <- jags(data=list.jags,
model.file = file.path(path.out,"model.file.bug"),
parameters.to.save = jags.model$pars,
n.chains = chains ,
DIC = FALSE,
n.iter = iter,
n.burnin = warmup,
n.thin = thin)
end <- Sys.time()
print(end -start)
print(jags.output)
saveRDS(jags.output, file = file.path(path.out, paste(var.sample,
"results.jags.rds", sep = '.')))
}
run.jags <- function (model.file, trait, init.TF,
min.obs = 10, sample.size = NA,
type.filling='species', cat.TF,
fname = 'data.all.no.log.rds',
var.sample = 'ecocode.id',
...) {
require(R2jags)
source(model.file, local = TRUE)
model <- load.model()
#= Path for output
if(fname == 'data.all.no.log.rds') dir <- 'all.no.log'
if(fname == 'data.all.log.rds') dir <- 'all.log'
path.out <- output.dir(type = 'jags', model = model$name,
trait = trait, set = dir,
type.filling = type.filling)
print(path.out)
dir.create(path.out, recursive = TRUE, showWarnings = FALSE)
cat("run jags for model", model.file, " for trait",
trait, "\n")
if (init.TF) {
jags.list <- fun.load.stan(trait,fname = fname, cat.TF = cat.TF,
sample.size = sample.size,
var.sample = var.sample,
sample.vec.TF = FALSE)
cat("Ok data with Nobs", jags.list$N_indiv,
"\n")
}
if (!init.TF) {
jags.list <- fun.load.stan(trait,fname = fname, cat.TF = cat.TF,
sample.size = sample.size,
var.sample = var.sample,
sample.vec.TF = TRUE)
cat("Ok data with Nobs", jags.list$N_indiv, 'run real',
"\n")
fun.call.jags.and.save(jags.model = model,
list.jags = jags.list,
path.out = path.out,
var.sample = var.sample,
...)
}
gc()
}
#!/usr/bin/env Rscript
source("R/analysis/lmer.output-fun.R")
source("R/analysis/lmer.run.R")
source("R/analysis/plot.residual.R")
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA", "Wood.density","Max.height")
type.filling <- 'species'
files <- c()
for (trait in traits){
for(model in c(model.files.lmer.Tf.3)){
source(model, local = TRUE)
model.obj <- load.model()
pathout <- output.dir('lmer', model.obj$name, trait, 'all.no.log',
type.filling=type.filling)
files <- c(files,file.path(pathout,"ecocode.id.results.nolog.all.rds"))
}
}
lapply(files, read.and.plot.resid )
......@@ -129,7 +129,8 @@ load.data.for.lmer <- function(trait, cat.TF,
base.dir = "output/processed",
sample.size. ,
var.sample. = 'ecocode.id',
select.biome. = NA){
select.biome. = NA,
sample.vec.TF. = FALSE){
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
......@@ -141,11 +142,21 @@ if(!is.na(select.biome.)) {
df <- df[df$biomes.id == select.biome., ]
}
if(!is.na(sample.size.)){
if(sample.size. >nrow(df)){ sample.size. <- nrow(df)}
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample.)
df <- df[sample(1:nrow(df), size = sample.size.,
prob = df$prob.sample), ]
print(paste('sub-sampled by',var.sample.))
if(!sample.vec.TF.){
if(sample.size. >nrow(df)){ sample.size. <- nrow(df)}
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample.)
sample.vec <- sample(1:nrow(df), size = sample.size.,
prob = df$prob.sample)
df <- df[sample.vec, ]
saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
print(paste('sub-sampled by',var.sample.))
}else{
sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
df <- df[sample.vec, ]
print(paste('sub-sampled by',var.sample., 'by loading sample.vec'))
}
}
return( df)
}
......
load.model <- function(){
list(
name = "LOGLIN.ER.AD.Tf",
## inits function
initial.values = function(){
list("intercept.trait"=runif(1,min=-1,max=1),
"slope.trait.e"=runif(1,min=-2,max=2),
"slope.trait.r"=runif(1,min=-2,max=2),
"mean.B"=runif(1,min=-0.5,max=1),
"sigma.B"=runif(1,min=0.0001,max=1.5),
"sigma"=runif(1,min=0.0001,max=4),
"sigma.alpha"=runif(1,min=0.0001,max=4),
"sigma.plot"=runif(1,min=0.0001,max=4))},
## parameters to save
parameters.to.save =c("alpha","B1",
"slope.trait.e","slope.trait.r","intercept.trait",
"sigma","sigma.plot","mean.B","sigma.B",
"sigma.alpha"),
bug =
"#######################################################
######################## growth model with JAGS #######
model {
############ Likelihood ###################
for (i in 1:N.indiv) {
logG[i] ~ dnorm(theo.g[i],tau)
theo.g[i] <- alpha[species.id[i]] + B1[species.id[i]]*logD[i] +B2[set.id[i]]*sumBn +random.plot[plot.species.id[i]]
}
################################################
########### Hierarchical parameters ########
### species random param
for (n in 1:N.species)
{
B1[n] ~ dnorm(mean.B[1],tau.B[1])
alpha[n] ~ dnorm(0,tau.alpha)
for ( j in 1:Ncompet.sp)
{
param.compet[n,j] <- slope.trait.e*trait.compet[j] +slope.trait.r*trait.target[n] +intercept.trait
}
}
## plot effect
for (j in 1:N.plot.species)
{
random.plot[j] ~ dnorm(0,tau.plot)
}
###############################################
########### Non-hierarchical parameters ########
# constants for prior
tau0 <- 1.0E-4
# variance error
tau <- pow(sigma,-2)
sigma ~ dunif(0.00001,5)
tau.plot <- pow(sigma.plot,-2)
sigma.plot ~ dunif(0.00001,5)
## traits parameters
slope.trait.e ~ dnorm(0,tau0)
slope.trait.r ~ dnorm(0,tau0)
intercept.trait ~ dnorm(0,tau0)
## prior for hyper parameter mean and sd
for (l in 1:1)
{
mean.B[l]~ dnorm(0,tau0)
tau.B[l] <- pow(sigma.B[1],-2)
sigma.B[l] ~ dunif(0.000001,5)
}
tau.alpha <- pow(sigma.alpha,-2)
sigma.alpha ~ dunif(0.000001,5)
} # End of the jags model
")
}
load.model <- function(){
list(
name = "LOGLIN.RE",
## inits function
initial.values = function(){
list("intercept.trait"=runif(1,min=-1,max=1),
"slope.trait.e"=runif(1,min=-2,max=2),
"slope.trait.r"=runif(1,min=-2,max=2),
"mean.B"=runif(1,min=-0.5,max=1),
"sigma.B"=runif(1,min=0.0001,max=1.5),
"sigma"=runif(1,min=0.0001,max=4),
"sigma.alpha"=runif(1,min=0.0001,max=4),
"sigma.plot"=runif(1,min=0.0001,max=4))},
## parameters to save
parameters.to.save =c("alpha","B1",
"slope.trait.e","slope.trait.r","intercept.trait",
"sigma","sigma.plot","mean.B","sigma.B",
"sigma.alpha"),
bug = "################################################################################
######################## growth model with JAGS #############################
model {
############ Likelihood ###################
for (i in 1:N.indiv) {
logG[i] ~ dnorm(theo_g[i],tau)
theo_g[i] <- intercept + intercept_sp[species_id[i]]
+ intercept_plot[plot_id[i]]
+ intercept_ecocode[ecocode_id[i]]
+ param_logD_sp[species_id[i]]*logD[i]
+ param_Tf_ecocode[ecocode_id[i]]*Tf[i]
+ param_sumBn_ecocode[ecocode_id[i]]*sumBn[i]
+ param_sumTfBn_ecocode[ecocode_id[i]]*sumTfBn[i]
+ param_sumTnBn_ecocode[ecocode_id[i]]*sumTnBn[i]
}
################################################
########### Hierarchical parameters ########
### species random param
for (n in 1:N_species)
{
intercept_sp[n] ~ dnorm(0,tau_intercept_sp)
param_logD_sp[n] ~ dnorm(0,tau_logD_sp)
}
## plot effect
for (j in 1:N_plot.species)
{
intercept_plot[j] ~ dnorm(0,tau_intercept_plot)
}
## ecocode effect
for (n in 1:N_ecocode)
{
intercept_ecocode[n] ~ dnorm(0,tau_intercept_ecocode)
param_Tf_ecocode[n] ~ dnorm(0,tau_Tf_ecocode)
param_sumBn_ecocode[n] ~ dnorm(0,tau_sumBn_ecocode)
param_sumTfBn_ecocode[n] ~ dnorm(0,tau_sumTfBn_ecocode)
param_sumTnBn_ecocode[n] ~ dnorm(0,tau_sumTnBn_ecocode)
}
###############################################
########### Non-hierarchical parameters ########
##### TODO NEED TO CHANGE FROM HERE
# constants for prior
tau0 <- 1.0E-4
# variance error
tau <- pow(sigma,-2)
sigma ~ dunif(0.00001,5)
tau.plot <- pow(sigma.plot,-2)
sigma.plot ~ dunif(0.00001,5)
## traits parameters
slope.trait.e ~ dnorm(0,tau0)
slope.trait.r ~ dnorm(0,tau0)
intercept.trait ~ dnorm(0,tau0)
## prior for hyper parameter mean and sd
for (l in 1:1)
{
mean.B[l]~ dnorm(0,tau0)
tau.B[l] <- pow(sigma.B[1],-2)
sigma.B[l] ~ dunif(0.000001,5)
}
tau.alpha <- pow(sigma.alpha,-2)
sigma.alpha ~ dunif(0.000001,5)
} # End of the jags model
")
}
load.model <- function(){
list(
name = "LOGLIN.RE",
## parameters to save
parameters.to.save <- c('intercept','mean_logD','mean_Tf','mean_sumBn',
'mean_sumTfBn','mean_sumTnBn','sigma_inter_sp',
'sigma_inter_ecocode','sigma_inter_plot','sigma_logD_sp',
'sigma_Tf_ecocode','sigma_sumBn_ecocode',
'sigma_sumTfBn_ecocode','sigma_sumTnBn_ecocode',
'sigma',
'param_Tf_ecocode',
'param_sumBn_ecocode','param_sumTfBn_ecocode',
'param_sumTnBn_ecocode','intercept_ecocode')
,
stan = "
data {
int<lower=0> N_indiv;
int<lower=0> N_species;
int<lower=0> N_plot;
int<lower=0> N_ecocode;
int species_id[N_indiv];
int plot_id[N_indiv];
int ecocode_id[N_indiv];
real logG[N_indiv];
real logD[N_indiv];
real sumBn[N_indiv];
real Tf[N_indiv];
real sumTfBn[N_indiv];
real sumTnBn[N_indiv];
}
parameters{
real<lower=-1,upper=1> intercept;
real<lower=-10,upper=10> mean_logD;
real<lower=-10,upper=10> mean_Tf;
real<lower=-10,upper=10> mean_sumBn;
real<lower=-10,upper=10> mean_sumTfBn;
real<lower=-10,upper=10> mean_sumTnBn;
real<lower=0.0001,upper=5> sigma_inter_sp;
real<lower=0.0001,upper=5> sigma_inter_ecocode;
real<lower=0.0001,upper=5> sigma_inter_plot;
real<lower=0.0001,upper=5> sigma_logD_sp;
real<lower=0.0001,upper=5> sigma_Tf_ecocode;
real<lower=0.0001,upper=5> sigma_sumBn_ecocode;
real<lower=0.0001,upper=5> sigma_sumTfBn_ecocode;
real<lower=0.0001,upper=5> sigma_sumTnBn_ecocode;
real<lower=0.0001,upper=5> sigma;
vector[N_species] param_logD_sp;
vector[N_species] intercept_sp;
vector[N_plot] intercept_plot;
vector[N_ecocode] param_Tf_ecocode;
vector[N_ecocode] param_sumBn_ecocode;
vector[N_ecocode] param_sumTfBn_ecocode;
vector[N_ecocode] param_sumTnBn_ecocode;
vector[N_ecocode] intercept_ecocode;
}
transformed parameters {
# vector for prediction
vector[N_indiv] theo_g;
for (i in 1:N_indiv){
theo_g[i] <- intercept + intercept_sp[species_id[i]]
+ intercept_plot[plot_id[i]]
+ intercept_ecocode[ecocode_id[i]]
+ param_logD_sp[species_id[i]]*logD[i]
+ param_Tf_ecocode[ecocode_id[i]]*Tf[i]
+ param_sumBn_ecocode[ecocode_id[i]]*sumBn[i]
+ param_sumTfBn_ecocode[ecocode_id[i]]*sumTfBn[i]
+ param_sumTnBn_ecocode[ecocode_id[i]]*sumTnBn[i]
;
}
}
model {
# constants for prior
real sigma0;
################################################################################
######################## growth model with STAN #############################
###############################################
########### Hierarchical parameters ########
### species random param
param_logD_sp ~ normal(mean_logD,sigma_logD_sp);
intercept_sp ~ normal(0,sigma_inter_sp);
### plot random param
intercept_plot ~ normal(0,sigma_inter_plot);
### ecocode random param
param_Tf_ecocode ~ normal(mean_Tf,sigma_Tf_ecocode);
param_sumBn_ecocode ~ normal(mean_sumBn,sigma_sumBn_ecocode);
param_sumTfBn_ecocode ~ normal(mean_sumTfBn,sigma_sumTfBn_ecocode);
param_sumTnBn_ecocode ~ normal(mean_sumTnBn,sigma_sumTnBn_ecocode);
intercept_ecocode ~ normal(0,sigma_inter_ecocode);
###############################################
########### Non-Hierarchical parameters ########
# constant for prior
sigma0 <- 10;
# slope and intercept
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
mean_Tf ~ normal(0,sigma0);
mean_sumBn ~ normal(0,sigma0);
mean_sumTfBn ~ normal(0,sigma0);
mean_sumTnBn ~ normal(0,sigma0);
# sigma
sigma_inter_sp~ uniform(0,6);
sigma_inter_ecocode~ uniform(0,6);
sigma_inter_plot~ uniform(0,6);