Commit af84e86f authored by Georges Kunstler's avatar Georges Kunstler
Browse files

fixed problem in initial value

parent a4806faa
......@@ -43,12 +43,12 @@ $(D2)/TRY/data.TRY.std.rds:
GLOBAL: $(D3)/Done.txt
$(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt
Rscript $<
Rscript scripts/process.data/summarise.data.R
Rscript scripts/process.data/plot.data.all.R
R CMD BATCH $<
R CMD BATCH scripts/process.data/summarise.data.R
R CMD BATCH scripts/process.data/plot.data.all.R
$(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R
Rscript $<
R CMD BATCH $<
#-------------------------------------------------------
TEST.TREE: $(D4)/Done.txt tree.all.sites
......
......@@ -12,38 +12,38 @@ load.model <- function(){
init.fun = function(chain_id= 1, list){
step <- (chain_id-1)/10
init <-
list(intercept = 0+step,
intercept_species = rnorm(list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(list$N_species, 0, 0.5+step),
param_logD_species = rnorm(list$N_species, 0, 0.5+step),
intercept_plot = rnorm(list$N_plot, 0, 0.5 +step),
intercept_set = rnorm(list$N_set, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf = 2/3+step,
param_sumBn = 2/3+step,
param_sumTfBn = 2/3+step,
param_sumTnBn = 2/3+step,
param_sumTnTfBn_abs= 2/3+step,
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma_inter_set = 0.5 + +step,
list(intercept = 0 + step,
intercept_species = rnorm(list$N_species, 0, 0.3 + step),
param_sumBn_species = rnorm(list$N_species, 0, 0.1 + step),
param_logD_species = rnorm(list$N_species, 0, 0.3 + step),
intercept_plot = rnorm(list$N_plot, 0, 0.3 + step),
intercept_set = rnorm(list$N_set, 0, 0.1 + step),
mean_logD = 2/3 + step,
param_Tf = -0.1 + step,
param_sumBn = -0.1 + step,
param_sumTfBn = -0.1 + step,
param_sumTnBn = -0.1 + step,
param_sumTnTfBn_abs= -0.1 + step,
sigma_inter_species = 0.5 + step,
sigma_inter_plot = 0.3 + step,
sigma_inter_set = 0.1 + step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step)
sigma_sumBn_species = 0.1 +step,
sigma_logD_species = 0.3 +step)
return(init)
},
stan = "
data {
int<lower=0> N_indiv;
int<lower=0> N_species;
int<lower=0> N_plot;
int<lower=0> N_set;
int species_id[N_indiv];
int plot_id[N_indiv];
int set_id[N_indiv];
real logG[N_indiv];
real logD[N_indiv];
real sumBn[N_indiv];
......@@ -93,27 +93,31 @@ transformed parameters {
model {
# constants for prior
## real sigma0;
################################################################################
######################## growth model with STAN #############################
real sigma0;
sigma0 <- 10;
###############################################
########### Hierarchical parameters ########
### species random param
param_logD_species ~ normal(0,sigma_logD_species);
param_sumBn_species ~ normal(0,sigma_sumBn_species);
intercept_species ~ normal(0, sigma_inter_species);
param_logD_species ~ normal(0,sigma_logD_species);
param_sumBn_species ~ normal(0,sigma_sumBn_species);
intercept_species ~ normal(0, sigma_inter_species);
### plot random param
intercept_plot ~ normal(0,sigma_inter_plot);
intercept_plot ~ normal(0,sigma_inter_plot);
### set random effect
intercept_set ~ normal(0, sigma_inter_set);
### biomes fixed param
param_Tf ~ normal(0, sigma0);
param_sumBn ~ normal(0,sigma0);
param_sumTfBn ~ normal(0 ,sigma0);
param_sumTnBn ~ normal(0 ,sigma0);
param_sumTnTfBn_abs ~ normal(0 , sigma0);
intercept_set ~ normal(0, sigma_inter_set);
###############################################
########### Non-Hierarchical parameters ########
# slope and intercept
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
### biomes fixed param
param_Tf ~ normal(0, sigma0);
param_sumBn ~ normal(0,sigma0);
param_sumTfBn ~ normal(0 ,sigma0);
param_sumTnBn ~ normal(0 ,sigma0);
param_sumTnTfBn_abs ~ normal(0 , sigma0);
###############################################
......
......@@ -13,26 +13,26 @@ load.model <- function(){
step <- (chain_id-1)/10
init <-
list(intercept = 0+step,
intercept_species = rnorm(list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(list$N_species, 0, 0.5+step),
param_logD_species = rnorm(list$N_species, 0, 0.5+step),
intercept_plot = rnorm(list$N_plot, 0, 0.5 +step),
intercept_set = rnorm(list$N_set, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf_biomes= rep(2/3+step, list$N_biomes),
param_sumBn_biomes= rep(2/3+step, list$N_biomes),
param_sumTfBn_biomes= rep(2/3+step, list$N_biomes),
param_sumTnBn_biomes= rep(2/3+step, list$N_biomes),
param_sumTnTfBn_abs_biomes= rep(2/3+step, list$N_biomes),
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma_inter_set = 0.5 + +step,
intercept_species = rnorm(list$N_species, 0, 0.3+step),
param_sumBn_species = rnorm(list$N_species, 0, 0.1+step),
param_logD_species = rnorm(list$N_species, 0, 0.3+step),
intercept_plot = rnorm(list$N_plot, 0, 0.3 +step),
intercept_set = rnorm(list$N_set, 0, 0.1 +step),
mean_logD = (2/3 - 0.1) +step,
param_Tf_biomes= rep(-0.1+step, list$N_biomes),
param_sumBn_biomes= rep(-0.1+step, list$N_biomes),
param_sumTfBn_biomes= rep(-0.1+step, list$N_biomes),
param_sumTnBn_biomes= rep(-0.1+step, list$N_biomes),
param_sumTnTfBn_abs_biomes= rep(-0.1+step, list$N_biomes),
sigma_inter_species = 0.3 +step,
sigma_inter_plot = 0.3 + +step,
sigma_inter_set = 0.1 + +step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step)
sigma_sumBn_species = 0.1 +step,
sigma_logD_species = 0.3 +step)
return(init)
},
stan = "
data {
int<lower=0> N_indiv;
......@@ -40,12 +40,12 @@ data {
int<lower=0> N_plot;
int<lower=0> N_biomes;
int<lower=0> N_set;
int species_id[N_indiv];
int plot_id[N_indiv];
int biomes_id[N_indiv];
int set_id[N_indiv];
real logG[N_indiv];
real logD[N_indiv];
real sumBn[N_indiv];
......@@ -80,12 +80,12 @@ transformed parameters {
vector[N_indiv] theo_g;
for (i in 1:N_indiv){
theo_g[i] <- intercept + intercept_species[species_id[i]]
theo_g[i] <- intercept + intercept_species[species_id[i]]
+ intercept_plot[plot_id[i]]
+ intercept_set[set_id[i]]
+ (mean_logD + param_logD_species[species_id[i]])*logD[i]
+ (param_Tf_biomes[biomes_id[i]])*Tf[i]
+ ( param_sumBn_biomes[biomes_id[i]] + param_sumBn_species[species_id[i]])*sumBn[i]
+ ( param_sumBn_biomes[biomes_id[i]] + param_sumBn_species[species_id[i]])*sumBn[i]
+ (param_sumTfBn_biomes[biomes_id[i]])*sumTfBn[i]
+ (param_sumTnBn_biomes[biomes_id[i]])*sumTnBn[i]
+ (param_sumTnTfBn_abs_biomes[biomes_id[i]])*sumTnTfBn_abs[i]
......@@ -94,48 +94,36 @@ transformed parameters {
}
model {
# constants for prior
## real sigma0;
################################################################################
######################## growth model with STAN #############################
# constants for prior
real sigma0;
sigma0 <- 10;
###############################################
########### Hierarchical parameters ########
### species random param
###############################################
########### Hierarchical parameters ########
### species random param
param_logD_species ~ normal(0,sigma_logD_species);
param_sumBn_species ~ normal(0,sigma_sumBn_species);
intercept_species ~ normal(0, sigma_inter_species);
### plot random param
### plot random param
intercept_plot ~ normal(0,sigma_inter_plot);
### set random effect
intercept_set ~ normal(0, sigma_inter_set);
### biomes fixed param
## param_Tf_biomes ~ normal(0, sigma0);
## param_sumBn_biomes ~ normal(0,sigma0);
## param_sumTfBn_biomes ~ normal(0 ,sigma0);
## param_sumTnBn_biomes ~ normal(0 ,sigma0);
## param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0);
### set random effect
intercept_set ~ normal(0, sigma_inter_set);
###############################################
########### Non-Hierarchical parameters ########
# constant for prior
## sigma0 <- 10;
# slope and intercept
## intercept ~ normal(0,sigma0);
## mean_logD ~ normal(0,sigma0);
## # sigma
## sigma_inter_species~ uniform(0,6);
## sigma_inter_set~ uniform(0,6);
## sigma_inter_plot~ uniform(0,6);
## sigma_inter_tree~ uniform(0,6);
## sigma_logD_species~ uniform(0,6);
## sigma_sumBn_species~ uniform(0,6);
## sigma~ uniform(0,6);
###############################################
########### Non-Hierarchical parameters ########
# slope and intercept
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
### biomes fixed param
param_Tf_biomes ~ normal(0, sigma0);
param_sumBn_biomes ~ normal(0,sigma0);
param_sumTfBn_biomes ~ normal(0 ,sigma0);
param_sumTnBn_biomes ~ normal(0 ,sigma0);
param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0);
###############################################
############ Likelihood ###################
logG ~ normal(theo_g,sigma);
###############################################
############ Likelihood ###################
logG ~ normal(theo_g,sigma);
}
")
}
......
......@@ -37,10 +37,10 @@ data {
int<lower=0> N_indiv;
int<lower=0> N_species;
int<lower=0> N_plot;
int species_id[N_indiv];
int plot_id[N_indiv];
real logG[N_indiv];
real logD[N_indiv];
real sumBn[N_indiv];
......@@ -90,28 +90,27 @@ model {
real sigma0;
sigma0 <- 10;
################################################################################
######################## growth model with STAN #############################
###############################################
########### Hierarchical parameters ########
### species random param
###############################################
########### Hierarchical parameters ########
### species random param
param_logD_species ~ normal(0,sigma_logD_species);
param_sumBn_species ~ normal(0,sigma_sumBn_species);
intercept_species ~ normal(0, sigma_inter_species);
### plot random param
### plot random param
intercept_plot ~ normal(0,sigma_inter_plot);
### biomes fixed param
### fixed param
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
### biomes fixed param
param_Tf ~ normal(0, sigma0);
param_sumBn ~ normal(0,sigma0);
param_sumTfBn ~ normal(0 ,sigma0);
param_sumTnBn ~ normal(0 ,sigma0);
param_sumTnTfBn_abs ~ normal(0 , sigma0);
###############################################
############ Likelihood ###################
logG ~ normal(theo_g,sigma);
###############################################
############ Likelihood ###################
logG ~ normal(theo_g,sigma);
}
")
}
......
......@@ -26,16 +26,16 @@ fun.call.stan.parallel.and.save <- function(stan.model, list.stan, path.out,
var.sample){
start <- Sys.time()
set_cppo(mode = "fast")
inits <- stan.model$init.fun(chain_id,
list.stan)
inits <- list(stan.model$init.fun(chains,
list.stan))
stan.output <- stan(model_code = stan.model$stan,
data = list.stan,
pars = stan.model$pars,
init = 'random',
init = inits,
iter = iter,
warmup = warmup,
chains = chains,
chain_id = chain_id,
## chain_id = chain_id,
thin = thin,
verbose = FALSE)
end <- Sys.time()
......@@ -57,7 +57,7 @@ fun.call.stan.and.save <- function(stan.model, list.stan, path.out,
stan.output <- stan(model_code = stan.model$stan,
data = list.stan,
pars = stan.model$pars,
init = 'random',
init = inits,
iter = iter,
warmup = warmup,
chains = chains,
......
### test stan
## I changed the coding of the random effect with norm(0,10*sigma it seems more slow!
## I changed the coding of the random effect
## with norm(0,10*sigma it seems more slow!
## PROBLEM WITH STANDARDIZED VARIABLE DO I NEED TO INCLUDE AN INTERCEPT??
## VERY BAD CONVERGENCE ???
......@@ -12,11 +13,12 @@ source('R/analysis/stan.run.R')
### TEST simple model on France only
df.lmer <- load.data.for.lmer(trait = 'SLA',fname = 'data.all.no.log.rds', cat.TF = FALSE,
sample.size = NA,
var.sample = NA,
sample.vec.TF. = FALSE,
select.set. = 'France')
df.lmer <- load.data.for.lmer(trait = 'SLA',
fname = 'data.all.no.log.rds',
cat.TF = FALSE,
sample.size = 500,
var.sample = 'ecocode',
sample.vec.TF. = FALSE)
stan.list <- fun.turn.in.list.for.jags.stan(df.lmer,
cat.TF = FALSE)
......@@ -26,8 +28,12 @@ source('R/analysis/model.stan/model.stan.LOGLIN.size.fixed.R', local = TRUE)
fun.init.stan <- function(chain_id= 1, stan.list){
init <- list(intercept = 0+(chain_id-1)/10,
intercept_species = rnorm(stan.list$N_species, 0, 0.5+(chain_id-1)/10),
intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +(chain_id-1)/10),
intercept_species = rnorm(stan.list$N_species,
0,
0.5+(chain_id-1)/10),
intercept_plot = rnorm(stan.list$N_plot,
0,
0.5 +(chain_id-1)/10),
mean_logD = 2/3+(chain_id-1)/10,
sigma_inter_species = 0.5 +(chain_id-1)/10,
sigma_inter_plot = 0.5 + +(chain_id-1)/10,
......@@ -48,33 +54,12 @@ system.time( stan.output <- stan(model_code = model$stan,
chains = 3,
verbose = FALSE))
source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R', local = TRUE)
source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R', local = TRUE)
model <- load.model()
fun.init.stan <- function(chain_id= 1, stan.list){
step <- (chain_id-1)/10
init <- list(intercept = 0+step,
intercept_species = rnorm(stan.list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(stan.list$N_species, 0, 0.5+step),
param_logD_species = rnorm(stan.list$N_species, 0, 0.5+step),
intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf = 2/3+step,
param_sumBn = 2/3+step,
param_sumTfBn = 2/3+step,
param_sumTnBn = 2/3+step,
param_sumTnTfBn_abs= 2/3+step,
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step
)
}
inits <- list(fun.init.stan(1, stan.list),
fun.init.stan(2, stan.list),
fun.init.stan(3, stan.list))
inits <- list(model$init.fun(1, stan.list),
model$init.fun(2, stan.list),
model$init.fun(3, stan.list))
library(rstan)
......@@ -96,8 +81,35 @@ pairs(stan.output)
library(ggmcmc)
S <- ggs(stan.output)
extract(stan.output, permuted = FALSE, inc_warmup=FALSE))
S <- ggs(stan.output2)
ggs_crosscorrelation(S)
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S)
ggs_traceplot(S)
ggs_running(S)
ggs_autocorrelation(S)
ggs_Rhat(S)
ggs_geweke(S)
ggs_caterpillar(S)
#######
## read results of simul on cluster
source("R/analysis/stan.output-fun.R")
stan.out.clust <- fun.merge.chain(
path.out = "output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/",
var.sample = 'ecocode', chains.vec = 1:3)
stan.out.clust
plot(stan.out.clust)
pairs(stan.out.clust)
traceplot(stan.out.clust, ask = TRUE)
## base on traceplot the initial value seems to be
## a big problem for the lack of convergence
library(ggmcmc)
S <- ggs(stan.out.clust)
ggs_crosscorrelation(S)
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S)
ggs_traceplot(S)
......@@ -108,3 +120,23 @@ ggs_geweke(S)
ggs_caterpillar(S)
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA',
type.filling='species', sample.size = 10000, var.sample = 'ecocode',
iter = 1000, warmup = 500, chains = 1, chain_id = 1,
init.TF = FALSE)
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA',
type.filling='species', sample.size = 1000, var.sample = 'ecocode',
iter = 1000, warmup = 500, chains = 3, parallel.TF = FALSE,
init.TF = FALSE)
out.stan3 <- readRDS('output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/ecocode.results.stan.rds')
traceplot(out.stan3, ask = TRUE)
plot(out.stan3)
pairs(out.stan3)
library(ggmcmc)
S <- ggs(out.stan3)
ggs_crosscorrelation(S)
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S, family = c('param_sumBn_biomes'))
......@@ -8,7 +8,9 @@ load.processed.data <- function(path, file.name = "data.tree.tot.no.std.csv"){
require(data.table)
fname <- file.path(path, file.name )
if(file.exists(fname)){
cat('loading file', path, file.name)
data <- fread(fname, stringsAsFactors = FALSE)
print(warnings())
return(data)
}else{return(NULL)}
}
......
......@@ -6,11 +6,11 @@ mkdir -p trait.workshop
for trait in "'SLA'" "'Leaf.N'" "'Wood.density'" "'Max.height'" "'Seed.mass'"; do
echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh
echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh
qsub trait.workshop/data1$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data1$trait" -q opt32G -j oe
echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh
echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh
qsub trait.workshop/data3$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data3$trait" -q opt32G -j oe
done
......
#!/bin/bash
# Georges Kunstler 14/05/2014
# read one variable
export LD_LIBRARY_PATH=/usr/lib64/R/library
mkdir -p trait.workshop
iter=50000
warmup=5000
thin=50
# test parameter
nbargs=$#
echo "number of arguments="$nbargs
if [ $nbargs -ne 1 ]
then
echo "need one and only one argument"
echo " usage :"
echo " ./launch_all_lmer.sh sample.size"
exit 100
fi
samplesize=$1
#
for trait in "'SLA'" "'Wood.density'" "'Max.height'" "'Seed.mass'" "'Leaf.N'"; do
echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/jags.run.R'); run.multiple.model.for.set.one.trait(c(model.files.jags.Tf.1), run.jags,$trait,type.filling='species', sample.size = $samplesize, var.sample = 'ecocode.id',iter = $iter, warmup = $warmup, chains = 3, thin = $thin, init.TF = TRUE);print('done')\"" > trait.workshop/speciesjags${trait}.sh
qsub trait.workshop/speciesjags${trait}.sh -l nodes=1:ppn=1,mem=8gb -N "jags${trait}" -q opt32G -j oe
done
......@@ -5,8 +5,8 @@ export LD_LIBRARY_PATH=/usr/lib64/R/library
mkdir -p trait.workshop
iter=1000
warmup=500
iter=2000
warmup=1000
# test parameter
nbargs=$#
echo "number of arguments="$nbargs
......
......@@ -5,18 +5,19 @@
rm(list = ls())
source("R/format.data/format-fun.R")
dir.create("output/formatted/BCI", recursive = TRUE, showWarnings = FALSE)
library(reshape, quietly = TRUE)
### READ DATA read individuals tree data Requires careful formatting of 7 census
## datasets The raw data is such that, once a tree dies in census X, then it no
## longer exists in census X+1, X+2 etc...
data.bci1 <- read.csv("data/raw/BCI/stem.data/BCI.all.plots.census.allstems.1.csv",
header = TRUE,
stringsAsFactors = FALSE)
stringsAsFactors = FALSE)
print(table(data.bci1$Stem))
print(sum(is.na(data.bci1$Stem)))
print(table(data.bci1$Stem))
print(sum(is.na(data.bci1$Stem)))
data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL
data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL
data.bci1$HOM1 <- data.bci1$HOM; data.bci1$HOM <- NULL
......