From af84e86f83dff3e502c7817d578165c95f2ef62d Mon Sep 17 00:00:00 2001 From: Georges Kunstler <Georges.Kunstler@gmail.com> Date: Wed, 10 Sep 2014 17:25:37 +0200 Subject: [PATCH] fixed problem in initial value --- Makefile | 8 +- .../model.stan.LOGLIN.ER.AD.Tf.fixed.R | 74 +++++++------ .../model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R | 100 ++++++++--------- .../model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R | 27 +++-- R/analysis/stan.run.R | 10 +- R/analysis/test.stan.R | 102 ++++++++++++------ R/process.data/test.tree.CWM-fun.R | 2 + launch.cluster/launch_all_data.lmer.bash | 4 +- launch.cluster/launch_all_jags.init.bash | 33 ++++++ launch.cluster/launch_all_stan.bash | 4 +- scripts/format.data/BCI.R | 7 +- scripts/format.data/Sweden.R | 46 ++++++-- 12 files changed, 255 insertions(+), 162 deletions(-) create mode 100644 launch.cluster/launch_all_jags.init.bash diff --git a/Makefile b/Makefile index 59d6b08..d62261a 100644 --- a/Makefile +++ b/Makefile @@ -43,12 +43,12 @@ $(D2)/TRY/data.TRY.std.rds: GLOBAL: $(D3)/Done.txt $(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt - Rscript $< - Rscript scripts/process.data/summarise.data.R - Rscript scripts/process.data/plot.data.all.R + R CMD BATCH $< + R CMD BATCH scripts/process.data/summarise.data.R + R CMD BATCH scripts/process.data/plot.data.all.R $(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R - Rscript $< + R CMD BATCH $< #------------------------------------------------------- TEST.TREE: $(D4)/Done.txt tree.all.sites diff --git a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R index a102c5f..cc53ecb 100644 --- a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R +++ b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R @@ -12,38 +12,38 @@ load.model <- function(){ init.fun = function(chain_id= 1, list){ step <- (chain_id-1)/10 init <- - list(intercept = 0+step, - intercept_species = rnorm(list$N_species, 0, 0.5+step), - param_sumBn_species = rnorm(list$N_species, 0, 0.5+step), - param_logD_species = rnorm(list$N_species, 0, 0.5+step), - intercept_plot = rnorm(list$N_plot, 0, 0.5 +step), - intercept_set = rnorm(list$N_set, 0, 0.5 +step), - mean_logD = 2/3+step, - param_Tf = 2/3+step, - param_sumBn = 2/3+step, - param_sumTfBn = 2/3+step, - param_sumTnBn = 2/3+step, - param_sumTnTfBn_abs= 2/3+step, - sigma_inter_species = 0.5 +step, - sigma_inter_plot = 0.5 + +step, - sigma_inter_set = 0.5 + +step, + list(intercept = 0 + step, + intercept_species = rnorm(list$N_species, 0, 0.3 + step), + param_sumBn_species = rnorm(list$N_species, 0, 0.1 + step), + param_logD_species = rnorm(list$N_species, 0, 0.3 + step), + intercept_plot = rnorm(list$N_plot, 0, 0.3 + step), + intercept_set = rnorm(list$N_set, 0, 0.1 + step), + mean_logD = 2/3 + step, + param_Tf = -0.1 + step, + param_sumBn = -0.1 + step, + param_sumTfBn = -0.1 + step, + param_sumTnBn = -0.1 + step, + param_sumTnTfBn_abs= -0.1 + step, + sigma_inter_species = 0.5 + step, + sigma_inter_plot = 0.3 + step, + sigma_inter_set = 0.1 + step, sigma = 0.5 +step, - sigma_sumBn_species = 0.5 +step, - sigma_logD_species = 0.5 +step) + sigma_sumBn_species = 0.1 +step, + sigma_logD_species = 0.3 +step) return(init) }, - + stan = " data { int<lower=0> N_indiv; int<lower=0> N_species; int<lower=0> N_plot; int<lower=0> N_set; - + int species_id[N_indiv]; int plot_id[N_indiv]; int set_id[N_indiv]; - + real logG[N_indiv]; real logD[N_indiv]; real sumBn[N_indiv]; @@ -93,27 +93,31 @@ transformed parameters { model { # constants for prior - ## real sigma0; - -################################################################################ -######################## growth model with STAN ############################# + real sigma0; + sigma0 <- 10; ############################################### ########### Hierarchical parameters ######## ### species random param - param_logD_species ~ normal(0,sigma_logD_species); - param_sumBn_species ~ normal(0,sigma_sumBn_species); - intercept_species ~ normal(0, sigma_inter_species); + param_logD_species ~ normal(0,sigma_logD_species); + param_sumBn_species ~ normal(0,sigma_sumBn_species); + intercept_species ~ normal(0, sigma_inter_species); ### plot random param - intercept_plot ~ normal(0,sigma_inter_plot); + intercept_plot ~ normal(0,sigma_inter_plot); ### set random effect - intercept_set ~ normal(0, sigma_inter_set); - ### biomes fixed param - param_Tf ~ normal(0, sigma0); - param_sumBn ~ normal(0,sigma0); - param_sumTfBn ~ normal(0 ,sigma0); - param_sumTnBn ~ normal(0 ,sigma0); - param_sumTnTfBn_abs ~ normal(0 , sigma0); + intercept_set ~ normal(0, sigma_inter_set); + + ############################################### + ########### Non-Hierarchical parameters ######## + # slope and intercept + intercept ~ normal(0,sigma0); + mean_logD ~ normal(0,sigma0); + ### biomes fixed param + param_Tf ~ normal(0, sigma0); + param_sumBn ~ normal(0,sigma0); + param_sumTfBn ~ normal(0 ,sigma0); + param_sumTnBn ~ normal(0 ,sigma0); + param_sumTnTfBn_abs ~ normal(0 , sigma0); ############################################### diff --git a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R index 314170a..5ee3d28 100644 --- a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R +++ b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R @@ -13,26 +13,26 @@ load.model <- function(){ step <- (chain_id-1)/10 init <- list(intercept = 0+step, - intercept_species = rnorm(list$N_species, 0, 0.5+step), - param_sumBn_species = rnorm(list$N_species, 0, 0.5+step), - param_logD_species = rnorm(list$N_species, 0, 0.5+step), - intercept_plot = rnorm(list$N_plot, 0, 0.5 +step), - intercept_set = rnorm(list$N_set, 0, 0.5 +step), - mean_logD = 2/3+step, - param_Tf_biomes= rep(2/3+step, list$N_biomes), - param_sumBn_biomes= rep(2/3+step, list$N_biomes), - param_sumTfBn_biomes= rep(2/3+step, list$N_biomes), - param_sumTnBn_biomes= rep(2/3+step, list$N_biomes), - param_sumTnTfBn_abs_biomes= rep(2/3+step, list$N_biomes), - sigma_inter_species = 0.5 +step, - sigma_inter_plot = 0.5 + +step, - sigma_inter_set = 0.5 + +step, + intercept_species = rnorm(list$N_species, 0, 0.3+step), + param_sumBn_species = rnorm(list$N_species, 0, 0.1+step), + param_logD_species = rnorm(list$N_species, 0, 0.3+step), + intercept_plot = rnorm(list$N_plot, 0, 0.3 +step), + intercept_set = rnorm(list$N_set, 0, 0.1 +step), + mean_logD = (2/3 - 0.1) +step, + param_Tf_biomes= rep(-0.1+step, list$N_biomes), + param_sumBn_biomes= rep(-0.1+step, list$N_biomes), + param_sumTfBn_biomes= rep(-0.1+step, list$N_biomes), + param_sumTnBn_biomes= rep(-0.1+step, list$N_biomes), + param_sumTnTfBn_abs_biomes= rep(-0.1+step, list$N_biomes), + sigma_inter_species = 0.3 +step, + sigma_inter_plot = 0.3 + +step, + sigma_inter_set = 0.1 + +step, sigma = 0.5 +step, - sigma_sumBn_species = 0.5 +step, - sigma_logD_species = 0.5 +step) + sigma_sumBn_species = 0.1 +step, + sigma_logD_species = 0.3 +step) return(init) }, - + stan = " data { int<lower=0> N_indiv; @@ -40,12 +40,12 @@ data { int<lower=0> N_plot; int<lower=0> N_biomes; int<lower=0> N_set; - + int species_id[N_indiv]; int plot_id[N_indiv]; int biomes_id[N_indiv]; int set_id[N_indiv]; - + real logG[N_indiv]; real logD[N_indiv]; real sumBn[N_indiv]; @@ -80,12 +80,12 @@ transformed parameters { vector[N_indiv] theo_g; for (i in 1:N_indiv){ - theo_g[i] <- intercept + intercept_species[species_id[i]] + theo_g[i] <- intercept + intercept_species[species_id[i]] + intercept_plot[plot_id[i]] + intercept_set[set_id[i]] + (mean_logD + param_logD_species[species_id[i]])*logD[i] + (param_Tf_biomes[biomes_id[i]])*Tf[i] - + ( param_sumBn_biomes[biomes_id[i]] + param_sumBn_species[species_id[i]])*sumBn[i] + + ( param_sumBn_biomes[biomes_id[i]] + param_sumBn_species[species_id[i]])*sumBn[i] + (param_sumTfBn_biomes[biomes_id[i]])*sumTfBn[i] + (param_sumTnBn_biomes[biomes_id[i]])*sumTnBn[i] + (param_sumTnTfBn_abs_biomes[biomes_id[i]])*sumTnTfBn_abs[i] @@ -94,48 +94,36 @@ transformed parameters { } model { - # constants for prior - ## real sigma0; - -################################################################################ -######################## growth model with STAN ############################# + # constants for prior + real sigma0; + sigma0 <- 10; - ############################################### - ########### Hierarchical parameters ######## - ### species random param + ############################################### + ########### Hierarchical parameters ######## + ### species random param param_logD_species ~ normal(0,sigma_logD_species); param_sumBn_species ~ normal(0,sigma_sumBn_species); intercept_species ~ normal(0, sigma_inter_species); - ### plot random param + ### plot random param intercept_plot ~ normal(0,sigma_inter_plot); - ### set random effect - intercept_set ~ normal(0, sigma_inter_set); - ### biomes fixed param - ## param_Tf_biomes ~ normal(0, sigma0); - ## param_sumBn_biomes ~ normal(0,sigma0); - ## param_sumTfBn_biomes ~ normal(0 ,sigma0); - ## param_sumTnBn_biomes ~ normal(0 ,sigma0); - ## param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0); + ### set random effect + intercept_set ~ normal(0, sigma_inter_set); - ############################################### - ########### Non-Hierarchical parameters ######## - # constant for prior - ## sigma0 <- 10; - # slope and intercept - ## intercept ~ normal(0,sigma0); - ## mean_logD ~ normal(0,sigma0); - ## # sigma - ## sigma_inter_species~ uniform(0,6); - ## sigma_inter_set~ uniform(0,6); - ## sigma_inter_plot~ uniform(0,6); - ## sigma_inter_tree~ uniform(0,6); - ## sigma_logD_species~ uniform(0,6); - ## sigma_sumBn_species~ uniform(0,6); - ## sigma~ uniform(0,6); + ############################################### + ########### Non-Hierarchical parameters ######## + # slope and intercept + intercept ~ normal(0,sigma0); + mean_logD ~ normal(0,sigma0); + ### biomes fixed param + param_Tf_biomes ~ normal(0, sigma0); + param_sumBn_biomes ~ normal(0,sigma0); + param_sumTfBn_biomes ~ normal(0 ,sigma0); + param_sumTnBn_biomes ~ normal(0 ,sigma0); + param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0); - ############################################### - ############ Likelihood ################### - logG ~ normal(theo_g,sigma); + ############################################### + ############ Likelihood ################### + logG ~ normal(theo_g,sigma); } ") } diff --git a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R index 49856e8..8d20862 100644 --- a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R +++ b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R @@ -37,10 +37,10 @@ data { int<lower=0> N_indiv; int<lower=0> N_species; int<lower=0> N_plot; - + int species_id[N_indiv]; int plot_id[N_indiv]; - + real logG[N_indiv]; real logD[N_indiv]; real sumBn[N_indiv]; @@ -90,28 +90,27 @@ model { real sigma0; sigma0 <- 10; -################################################################################ -######################## growth model with STAN ############################# - - ############################################### - ########### Hierarchical parameters ######## - ### species random param + ############################################### + ########### Hierarchical parameters ######## + ### species random param param_logD_species ~ normal(0,sigma_logD_species); param_sumBn_species ~ normal(0,sigma_sumBn_species); intercept_species ~ normal(0, sigma_inter_species); - ### plot random param + ### plot random param intercept_plot ~ normal(0,sigma_inter_plot); - ### biomes fixed param + ### fixed param + intercept ~ normal(0,sigma0); + mean_logD ~ normal(0,sigma0); + ### biomes fixed param param_Tf ~ normal(0, sigma0); param_sumBn ~ normal(0,sigma0); param_sumTfBn ~ normal(0 ,sigma0); param_sumTnBn ~ normal(0 ,sigma0); param_sumTnTfBn_abs ~ normal(0 , sigma0); - - ############################################### - ############ Likelihood ################### - logG ~ normal(theo_g,sigma); + ############################################### + ############ Likelihood ################### + logG ~ normal(theo_g,sigma); } ") } diff --git a/R/analysis/stan.run.R b/R/analysis/stan.run.R index d11c616..9028b42 100644 --- a/R/analysis/stan.run.R +++ b/R/analysis/stan.run.R @@ -26,16 +26,16 @@ fun.call.stan.parallel.and.save <- function(stan.model, list.stan, path.out, var.sample){ start <- Sys.time() set_cppo(mode = "fast") - inits <- stan.model$init.fun(chain_id, - list.stan) + inits <- list(stan.model$init.fun(chains, + list.stan)) stan.output <- stan(model_code = stan.model$stan, data = list.stan, pars = stan.model$pars, - init = 'random', + init = inits, iter = iter, warmup = warmup, chains = chains, - chain_id = chain_id, + ## chain_id = chain_id, thin = thin, verbose = FALSE) end <- Sys.time() @@ -57,7 +57,7 @@ fun.call.stan.and.save <- function(stan.model, list.stan, path.out, stan.output <- stan(model_code = stan.model$stan, data = list.stan, pars = stan.model$pars, - init = 'random', + init = inits, iter = iter, warmup = warmup, chains = chains, diff --git a/R/analysis/test.stan.R b/R/analysis/test.stan.R index 515a17d..3431465 100644 --- a/R/analysis/test.stan.R +++ b/R/analysis/test.stan.R @@ -1,6 +1,7 @@ ### test stan -## I changed the coding of the random effect with norm(0,10*sigma it seems more slow! +## I changed the coding of the random effect +## with norm(0,10*sigma it seems more slow! ## PROBLEM WITH STANDARDIZED VARIABLE DO I NEED TO INCLUDE AN INTERCEPT?? ## VERY BAD CONVERGENCE ??? @@ -12,11 +13,12 @@ source('R/analysis/stan.run.R') ### TEST simple model on France only - df.lmer <- load.data.for.lmer(trait = 'SLA',fname = 'data.all.no.log.rds', cat.TF = FALSE, - sample.size = NA, - var.sample = NA, - sample.vec.TF. = FALSE, - select.set. = 'France') + df.lmer <- load.data.for.lmer(trait = 'SLA', + fname = 'data.all.no.log.rds', + cat.TF = FALSE, + sample.size = 500, + var.sample = 'ecocode', + sample.vec.TF. = FALSE) stan.list <- fun.turn.in.list.for.jags.stan(df.lmer, cat.TF = FALSE) @@ -26,8 +28,12 @@ source('R/analysis/model.stan/model.stan.LOGLIN.size.fixed.R', local = TRUE) fun.init.stan <- function(chain_id= 1, stan.list){ init <- list(intercept = 0+(chain_id-1)/10, - intercept_species = rnorm(stan.list$N_species, 0, 0.5+(chain_id-1)/10), - intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +(chain_id-1)/10), + intercept_species = rnorm(stan.list$N_species, + 0, + 0.5+(chain_id-1)/10), + intercept_plot = rnorm(stan.list$N_plot, + 0, + 0.5 +(chain_id-1)/10), mean_logD = 2/3+(chain_id-1)/10, sigma_inter_species = 0.5 +(chain_id-1)/10, sigma_inter_plot = 0.5 + +(chain_id-1)/10, @@ -48,33 +54,12 @@ system.time( stan.output <- stan(model_code = model$stan, chains = 3, verbose = FALSE)) -source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R', local = TRUE) +source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R', local = TRUE) model <- load.model() -fun.init.stan <- function(chain_id= 1, stan.list){ - step <- (chain_id-1)/10 -init <- list(intercept = 0+step, - intercept_species = rnorm(stan.list$N_species, 0, 0.5+step), - param_sumBn_species = rnorm(stan.list$N_species, 0, 0.5+step), - param_logD_species = rnorm(stan.list$N_species, 0, 0.5+step), - intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +step), - mean_logD = 2/3+step, - param_Tf = 2/3+step, - param_sumBn = 2/3+step, - param_sumTfBn = 2/3+step, - param_sumTnBn = 2/3+step, - param_sumTnTfBn_abs= 2/3+step, - sigma_inter_species = 0.5 +step, - sigma_inter_plot = 0.5 + +step, - sigma = 0.5 +step, - sigma_sumBn_species = 0.5 +step, - sigma_logD_species = 0.5 +step - ) -} - -inits <- list(fun.init.stan(1, stan.list), - fun.init.stan(2, stan.list), - fun.init.stan(3, stan.list)) +inits <- list(model$init.fun(1, stan.list), + model$init.fun(2, stan.list), + model$init.fun(3, stan.list)) library(rstan) @@ -96,8 +81,35 @@ pairs(stan.output) library(ggmcmc) -S <- ggs(stan.output) -extract(stan.output, permuted = FALSE, inc_warmup=FALSE)) +S <- ggs(stan.output2) +ggs_crosscorrelation(S) +ggs_crosscorrelation(S, absolute_scale = FALSE) +ggs_density(S) +ggs_traceplot(S) +ggs_running(S) +ggs_autocorrelation(S) +ggs_Rhat(S) +ggs_geweke(S) +ggs_caterpillar(S) + +####### +## read results of simul on cluster +source("R/analysis/stan.output-fun.R") + +stan.out.clust <- fun.merge.chain( + path.out = "output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/", + var.sample = 'ecocode', chains.vec = 1:3) + +stan.out.clust +plot(stan.out.clust) +pairs(stan.out.clust) +traceplot(stan.out.clust, ask = TRUE) + +## base on traceplot the initial value seems to be +## a big problem for the lack of convergence +library(ggmcmc) +S <- ggs(stan.out.clust) +ggs_crosscorrelation(S) ggs_crosscorrelation(S, absolute_scale = FALSE) ggs_density(S) ggs_traceplot(S) @@ -108,3 +120,23 @@ ggs_geweke(S) ggs_caterpillar(S) +run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA', + type.filling='species', sample.size = 10000, var.sample = 'ecocode', + iter = 1000, warmup = 500, chains = 1, chain_id = 1, + init.TF = FALSE) + + +run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA', + type.filling='species', sample.size = 1000, var.sample = 'ecocode', + iter = 1000, warmup = 500, chains = 3, parallel.TF = FALSE, + init.TF = FALSE) +out.stan3 <- readRDS('output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/ecocode.results.stan.rds') +traceplot(out.stan3, ask = TRUE) +plot(out.stan3) +pairs(out.stan3) + +library(ggmcmc) +S <- ggs(out.stan3) +ggs_crosscorrelation(S) +ggs_crosscorrelation(S, absolute_scale = FALSE) +ggs_density(S, family = c('param_sumBn_biomes')) diff --git a/R/process.data/test.tree.CWM-fun.R b/R/process.data/test.tree.CWM-fun.R index bc3d5e6..826141d 100644 --- a/R/process.data/test.tree.CWM-fun.R +++ b/R/process.data/test.tree.CWM-fun.R @@ -8,7 +8,9 @@ load.processed.data <- function(path, file.name = "data.tree.tot.no.std.csv"){ require(data.table) fname <- file.path(path, file.name ) if(file.exists(fname)){ + cat('loading file', path, file.name) data <- fread(fname, stringsAsFactors = FALSE) + print(warnings()) return(data) }else{return(NULL)} } diff --git a/launch.cluster/launch_all_data.lmer.bash b/launch.cluster/launch_all_data.lmer.bash index 0b216d1..5acf3d3 100644 --- a/launch.cluster/launch_all_data.lmer.bash +++ b/launch.cluster/launch_all_data.lmer.bash @@ -6,11 +6,11 @@ mkdir -p trait.workshop for trait in "'SLA'" "'Leaf.N'" "'Wood.density'" "'Max.height'" "'Seed.mass'"; do - echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh + echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh qsub trait.workshop/data1$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data1$trait" -q opt32G -j oe - echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh + echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh qsub trait.workshop/data3$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data3$trait" -q opt32G -j oe done diff --git a/launch.cluster/launch_all_jags.init.bash b/launch.cluster/launch_all_jags.init.bash new file mode 100644 index 0000000..fc0cb6f --- /dev/null +++ b/launch.cluster/launch_all_jags.init.bash @@ -0,0 +1,33 @@ +#!/bin/bash +# Georges Kunstler 14/05/2014 +# read one variable +export LD_LIBRARY_PATH=/usr/lib64/R/library + +mkdir -p trait.workshop + +iter=50000 +warmup=5000 +thin=50 +# test parameter +nbargs=$# +echo "number of arguments="$nbargs +if [ $nbargs -ne 1 ] +then + echo "need one and only one argument" + echo " usage :" + echo " ./launch_all_lmer.sh sample.size" + exit 100 +fi + + +samplesize=$1 +# +for trait in "'SLA'" "'Wood.density'" "'Max.height'" "'Seed.mass'" "'Leaf.N'"; do + + echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/jags.run.R'); run.multiple.model.for.set.one.trait(c(model.files.jags.Tf.1), run.jags,$trait,type.filling='species', sample.size = $samplesize, var.sample = 'ecocode.id',iter = $iter, warmup = $warmup, chains = 3, thin = $thin, init.TF = TRUE);print('done')\"" > trait.workshop/speciesjags${trait}.sh + qsub trait.workshop/speciesjags${trait}.sh -l nodes=1:ppn=1,mem=8gb -N "jags${trait}" -q opt32G -j oe + + + +done + diff --git a/launch.cluster/launch_all_stan.bash b/launch.cluster/launch_all_stan.bash index 3dcbe60..f1beb43 100644 --- a/launch.cluster/launch_all_stan.bash +++ b/launch.cluster/launch_all_stan.bash @@ -5,8 +5,8 @@ export LD_LIBRARY_PATH=/usr/lib64/R/library mkdir -p trait.workshop -iter=1000 -warmup=500 +iter=2000 +warmup=1000 # test parameter nbargs=$# echo "number of arguments="$nbargs diff --git a/scripts/format.data/BCI.R b/scripts/format.data/BCI.R index 05aa543..ce9f65e 100644 --- a/scripts/format.data/BCI.R +++ b/scripts/format.data/BCI.R @@ -5,18 +5,19 @@ rm(list = ls()) source("R/format.data/format-fun.R") dir.create("output/formatted/BCI", recursive = TRUE, showWarnings = FALSE) + library(reshape, quietly = TRUE) ### READ DATA read individuals tree data Requires careful formatting of 7 census ## datasets The raw data is such that, once a tree dies in census X, then it no ## longer exists in census X+1, X+2 etc... data.bci1 <- read.csv("data/raw/BCI/stem.data/BCI.all.plots.census.allstems.1.csv", header = TRUE, - stringsAsFactors = FALSE) + stringsAsFactors = FALSE) - print(table(data.bci1$Stem)) - print(sum(is.na(data.bci1$Stem))) +print(table(data.bci1$Stem)) +print(sum(is.na(data.bci1$Stem))) data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL data.bci1$HOM1 <- data.bci1$HOM; data.bci1$HOM <- NULL diff --git a/scripts/format.data/Sweden.R b/scripts/format.data/Sweden.R index d51f4cf..31dc30f 100644 --- a/scripts/format.data/Sweden.R +++ b/scripts/format.data/Sweden.R @@ -1,17 +1,22 @@ #!/usr/bin/env Rscript ### MERGE sweden DATA -source("R/format.data/format-fun.R"); -library(reshape, quietly = TRUE);d -ir.create("output/formatted/Sweden", recursive = TRUE, showWarnings = FALSE) +source("R/format.data/format-fun.R") +library(reshape, quietly = TRUE) +dir.create("output/formatted/Sweden", recursive = TRUE, showWarnings = FALSE) ######################### READ DATA read individuals tree data data.swe <- read.table("data/raw/Sweden/Swe_NFI_all.txt", header = T, stringsAsFactors = F, sep = "\t") +## library(gnumeric) +## df1<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_1.ods",sheet.name='Data' , +## quiet=FALSE , head = TRUE) +## df2<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_2a.xlsx",sheet.name='Data' , +## quiet=FALSE , head = TRUE) +## df3<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_3.xlsx",sheet.name='Data' , +## quiet=FALSE , head = TRUE) +## head(df) -# understand year for survival -table(data.swe$Year, data.swe$DeadYear) -table(data.swe$Year,is.na(data.swe$dia_t3)) data.swe$tree.id <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]], data.swe[["PlotNr"]], @@ -20,6 +25,35 @@ data.swe$tree.id <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]], data.swe$plot <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]], data.swe[["PlotNr"]]), 1, paste, collapse = "_") +## ## Need to check if some year of census have more census than possible ? + +## # understand year for survival +## # need to compute per plot +## plot.df <- data.frame(plot = unique(data.swe$plot), +## year = data.swe$Year[!duplicated(data.swe$plot)], +## mesure_t1 = tapply(!is.na(data.swe$dia_t1), +## data.swe$plot, sum)>0, +## mesure_t2 = tapply(!is.na(data.swe$dia_t2), +## data.swe$plot, sum)>0, +## mesure_t3 = tapply(!is.na(data.swe$dia_t3), +## data.swe$plot, sum)>0) +## table(plot.df$year, plot.df$mesure_t1) +## table(plot.df$year, plot.df$mesure_t3) +## table(plot.df$mesure_t2 +## , plot.df$mesure_t3) +## sum(plot.df$mesure_t2 == FALSE & plot.df$mesure_t1 == TRUE & plot.df$mesure_t3 == TRUE) + +## mean.year <- tapply(data.swe$Year, data.swe$plot, mean) +## names(mean.year)[(!mean.year %in% c(2003:2012))] + + +## tapply(!is.na(data.swe$dia_t3), data.swe$plot, sum) + +## table(data.swe$Year, data.swe$DeadYear) +## table(data.swe$Year,is.na(data.swe$dia_t3) & is.na(data.swe$dia_t2)) +## table(data.swe$Year,is.na(data.swe$dia_t1) & is.na(data.swe$dia_t2)) +## table(data.swe$Year,is.na(data.swe$dia_t2)) +## table(data.swe$Year,is.na(data.swe$dia_t1)) ## Format to desired form -- GitLab