diff --git a/Makefile b/Makefile index 59d6b08df0d9241f4861508719426e4afbb00633..d62261ab5ae26e29083684d42cb1b7d16fa80c0d 100644 --- a/Makefile +++ b/Makefile @@ -43,12 +43,12 @@ $(D2)/TRY/data.TRY.std.rds: GLOBAL: $(D3)/Done.txt $(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt - Rscript $< - Rscript scripts/process.data/summarise.data.R - Rscript scripts/process.data/plot.data.all.R + R CMD BATCH $< + R CMD BATCH scripts/process.data/summarise.data.R + R CMD BATCH scripts/process.data/plot.data.all.R $(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R - Rscript $< + R CMD BATCH $< #------------------------------------------------------- TEST.TREE: $(D4)/Done.txt tree.all.sites diff --git a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R index a102c5fe049b8f2d0d2509fbf7a94224f5f11fb3..cc53ecbc40babb572e301141b654c3d9e6a1528b 100644 --- a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R +++ b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.R @@ -12,38 +12,38 @@ load.model <- function(){ init.fun = function(chain_id= 1, list){ step <- (chain_id-1)/10 init <- - list(intercept = 0+step, - intercept_species = rnorm(list$N_species, 0, 0.5+step), - param_sumBn_species = rnorm(list$N_species, 0, 0.5+step), - param_logD_species = rnorm(list$N_species, 0, 0.5+step), - intercept_plot = rnorm(list$N_plot, 0, 0.5 +step), - intercept_set = rnorm(list$N_set, 0, 0.5 +step), - mean_logD = 2/3+step, - param_Tf = 2/3+step, - param_sumBn = 2/3+step, - param_sumTfBn = 2/3+step, - param_sumTnBn = 2/3+step, - param_sumTnTfBn_abs= 2/3+step, - sigma_inter_species = 0.5 +step, - sigma_inter_plot = 0.5 + +step, - sigma_inter_set = 0.5 + +step, + list(intercept = 0 + step, + intercept_species = rnorm(list$N_species, 0, 0.3 + step), + param_sumBn_species = rnorm(list$N_species, 0, 0.1 + step), + param_logD_species = rnorm(list$N_species, 0, 0.3 + step), + intercept_plot = rnorm(list$N_plot, 0, 0.3 + step), + intercept_set = rnorm(list$N_set, 0, 0.1 + step), + mean_logD = 2/3 + step, + param_Tf = -0.1 + step, + param_sumBn = -0.1 + step, + param_sumTfBn = -0.1 + step, + param_sumTnBn = -0.1 + step, + param_sumTnTfBn_abs= -0.1 + step, + sigma_inter_species = 0.5 + step, + sigma_inter_plot = 0.3 + step, + sigma_inter_set = 0.1 + step, sigma = 0.5 +step, - sigma_sumBn_species = 0.5 +step, - sigma_logD_species = 0.5 +step) + sigma_sumBn_species = 0.1 +step, + sigma_logD_species = 0.3 +step) return(init) }, - + stan = " data { int<lower=0> N_indiv; int<lower=0> N_species; int<lower=0> N_plot; int<lower=0> N_set; - + int species_id[N_indiv]; int plot_id[N_indiv]; int set_id[N_indiv]; - + real logG[N_indiv]; real logD[N_indiv]; real sumBn[N_indiv]; @@ -93,27 +93,31 @@ transformed parameters { model { # constants for prior - ## real sigma0; - -################################################################################ -######################## growth model with STAN ############################# + real sigma0; + sigma0 <- 10; ############################################### ########### Hierarchical parameters ######## ### species random param - param_logD_species ~ normal(0,sigma_logD_species); - param_sumBn_species ~ normal(0,sigma_sumBn_species); - intercept_species ~ normal(0, sigma_inter_species); + param_logD_species ~ normal(0,sigma_logD_species); + param_sumBn_species ~ normal(0,sigma_sumBn_species); + intercept_species ~ normal(0, sigma_inter_species); ### plot random param - intercept_plot ~ normal(0,sigma_inter_plot); + intercept_plot ~ normal(0,sigma_inter_plot); ### set random effect - intercept_set ~ normal(0, sigma_inter_set); - ### biomes fixed param - param_Tf ~ normal(0, sigma0); - param_sumBn ~ normal(0,sigma0); - param_sumTfBn ~ normal(0 ,sigma0); - param_sumTnBn ~ normal(0 ,sigma0); - param_sumTnTfBn_abs ~ normal(0 , sigma0); + intercept_set ~ normal(0, sigma_inter_set); + + ############################################### + ########### Non-Hierarchical parameters ######## + # slope and intercept + intercept ~ normal(0,sigma0); + mean_logD ~ normal(0,sigma0); + ### biomes fixed param + param_Tf ~ normal(0, sigma0); + param_sumBn ~ normal(0,sigma0); + param_sumTfBn ~ normal(0 ,sigma0); + param_sumTnBn ~ normal(0 ,sigma0); + param_sumTnTfBn_abs ~ normal(0 , sigma0); ############################################### diff --git a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R index 314170a183800e3494d8921633875f6a601b1701..5ee3d284421f73bee0119cf8b2b801f673bd0129 100644 --- a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R +++ b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R @@ -13,26 +13,26 @@ load.model <- function(){ step <- (chain_id-1)/10 init <- list(intercept = 0+step, - intercept_species = rnorm(list$N_species, 0, 0.5+step), - param_sumBn_species = rnorm(list$N_species, 0, 0.5+step), - param_logD_species = rnorm(list$N_species, 0, 0.5+step), - intercept_plot = rnorm(list$N_plot, 0, 0.5 +step), - intercept_set = rnorm(list$N_set, 0, 0.5 +step), - mean_logD = 2/3+step, - param_Tf_biomes= rep(2/3+step, list$N_biomes), - param_sumBn_biomes= rep(2/3+step, list$N_biomes), - param_sumTfBn_biomes= rep(2/3+step, list$N_biomes), - param_sumTnBn_biomes= rep(2/3+step, list$N_biomes), - param_sumTnTfBn_abs_biomes= rep(2/3+step, list$N_biomes), - sigma_inter_species = 0.5 +step, - sigma_inter_plot = 0.5 + +step, - sigma_inter_set = 0.5 + +step, + intercept_species = rnorm(list$N_species, 0, 0.3+step), + param_sumBn_species = rnorm(list$N_species, 0, 0.1+step), + param_logD_species = rnorm(list$N_species, 0, 0.3+step), + intercept_plot = rnorm(list$N_plot, 0, 0.3 +step), + intercept_set = rnorm(list$N_set, 0, 0.1 +step), + mean_logD = (2/3 - 0.1) +step, + param_Tf_biomes= rep(-0.1+step, list$N_biomes), + param_sumBn_biomes= rep(-0.1+step, list$N_biomes), + param_sumTfBn_biomes= rep(-0.1+step, list$N_biomes), + param_sumTnBn_biomes= rep(-0.1+step, list$N_biomes), + param_sumTnTfBn_abs_biomes= rep(-0.1+step, list$N_biomes), + sigma_inter_species = 0.3 +step, + sigma_inter_plot = 0.3 + +step, + sigma_inter_set = 0.1 + +step, sigma = 0.5 +step, - sigma_sumBn_species = 0.5 +step, - sigma_logD_species = 0.5 +step) + sigma_sumBn_species = 0.1 +step, + sigma_logD_species = 0.3 +step) return(init) }, - + stan = " data { int<lower=0> N_indiv; @@ -40,12 +40,12 @@ data { int<lower=0> N_plot; int<lower=0> N_biomes; int<lower=0> N_set; - + int species_id[N_indiv]; int plot_id[N_indiv]; int biomes_id[N_indiv]; int set_id[N_indiv]; - + real logG[N_indiv]; real logD[N_indiv]; real sumBn[N_indiv]; @@ -80,12 +80,12 @@ transformed parameters { vector[N_indiv] theo_g; for (i in 1:N_indiv){ - theo_g[i] <- intercept + intercept_species[species_id[i]] + theo_g[i] <- intercept + intercept_species[species_id[i]] + intercept_plot[plot_id[i]] + intercept_set[set_id[i]] + (mean_logD + param_logD_species[species_id[i]])*logD[i] + (param_Tf_biomes[biomes_id[i]])*Tf[i] - + ( param_sumBn_biomes[biomes_id[i]] + param_sumBn_species[species_id[i]])*sumBn[i] + + ( param_sumBn_biomes[biomes_id[i]] + param_sumBn_species[species_id[i]])*sumBn[i] + (param_sumTfBn_biomes[biomes_id[i]])*sumTfBn[i] + (param_sumTnBn_biomes[biomes_id[i]])*sumTnBn[i] + (param_sumTnTfBn_abs_biomes[biomes_id[i]])*sumTnTfBn_abs[i] @@ -94,48 +94,36 @@ transformed parameters { } model { - # constants for prior - ## real sigma0; - -################################################################################ -######################## growth model with STAN ############################# + # constants for prior + real sigma0; + sigma0 <- 10; - ############################################### - ########### Hierarchical parameters ######## - ### species random param + ############################################### + ########### Hierarchical parameters ######## + ### species random param param_logD_species ~ normal(0,sigma_logD_species); param_sumBn_species ~ normal(0,sigma_sumBn_species); intercept_species ~ normal(0, sigma_inter_species); - ### plot random param + ### plot random param intercept_plot ~ normal(0,sigma_inter_plot); - ### set random effect - intercept_set ~ normal(0, sigma_inter_set); - ### biomes fixed param - ## param_Tf_biomes ~ normal(0, sigma0); - ## param_sumBn_biomes ~ normal(0,sigma0); - ## param_sumTfBn_biomes ~ normal(0 ,sigma0); - ## param_sumTnBn_biomes ~ normal(0 ,sigma0); - ## param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0); + ### set random effect + intercept_set ~ normal(0, sigma_inter_set); - ############################################### - ########### Non-Hierarchical parameters ######## - # constant for prior - ## sigma0 <- 10; - # slope and intercept - ## intercept ~ normal(0,sigma0); - ## mean_logD ~ normal(0,sigma0); - ## # sigma - ## sigma_inter_species~ uniform(0,6); - ## sigma_inter_set~ uniform(0,6); - ## sigma_inter_plot~ uniform(0,6); - ## sigma_inter_tree~ uniform(0,6); - ## sigma_logD_species~ uniform(0,6); - ## sigma_sumBn_species~ uniform(0,6); - ## sigma~ uniform(0,6); + ############################################### + ########### Non-Hierarchical parameters ######## + # slope and intercept + intercept ~ normal(0,sigma0); + mean_logD ~ normal(0,sigma0); + ### biomes fixed param + param_Tf_biomes ~ normal(0, sigma0); + param_sumBn_biomes ~ normal(0,sigma0); + param_sumTfBn_biomes ~ normal(0 ,sigma0); + param_sumTnBn_biomes ~ normal(0 ,sigma0); + param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0); - ############################################### - ############ Likelihood ################### - logG ~ normal(theo_g,sigma); + ############################################### + ############ Likelihood ################### + logG ~ normal(theo_g,sigma); } ") } diff --git a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R index 49856e8ca1dd9c7611302da38d0e6900ae8642a6..8d208628817ae9dbbfcf3aca34ae4b9426425fb5 100644 --- a/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R +++ b/R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R @@ -37,10 +37,10 @@ data { int<lower=0> N_indiv; int<lower=0> N_species; int<lower=0> N_plot; - + int species_id[N_indiv]; int plot_id[N_indiv]; - + real logG[N_indiv]; real logD[N_indiv]; real sumBn[N_indiv]; @@ -90,28 +90,27 @@ model { real sigma0; sigma0 <- 10; -################################################################################ -######################## growth model with STAN ############################# - - ############################################### - ########### Hierarchical parameters ######## - ### species random param + ############################################### + ########### Hierarchical parameters ######## + ### species random param param_logD_species ~ normal(0,sigma_logD_species); param_sumBn_species ~ normal(0,sigma_sumBn_species); intercept_species ~ normal(0, sigma_inter_species); - ### plot random param + ### plot random param intercept_plot ~ normal(0,sigma_inter_plot); - ### biomes fixed param + ### fixed param + intercept ~ normal(0,sigma0); + mean_logD ~ normal(0,sigma0); + ### biomes fixed param param_Tf ~ normal(0, sigma0); param_sumBn ~ normal(0,sigma0); param_sumTfBn ~ normal(0 ,sigma0); param_sumTnBn ~ normal(0 ,sigma0); param_sumTnTfBn_abs ~ normal(0 , sigma0); - - ############################################### - ############ Likelihood ################### - logG ~ normal(theo_g,sigma); + ############################################### + ############ Likelihood ################### + logG ~ normal(theo_g,sigma); } ") } diff --git a/R/analysis/stan.run.R b/R/analysis/stan.run.R index d11c61615718644c4fb754730ae0460bbdec521e..9028b425f0468c2072c0bdd6aced36557072225c 100644 --- a/R/analysis/stan.run.R +++ b/R/analysis/stan.run.R @@ -26,16 +26,16 @@ fun.call.stan.parallel.and.save <- function(stan.model, list.stan, path.out, var.sample){ start <- Sys.time() set_cppo(mode = "fast") - inits <- stan.model$init.fun(chain_id, - list.stan) + inits <- list(stan.model$init.fun(chains, + list.stan)) stan.output <- stan(model_code = stan.model$stan, data = list.stan, pars = stan.model$pars, - init = 'random', + init = inits, iter = iter, warmup = warmup, chains = chains, - chain_id = chain_id, + ## chain_id = chain_id, thin = thin, verbose = FALSE) end <- Sys.time() @@ -57,7 +57,7 @@ fun.call.stan.and.save <- function(stan.model, list.stan, path.out, stan.output <- stan(model_code = stan.model$stan, data = list.stan, pars = stan.model$pars, - init = 'random', + init = inits, iter = iter, warmup = warmup, chains = chains, diff --git a/R/analysis/test.stan.R b/R/analysis/test.stan.R index 515a17d0fd624b4db360240d768bf955b7d048ee..3431465fdb344692282a1e0002a8211a3f4a7b27 100644 --- a/R/analysis/test.stan.R +++ b/R/analysis/test.stan.R @@ -1,6 +1,7 @@ ### test stan -## I changed the coding of the random effect with norm(0,10*sigma it seems more slow! +## I changed the coding of the random effect +## with norm(0,10*sigma it seems more slow! ## PROBLEM WITH STANDARDIZED VARIABLE DO I NEED TO INCLUDE AN INTERCEPT?? ## VERY BAD CONVERGENCE ??? @@ -12,11 +13,12 @@ source('R/analysis/stan.run.R') ### TEST simple model on France only - df.lmer <- load.data.for.lmer(trait = 'SLA',fname = 'data.all.no.log.rds', cat.TF = FALSE, - sample.size = NA, - var.sample = NA, - sample.vec.TF. = FALSE, - select.set. = 'France') + df.lmer <- load.data.for.lmer(trait = 'SLA', + fname = 'data.all.no.log.rds', + cat.TF = FALSE, + sample.size = 500, + var.sample = 'ecocode', + sample.vec.TF. = FALSE) stan.list <- fun.turn.in.list.for.jags.stan(df.lmer, cat.TF = FALSE) @@ -26,8 +28,12 @@ source('R/analysis/model.stan/model.stan.LOGLIN.size.fixed.R', local = TRUE) fun.init.stan <- function(chain_id= 1, stan.list){ init <- list(intercept = 0+(chain_id-1)/10, - intercept_species = rnorm(stan.list$N_species, 0, 0.5+(chain_id-1)/10), - intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +(chain_id-1)/10), + intercept_species = rnorm(stan.list$N_species, + 0, + 0.5+(chain_id-1)/10), + intercept_plot = rnorm(stan.list$N_plot, + 0, + 0.5 +(chain_id-1)/10), mean_logD = 2/3+(chain_id-1)/10, sigma_inter_species = 0.5 +(chain_id-1)/10, sigma_inter_plot = 0.5 + +(chain_id-1)/10, @@ -48,33 +54,12 @@ system.time( stan.output <- stan(model_code = model$stan, chains = 3, verbose = FALSE)) -source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R', local = TRUE) +source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R', local = TRUE) model <- load.model() -fun.init.stan <- function(chain_id= 1, stan.list){ - step <- (chain_id-1)/10 -init <- list(intercept = 0+step, - intercept_species = rnorm(stan.list$N_species, 0, 0.5+step), - param_sumBn_species = rnorm(stan.list$N_species, 0, 0.5+step), - param_logD_species = rnorm(stan.list$N_species, 0, 0.5+step), - intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +step), - mean_logD = 2/3+step, - param_Tf = 2/3+step, - param_sumBn = 2/3+step, - param_sumTfBn = 2/3+step, - param_sumTnBn = 2/3+step, - param_sumTnTfBn_abs= 2/3+step, - sigma_inter_species = 0.5 +step, - sigma_inter_plot = 0.5 + +step, - sigma = 0.5 +step, - sigma_sumBn_species = 0.5 +step, - sigma_logD_species = 0.5 +step - ) -} - -inits <- list(fun.init.stan(1, stan.list), - fun.init.stan(2, stan.list), - fun.init.stan(3, stan.list)) +inits <- list(model$init.fun(1, stan.list), + model$init.fun(2, stan.list), + model$init.fun(3, stan.list)) library(rstan) @@ -96,8 +81,35 @@ pairs(stan.output) library(ggmcmc) -S <- ggs(stan.output) -extract(stan.output, permuted = FALSE, inc_warmup=FALSE)) +S <- ggs(stan.output2) +ggs_crosscorrelation(S) +ggs_crosscorrelation(S, absolute_scale = FALSE) +ggs_density(S) +ggs_traceplot(S) +ggs_running(S) +ggs_autocorrelation(S) +ggs_Rhat(S) +ggs_geweke(S) +ggs_caterpillar(S) + +####### +## read results of simul on cluster +source("R/analysis/stan.output-fun.R") + +stan.out.clust <- fun.merge.chain( + path.out = "output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/", + var.sample = 'ecocode', chains.vec = 1:3) + +stan.out.clust +plot(stan.out.clust) +pairs(stan.out.clust) +traceplot(stan.out.clust, ask = TRUE) + +## base on traceplot the initial value seems to be +## a big problem for the lack of convergence +library(ggmcmc) +S <- ggs(stan.out.clust) +ggs_crosscorrelation(S) ggs_crosscorrelation(S, absolute_scale = FALSE) ggs_density(S) ggs_traceplot(S) @@ -108,3 +120,23 @@ ggs_geweke(S) ggs_caterpillar(S) +run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA', + type.filling='species', sample.size = 10000, var.sample = 'ecocode', + iter = 1000, warmup = 500, chains = 1, chain_id = 1, + init.TF = FALSE) + + +run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA', + type.filling='species', sample.size = 1000, var.sample = 'ecocode', + iter = 1000, warmup = 500, chains = 3, parallel.TF = FALSE, + init.TF = FALSE) +out.stan3 <- readRDS('output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/ecocode.results.stan.rds') +traceplot(out.stan3, ask = TRUE) +plot(out.stan3) +pairs(out.stan3) + +library(ggmcmc) +S <- ggs(out.stan3) +ggs_crosscorrelation(S) +ggs_crosscorrelation(S, absolute_scale = FALSE) +ggs_density(S, family = c('param_sumBn_biomes')) diff --git a/R/process.data/test.tree.CWM-fun.R b/R/process.data/test.tree.CWM-fun.R index bc3d5e65d4d5ad608a1419b2e8fc78b5fc361111..826141de6a32c1e50e19629c925cfe8ed0a5ddad 100644 --- a/R/process.data/test.tree.CWM-fun.R +++ b/R/process.data/test.tree.CWM-fun.R @@ -8,7 +8,9 @@ load.processed.data <- function(path, file.name = "data.tree.tot.no.std.csv"){ require(data.table) fname <- file.path(path, file.name ) if(file.exists(fname)){ + cat('loading file', path, file.name) data <- fread(fname, stringsAsFactors = FALSE) + print(warnings()) return(data) }else{return(NULL)} } diff --git a/launch.cluster/launch_all_data.lmer.bash b/launch.cluster/launch_all_data.lmer.bash index 0b216d11395cd0b097a1f2d3f800bcc34e2ba256..5acf3d31a17d0bb1fad01bf01bd38db6d97a7b7d 100644 --- a/launch.cluster/launch_all_data.lmer.bash +++ b/launch.cluster/launch_all_data.lmer.bash @@ -6,11 +6,11 @@ mkdir -p trait.workshop for trait in "'SLA'" "'Leaf.N'" "'Wood.density'" "'Max.height'" "'Seed.mass'"; do - echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh + echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh qsub trait.workshop/data1$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data1$trait" -q opt32G -j oe - echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh + echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh qsub trait.workshop/data3$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data3$trait" -q opt32G -j oe done diff --git a/launch.cluster/launch_all_jags.init.bash b/launch.cluster/launch_all_jags.init.bash new file mode 100644 index 0000000000000000000000000000000000000000..fc0cb6fa5a630bbc394e6110b95b2b259c5fc5a7 --- /dev/null +++ b/launch.cluster/launch_all_jags.init.bash @@ -0,0 +1,33 @@ +#!/bin/bash +# Georges Kunstler 14/05/2014 +# read one variable +export LD_LIBRARY_PATH=/usr/lib64/R/library + +mkdir -p trait.workshop + +iter=50000 +warmup=5000 +thin=50 +# test parameter +nbargs=$# +echo "number of arguments="$nbargs +if [ $nbargs -ne 1 ] +then + echo "need one and only one argument" + echo " usage :" + echo " ./launch_all_lmer.sh sample.size" + exit 100 +fi + + +samplesize=$1 +# +for trait in "'SLA'" "'Wood.density'" "'Max.height'" "'Seed.mass'" "'Leaf.N'"; do + + echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/jags.run.R'); run.multiple.model.for.set.one.trait(c(model.files.jags.Tf.1), run.jags,$trait,type.filling='species', sample.size = $samplesize, var.sample = 'ecocode.id',iter = $iter, warmup = $warmup, chains = 3, thin = $thin, init.TF = TRUE);print('done')\"" > trait.workshop/speciesjags${trait}.sh + qsub trait.workshop/speciesjags${trait}.sh -l nodes=1:ppn=1,mem=8gb -N "jags${trait}" -q opt32G -j oe + + + +done + diff --git a/launch.cluster/launch_all_stan.bash b/launch.cluster/launch_all_stan.bash index 3dcbe604d92819bdcff130328363be9ff8b32cf1..f1beb43338521e6133a2206e992fa00a22a4fa97 100644 --- a/launch.cluster/launch_all_stan.bash +++ b/launch.cluster/launch_all_stan.bash @@ -5,8 +5,8 @@ export LD_LIBRARY_PATH=/usr/lib64/R/library mkdir -p trait.workshop -iter=1000 -warmup=500 +iter=2000 +warmup=1000 # test parameter nbargs=$# echo "number of arguments="$nbargs diff --git a/scripts/format.data/BCI.R b/scripts/format.data/BCI.R index 05aa54392d13b5b596efbbf3b0f3db37d97d5aab..ce9f65e4c055bfa7a7115728f6481594c4018c87 100644 --- a/scripts/format.data/BCI.R +++ b/scripts/format.data/BCI.R @@ -5,18 +5,19 @@ rm(list = ls()) source("R/format.data/format-fun.R") dir.create("output/formatted/BCI", recursive = TRUE, showWarnings = FALSE) + library(reshape, quietly = TRUE) ### READ DATA read individuals tree data Requires careful formatting of 7 census ## datasets The raw data is such that, once a tree dies in census X, then it no ## longer exists in census X+1, X+2 etc... data.bci1 <- read.csv("data/raw/BCI/stem.data/BCI.all.plots.census.allstems.1.csv", header = TRUE, - stringsAsFactors = FALSE) + stringsAsFactors = FALSE) - print(table(data.bci1$Stem)) - print(sum(is.na(data.bci1$Stem))) +print(table(data.bci1$Stem)) +print(sum(is.na(data.bci1$Stem))) data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL data.bci1$HOM1 <- data.bci1$HOM; data.bci1$HOM <- NULL diff --git a/scripts/format.data/Sweden.R b/scripts/format.data/Sweden.R index d51f4cf4e190c0cdb2f67c9607c2ac3df2553604..31dc30fd18b4c5a3af7469dcedf500895d6d1bc6 100644 --- a/scripts/format.data/Sweden.R +++ b/scripts/format.data/Sweden.R @@ -1,17 +1,22 @@ #!/usr/bin/env Rscript ### MERGE sweden DATA -source("R/format.data/format-fun.R"); -library(reshape, quietly = TRUE);d -ir.create("output/formatted/Sweden", recursive = TRUE, showWarnings = FALSE) +source("R/format.data/format-fun.R") +library(reshape, quietly = TRUE) +dir.create("output/formatted/Sweden", recursive = TRUE, showWarnings = FALSE) ######################### READ DATA read individuals tree data data.swe <- read.table("data/raw/Sweden/Swe_NFI_all.txt", header = T, stringsAsFactors = F, sep = "\t") +## library(gnumeric) +## df1<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_1.ods",sheet.name='Data' , +## quiet=FALSE , head = TRUE) +## df2<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_2a.xlsx",sheet.name='Data' , +## quiet=FALSE , head = TRUE) +## df3<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_3.xlsx",sheet.name='Data' , +## quiet=FALSE , head = TRUE) +## head(df) -# understand year for survival -table(data.swe$Year, data.swe$DeadYear) -table(data.swe$Year,is.na(data.swe$dia_t3)) data.swe$tree.id <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]], data.swe[["PlotNr"]], @@ -20,6 +25,35 @@ data.swe$tree.id <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]], data.swe$plot <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]], data.swe[["PlotNr"]]), 1, paste, collapse = "_") +## ## Need to check if some year of census have more census than possible ? + +## # understand year for survival +## # need to compute per plot +## plot.df <- data.frame(plot = unique(data.swe$plot), +## year = data.swe$Year[!duplicated(data.swe$plot)], +## mesure_t1 = tapply(!is.na(data.swe$dia_t1), +## data.swe$plot, sum)>0, +## mesure_t2 = tapply(!is.na(data.swe$dia_t2), +## data.swe$plot, sum)>0, +## mesure_t3 = tapply(!is.na(data.swe$dia_t3), +## data.swe$plot, sum)>0) +## table(plot.df$year, plot.df$mesure_t1) +## table(plot.df$year, plot.df$mesure_t3) +## table(plot.df$mesure_t2 +## , plot.df$mesure_t3) +## sum(plot.df$mesure_t2 == FALSE & plot.df$mesure_t1 == TRUE & plot.df$mesure_t3 == TRUE) + +## mean.year <- tapply(data.swe$Year, data.swe$plot, mean) +## names(mean.year)[(!mean.year %in% c(2003:2012))] + + +## tapply(!is.na(data.swe$dia_t3), data.swe$plot, sum) + +## table(data.swe$Year, data.swe$DeadYear) +## table(data.swe$Year,is.na(data.swe$dia_t3) & is.na(data.swe$dia_t2)) +## table(data.swe$Year,is.na(data.swe$dia_t1) & is.na(data.swe$dia_t2)) +## table(data.swe$Year,is.na(data.swe$dia_t2)) +## table(data.swe$Year,is.na(data.swe$dia_t1)) ## Format to desired form