Commit af84e86f authored by Georges Kunstler's avatar Georges Kunstler
Browse files

fixed problem in initial value

parent a4806faa
......@@ -43,12 +43,12 @@ $(D2)/TRY/data.TRY.std.rds:
GLOBAL: $(D3)/Done.txt
$(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt
Rscript $<
Rscript scripts/process.data/summarise.data.R
Rscript scripts/process.data/plot.data.all.R
R CMD BATCH $<
R CMD BATCH scripts/process.data/summarise.data.R
R CMD BATCH scripts/process.data/plot.data.all.R
$(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R
Rscript $<
R CMD BATCH $<
#-------------------------------------------------------
TEST.TREE: $(D4)/Done.txt tree.all.sites
......
......@@ -12,24 +12,24 @@ load.model <- function(){
init.fun = function(chain_id= 1, list){
step <- (chain_id-1)/10
init <-
list(intercept = 0+step,
intercept_species = rnorm(list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(list$N_species, 0, 0.5+step),
param_logD_species = rnorm(list$N_species, 0, 0.5+step),
intercept_plot = rnorm(list$N_plot, 0, 0.5 +step),
intercept_set = rnorm(list$N_set, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf = 2/3+step,
param_sumBn = 2/3+step,
param_sumTfBn = 2/3+step,
param_sumTnBn = 2/3+step,
param_sumTnTfBn_abs= 2/3+step,
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma_inter_set = 0.5 + +step,
list(intercept = 0 + step,
intercept_species = rnorm(list$N_species, 0, 0.3 + step),
param_sumBn_species = rnorm(list$N_species, 0, 0.1 + step),
param_logD_species = rnorm(list$N_species, 0, 0.3 + step),
intercept_plot = rnorm(list$N_plot, 0, 0.3 + step),
intercept_set = rnorm(list$N_set, 0, 0.1 + step),
mean_logD = 2/3 + step,
param_Tf = -0.1 + step,
param_sumBn = -0.1 + step,
param_sumTfBn = -0.1 + step,
param_sumTnBn = -0.1 + step,
param_sumTnTfBn_abs= -0.1 + step,
sigma_inter_species = 0.5 + step,
sigma_inter_plot = 0.3 + step,
sigma_inter_set = 0.1 + step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step)
sigma_sumBn_species = 0.1 +step,
sigma_logD_species = 0.3 +step)
return(init)
},
......@@ -93,10 +93,8 @@ transformed parameters {
model {
# constants for prior
## real sigma0;
################################################################################
######################## growth model with STAN #############################
real sigma0;
sigma0 <- 10;
###############################################
########### Hierarchical parameters ########
......@@ -108,6 +106,12 @@ model {
intercept_plot ~ normal(0,sigma_inter_plot);
### set random effect
intercept_set ~ normal(0, sigma_inter_set);
###############################################
########### Non-Hierarchical parameters ########
# slope and intercept
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
### biomes fixed param
param_Tf ~ normal(0, sigma0);
param_sumBn ~ normal(0,sigma0);
......
......@@ -13,23 +13,23 @@ load.model <- function(){
step <- (chain_id-1)/10
init <-
list(intercept = 0+step,
intercept_species = rnorm(list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(list$N_species, 0, 0.5+step),
param_logD_species = rnorm(list$N_species, 0, 0.5+step),
intercept_plot = rnorm(list$N_plot, 0, 0.5 +step),
intercept_set = rnorm(list$N_set, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf_biomes= rep(2/3+step, list$N_biomes),
param_sumBn_biomes= rep(2/3+step, list$N_biomes),
param_sumTfBn_biomes= rep(2/3+step, list$N_biomes),
param_sumTnBn_biomes= rep(2/3+step, list$N_biomes),
param_sumTnTfBn_abs_biomes= rep(2/3+step, list$N_biomes),
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma_inter_set = 0.5 + +step,
intercept_species = rnorm(list$N_species, 0, 0.3+step),
param_sumBn_species = rnorm(list$N_species, 0, 0.1+step),
param_logD_species = rnorm(list$N_species, 0, 0.3+step),
intercept_plot = rnorm(list$N_plot, 0, 0.3 +step),
intercept_set = rnorm(list$N_set, 0, 0.1 +step),
mean_logD = (2/3 - 0.1) +step,
param_Tf_biomes= rep(-0.1+step, list$N_biomes),
param_sumBn_biomes= rep(-0.1+step, list$N_biomes),
param_sumTfBn_biomes= rep(-0.1+step, list$N_biomes),
param_sumTnBn_biomes= rep(-0.1+step, list$N_biomes),
param_sumTnTfBn_abs_biomes= rep(-0.1+step, list$N_biomes),
sigma_inter_species = 0.3 +step,
sigma_inter_plot = 0.3 + +step,
sigma_inter_set = 0.1 + +step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step)
sigma_sumBn_species = 0.1 +step,
sigma_logD_species = 0.3 +step)
return(init)
},
......@@ -95,10 +95,8 @@ transformed parameters {
model {
# constants for prior
## real sigma0;
################################################################################
######################## growth model with STAN #############################
real sigma0;
sigma0 <- 10;
###############################################
########### Hierarchical parameters ########
......@@ -110,28 +108,18 @@ model {
intercept_plot ~ normal(0,sigma_inter_plot);
### set random effect
intercept_set ~ normal(0, sigma_inter_set);
### biomes fixed param
## param_Tf_biomes ~ normal(0, sigma0);
## param_sumBn_biomes ~ normal(0,sigma0);
## param_sumTfBn_biomes ~ normal(0 ,sigma0);
## param_sumTnBn_biomes ~ normal(0 ,sigma0);
## param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0);
###############################################
########### Non-Hierarchical parameters ########
# constant for prior
## sigma0 <- 10;
# slope and intercept
## intercept ~ normal(0,sigma0);
## mean_logD ~ normal(0,sigma0);
## # sigma
## sigma_inter_species~ uniform(0,6);
## sigma_inter_set~ uniform(0,6);
## sigma_inter_plot~ uniform(0,6);
## sigma_inter_tree~ uniform(0,6);
## sigma_logD_species~ uniform(0,6);
## sigma_sumBn_species~ uniform(0,6);
## sigma~ uniform(0,6);
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
### biomes fixed param
param_Tf_biomes ~ normal(0, sigma0);
param_sumBn_biomes ~ normal(0,sigma0);
param_sumTfBn_biomes ~ normal(0 ,sigma0);
param_sumTnBn_biomes ~ normal(0 ,sigma0);
param_sumTnTfBn_abs_biomes ~ normal(0 , sigma0);
###############################################
############ Likelihood ###################
......
......@@ -90,9 +90,6 @@ model {
real sigma0;
sigma0 <- 10;
################################################################################
######################## growth model with STAN #############################
###############################################
########### Hierarchical parameters ########
### species random param
......@@ -101,6 +98,9 @@ model {
intercept_species ~ normal(0, sigma_inter_species);
### plot random param
intercept_plot ~ normal(0,sigma_inter_plot);
### fixed param
intercept ~ normal(0,sigma0);
mean_logD ~ normal(0,sigma0);
### biomes fixed param
param_Tf ~ normal(0, sigma0);
param_sumBn ~ normal(0,sigma0);
......@@ -108,7 +108,6 @@ model {
param_sumTnBn ~ normal(0 ,sigma0);
param_sumTnTfBn_abs ~ normal(0 , sigma0);
###############################################
############ Likelihood ###################
logG ~ normal(theo_g,sigma);
......
......@@ -26,16 +26,16 @@ fun.call.stan.parallel.and.save <- function(stan.model, list.stan, path.out,
var.sample){
start <- Sys.time()
set_cppo(mode = "fast")
inits <- stan.model$init.fun(chain_id,
list.stan)
inits <- list(stan.model$init.fun(chains,
list.stan))
stan.output <- stan(model_code = stan.model$stan,
data = list.stan,
pars = stan.model$pars,
init = 'random',
init = inits,
iter = iter,
warmup = warmup,
chains = chains,
chain_id = chain_id,
## chain_id = chain_id,
thin = thin,
verbose = FALSE)
end <- Sys.time()
......@@ -57,7 +57,7 @@ fun.call.stan.and.save <- function(stan.model, list.stan, path.out,
stan.output <- stan(model_code = stan.model$stan,
data = list.stan,
pars = stan.model$pars,
init = 'random',
init = inits,
iter = iter,
warmup = warmup,
chains = chains,
......
### test stan
## I changed the coding of the random effect with norm(0,10*sigma it seems more slow!
## I changed the coding of the random effect
## with norm(0,10*sigma it seems more slow!
## PROBLEM WITH STANDARDIZED VARIABLE DO I NEED TO INCLUDE AN INTERCEPT??
## VERY BAD CONVERGENCE ???
......@@ -12,11 +13,12 @@ source('R/analysis/stan.run.R')
### TEST simple model on France only
df.lmer <- load.data.for.lmer(trait = 'SLA',fname = 'data.all.no.log.rds', cat.TF = FALSE,
sample.size = NA,
var.sample = NA,
sample.vec.TF. = FALSE,
select.set. = 'France')
df.lmer <- load.data.for.lmer(trait = 'SLA',
fname = 'data.all.no.log.rds',
cat.TF = FALSE,
sample.size = 500,
var.sample = 'ecocode',
sample.vec.TF. = FALSE)
stan.list <- fun.turn.in.list.for.jags.stan(df.lmer,
cat.TF = FALSE)
......@@ -26,8 +28,12 @@ source('R/analysis/model.stan/model.stan.LOGLIN.size.fixed.R', local = TRUE)
fun.init.stan <- function(chain_id= 1, stan.list){
init <- list(intercept = 0+(chain_id-1)/10,
intercept_species = rnorm(stan.list$N_species, 0, 0.5+(chain_id-1)/10),
intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +(chain_id-1)/10),
intercept_species = rnorm(stan.list$N_species,
0,
0.5+(chain_id-1)/10),
intercept_plot = rnorm(stan.list$N_plot,
0,
0.5 +(chain_id-1)/10),
mean_logD = 2/3+(chain_id-1)/10,
sigma_inter_species = 0.5 +(chain_id-1)/10,
sigma_inter_plot = 0.5 + +(chain_id-1)/10,
......@@ -48,33 +54,12 @@ system.time( stan.output <- stan(model_code = model$stan,
chains = 3,
verbose = FALSE))
source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.oneset.R', local = TRUE)
source('R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.fixed.biomes.R', local = TRUE)
model <- load.model()
fun.init.stan <- function(chain_id= 1, stan.list){
step <- (chain_id-1)/10
init <- list(intercept = 0+step,
intercept_species = rnorm(stan.list$N_species, 0, 0.5+step),
param_sumBn_species = rnorm(stan.list$N_species, 0, 0.5+step),
param_logD_species = rnorm(stan.list$N_species, 0, 0.5+step),
intercept_plot = rnorm(stan.list$N_plot, 0, 0.5 +step),
mean_logD = 2/3+step,
param_Tf = 2/3+step,
param_sumBn = 2/3+step,
param_sumTfBn = 2/3+step,
param_sumTnBn = 2/3+step,
param_sumTnTfBn_abs= 2/3+step,
sigma_inter_species = 0.5 +step,
sigma_inter_plot = 0.5 + +step,
sigma = 0.5 +step,
sigma_sumBn_species = 0.5 +step,
sigma_logD_species = 0.5 +step
)
}
inits <- list(fun.init.stan(1, stan.list),
fun.init.stan(2, stan.list),
fun.init.stan(3, stan.list))
inits <- list(model$init.fun(1, stan.list),
model$init.fun(2, stan.list),
model$init.fun(3, stan.list))
library(rstan)
......@@ -96,8 +81,35 @@ pairs(stan.output)
library(ggmcmc)
S <- ggs(stan.output)
extract(stan.output, permuted = FALSE, inc_warmup=FALSE))
S <- ggs(stan.output2)
ggs_crosscorrelation(S)
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S)
ggs_traceplot(S)
ggs_running(S)
ggs_autocorrelation(S)
ggs_Rhat(S)
ggs_geweke(S)
ggs_caterpillar(S)
#######
## read results of simul on cluster
source("R/analysis/stan.output-fun.R")
stan.out.clust <- fun.merge.chain(
path.out = "output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/",
var.sample = 'ecocode', chains.vec = 1:3)
stan.out.clust
plot(stan.out.clust)
pairs(stan.out.clust)
traceplot(stan.out.clust, ask = TRUE)
## base on traceplot the initial value seems to be
## a big problem for the lack of convergence
library(ggmcmc)
S <- ggs(stan.out.clust)
ggs_crosscorrelation(S)
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S)
ggs_traceplot(S)
......@@ -108,3 +120,23 @@ ggs_geweke(S)
ggs_caterpillar(S)
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA',
type.filling='species', sample.size = 10000, var.sample = 'ecocode',
iter = 1000, warmup = 500, chains = 1, chain_id = 1,
init.TF = FALSE)
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1, run.stan, 'SLA',
type.filling='species', sample.size = 1000, var.sample = 'ecocode',
iter = 1000, warmup = 500, chains = 3, parallel.TF = FALSE,
init.TF = FALSE)
out.stan3 <- readRDS('output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.Tf.fixed.biomes/ecocode.results.stan.rds')
traceplot(out.stan3, ask = TRUE)
plot(out.stan3)
pairs(out.stan3)
library(ggmcmc)
S <- ggs(out.stan3)
ggs_crosscorrelation(S)
ggs_crosscorrelation(S, absolute_scale = FALSE)
ggs_density(S, family = c('param_sumBn_biomes'))
......@@ -8,7 +8,9 @@ load.processed.data <- function(path, file.name = "data.tree.tot.no.std.csv"){
require(data.table)
fname <- file.path(path, file.name )
if(file.exists(fname)){
cat('loading file', path, file.name)
data <- fread(fname, stringsAsFactors = FALSE)
print(warnings())
return(data)
}else{return(NULL)}
}
......
......@@ -6,11 +6,11 @@ mkdir -p trait.workshop
for trait in "'SLA'" "'Leaf.N'" "'Wood.density'" "'Max.height'" "'Seed.mass'"; do
echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh
echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait);print('done')\"" > trait.workshop/data1$trait.sh
qsub trait.workshop/data1$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data1$trait" -q opt32G -j oe
echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh
echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/lmer.run.R'); load.and.save.data.for.lmer($trait, fname = 'data.all.log.rds');print('done')\"" > trait.workshop/data3$trait.sh
qsub trait.workshop/data3$trait.sh -l nodes=1:ppn=1,mem=8gb -N "data3$trait" -q opt32G -j oe
done
......
#!/bin/bash
# Georges Kunstler 14/05/2014
# read one variable
export LD_LIBRARY_PATH=/usr/lib64/R/library
mkdir -p trait.workshop
iter=50000
warmup=5000
thin=50
# test parameter
nbargs=$#
echo "number of arguments="$nbargs
if [ $nbargs -ne 1 ]
then
echo "need one and only one argument"
echo " usage :"
echo " ./launch_all_lmer.sh sample.size"
exit 100
fi
samplesize=$1
#
for trait in "'SLA'" "'Wood.density'" "'Max.height'" "'Seed.mass'" "'Leaf.N'"; do
echo "/usr/local/R/R-3.1.1/bin/Rscript -e \"source('R/analysis/jags.run.R'); run.multiple.model.for.set.one.trait(c(model.files.jags.Tf.1), run.jags,$trait,type.filling='species', sample.size = $samplesize, var.sample = 'ecocode.id',iter = $iter, warmup = $warmup, chains = 3, thin = $thin, init.TF = TRUE);print('done')\"" > trait.workshop/speciesjags${trait}.sh
qsub trait.workshop/speciesjags${trait}.sh -l nodes=1:ppn=1,mem=8gb -N "jags${trait}" -q opt32G -j oe
done
......@@ -5,8 +5,8 @@ export LD_LIBRARY_PATH=/usr/lib64/R/library
mkdir -p trait.workshop
iter=1000
warmup=500
iter=2000
warmup=1000
# test parameter
nbargs=$#
echo "number of arguments="$nbargs
......
......@@ -5,6 +5,7 @@
rm(list = ls())
source("R/format.data/format-fun.R")
dir.create("output/formatted/BCI", recursive = TRUE, showWarnings = FALSE)
library(reshape, quietly = TRUE)
### READ DATA read individuals tree data Requires careful formatting of 7 census
## datasets The raw data is such that, once a tree dies in census X, then it no
......@@ -15,8 +16,8 @@ data.bci1 <- read.csv("data/raw/BCI/stem.data/BCI.all.plots.census.allstems.1.cs
print(table(data.bci1$Stem))
print(sum(is.na(data.bci1$Stem)))
print(table(data.bci1$Stem))
print(sum(is.na(data.bci1$Stem)))
data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL
data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL
data.bci1$HOM1 <- data.bci1$HOM; data.bci1$HOM <- NULL
......
#!/usr/bin/env Rscript
### MERGE sweden DATA
source("R/format.data/format-fun.R");
library(reshape, quietly = TRUE);d
ir.create("output/formatted/Sweden", recursive = TRUE, showWarnings = FALSE)
source("R/format.data/format-fun.R")
library(reshape, quietly = TRUE)
dir.create("output/formatted/Sweden", recursive = TRUE, showWarnings = FALSE)
######################### READ DATA read individuals tree data
data.swe <- read.table("data/raw/Sweden/Swe_NFI_all.txt", header = T,
stringsAsFactors = F, sep = "\t")
## library(gnumeric)
## df1<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_1.ods",sheet.name='Data' ,
## quiet=FALSE , head = TRUE)
## df2<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_2a.xlsx",sheet.name='Data' ,
## quiet=FALSE , head = TRUE)
## df3<-read.gnumeric.sheet( "data/raw/Sweden/Swe_NFI_3.xlsx",sheet.name='Data' ,
## quiet=FALSE , head = TRUE)
## head(df)
# understand year for survival
table(data.swe$Year, data.swe$DeadYear)
table(data.swe$Year,is.na(data.swe$dia_t3))
data.swe$tree.id <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]],
data.swe[["PlotNr"]],
......@@ -20,6 +25,35 @@ data.swe$tree.id <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]],
data.swe$plot <- apply(cbind(data.swe[["Year"]], data.swe[["TractNr"]],
data.swe[["PlotNr"]]),
1, paste, collapse = "_")
## ## Need to check if some year of census have more census than possible ?
## # understand year for survival
## # need to compute per plot
## plot.df <- data.frame(plot = unique(data.swe$plot),
## year = data.swe$Year[!duplicated(data.swe$plot)],
## mesure_t1 = tapply(!is.na(data.swe$dia_t1),
## data.swe$plot, sum)>0,
## mesure_t2 = tapply(!is.na(data.swe$dia_t2),
## data.swe$plot, sum)>0,
## mesure_t3 = tapply(!is.na(data.swe$dia_t3),
## data.swe$plot, sum)>0)
## table(plot.df$year, plot.df$mesure_t1)
## table(plot.df$year, plot.df$mesure_t3)
## table(plot.df$mesure_t2
## , plot.df$mesure_t3)
## sum(plot.df$mesure_t2 == FALSE & plot.df$mesure_t1 == TRUE & plot.df$mesure_t3 == TRUE)
## mean.year <- tapply(data.swe$Year, data.swe$plot, mean)
## names(mean.year)[(!mean.year %in% c(2003:2012))]
## tapply(!is.na(data.swe$dia_t3), data.swe$plot, sum)
## table(data.swe$Year, data.swe$DeadYear)
## table(data.swe$Year,is.na(data.swe$dia_t3) & is.na(data.swe$dia_t2))
## table(data.swe$Year,is.na(data.swe$dia_t1) & is.na(data.swe$dia_t2))
## table(data.swe$Year,is.na(data.swe$dia_t2))
## table(data.swe$Year,is.na(data.swe$dia_t1))
## Format to desired form
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment