Commit 1ae052a9 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

solve issue in data resampling and stan mergeing of chains

parent e1f33265
......@@ -69,7 +69,7 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
run.lmer <- function (model.file, trait,
min.obs = 10, sample.size = NA,
type.filling, cat.TF, fname = 'data.all.no.log.rds',
var.sample = 'ecocode.id',
var.sample = 'ecocode',
select.biome = NA) {
require(lme4)
source(model.file, local = TRUE)
......@@ -128,18 +128,18 @@ load.data.for.lmer <- function(trait, cat.TF,
fname = 'data.all.no.log.rds',
base.dir = "output/processed",
sample.size. ,
var.sample. = 'ecocode.id',
var.sample. = 'ecocode',
select.biome. = NA,
sample.vec.TF. = FALSE){
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
if (!cat.TF) cat <- 'no.cat'
df <- readRDS(file = file.path(base.dir,paste('data', trait, type, cat,'rds',
df <- readRDS(file = file.path(base.dir,paste('data', trait, type,'rds',
sep = '.')))
df$species.id[is.na(df$species.id)] <- 'missing.sp'
df$sp.name[is.na(df$sp.name)] <- 'missing.sp'
if(!is.na(select.biome.)) {
df <- df[df$biomes.id == select.biome., ]
df <- df[df$biomes == select.biome., ]
}
if(!is.na(sample.size.)){
if(!sample.vec.TF.){
......@@ -151,24 +151,24 @@ if(!is.na(sample.size.)){
saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
print(paste('sub-sampled by',var.sample.))
df <- fun.format.data.for.lmer(data.tree.tot, trait,
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}else{
sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
df <- df[sample.vec, ]
df <- fun.format.data.for.lmer(data.tree.tot, trait,
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
print(paste('sub-sampled by',var.sample., 'by loading sample.vec'))
}
}else{
df <- fun.format.data.for.lmer(data.tree.tot, trait,
res <- fun.format.data.for.lmer(df, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}
return( df)
}
return( res)
}
load.and.save.data.for.lmer <- function(trait,
......@@ -179,8 +179,7 @@ load.and.save.data.for.lmer <- function(trait,
data.table.TF = FALSE){
data.tree.tot <- readRDS(file.path(base.dir, fname))
df <- fun.select.data.for.lmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
type.filling = type.filling)
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
saveRDS(df,file = file.path(base.dir,paste('data', trait, type, 'rds',
......
......@@ -68,15 +68,15 @@ parameters{
vector[N_biomes] param_sumTnBn_biomes;
vector[N_biomes] param_sumTnTfBn_abs_biomes;
vector[N_species] param_logD_species;
vector[N_species] intercept_species;
vector[N_plot] intercept_plot;
vector[N_set] intercept_set;
vector[N_tree] intercept_tree;
}
transformed parameters {
# vector for prediction
vector[N_indiv] theo_g;
vector[N_species] intercept_species;
vector[N_plot] intercept_plot;
vector[N_set] intercept_set;
vector[N_tree] intercept_tree;
for (i in 1:N_indiv){
theo_g[i] <- intercept + intercept_species[species_id[i]]
+ intercept_tree[tree_id[i]]*tree_01[i]
......
......@@ -12,53 +12,20 @@ for (trait in traits){
for(model in c(model.files.stan.Tf.1[3])){
source(model, local = TRUE)
model.obj <- load.model()
## TODO
path.out <- output.dir(type = 'stan', model = model.obj$name,
trait = trait, set = 'all.no.log',
type.filling = 'species')
paths <- c(files,path.out)
paths <- c(paths,path.out)
}
}
## TODO
paste(var.sample,
"results.stan", chains.id, "rds", sep = '.')
out <- lapply(files, summarise.lmer.output.all.list)
names(out) <- lapply(lapply(files,files.details.all),
function(x) paste(as.vector(x[names(x) != 'file']),
collapse="_"))
### remove missing
out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))]
saveRDS(out,file='output/list.lmer.out.all.ecocode.id.no.log.rds')
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA", "Wood.density","Max.height")
type.filling <- 'species'
files <- c()
for (trait in traits){
for(model in c(model.files.lmer.Tf.1,
model.files.lmer.Tf.2,
model.files.lmer.Tf.3,
model.files.lmer.Tf.5,
model.files.lmer.Tf.CAT.1,
model.files.lmer.Tf.CAT.2)){
source(model, local = TRUE)
model.obj <- load.model()
pathout <- output.dir('lmer', model.obj$name, trait, 'all.no.log',
type.filling=type.filling)
files <- c(files,file.path(pathout,"species.id.results.nolog.all.rds"))
}
}
out <- lapply(files, summarise.lmer.output.all.list)
names(out) <- lapply(lapply(files,files.details.all),
function(x) paste(as.vector(x[names(x) != 'file']),
collapse="_"))
### remove missing
out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))]
saveRDS(out,file='output/list.lmer.out.all.species.id.no.log.rds')
out <- lapply(1:1, fun.load.print.conv,
paths,
models = c(model.files.stan.Tf.1[3]),
var.sample = 'ecocode.id',
chains.vec = 1:3)
......@@ -13,6 +13,25 @@ res.merged <- sflist2stanfit(sflist)
return(res.merged)
}
fun.print.conv <- function(res.stan, pars){
res <- monitor(extract(res.stan, pars,
permuted = FALSE,
inc_warmup = TRUE))
return(res)
}
## function to load and print convergence
fun.load.print.conv <- function(i,paths, models, var.sample, chains.vec){
source(models[i], local = TRUE)
model.obj <- load.model()
res.stan <- fun.merge.chain(paths[i], var.sample, chains.vec)
res <- fun.print.conv(res.stan, model.obj$pars)
print(res)
return(res)
}
## path.out <- 'output/stan/all.no.log/Wood.density/species/LOGLIN.ER.AD.Tf.r.biomes'
## res <- fun.merge.chain(path.out, var.sample = 'ecocode.id', chains.vec = 1:3)
### test stan
source('R/analysis/lmer.run.R')
source('R/analysis/stan.run.R')
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 1000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = TRUE)
system.time(res <- run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 1000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = FALSE))
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 10000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = TRUE)
system.time(res <- run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 10000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = FALSE))
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 20000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = TRUE)
system.time(res <- run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 20000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = FALSE))
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 5000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = TRUE)
system.time(res <- run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 5000,
var.sample = 'ecocode.id',
iter = 200, warmup = 20, chains = 2, chain_id = '1',
init.TF = FALSE))
run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 5000,
var.sample = 'ecocode.id',
iter = 2000, warmup = 200, chains = 2, chain_id = '1',
init.TF = TRUE)
system.time(res <- run.multiple.model.for.set.one.trait(model.files.stan.Tf.1[3], run.stan,
trait = 'SLA',type.filling='species', sample.size = 5000,
var.sample = 'ecocode.id',
iter = 2000, warmup = 200, chains = 2, chain_id = '1',
init.TF = FALSE))
res <- run.stan(model.file = model.files.stan.Tf.1[4], trait = 'SLA',
min.obs = 10, sample.size = 1000,
type.filling = 'species', cat.TF = FALSE,
fname = 'data.all.no.log.csv')
saveRDS(res, 'stan.res.rds')
res <- readRDS('output/stan/all.no.log/SLA/species/LOGLIN.ER.AD.norandom/results.stan.rds')
res
......@@ -696,48 +696,70 @@ return(data.all)
## function to reformat data with traits centred and standardized
fun.reformat.traits <- function(data, traits, mean.traits, sd.traits){
for (trait in traits){
trait.f <- paste(trait, 'focal', sep ='.')
data[[trait.f]] <- (data[[trait.f]]- mean.traits[trait])/sd.traits[trait]
trait.CWM <- paste(trait, 'CWM.fill', sep = '.')
data[[trait.CWM]] <- (data[[trait.CWM]]- mean.traits[trait])/sd.traits[trait]
trait.abs <- paste(trait, 'abs.CWM.fill', sep = '.')
data[[trait.abs]] <- (data[[trait.CWM]])/sd.traits[trait]
}
fun.reformat.trait <- function(data,trait){
##
trait.CWM.sd.n <- paste(trait, 'CWM.fill', sep = '.')
##
trait.CWM.sd.e <- (parse(text =
paste('(',trait,'.CWM.fill',
' - mean(', trait,
'.focal, na.rm =TRUE))/sd(',trait,
'.focal, na.rm =TRUE)' ,
sep = '')))
data[, c(trait.CWM.sd.n) := eval(trait.CWM.sd.e) ]
##
trait.abs.CWM.sd.n <- paste(trait, 'abs.CWM.fill', sep = '.')
##
trait.abs.CWM.sd.e <- (parse(text =
paste('(',trait,'.abs.CWM.fill',
')/sd(',trait,
'.focal, na.rm =TRUE)' ,
sep = '')))
data[, c(trait.abs.CWM.sd.n) := eval(trait.abs.CWM.sd.e) ]
return(data)
}
fun.reformat.traits.CAT <- function(data, traits, mean.traits, sd.traits){
for (trait in traits){
for(cat in c('A_EV', 'A_D', 'C')){
trait.CWM <- paste(trait, 'CWM.fill', cat, sep = '.')
data[[trait.CWM]] <- (data[[trait.CWM]]- mean.traits[trait])/sd.traits[trait]
trait.abs <- paste(trait, 'abs.CWM.fill', cat, sep = '.')
data[[trait.abs]] <- (data[[trait.CWM]])/sd.traits[trait]
}
}
fun.reformat.trait.CAT <- function(data, trait, cat){
##
trait.CWM.sd.n <- paste(trait, 'CWM.fill', cat, sep = '.')
##
trait.CWM.sd.e <- (parse(text =
paste('(',trait,'.CWM.fill.', cat,
' - mean(', trait,
'.focal, na.rm =TRUE))/sd(',trait,
'.focal, na.rm =TRUE)' ,
sep = '')))
data[, c(trait.CWM.sd.n) := eval(trait.CWM.sd.e) ]
##
trait.abs.CWM.sd.n <- paste(trait, 'abs.CWM.fill', cat, sep = '.')
##
trait.abs.CWM.sd.e <- (parse(text =
paste('(',trait,'.abs.CWM.fill.', cat,
')/sd(',trait,
'.focal, na.rm =TRUE)' ,
sep = '')))
data[, c(trait.abs.CWM.sd.n) := eval(trait.abs.CWM.sd.e) ]
return(data)
}
fun.standardized.traits <- function(data,
fun.standardized.traits <- function(data,
traits = c('SLA', 'Leaf.N',
'Wood.density', 'Max.height',
'Seed.mass')){
## Compute Std deviation of each traits
mean.traits <- unlist(lapply(paste(traits, 'focal', sep = '.'),
function(var, data) mean(data[[var]], na.rm = TRUE),
data))
sd.traits <- unlist(lapply(paste(traits, 'focal', sep = '.'),
function(var, data) sd(data[[var]], na.rm = TRUE),
data))
names(mean.traits) <- names(sd.traits) <- traits
## Reformat to account for traits std
data <- fun.reformat.traits(data, traits, mean.traits, sd.traits)
data <- fun.reformat.traits.CAT(data, traits, mean.traits, sd.traits)
'Seed.mass'),
cat.vec = c('A_EV', 'A_D', 'C')){
for(trait in traits){
data <- fun.reformat.trait(data, trait)
for (cat in cat.vec){
data <- fun.reformat.trait.CAT(data, trait, cat)
}
print(data)
}
return(data)
}
}
......@@ -762,8 +784,8 @@ fun.reform.data.and.remove.outlier <- function(data.all,
data.all[ , cluster := paste(ecocode, cluster)]
data.all[ , tree.id := paste(ecocode, tree.id)]
data.all[ , obs.id := paste(ecocode, obs.id)]
data.all <- as.data.frame(data.all)
if(std.traits.TF) fun.standardized.traits(data.all)
data.all <- as.data.frame(data.all)
return(data.all)
}
......
TARGETS = $(subst md,pdf,$(shell ls *.md))
all: outline.pdf
outline.pdf: outline.md include.tex
pandoc $< --csl=journal-of-applied-ecology.csl --filter pandoc-citeproc --bibliography=refs.bib --template=include.tex --variable mainfont="Times New Roman" --variable sansfont=Arial --variable fontsize=12pt --latex-engine=xelatex -o $@
clean:
rm -f *.pdf
#
% THIS FILE TAKEN FROM PANDOC DEMOS PAGE:http://johnmacfarlane.net/pandoc/demos.html,http://johnmacfarlane.net/pandoc/demo/mytemplate.tex
\documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$lang$,$endif$, a4paper]{$documentclass$}
\RequirePackage[hmargin=2cm,vmargin=2cm]{geometry}
\usepackage[T1]{fontenc}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e} % provides \textsubscript
% use microtype if available
\IfFileExists{microtype.sty}{\usepackage{microtype}}{}
% use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[utf8]{inputenc}
$if(euro)$
\usepackage{eurosym}
$endif$
\else % if luatex or xelatex
\usepackage{fontspec}
\ifxetex
\usepackage{xltxtra,xunicode}
\fi
\defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
\newcommand{\euro}{}
$if(mainfont)$
\setmainfont{$mainfont$}
$endif$
$if(sansfont)$
\setsansfont{$sansfont$}
$endif$
$if(monofont)$
\setmonofont{$monofont$}
$endif$
$if(mathfont)$
\setmathfont{$mathfont$}
$endif$
\fi
% Fancy HEADER
\usepackage{fancyhdr}
\pagestyle{fancy}
\pagenumbering{arabic}
\lhead{\itshape{\nouppercase{\leftmark}}}
\chead{}
\rhead{\thepage}
%\lfoot{v $version$}
\cfoot{}
%\rfoot{\thepage}
$if(geometry)$
\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry}
$endif$
$if(natbib)$
\usepackage{natbib}
\bibliographystyle{plainnat}
$endif$
$if(biblatex)$
\usepackage{biblatex}
$if(biblio-files)$
\bibliography{$biblio-files$}
$endif$
$endif$
$if(listings)$
\usepackage{listings}
$endif$
$if(lhs)$
\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{}
$endif$
$if(highlighting-macros)$
$highlighting-macros$
$endif$
$if(verbatim-in-note)$
\usepackage{fancyvrb}
$endif$
$if(tables)$
\usepackage{longtable}
$endif$
$if(graphics)$
\usepackage{graphicx}
% We will generate all images so they have a width \maxwidth. This means
% that they will get their normal width if they fit onto the page, but
% are scaled down if they would overflow the margins.
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth
\else\Gin@nat@width\fi}
\makeatother
\let\Oldincludegraphics\includegraphics
\renewcommand{\includegraphics}[1]{\Oldincludegraphics[width=\maxwidth]{#1}}
$endif$
\ifxetex
\usepackage[setpagesize=false, % page size defined by xetex
unicode=false, % unicode breaks when used with xetex
xetex]{hyperref}
\else
\usepackage[unicode=true]{hyperref}
\fi
\hypersetup{breaklinks=true,
bookmarks=true,
pdfauthor={$author-meta$},
pdftitle={$title-meta$},
colorlinks=true,
urlcolor=$if(urlcolor)$$urlcolor$$else$blue$endif$,
linkcolor=$if(linkcolor)$$linkcolor$$else$magenta$endif$,
pdfborder={0 0 0}}
$if(links-as-notes)$
% Make links footnotes instead of hotlinks:
\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
$endif$
$if(strikeout)$
\usepackage[normalem]{ulem}
% avoid problems with \sout in headers with hyperref:
\pdfstringdefDisableCommands{\renewcommand{\sout}{}}
$endif$
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
\setlength{\emergencystretch}{3em} % prevent overfull lines
$if(numbersections)$
$else$
\setcounter{secnumdepth}{0}
$endif$
$if(verbatim-in-note)$
\VerbatimFootnotes % allows verbatim text in footnotes
$endif$
$if(lang)$
\ifxetex
\usepackage{polyglossia}
\setmainlanguage{$mainlang$}
\else
\usepackage[$lang$]{babel}
\fi
$endif$
$for(header-includes)$
$header-includes$
$endfor$
$if(title)$
\title{$title$}
$endif$
\author{$for(author)$$author$$sep$ \and $endfor$}
\date{$date$}
\begin{document}
$if(title)$
\maketitle
$endif$
$for(include-before)$
$include-before$
$endfor$
$if(toc)$
{
\hypersetup{linkcolor=black}
\setcounter{tocdepth}{$toc-depth$}
\tableofcontents
}
$endif$
$body$
$if(natbib)$
$if(biblio-files)$
$if(biblio-title)$
$if(book-class)$
\renewcommand\bibname{$biblio-title$}
$else$
\renewcommand\refname{$biblio-title$}
$endif$
$endif$
\bibliography{$biblio-files$}
$endif$
$endif$
$if(biblatex)$
\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$
$endif$
$for(include-after)$
$include-after$
$endfor$
\end{document}
<?xml version="1.0" encoding="utf-8"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" default-locale="en-GB">
<info>
<title>Journal of Applied Ecology</title>
<id>http://www.zotero.org/styles/journal-of-applied-ecology</id>
<link href="http://www.zotero.org/styles/journal-of-applied-ecology" rel="self"/>
<link href="http://www.journalofappliedecology.org/view/0/authorGuideline.html" rel="documentation"/>
<author>
<name>Johan Asplund</name>
<email>asplundj@gmail.com</email>
</author>
<contributor>
<name>Sebastian Karcher</name>
</contributor>
<category citation-format="author-date"/>
<category field="biology"/>
<issn>0021-8901</issn>
<eissn>1365-2664</eissn>
<updated>2013-09-08T00:04:27+00:00</updated>
<rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
</info>