Commit 803219a5 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

progress on talk in beamer

parent 6b39326a
......@@ -4,7 +4,7 @@ D3 := output/processed
D4 := figs/test.format.tree
D5 := figs/test.traits
D6 := figs/test.CWM
sites:= US Fushan Paracou BCI Mbaiki Luquillo Japan Spain Sweden Canada France Swiss NSW NVS
sites:= Fushan NSW Paracou BCI Mbaiki Luquillo Japan Spain Sweden Canada France Swiss NVS US
D3Done := $(addsuffix /Done.no.txt,$(addprefix $(D3)/, $(sites) ))
D2traits := $(addsuffix /traits.csv,$(addprefix $(D2)/, $(sites) ))
D2tree := $(addsuffix /tree.csv,$(addprefix $(D2)/, $(sites) ))
......@@ -44,7 +44,8 @@ GLOBAL: $(D3)/Done.txt
$(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt
Rscript $<
Rscript scripts/process.data/summarise.data.R
Rscript scripts/process.data/summarise.data.R
Rscript scripts/process.data/plot.data.all.R
$(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R
Rscript $<
......@@ -65,7 +66,7 @@ $(D5)/Done.txt: scripts/find.trait/test.traits.R $(D3traits)
TEST.CWM: $(D6)/Done.txt
$(D6)/Done.txt: scripts/process.data/test.tree.CWM.R $(D3Done)
Rscript $<
Rscript $<
#-------------------------------------------------------
......
#!/usr/bin/env Rscript
source("R/analysis/glmer.output-fun.R")
source("R/analysis/glmer.run.all.R")
source("R/analysis/glmer.run.R")
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA", "Wood.density","Max.height","Leaf.N","Seed.mass")
......
......@@ -148,6 +148,7 @@ df <- readRDS(file = file.path(base.dir,
cat,
'rds',
sep = '.')))
df$species.id[is.na(df$species.id)] <- 'missing.sp'
if(!is.na(sample.size)){
df$species.id[is.na(df$species.id)] <- 'missing.sp'
......@@ -176,6 +177,7 @@ data.tree.tot <- readRDS(file.path(base.dir, fname))
df <- fun.data.for.glmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
......
......@@ -855,27 +855,94 @@ for (i in traits){
}
}
}
fun.axis.one.by.one <- function(x, side, labels, cols.vec ){
axis(side, x, labels = bquote(.(labels[x])), las = 1, cex.axis = 2,
mgp = c(1.5,0.55,0),col.axis = cols.vec[x])
}
plot.param.BLUP <- function(list.res,
model = 'lmer.LOGLIN.ER.AD.Tf',
traits = c('Wood.density', 'SLA', 'Max.height'),
param.vec = c("Tf","sumBn", "sumTnBn",
"sumTfBn", "sumTnTfBn.abs"),
param.names = c('Direct trait',
'Compet',
'Compet effect x trait',
'Compet response x trait',
'Compet x trait dissimilarity'),
param.print = 1:5,
col.names = c('#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00') ,
col.vec,
pch.vec,
names.bio){
m <- matrix(c(1:4), 1, 4)
layout(m, widths=c(5.3, 2, 2 ,2)/10,
heights=c(4)/10)
for (i in traits){
list.temp <- list.res[[paste("all.no.log_", i ,
"_species_", model,
sep = '')]]$lmer.summary
param.mean <- list.temp$fixed.coeff.E[param.vec]
param.std <- list.temp$fixed.coeff.Std.Error
names(param.std) <- names(list.temp$fixed.coeff.E)
param.std <- param.std[param.vec]
param.BLUP <- list.temp$set.BLUP
if(i == traits[1]) {
par(mar=c(6,22,3,0))
}else{
par(mar=c(6,0,3,0))
}
plot(param.mean[param.print], (1:length(param.vec))[param.print],
yaxt = 'n', xlab = paste('Effect size '), ylab = NA,
xlim = c(-0.45, 0.25) , pch = 16 , cex = 2, cex.lab = 1.5,
ylim = range(1:length(param.vec)))
mtext(i, side=3, cex =1.2)
box(lwd= 2)
abline(v = 0)
if(i == traits[1]) {lapply(1:length(param.vec),
fun.axis.one.by.one,
side = 2,
labels = param.names,
cols.vec = col.names)
}
fun.plot.error.bar.horiz(param.mean[param.print],
(1:length(param.vec))[param.print],
param.std[param.print])
for (j in 1:length(param.vec)){
points(param.BLUP[,param.vec[j]], rep(j,nrow(param.BLUP)),
pch = pch.vec[rownames(param.BLUP)],
col = col.vec[rownames(param.BLUP)],
cex= 1)
}
}
par(mar=c(0, 0, 0, 0))
plot(1,1,type="n", axes=F, xlab="", ylab="")
legend("center",legend = names.bio, col = col.vec, pch = pch.vec,
bty = 'n', cex = 1)
}
plot.param <- function(list.res,
model = 'lmer.LOGLIN.ER.AD.Tf',
traits = c('Wood.density', 'SLA', 'Max.height'),
param.vec = c("logD", "Tf","sumBn", "sumTnBn",
"sumTfBn", "sumTnTfBn.abs"),
param.names = c('Size', 'Direct trait',
'Compet', 'Compet effect x trait',
'Compet response x trait',
'Compet x trait dissimilarity'),
col.vec,
pch.vec,
names.bio){
model = 'lmer.LOGLIN.ER.AD.Tf',
traits = c('Wood.density', 'SLA', 'Max.height'),
param.vec = c("Tf","sumBn", "sumTnBn",
"sumTfBn", "sumTnTfBn.abs"),
param.names = c('Direct trait',
'Compet',
'Compet effect x trait',
'Compet response x trait',
'Compet x trait dissimilarity'),
param.print = 1:5,
col.names = c('#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00') ,
col.vec,
pch.vec,
names.bio){
m <- matrix(c(1:3), 1, 3)
layout(m, widths=c(7, 4, 4 )/10,
layout(m, widths=c(5.3, 2.39, 2.39 )/10,
heights=c(4)/10)
for (i in traits){
list.temp <- list.res[[paste("all.no.log_", i ,
......@@ -887,27 +954,32 @@ for (i in traits){
param.std <- param.std[param.vec]
param.BLUP <- list.temp$set.BLUP
if(i == traits[1]) {
par(mar=c(6,13,3,0))
par(mar=c(6,22,3,0))
}else{
par(mar=c(6,0,3,0))
}
plot(param.mean, 1:length(param.vec),
plot(param.mean[param.print], (1:length(param.vec))[param.print],
yaxt = 'n', xlab = paste('Effect size '), ylab = NA,
xlim = c(-0.45, 0.25) , pch = 16 , cex = 2, cex.lab = 1.5)
xlim = c(-0.45, 0.25) , pch = 16 , cex = 2, cex.lab = 1.5,
ylim = range(1:length(param.vec)))
mtext(i, side=3, cex =1.2)
box(lwd= 2)
abline(v = 0)
if(i == traits[1]) axis(2, 1:length(param.vec), labels = param.names, las = 1, cex.axis = 2,
mgp = c(1.5,0.55,0))
fun.plot.error.bar.horiz(param.mean,
1:length(param.vec),
param.std)
if(i == traits[1]) {lapply(1:length(param.vec),
fun.axis.one.by.one,
side = 2,
labels = param.names,
cols.vec = col.names)
}
fun.plot.error.bar.horiz(param.mean[param.print],
(1:length(param.vec))[param.print],
param.std[param.print])
}
}
plot.param.BLUP2 <- function(list.res, model = 'lmer.LOGLIN.ER.AD.Tf',
traits = c('Wood.density', 'SLA', 'Max.height'), param.vec = c("logD",
"Tf","sumBn", "sumTnBn", "sumTfBn", "sumTnTfBn.abs"), col.vec,
......
......@@ -4,6 +4,7 @@
library(lme4)
run.multiple.model.for.set.one.trait <- function(model.files, fun.model, trait,
type.filling, cat.TF = FALSE,
fname = 'data.all.no.log.rds',
......@@ -50,18 +51,26 @@ model.files.lmer.Tf.CAT.3 <-
c("R/analysis/model.lmer/model.lmer.LOGLIN.ER.AD.Tf.CAT.MAT.MAP.r.species.R")
fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
var.sample){
var.sample, select.biome){
lmer.output <- lmer(formula = formula, data = df.lmer, REML = FALSE,
control = lmerControl(optCtrl = list(maxfun = 40000) ) )
print(summary(lmer.output))
saveRDS(lmer.output, file = file.path(path.out, paste(var.sample,
"results.nolog.all.rds", sep ='.')))
if(!is.na(select.biome)){
name.file <- paste(var.sample,
"results.nolog.all.rds", sep ='.')
}
if(is.na(select.biome)){
name.file <- paste(var.sample,select.biome,
"results.nolog.all.rds", sep ='.')
}
saveRDS(lmer.output, file = file.path(path.out, name.file))
}
run.lmer <- function (model.file, trait,
min.obs = 10, sample.size = NA,
type.filling, cat.TF, fname = 'data.all.no.log.rds',
var.sample = 'ecocode.id') {
var.sample = 'ecocode.id',
select.biome = NA) {
require(lme4)
source(model.file, local = TRUE)
model <- load.model()
......@@ -76,8 +85,9 @@ run.lmer <- function (model.file, trait,
trait, "\n")
df.lmer <- load.data.for.lmer(trait,fname = fname,
cat.TF = cat.TF,
sample.size = sample.size,
var.sample = var.sample)
sample.size. = sample.size,
var.sample. = var.sample,
select.biome. = select.biome)
# return a DF
cat("Ok data with Nobs", nrow(df.lmer),
......@@ -85,7 +95,8 @@ run.lmer <- function (model.file, trait,
#= Run model
fun.call.lmer.and.save(formula = model$lmer.formula.tree.id,
df.lmer, path.out = path.out,
var.sample = var.sample)
var.sample = var.sample,
select.biome = select.biome)
rm(df.lmer)
gc()
}
......@@ -116,21 +127,24 @@ add.sampling.prob.by.var.sample <- function(df, var.sample){
load.data.for.lmer <- function(trait, cat.TF,
fname = 'data.all.no.log.rds',
base.dir = "output/processed",
sample.size ,
var.sample = 'ecocode.id'){
sample.size. ,
var.sample. = 'ecocode.id',
select.biome. = NA){
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
if (!cat.TF) cat <- 'no.cat'
df <- readRDS(file = file.path(base.dir,paste('data', trait, type, cat,'rds',
sep = '.')))
if(!is.na(sample.size)){
df$species.id[is.na(df$species.id)] <- 'missing.sp'
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample)
df <- df[sample(1:nrow(df), size = sample.size,
df$species.id[is.na(df$species.id)] <- 'missing.sp'
if(!is.na(select.biome.)) {
df <- df[df$biomes.id == select.biome., ]
}
if(!is.na(sample.size.)){
df <- add.sampling.prob.by.var.sample(df, var.sample = var.sample.)
df <- df[sample(1:nrow(df), size = sample.size.,
prob = df$prob.sample), ]
print(paste('sub-sampled by',var.sample))
print(paste('sub-sampled by',var.sample.))
}
return( df)
}
......
load.model <- function(){
list(name = "LOGLIN.ER.AD.Tf.r.biomes",
## parameters to save
pars = c('intercept' , 'mean_logD', 'mean_Tf',
'mean_sumBn', 'mean_sumTfBn', 'mean_sumTnBn',
'mean_sumTnTfBn_abs', 'sigma_inter_species',
'sigma_inter_set', 'sigma_inter_plot',
'sigma_inter_tree', 'sigma_logD_species',
'sigma_Tf_biomes', 'sigma_sumBn_biomes',
'sigma_sumBn_species', 'sigma_sumTfBn_biomes',
'sigma_sumTnBn_biomes', 'sigma_sumTnTfBn_abs_biomes',
'sigma', 'param_Tf_biomes',
'param_sumBn_biomes', 'param_sumTfBn_biomes',
'param_sumTnBn_biomes', 'param_sumTnTfBn_abs_biomes'),## parameters to save
stan = "
data {
int<lower=0> N_indiv;
......
### fiunction to merge several chain
load.stan.res <- function(chain, path.out, var.sample){
res <- readRDS(file.path(path.out, paste(var.sample, 'results.stan', chain, 'rds', sep = '.')))
return(res)
}
fun.merge.chain <- function(path.out, var.sample, chains.vec){
require(rstan)
sflist <- lapply(chains.vec, load.stan.res, path.out, var.sample)
res.merged <- sflist2stanfit(sflist)
return(res.merged)
}
## path.out <- 'output/stan/all.no.log/Wood.density/species/LOGLIN.ER.AD.Tf.r.biomes'
## res <- fun.merge.chain(path.out, var.sample = 'ecocode.id', chains.vec = 1:3)
......@@ -12,28 +12,48 @@ model.files.stan.Tf.1 <-
"R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.r.biomes.R",
"R/analysis/model.stan/model.stan.LOGLIN.ER.AD.Tf.r.ecocode.R")
fun.call.stan.and.save <- function(stan.model, list.stan, path.out,
fun.call.stan.and.save.init <- function(stan.model, list.stan, path.out,
var.sample){
set_cppo(mode = "fast")
stan.output <- stan(model_code = stan.model$stan,
data = list.stan,
pars = stan.model$pars,
chains = 0)
print(stan.output)
saveRDS(list(model.stan = stan.output, data.stan = list.stan),
file = file.path(path.out,
paste(var.sample,
"results.stan", 'init',
"rds", sep = '.')))
}
fun.call.stan.and.save <- function(stan.model, path.out,
chain_id,
iter = 100, warmup = 50,
chains = 1, thin = 1,
var.sample){
set_cppo(mode = "fast")
list.init.stan <- readRDS( file = file.path(path.out, paste(var.sample,
"results.stan", 'init', "rds", sep = '.')))
rng_seed <- 123
stan.output <- stan(model_code = stan.model$stan,
data = list.stan,
pars = NA,#stan.model$param.to.save,
data = list.init.stan$data.stan,
pars = stan.model$pars,
init = 'random',
iter = iter,
warmup = warmup,
chains = chains,
chain_id = chain_id,
thin = thin,
verbose = FALSE)
verbose = FALSE,
seed = rng_seed)
print(stan.output)
saveRDS(stan.output, file = file.path(path.out, paste(var.sample,
"results.stan.", chain_id, ".rds", sep = '.')))
"results.stan", chain_id, "rds", sep = '.')))
}
run.stan <- function (model.file, trait,
run.stan <- function (model.file, trait, init.TF,
min.obs = 10, sample.size = NA,
type.filling='species', cat.TF,
fname = 'data.all.no.log.rds',
......@@ -53,19 +73,27 @@ run.stan <- function (model.file, trait,
dir.create(path.out, recursive = TRUE, showWarnings = FALSE)
cat("run stan for model", model.file, " for trait",
trait, "\n")
if (init.TF) {
stan.list <- fun.load.stan(trait,fname = fname, cat.TF = cat.TF,
sample.size = sample.size,
var.sample = var.sample)
cat("Ok data with Nobs", stan.list$N_indiv,
"\n")
#= Run model
fun.call.stan.and.save(stan.model = model,
fun.call.stan.and.save.init(stan.model = model,
list.stan = stan.list,
path.out = path.out,
var.sample = var.sample)
rm(stan.list)
}
if (!init.TF) {
fun.call.stan.and.save(stan.model = model,
path.out = path.out,
var.sample = var.sample,
chain_id = chain_id,
...)
rm(stan.list)
}
gc()
}
......
......@@ -2,7 +2,6 @@
## FUNCTION TO TEST AND PLOTS TRAITS
source("R/utils/plot.R")
## Fun to test that the range of traits is ok
## Leaf N mg g-1 between 2 and 50 Kattge et al 2011
......@@ -23,14 +22,14 @@ fun.test.one.trait.data <- function(i, data, trait.name, t.min, t.max) {
}
fun.test.range.traits <- function(data.traits, set) {
trait.name <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean",
trait.name <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean",
"Wood.density.mean", "Max.height.mean")
t.min <- c(2, 0.0001, 1, 0.1, 1)
t.max <- c(60, 600000, 100, 1.5, 100)
vec.test <- sapply(1:5, fun.test.one.trait.data,
vec.test <- sapply(1:5, fun.test.one.trait.data,
data.traits, trait.name, t.min, t.max)
if(any(vec.test))
stop(paste("Trait", paste(trait.name[vec.test], collapse = " "),
stop(paste("Trait", paste(trait.name[vec.test], collapse = " "),
"out of range for set", set))
else { print("All traits within suitable range") }
}
......@@ -48,15 +47,15 @@ fun.test.one.trait.data <- function(i, data, trait.name, t.min, t.max) {
}
fun.test.range.traits <- function(data.traits, set) {
trait.name <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean",
trait.name <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean",
"Wood.density.mean", "Max.height.mean")
t.min <- c(2, 0.00001, 1, 0.1, 1)
t.max <- c(60, 600000, 140, 1.5, 100)
vec.test <- sapply(1:5, fun.test.one.trait.data, data.traits,
vec.test <- sapply(1:5, fun.test.one.trait.data, data.traits,
trait.name, t.min, t.max)
if(any(vec.test))
stop(paste("Trait",
paste(trait.name[vec.test], collapse = " ") ,
paste(trait.name[vec.test], collapse = " ") ,
"out of range for set", set))
}
......@@ -64,9 +63,9 @@ fun.test.range.traits <- function(data.traits, set) {
fun.test.type.data <- function(t.name, data, type.t, set) {
t <- data[[t.name]]
if(sum(!is.na(t))) {
if(!(mode(t)==type.t)) stop(paste("In set", set, "variable", t,
if(!(mode(t)==type.t)) stop(paste("In set", set, "variable", t,
"is not", type.t, sep = " ")) }
}
}
fun.test.data.type.traits <- function(data.traits, set) {
trait.n <- c("Leaf.N", "Seed.mass", "SLA", "Wood.density", "Max.height")
......@@ -92,7 +91,7 @@ fun.plot.xy.traits <- function(get.traits, k) {
for (j in (i+1):5) {
if (sum(!is.na(get.traits[[trait.name[i]]] ) &
!is.na(get.traits[[trait.name[j]]] ))>0) {
plot(get.traits[[trait.name[i]]] , get.traits[[trait.name[j]]],
plot(get.traits[[trait.name[i]]] , get.traits[[trait.name[j]]],
xlab = trait.name[i], ylab = trait.name[j],
log = "xy", main = k)
}
......@@ -105,7 +104,7 @@ fun.plot.xy.traits <- function(get.traits, k) {
## fun to run all test
fun.test.set <- function(set, filedir) {
get.traits <- read.csv(file.path(filedir, set, "traits.csv"),
get.traits <- read.csv(file.path(filedir, set, "traits.csv"),
header = T, stringsAsFactors =FALSE)
trait.n <- c("Leaf.N", "Seed.mass", "SLA", "Wood.density", "Max.height")
character.var <- c("sp", "Latin_name", 'Phylo.group', 'Pheno.T')
......
......@@ -94,6 +94,11 @@ fun.test.range.vars.q.B <- function(data.tree, set) {
"quantile 0.1 or 0.9 out of range for set", set))
}
fun.test.dead <- function(x, set){
perc.dead <- sum(x[!is.na(x)] ==1)/length(x[!is.na(x)])
if(perc.dead >0.2 | perc.dead < 0.0001) stop('problem with dead tree variable in set ', set)
}
###############
##### FUN FOR ALL
# Inventory
......@@ -104,6 +109,7 @@ fun.test.set.I <- function(set, filedir) {
fun.test.data.names.I(set, data.tree)
fun.test.range.vars.q.I(data.tree, set)
fun.test.data.type.tree.I(set, data.tree)
fun.test.dead(data.tree$dead, set)
fun.plot.XY.hist(data.tree, set, var.1 = "D", var.2 = "G")
fun.plot.XY.hist(data.tree, set, var.1 = "D", var.2 = "BA.G")
cat(set, "OK \n")
......@@ -116,6 +122,7 @@ fun.test.set.B <- function(set, filedir) {
fun.test.data.names.B(set, data.tree)
fun.test.range.vars.q.B(data.tree, set)
fun.test.data.type.tree.B(set, data.tree)
fun.test.dead(data.tree$dead, set)
fun.plot.XY.hist(data.tree, set, var.1 = "D", var.2 = "G")
fun.plot.XY.hist(data.tree, set, var.1 = "D", var.2 = "BA.G")
cat(set, "OK \n")
......
################################
### RUN test tree
rm(list = ls())
source("R/format.data/test.tree-fun.R")
##################
## do the test
## run in GK_competition/data.format.workshop
sets.I <- c("Canada", "France", "NSW", "NVS",
"Sweden", "Swiss", "Spain", "US")
sets.B <- c("BCI", "Fushan", "Paracou", "Mbaiki", "Japan", "Luquillo")
filedir <- "output/formatted"
dir.create("figs/test.format.tree", recursive = TRUE, showWarnings = FALSE)
## test set
I <- lapply(sets.I, fun.test.set.I, filedir)
B <- lapply(sets.B, fun.test.set.B, filedir)
## compute summary for vars and plots
array.summary <- function.summary.table.sets(c(sets.I, sets.B),
vars = c('D', 'G',
'BA.G', 'weights'),
filedir)
pdf("figs/test.format.tree/range.D.G.BAG.pdf")
lapply(c('D', 'G', 'BA.G', 'weights'), fun.plot.m.q, array.summary)
dev.off()
lapply(c('MAT', 'MAP'), fun.hist.var.all.set, sets = sets.I, filedir)
fun.plot.xy.all.sets(sets = c(sets.B, sets.I), x = 'D', y = 'G',
filedir, xlim = c(10, 300), ylim = c(-100, 200))
fun.plot.xy.all.sets(sets = c(sets.B, sets.I), x = 'D', y = 'BA.G',
filedir, xlim = c(10, 300), ylim = c(-200, 500))
print("check figures in figs/test.format.tree")
cat("finished", file = file.path("figs/test.format.tree", "Done.txt"))
##################### function to process data install all unstallled packages
git.root <- function() {
system("git rev-parse --show-toplevel", intern=TRUE)
}
source.root <- function(filename) {
source(file.path(git.root(), filename), chdir=TRUE)
}
path.root <- git.root()
source.root("R/packages.R")