Commit e1f33265 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

fixe issue of traits standarization

parent e045633e
......@@ -151,32 +151,39 @@ if(!is.na(sample.size.)){
saveRDS(sample.vec,file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
print(paste('sub-sampled by',var.sample.))
}else{
df <- fun.format.data.for.lmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}else{
sample.vec <- readRDS(file = file.path(base.dir,paste('sample.vec', trait, type, cat,'rds',
sep = '.')))
df <- df[sample.vec, ]
df <- fun.format.data.for.lmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
print(paste('sub-sampled by',var.sample., 'by loading sample.vec'))
}
}
}
}else{
df <- fun.format.data.for.lmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
}
return( df)
}
load.and.save.data.for.lmer <- function(trait,
min.obs= 10,
type.filling = species,
cat.TF= FALSE,
fname = 'data.all.no.log.rds',
base.dir = "output/processed",
data.table.TF = FALSE){
data.tree.tot <- readRDS(file.path(base.dir, fname))
df <- fun.data.for.lmer(data.tree.tot, trait,
df <- fun.select.data.for.lmer(data.tree.tot, trait,
type.filling = type.filling,
cat.TF = cat.TF)
if (fname == 'data.all.no.log.rds') type <- 'no.log'
if (fname == 'data.all.log.rds') type <- 'log'
if (cat.TF) cat <- 'cat'
if (!cat.TF) cat <- 'no.cat'
saveRDS(df,file = file.path(base.dir,paste('data', trait, type, cat,'rds',
saveRDS(df,file = file.path(base.dir,paste('data', trait, type, 'rds',
sep = '.')))
}
......@@ -328,8 +335,8 @@ data.frame(logG = logG,
# Function to prepare data for lmer with new CWM data
# that will be used in a simple linear model
#============================================================
fun.data.for.lmer <- function(data.tree, trait, min.obs = 10,
type.filling = 'species', cat.TF) {
fun.select.data.for.lmer <- function(data.tree, trait, min.obs = 10,
type.filling = 'species') {
if(! trait %in% c("SLA", "Leaf.N", "Seed.mass",
"SLA", "Wood.density", "Max.height"))
stop("need trait to be in SLA Leaf.N Seed.mass
......@@ -347,6 +354,25 @@ data.tree <- fun.select.data.for.analysis(data.tree,
perc.neigh.gs,
BATOT,
min.obs)
return(data.tree)
}
#============================================================
# Function to prepare data for lmer with new CWM data
# that will be used in a simple linear model
#============================================================
fun.format.data.for.lmer <- function(data.tree, trait, min.obs = 10,
type.filling = 'species', cat.TF) {
if(! trait %in% c("SLA", "Leaf.N", "Seed.mass",
"SLA", "Wood.density", "Max.height"))
stop("need trait to be in SLA Leaf.N Seed.mass
SLA Wood.density or Max.height ")
# get vars names
CWM.tn <- paste(trait, "CWM", 'fill', sep = ".")
abs.CWM.tntf <- paste(trait, "abs.CWM", 'fill', sep = ".")
tf <- paste(trait, "focal", sep = ".")
BATOT <- "BATOT"
#= DATA LIST FOR LMER
if(!cat.TF) {
print('no cat')
......
#!/usr/bin/env Rscript
source("R/analysis/stan.output-fun.R")
source("R/analysis/stan.run.R")
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA")
type.filling <- 'species'
paths <- c()
for (trait in traits){
for(model in c(model.files.stan.Tf.1[3])){
source(model, local = TRUE)
model.obj <- load.model()
## TODO
path.out <- output.dir(type = 'stan', model = model.obj$name,
trait = trait, set = 'all.no.log',
type.filling = 'species')
paths <- c(files,path.out)
}
}
## TODO
paste(var.sample,
"results.stan", chains.id, "rds", sep = '.')
out <- lapply(files, summarise.lmer.output.all.list)
names(out) <- lapply(lapply(files,files.details.all),
function(x) paste(as.vector(x[names(x) != 'file']),
collapse="_"))
### remove missing
out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))]
saveRDS(out,file='output/list.lmer.out.all.ecocode.id.no.log.rds')
## LOOP OVER FILES AND NOT SCAN ALL FILES
traits <- c("SLA", "Wood.density","Max.height")
type.filling <- 'species'
files <- c()
for (trait in traits){
for(model in c(model.files.lmer.Tf.1,
model.files.lmer.Tf.2,
model.files.lmer.Tf.3,
model.files.lmer.Tf.5,
model.files.lmer.Tf.CAT.1,
model.files.lmer.Tf.CAT.2)){
source(model, local = TRUE)
model.obj <- load.model()
pathout <- output.dir('lmer', model.obj$name, trait, 'all.no.log',
type.filling=type.filling)
files <- c(files,file.path(pathout,"species.id.results.nolog.all.rds"))
}
}
out <- lapply(files, summarise.lmer.output.all.list)
names(out) <- lapply(lapply(files,files.details.all),
function(x) paste(as.vector(x[names(x) != 'file']),
collapse="_"))
### remove missing
out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))]
saveRDS(out,file='output/list.lmer.out.all.species.id.no.log.rds')
......@@ -693,14 +693,58 @@ return(data.all)
}
## remove growth outliers
## function to reformat data with traits centred and standardized
fun.reformat.traits <- function(data, traits, mean.traits, sd.traits){
for (trait in traits){
trait.f <- paste(trait, 'focal', sep ='.')
data[[trait.f]] <- (data[[trait.f]]- mean.traits[trait])/sd.traits[trait]
trait.CWM <- paste(trait, 'CWM.fill', sep = '.')
data[[trait.CWM]] <- (data[[trait.CWM]]- mean.traits[trait])/sd.traits[trait]
trait.abs <- paste(trait, 'abs.CWM.fill', sep = '.')
data[[trait.abs]] <- (data[[trait.CWM]])/sd.traits[trait]
}
return(data)
}
fun.reformat.traits.CAT <- function(data, traits, mean.traits, sd.traits){
for (trait in traits){
for(cat in c('A_EV', 'A_D', 'C')){
trait.CWM <- paste(trait, 'CWM.fill', cat, sep = '.')
data[[trait.CWM]] <- (data[[trait.CWM]]- mean.traits[trait])/sd.traits[trait]
trait.abs <- paste(trait, 'abs.CWM.fill', cat, sep = '.')
data[[trait.abs]] <- (data[[trait.CWM]])/sd.traits[trait]
}
}
return(data)
}
fun.standardized.traits <- function(data,
traits = c('SLA', 'Leaf.N',
'Wood.density', 'Max.height',
'Seed.mass')){
## Compute Std deviation of each traits
mean.traits <- unlist(lapply(paste(traits, 'focal', sep = '.'),
function(var, data) mean(data[[var]], na.rm = TRUE),
data))
sd.traits <- unlist(lapply(paste(traits, 'focal', sep = '.'),
function(var, data) sd(data[[var]], na.rm = TRUE),
data))
names(mean.traits) <- names(sd.traits) <- traits
## Reformat to account for traits std
data <- fun.reformat.traits(data, traits, mean.traits, sd.traits)
data <- fun.reformat.traits.CAT(data, traits, mean.traits, sd.traits)
return(data)
}
#### function to reformat data for global data set
fun.reform.data.and.remove.outlier <- function(data.all){
fun.reform.data.and.remove.outlier <- function(data.all,
std.traits.TF = TRUE){
## remove outlier following Condit approach
require(data.table)
data.all <- as.data.table(data.all)
......@@ -719,13 +763,12 @@ fun.reform.data.and.remove.outlier <- function(data.all){
data.all[ , tree.id := paste(ecocode, tree.id)]
data.all[ , obs.id := paste(ecocode, obs.id)]
data.all <- as.data.frame(data.all)
if(std.traits.TF) fun.standardized.traits(data.all)
return(data.all)
}
## write big file to csv by chunk to save RAM
fun.write.big.csv <- function(dt, file){
dt <- as.data.frame(dt)
N.chunk <- 100000
......
% Points to discuss during meeting with other
- Re-run in JAGS or STAN => several weeks (3 weeks) !
- Is this new analysis ok or should we include CAT Angiosperm_EV ANgiosperm_D Conifer. continuous variables MAT MAP.
- Survival important or not ? I'm really hoping to do that
- Traits effect make seens
- **Diff between biomes.**
- So far no big pattern emerging.
- This is may be the surprising results that there is not more differences
- Effect of range of traits
Explain the effect of different range of traits
- Implication for coexistence ? Up to where do we want to go. Based on my discussion with Mark so far not too far! Rather a context.
- If we discuss we are probably more interested to traits range that coexist than the numer of species. The diff are not so big for traits than for species.
## Minor Points
- Ghislain Max height Paracou
- Lourens Bolivian data?
# Plan to progress on the publication
- timeline
mid july to mid-august I will be not available
- run analysis on the cluster while not working
- end of september haev results and first draft.
- survival
- **Nature** data policy
A condition of publication in a Nature journal is that authors are required to make materials, data and associated protocols promptly available to others without undue qualifications.
Data sets must be made freely available to readers from the date of publication, and must be provided to editors and peer-reviewers at submission, for the purposes of evaluating the manuscript.
For the following types of data set, submission to a community-endorsed, public repository is mandatory. Accession numbers must be provided in the paper. Examples of appropriate public repositories are listed below.
- Other datasets
In addition to the above-mentioned mandatory requirements for data submission to community-endorsed public databases, Nature journals strongly recommend deposition of other types of data sets into appropriate public repositories that are at an earlier stage of development. Examples of such repositories that facilitate sharing large data sets, some of which can offer the option of anonymous referee access to data before publication, include:
**DAVID COOMES RECENT PAPER IN NATURE DATA ?** NO DATA RIGHT ?
#############
## MARK no nead to go up to coexistence
He is right !!!!
############3
## GHISLAIN
- not biomes as a random effect
- Why so different BATOT between traits
- model with only BATOT
#####
## Lourens
- R2
- model with std at world scale or at global scale
######
## DAVID IDEAS
- EVERYONE SEND AN EMAIL WITH WHAT IS THE BIGGEST POINT COMMING OUT OF THE RESULTS
in coming two weeks
- Need better figure to convince the message
- species effect random plot
- prediction of growth in function of basal area for diff traits value.
#####
## Daniel Laughlin
- add a Figure like the LAsky one to explain the hypothesis
- test fit with all data what is the differences ?
- Seed mass
- what to do with CI intercepting zero not significant at all or discuss with both (Mark comment explained with biomes effect and with variability in the model).
- justify resampling
############
## Mark Vanderwel
- wwf ecoregion compare
######
## Rob Kooyman
- send result with MAT MAP
......@@ -357,6 +357,13 @@ legend("center",legend = names.biomes.t,
bty = 'n', cex = 1.2)
## ![Range of traits in each biomes](../../figs/range_traits_per_biomes.pdf)
## ![Species diversity (shannon) vs. Traits Std. Dev.](../../figs/biomes_shannon_sdtraits.pdf)
## \newpage
......
......@@ -48,7 +48,7 @@ $\log{G_{max}} = \beta_0 + \beta_1 \times t_f$
where:
- $\beta_0$ is mean Gmax, including a plot/quadrat $p$ random effect, a random focal species $f$ effect, and a random set effect $s$ to account for the difference between data set^[Note that a random individual $i$ effect when multiple census are present is missing so far (because this is not easy to have a random effect for only a part of the data set in lme4 but I have that included in *stan*).].
- $\beta_0$ is mean Gmax, including a plot/quadrat $p$ random effect, a random focal species $f$ effect, and a random set effect $s$ to account for the difference between data set (Note that a random individual $i$ effect when multiple census are present is missing so far - because this is not easy to have a random effect for only a part of the data set in lme4 but I have that included in *stan*).
- $\beta_1$ is the slope for the link between $G_{max}$ and the trait $t_f$ with a random biomes effect.
......@@ -192,7 +192,7 @@ pandoc.table(mat.param, caption = "Full effect size")
The stronger trait effect is the direct effect of wood density. There is a consistent limiting similarity effect but generally of small magnitude.
For Wood density and SLA, the *response* and *effect* are of opposite sign, so species that are tolerant to competition also has strong competitive effect^[This match the competitive hierarchy I was using in Ecol. Let. which was assuming $c_r = - c_e$.]. This is not the case for Max height, shorter tree species are more tolerant to competition than taller species, but taller species have stronger competitive effect.
For Wood density and SLA, the *response* and *effect* are of opposite sign, so species that are tolerant to competition also has strong competitive effect (This match the competitive hierarchy I was using in Ecol. Let. which was assuming $c_r = - c_e$). This is not the case for Max height, shorter tree species are more tolerant to competition than taller species, but taller species have stronger competitive effect.
......@@ -380,6 +380,13 @@ legend("center",legend = names.biomes.t,
```
![Range of traits in each biomes](../../figs/range_traits_per_biomes.pdf)
![Species diversity (shannon) vs. Traits Std. Dev.](../../figs/biomes_shannon_sdtraits.pdf)
\newpage
......
......@@ -34,7 +34,7 @@ $\log{G_{max}} = \beta_0 + \beta_1 \times t_f$
where:
- $\beta_0$ is mean Gmax, including a plot/quadrat $p$ random effect, a random focal species $f$ effect, and a random set effect $s$ to account for the difference between data set^[Note that a random individual $i$ effect when multiple census are present is missing so far (because this is not easy to have a random effect for only a part of the data set in lme4 but I have that included in *stan*).].
- $\beta_0$ is mean Gmax, including a plot/quadrat $p$ random effect, a random focal species $f$ effect, and a random set effect $s$ to account for the difference between data set (Note that a random individual $i$ effect when multiple census are present is missing so far - because this is not easy to have a random effect for only a part of the data set in lme4 but I have that included in *stan*).
- $\beta_1$ is the slope for the link between $G_{max}$ and the trait $t_f$ with a random biomes effect.
......@@ -158,7 +158,7 @@ Table: Full effect size
The stronger trait effect is the direct effect of wood density. There is a consistent limiting similarity effect but generally of small magnitude.
For Wood density and SLA, the *response* and *effect* are of opposite sign, so species that are tolerant to competition also has strong competitive effect^[This match the competitive hierarchy I was using in Ecol. Let. which was assuming $c_r = - c_e$.]. This is not the case for Max height, shorter tree species are more tolerant to competition than taller species, but taller species have stronger competitive effect.
For Wood density and SLA, the *response* and *effect* are of opposite sign, so species that are tolerant to competition also has strong competitive effect (This match the competitive hierarchy I was using in Ecol. Let. which was assuming $c_r = - c_e$). This is not the case for Max height, shorter tree species are more tolerant to competition than taller species, but taller species have stronger competitive effect.
......@@ -218,6 +218,13 @@ The figure 7 shows the difference of species number (on a plot of ~20x20m - but
![**Species and traits diversity per biomes** (Tundra and deserts removed).](figure/Fig7.pdf)
![Range of traits in each biomes](../../figs/range_traits_per_biomes.pdf)
![Species diversity (shannon) vs. Traits Std. Dev.](../../figs/biomes_shannon_sdtraits.pdf)
\newpage
......
......@@ -12,6 +12,7 @@ filedir <- "output/processed"
library(data.table)
data.all <- readRDS(file.path(filedir, "data.all.no.log.rds"))
### compute mean BATOT, number of species, traits and VAR OF TRAITS per cluster
system.time( data.summarise <- fun.compute.all.var.cluster( data.all ) )
......@@ -21,7 +22,8 @@ test.data.all <- table(data.all$biomes, data.all$set) == 0
test.data.summarise <- table(data.summarise$biomes, data.summarise$set) == 0
if(!identical(test.data.summarise[, colnames(test.data.all)], test.data.all)) stop('error in summarise data')
if(!identical(test.data.summarise[, colnames(test.data.all)], test.data.all))
stop('error in summarise data')
saveRDS(data.summarise, 'output/data.summarise.rds')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment