Commit 2672624f authored by Georges Kunstler's avatar Georges Kunstler
Browse files

dplyr final version

parent 065497c6
......@@ -45,7 +45,7 @@ GLOBAL: $(D3)/Done.txt
$(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt
Rscript $<
Rscript scripts/process.data/summarise.data.R
Rscript scripts/process.data/plot.data.all.R
Rscript scripts/process.data/plot.data.all.R; convert figs/test.processed/*.p* figs/test.processed/all.CWM.pdf
$(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R $(sites)
Rscript $<
......@@ -66,7 +66,7 @@ $(D5)/Done.txt: scripts/find.trait/test.traits.R $(D3traits)
TEST.CWM: $(D6)/Done.txt
$(D6)/Done.txt: scripts/process.data/test.tree.CWM.R $(D3Done)
Rscript $< ; convert figs/test.processed/*.p* figs/test.processed/all.CWM.pdf
Rscript $<
#-------------------------------------------------------
......
......@@ -36,12 +36,15 @@ summarise.lmer.all.output <- function(x, random.name ){
}
summarise.lmer.output.list <- function(f ){
output.lmer <- read.lmer.output(f)
list.output.lmer <- read.lmer.output(f)
output.lmer <- list.output.lmer[['output']]
relgrad <- list.output.lmer[['relgrad']]
if(!is.null(output.lmer)){
res <- list(files.details = files.details(f),
lmer.summary = summarise.lmer.output( output.lmer),
terms = terms(output.lmer),
vcov = vcov(output.lmer))
vcov = vcov(output.lmer),
relgrad = relgrad)
}else{
res <- NULL
}
......@@ -50,7 +53,9 @@ summarise.lmer.output.list <- function(f ){
summarise.lmer.output.all.list <- function(f ){
output.lmer <- read.lmer.output(f)
list.output.lmer <- read.lmer.output(f)
output.lmer <- list.output.lmer[['output']]
relgrad <- list.output.lmer[['relgrad']]
if(!is.null(output.lmer)){
details <- files.details.all(f)
......@@ -63,7 +68,8 @@ summarise.lmer.output.all.list <- function(f ){
random.name =
model$var.BLUP),
terms = terms(output.lmer),
vcov = vcov(output.lmer))
vcov = vcov(output.lmer),
relgrad = relgrad)
}else{
res <- NULL
}
......
......@@ -41,7 +41,7 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
var.sample, select.biome){
lmer.output <- lmer(formula = formula, data = df.lmer, REML = FALSE,
control = lmerControl(optCtrl = list(maxfun = 40000) ) )
print(warnigs())
print(warnings())
print(summary(lmer.output))
relgrad <- with(lmer.output@optinfo$derivs,solve(Hessian,gradient))
print('test convergence of relative gradient of hessian')
......@@ -54,7 +54,8 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
name.file <- paste(var.sample, gsub(' ', '_',select.biome),
"results.nolog.all.rds", sep ='.')
}
saveRDS(lmer.output, file = file.path(path.out, name.file))
saveRDS(list(output = lmer.output, relgrad = relgrad),
file = file.path(path.out, name.file))
}
run.lmer <- function (model.file, trait,
......
......@@ -762,42 +762,14 @@ fun.reformat.trait <- function(data,trait){
return(data)
}
fun.reformat.trait.CAT <- function(data, trait, cat){
##
trait.CWM.sd.n <- paste(trait, 'CWM.fill', cat, sep = '.')
##
trait.CWM.sd.e <- (parse(text =
paste('(',trait,'.CWM.fill.', cat,
' - mean(', trait,
'.focal, na.rm =TRUE))/sd(',trait,
'.focal, na.rm =TRUE)' ,
sep = '')))
data[, c(trait.CWM.sd.n) := eval(trait.CWM.sd.e) ]
##
trait.abs.CWM.sd.n <- paste(trait, 'abs.CWM.fill', cat, sep = '.')
##
trait.abs.CWM.sd.e <- (parse(text =
paste('(',trait,'.abs.CWM.fill.', cat,
')/sd(',trait,
'.focal, na.rm =TRUE)' ,
sep = '')))
data[, c(trait.abs.CWM.sd.n) := eval(trait.abs.CWM.sd.e) ]
return(data)
}
fun.standardized.traits <- function(data,
traits = c('SLA', 'Leaf.N',
'Wood.density', 'Max.height',
'Seed.mass'),
cat.vec = c('A_EV', 'A_D', 'C')){
'Seed.mass')){
for(trait in traits){
data <- fun.reformat.trait(data, trait)
for (cat in cat.vec){
data <- fun.reformat.trait.CAT(data, trait, cat)
}
print(data)
print(str(data))
}
return(data)
}
......@@ -806,16 +778,26 @@ return(data)
#### function to reformat data for global data set
fun.reform.data.and.remove.outlier <- function(data.all,
std.traits.TF = TRUE){
fun.reform.data.and.remove.outlier <- function(data.all, err.limit = 4,
std.traits.TF = TRUE,
select.one.census.rand = TRUE){
## remove outlier following Condit approach
require(data.table)
require(dplyr)
data.all <- as.data.table(data.all)
q.l <- quantile(data.all$BA.G, probs = 0.00005, na.rm = TRUE)
q.h <- quantile(data.all$BA.G, probs = 0.99995, na.rm = TRUE)
data.all[!is.na(BA.G) &
BA.G > q.h,
c('G','BA.G') := NA]
data.all[!is.na(BA.G) &
BA.G < q.l,
c('G','BA.G') := NA]
data.all[!is.na(G) &
G > 75,
c('G','BA.G') := NA]
data.all[!is.na(G) &
((D*10+year*G) <= (D*10 -5*(0.006214*D*10 + 0.9036))),
((D*10+year*G) <= (D*10 -err.limit*(0.006214*D*10 + 0.9036))),
c('G','BA.G') := NA]
# reformat factors
......@@ -826,15 +808,15 @@ fun.reform.data.and.remove.outlier <- function(data.all,
data.all[ , tree.id := paste(ecocode, tree.id)]
data.all[ , obs.id := paste(ecocode, obs.id)]
if(std.traits.TF) fun.standardized.traits(data.all)
data.all[ , plot.c := paste(plot, census)]
data.all <- as.data.frame(data.all)
plots.select <- drop(as.matrix(group_by(data.all, plot) %>%
summarise(select = sample(plot.c, 1)) %>%
select(select)))
data.all <- filter(data.all, plot %in% plots.select)
# remove tree with multiple obs
### TODO NEED TO CHANGE THAT TO SELECT RANDOMLY ONE CENSUS PER PLOT
if(select.one.census.rand){
data.all[ , plot.c := paste(plot, census)]
data.all <- as.data.frame(data.all)
plots.select <- drop(as.matrix(group_by(data.all, plot) %>%
dplyr::summarise(select = sample(plot.c, 1)) %>%
dplyr::select(select)))
data.all <- filter(data.all, plot.c %in% plots.select)
}
# remove tree with multiple obs
return(data.all)
}
......
#######################################
#######################################
###### EXPLORE DATA SET BEFORE ANALYSIS
rm(list = ls())
source("R/process.data/process-fun.R")
source("R/process.data/test.tree.CWM-fun.R")
source("R/utils/plot.R")
source("R/utils/mem.R")
source("R/analysis/lmer.run.R")
filedir <- "output/processed"
mat.perc <- read.csv(file = file.path(filedir, "all.sites.perc.traits.csv"),
stringsAsFactors = FALSE)
### read all data
library(data.table)
system.time(data.all <- fread(file.path(filedir, "data.all.log.csv"),
stringsAsFactors = FALSE))
## system.time(data.all <- read.csv(file.path(filedir, "data.all.csv"),
## stringsAsFactors = FALSE))
system.time(data.all.no.log <- fread(file.path(filedir, "data.all.no.log.csv"),
stringsAsFactors = FALSE))
df.lmer <- fun.data.for.lmer(data.all, 'Max.height',
type.filling = 'species', cat.TF = FALSE)
if(dim(data.all)[1] != sum(mat.perc[['num.obs']]))
stop('error not same dimension per ecoregion and total')
data.all$set <- factor(data.all$set)
##############
fun.table.set.more.cut <- function(cut, data, var){
return(table(data[['set']][data[[var]]> cut & !is.na(data[[var]])]) /
table(data[['set']][ !is.na(data[[var]])]))
}
### function to look at number of observation per data set in function of min.perc
pdf('figs/test.processed/perc.vs.cut.pdf')
for (t in c('Wood.density','SLA', 'Leaf.N', 'Max.height')){
perc.genus <- paste(t, 'perc.genus', sep = '.')
perc.species <- paste(t, 'perc.species', sep = '.')
cut.perc <- seq(0.7, 0.99, length = 30)
set.sp <- do.call('rbind', lapply(cut.perc,
fun.table.set.more.cut,
data = data.all,
perc.species))
set.gs <- do.call('rbind', lapply(cut.perc,
fun.table.set.more.cut,
data = data.all,
perc.genus))
par(mfrow=c(1, 2))
matplot(cut.perc, set.sp, type = 'l', col = col.vec[colnames(set.sp)], lwd = 2,
main = t, lty = 1)
matplot(cut.perc, set.gs, , type = 'l', col = col.vec[colnames(set.sp)],
lwd = 2, lty =1)
legend(0.7, 0.8, colnames(set.sp), col = col.vec[colnames(set.sp)],
lty = 1, lwd = 2)
}
dev.off()
## Look at number of species with enough occurence
sp.mat <- table(data.all$ecocode, data.all$sp)
sum.sp <- apply(sp.mat, MARGIN = 1, sum, na.rm = TRUE)
sp.mat.perc <- sp.mat/sum.sp
sort(apply(sp.mat.perc > 0.1, MARGIN = 1, sum))
sort(apply(sp.mat > 200, MARGIN = 1, sum))
## sd of trait focal and shannon
fun.sd.x.per.y <- function(x, y = 'ecocode', data = data.all){
res <- tapply(data[[x]], data[[y]], sd , na.rm = TRUE)
df.res <- data.frame(res)
names(df.res) <- x
return(df.res)
}
trait <- c('Leaf.N','SLA','Wood.density','Max.height')
# per ecocode
sd.ecocode <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'),
fun.sd.x.per.y,
data = data.all))
set.per.ecocode <- tapply(data.all$set, data.all$ecocode, unique)
div.per.ecocode <- tapply(data.all$sp, data.all$ecocode, fun_div)
nsp.per.ecocode <- tapply(data.all$sp, data.all$ecocode,
function(x) nlevels(factor(x)))
nsp.per.plot <- tapply(data.all$sp, data.all$plot,
function(x) nlevels(factor(x)))
sd.ecocode <- data.frame(set = set.per.ecocode, ecocode = rownames(sd.ecocode),
sd.ecocode, do.call('rbind', div.per.ecocode),nsp = nsp.per.ecocode)
# per set
sd.set <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'),
fun.sd.x.per.y, y = 'set',
data = data.all))
div.per.set <- do.call('rbind', tapply(data.all$sp, data.all$set, fun_div))
nsp.per.set <- tapply(data.all$sp, data.all$set,
function(x) nlevels(factor(x)))
sd.set <- data.frame(set = rownames(sd.set),
sd.set, div.per.set, nsp = nsp.per.set)
## plots
pdf("figs/test.processed/boxplot.traits.sd.tot.ecocode.pdf",
width = 12, height = 12)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste(trait,'focal',sep='.')){
barplot(sd.ecocode[[i]], las = 3,
main=i, ylab = 'sd', names.arg = sd.ecocode$ecocode,
col = col.vec[sd.ecocode$set])
}
dev.off()
pdf("figs/test.processed/boxplot.traits.div.tot.ecocode.pdf",
width = 12, height = 12)
par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
for (i in c('shannon')){
barplot(exp(sd.ecocode[[i]]), las = 3,
main=i, names.arg = sd.ecocode$ecocode,
col = col.vec[sd.ecocode$set], log ='y',
ylim = range(1, exp(sd.set[[i]])),
ylab = 'exp(shannon)')
}
for (i in c('nsp')){
barplot(sd.ecocode[[i]], las = 3,
main=i, names.arg = sd.ecocode$ecocode,
col = col.vec[sd.ecocode$set], log ='y',
ylim = range(0.1, (sd.set[[i]])),
ylab = 'N species')
}
dev.off()
pdf("figs/test.processed/boxplot.traits.sd.tot.set.pdf",
width = 12, height = 12)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste(trait,'focal',sep='.')){
barplot(sd.set[[i]], las = 3,
main = i, ylab = 'sd', names.arg = sd.set$set,
col = col.vec[sd.set$set])
}
dev.off()
pdf("figs/test.processed/boxplot.traits.div.tot.SET.pdf",
width = 12, height = 12)
par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
for (i in c('shannon')){
barplot(exp(sd.set[[i]]), las = 3,
main=i, names.arg = sd.set$set,
col = col.vec[sd.set$set], log = 'y',
ylim = range(1, exp(sd.set[[i]])),
ylab = 'exp(shannon)')
abline(h = 5)
}
for (i in c('nsp')){
barplot((sd.set[[i]]), las = 3,
main=i, names.arg = sd.set$set,
col = col.vec[sd.set$set], log ='y',
ylim = range(0.1, (sd.set[[i]])),
ylab = 'N species')
}
dev.off()
########
### TODO
#- look at BATOT ALONG MAP AND MAT (log scale)
#- how to compute FD on plot with diferent size
#- pattern of CWM
#- pattern ED angio/conif
## look at BATOT per ecocode
MAP.ECOCODE <- tapply(data.all$MAP, INDEX = data.all$ecocode, FUN = mean)
BATOT.ECOCODE <- tapply(data.all$BATOT, INDEX = data.all$ecocode, FUN = mean)
SET.ECOCODE <- tapply(data.all$set, INDEX = data.all$ecocode, FUN = unique)
pdf("figs/test.processed/BATOT.vs.MAP.pdf")
par(mar=c(8.1,4.1,4.1,2.1))
plot(log(MAP.ECOCODE), BATOT.ECOCODE, pch = '', ylim = c(0,180) ,
xlab = 'Mean annual precipitation (log)',
ylab = 'Total basal area per plot (m^2/ha)')
boxplot(data.all$BATOT ~ data.all$ecocode, las = 3,
at = log(MAP.ECOCODE),names = NA,
boxwex = 0.05, outline = FALSE,
col = col.vec[SET.ECOCODE], xlim = range(log(MAP.ECOCODE)), add = TRUE)
legend('topleft', legend = names(col.vec),fill=col.vec)
dev.off()
pdf("figs/test.processed/boxplot.BATOT.D.G.BA.G.pdf", width = 20 , height = 10)
par(mfrow=c(2,2))
for (i in c('BATOT', 'D', 'G', 'BA.G'))
boxplot((data.all[[i]]+1000) ~ data.all$ecocode, las = 3, ylab = i,
log = 'y' , outline = FALSE)
dev.off()
trait <- c('Leaf.N','SLA','Wood.density','Max.height')
pdf("figs/test.processed/boxplot.traits.focal.pdf",
width = 12, height = 7)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste(trait,'focal',sep='.')){
set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(data.all[[i]] ~ data.all$ecocode, las = 3,
main=i,
col = col.vec[set.vec], outline = FALSE)
}
dev.off()
pdf("figs/test.processed/boxplot.traits.CWM.fill.pdf",
width = 12, height = 7)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste(trait,'CWM.fill',sep='.')){
set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(data.all[[i]] ~ data.all$ecocode, las = 3,
main=i,
col = col.vec[set.vec], outline = FALSE)
}
dev.off()
pdf("figs/test.processed/boxplot.traits.abs.CWM.fill.pdf",
width = 12, height = 7)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste(trait,'abs.CWM.fill',sep='.')){
set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(data.all[[i]] ~ data.all$ecocode, las = 3,
main=i,
col = col.vec[set.vec], outline = FALSE)
}
dev.off()
### compute mean BATOT, number of species, traits and VAR OF TRAITS per cluster
system.time( data.summarise <- fun.compute.all.var.cluster( data.all ) )
pdf("figs/test.processed/perc.angio.deciduous.pdf")
par(mfrow = c(2, 1))
## angio
per.angio.ecocode.mean <- tapply(data.summarise$per.angio,
data.summarise$ecocode,mean,
na.rm = TRUE)
names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode,unique)
set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique)
names(per.angio.ecocode.mean) <- names.ecocode
par(mar=c(8.1,4.1,4.1,2.1))
mp <- barplot(per.angio.ecocode.mean, las = 3,
col= col.vec[set.ecocode], ylab = 'perc angio')
## deciduous
per.deciduous.ecocode.mean <- tapply(data.summarise$per.deciduous,
data.summarise$ecocode, mean ,
na.rm = TRUE)
names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode, unique)
set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique)
names(per.deciduous.ecocode.mean) <- names.ecocode
par(mar=c(8.1,4.1,4.1,2.1))
mp <- barplot(per.deciduous.ecocode.mean, las = 3,
col = col.vec[set.ecocode], ylab = 'perc deciduous')
dev.off()
plot(log(data.summarise$MAP), data.summarise$BATOT,
col = col.vec[data.summarise$set], cex = 0.1, ylim = c(0, 190))
fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$BATOT,
Nclass = 15, add = TRUE)
par(mfrow = c(1,2))
plot(log(data.summarise$MAP),data.summarise$sd.SLA,
,col=col.vec[data.summarise$set], cex = 0.1)
fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$sd.SLA,
Nclass = 15, add = TRUE)
plot(log(data.summarise$MAP),data.summarise$mean.SLA,
,col=col.vec[data.summarise$set], cex = 0.1,)
fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$mean.SLA,
Nclass = 15, add = TRUE)
par(mfrow = c(1,2))
plot(log(data.summarise$MAP),data.summarise$sd.Wood.density,
,col=col.vec[data.summarise$set], cex = 0.1)
fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$sd.Wood.density,
Nclass = 15, add = TRUE)
plot(log(data.summarise$MAP),data.summarise$mean.Wood.density,
,col=col.vec[data.summarise$set], cex = 0.1,)
fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$mean.Wood.density,
Nclass = 15, add = TRUE)
pch.vec <- 1:14
names(pch.vec) <- names(col.vec)
plot(data.summarise$n_sp,data.summarise$sd.Wood.density,
,col=col.vec[data.summarise$set],pch=pch.vec[data.summarise$set])
legend("topright",legend=names(col.vec),col=col.vec,pch=pch.vec)
pdf("figs/test.processed/boxplot.traits.sd.pdf",
width = 12, height = 7)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste("sd",trait,sep='.')){
set.vec <- sapply(levels(factor(data.summarise$ecocode[
!is.na(data.summarise[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3,
main=i,
col = col.vec[set.vec])
}
dev.off()
pdf("figs/test.processed/boxplot.traits.mean.pdf",
width = 12, height = 7)
par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
for (i in paste("mean",trait,sep='.')){
set.vec <- sapply(levels(factor(data.summarise$ecocode[
!is.na(data.summarise[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3,
main=i,
col = col.vec[set.vec])
}
dev.off()
pdf("figs/test.processed/boxplot.div.pdf",
width = 12, height = 7)
par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
for (i in c('shannon', 'simpson')){
set.vec <- sapply(levels(factor(data.summarise$ecocode[
!is.na(data.summarise[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(exp(data.summarise[[i]]) ~ data.summarise$ecocode, las = 3,
main=i,
col = col.vec[set.vec])
}
dev.off()
pdf("figs/test.processed/boxplot.nsp.pdf",
width = 12, height = 7)
par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
for (i in c('n_sp', 'count')){
set.vec <- sapply(levels(factor(data.summarise$ecocode[
!is.na(data.summarise[[i]])])),
function(x) gsub(paste(" ",
gsub("^([a-zA-Z]* )", "",x),
sep = ""),
"", x, fixed = TRUE))
boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3,
main = i,
col = col.vec[set.vec])
}
dev.off()
fun.boxplot.data <- function(data, x, y, ...){
fun.boxplot.breaks(data[[x]]*data[['BATOT']], data[[y]],
Nclass = 15, add = FALSE,
main = unique(data$ecocode), ...)
}