Commit f12d15b5 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

start to get angio-conif D EV

parent 143a6ccf
#### function to analyse lmer output
library(lme4)
read.lmer.output <- function(file.name){
tryCatch(readRDS(file.name), error=function(cond)return(NULL)) # Choose a return value in case of error
}
summarise.lmer.output <- function(x){
list( nobs = nobs(x),
R2m =Rsquared.glmm.lmer(x)$Marginal,
R2c =Rsquared.glmm.lmer(x)$Conditional,
AIC = AIC(x),
deviance = deviance(x),
conv=x@optinfo$conv,
effect.response.var=variance.fixed.glmm.lmer.effect.and.response(x),
fixed.coeff.E=fixef(x),
fixed.coeff.Std.Error=sqrt(diag(vcov(x))),
fixed.var=variance.fixed.glmm.lmer(x))
}
summarise.lmer.output.list <- function(f ){
output.lmer <- read.lmer.output(f)
if(!is.null(output.lmer)){
res <- list(files.details=files.details(f),
lmer.summary=summarise.lmer.output( output.lmer))
}else{
res <- NULL
}
return(res)
}
files.details <- function(x){
s <- data.frame(t(strsplit(x, "/", fixed=TRUE)[[1]]), stringsAsFactors= FALSE)
names(s) <- c("d1", "d2", "set", "ecocode", "trait", "filling", "model", "file" )
s[-(1:2)]
}
#### R squred functions
# Function rsquared.glmm requires models to be input as a list (can include fixed-
# effects only models,but not a good idea to mix models of class "mer" with models
# of class "lme") FROM http://jslefche.wordpress.com/2013/03/13/r2-for-linear-mixed-effects-models/
Rsquared.glmm.lmer <- function(i){
# Get variance of fixed effects by multiplying coefficients by design matrix
VarF <- var(as.vector(fixef(i) %*% t(i@pp$X)))
# Get variance of random effects by extracting variance components
VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1])))
# Get residual variance
VarResid <- attr(VarCorr(i),"sc")^2
# Calculate marginal R-squared (fixed effects/total variance)
Rm <- VarF/(VarF+VarRand+VarResid)
# Calculate conditional R-squared (fixed effects+random effects/total variance)
Rc <- (VarF+VarRand)/(VarF+VarRand+VarResid)
# Bind R^2s into a matrix and return with AIC values
Rsquared.mat <- data.frame(Class=class(i),Family="Gaussian",Marginal=Rm,
Conditional=Rc,AIC=AIC(i))
return(Rsquared.mat)
}
variance.fixed.glmm.lmer <- function(i){
# Get variance of for each fixed effects by multiplying coefficients by design matrix
var.vec <- apply(fixef(i) * t(i@pp$X),MARGIN=1,var)
# Get variance of fixed effects by multiplying coefficients by design matrix
VarF <- var(as.vector(fixef(i) %*% t(i@pp$X)))
# Get variance of random effects by extracting variance components
VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1])))
# Get residual variance
VarResid <- attr(VarCorr(i),"sc")^2
var.vec <- var.vec/(VarF+VarRand+VarResid)
names(var.vec) <- paste(names(var.vec),"VAR",sep=".")
return(var.vec)
}
variance.fixed.glmm.lmer.effect.and.response <- function(i){
if(sum(c("sumTfBn","sumTnBn") %in% names(fixef(i)))==2){
# Get variance of for each fixed effects by multiplying coefficients by design matrix
var.vec <- var(as.vector(fixef(i)[c("sumTfBn","sumTnBn")] %*% t(i@pp$X[,c("sumTfBn","sumTnBn")])))
# Get variance of fixed effects by multiplying coefficients by design matrix
VarF <- var(as.vector(fixef(i) %*% t(i@pp$X)))
# Get variance of random effects by extracting variance components
VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1])))
# Get residual variance
VarResid <- attr(VarCorr(i),"sc")^2
var.vec <- var.vec/(VarF+VarRand+VarResid)
}else{
var.vec <- NA
}
names(var.vec) <- paste("effect.response","VAR",sep=".")
return(var.vec)
}
## function to turn lmer output from list to DF
fun.format.in.data.frame <- function(list.res,names.param){
dat.t <- data.frame(t(rep(NA,3*length(names.param))))
names(dat.t) <- c(names.param,paste(names.param,"Std.Error",sep=".")
,paste(names.param,"VAR",sep="."))
dat.t[,match(names(list.res$lmer.summary$fixed.coeff.E),names(dat.t))] <-
list.res$lmer.summary$fixed.coeff.E
dat.t[,length(names.param)+match(names(list.res$lmer.summary$fixed.coeff.E),names(dat.t))] <-
list.res$lmer.summary$fixed.coeff.Std.Error
dat.t[,match(names(list.res$lmer.summary$fixed.var),names(dat.t))] <-
list.res$lmer.summary$fixed.var
res <- data.frame(list.res$files.details,list.res$lmer.summary[1:7],dat.t)
return(res)
}
#!/usr/bin/env Rscript
source("R/analysis/lmer.nolog.output-fun.R")
source("R/analysis/lmer.nolog.run.R")
source("R/analysis/lmer.output-fun.R")
source("R/analysis/lmer.run.nolog.R")
## TODO NEED TO CHANGE THAT TO LOOP OVER FILES AND NOT SCAN ALL FILES
sets <- c('BCI','Canada','France','Fushan','NVS','Paracou',
......@@ -13,12 +13,12 @@ for (set in sets){
ecoregions <- get.ecoregions.for.set(set)
for (ecoregion in ecoregions){
for (trait in traits){
for (model in c(model.files.lmer.Tf.1,model.files.lmer.Tf.2)){
for (model in c(model.files.lmer.Tf.1)){
source(model, local = TRUE)
model.obj <- load.model()
pathout <- output.dir.lmer(model=model.obj$name, trait, set, ecoregion,type.filling)
files <- c(files,file.path(pathout,"results.no.std.nolog.rds"))
files <- c(files,file.path(pathout,"results.nolog.rds"))
}
}
}
......@@ -27,8 +27,10 @@ ecoregions <- get.ecoregions.for.set(set)
out <- lapply(files, summarise.lmer.output.list)
names(out) <- lapply(lapply(files,files.details),function(x) paste(as.vector(x[-6]),collapse="_"))
saveRDS(out,file='output/lmer.list.out.rds')
### remove missing
out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))]
saveRDS(out,file='output/lmer.list.out.nolog.rds')
names.param <- unique(unlist(lapply(out,function(list.res) names(list.res$lmer.summary$fixed.coeff.E))))
DF.results <- do.call("rbind",lapply(out,fun.format.in.data.frame,names.param=names.param))
DF.results$id <- paste(DF.results$set,DF.results$ecocode,sep=".")
saveRDS(DF.results,file='output/lmer.DF.rds')
saveRDS(DF.results,file='output/lmer.nolog.DF.rds')
......@@ -283,10 +283,13 @@ segments( df.selected[[var.x]]-small.bar,df.selected[[param]] + 1.96*df.selected
df.selected[[var.x]]+small.bar,df.selected[[param]] + 1.96*df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec)
}
fun.plot.all.param.x.y.c <- function(model.selected,trait.name,DF.results,var.x,params,small.bar,threshold.delta.AIC,col.vec,col.set=TRUE,...){
fun.plot.all.param.x.y.c <- function(model.selected,trait.name,DF.results,var.x,params,
small.bar,threshold.delta.AIC,col.vec,col.set=TRUE,...){
df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name,model.selected=model.selected,threshold.delta.AIC=threshold.delta.AIC)
if(col.set) {col.vec2 <- col.vec[unclass(df.selected[['set']])]}else{col.vec2 <- 1}
plot(df.selected[[var.x]],df.selected[[params[1]]],col=col.vec2,...)
ylim <- range(c(df.selected[[params[1]]] - 1.96*df.selected[[paste(params[1],"Std.Error",sep=".")]],
df.selected[[params[1]]] + 1.96*df.selected[[paste(params[1],"Std.Error",sep=".")]]),na.rm=TRUE)
plot(df.selected[[var.x]],df.selected[[params[1]]],col=col.vec2,ylim=ylim,...)
fun.plot.param.error.bar(df.selected,var.x,param=params[1],small.bar,col=col.vec2)
}
......
......@@ -27,6 +27,8 @@ ecoregions <- get.ecoregions.for.set(set)
out <- lapply(files, summarise.lmer.output.list)
names(out) <- lapply(lapply(files,files.details),function(x) paste(as.vector(x[-6]),collapse="_"))
### remove missing
out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))]
saveRDS(out,file='output/lmer.list.out.rds')
names.param <- unique(unlist(lapply(out,function(list.res) names(list.res$lmer.summary$fixed.coeff.E))))
DF.results <- do.call("rbind",lapply(out,fun.format.in.data.frame,names.param=names.param))
......
......@@ -15,6 +15,16 @@ run.models.for.set.all.traits('Paracou',model.files.lmer.Tf.1, run.lmer,type.fil
run.models.for.set.all.traits('Mbaiki',model.files.lmer.Tf.1, run.lmer,type.filling='species',std=TRUE)
run.models.for.set.all.traits('US',model.files.lmer.Tf.1, run.lmer,type.filling='species',std=TRUE)
sets <- c('France','Spain','Sweden','Swiss','BCI','Fushan','Luquillo','NVS','Japan','Canada','Paracou','Mbaiki','US')
library(doParallel)
cl <- makeCluster(7)
registerDoParallel(cl)
foreach(set=sets) %dopar% run.models.for.set.all.traits(set,model.files.lmer.Tf.1, run.lmer,type.filling='species',std=FALSE)
foreach(set=sets) %dopar% run.models.for.set.all.traits(set,model.files.lmer.Tf.2, run.lmer,type.filling='species',std=FALSE)
foreach(set=sets) %dopar% run.models.for.set.all.traits(set,model.files.lmer.Tf.2, run.lmer,type.filling='species',std=TRUE)
......
......@@ -14,20 +14,53 @@ species.clean$Latin_name_syn<- (gsub("_", " ", species.clean$Latin_name_syn))
species.clean <- subset(species.clean,subset=!is.na(species.clean$Latin_name_syn))
species.clean$sp <- paste("sp",species.clean$code,sep=".")
species.clean$code <- NULL
## remove duplicated sp
species.clean <- species.clean[!duplicated(species.clean$sp),]
## read in data
data.TRY.std <- readRDS("output/formatted/TRY/data.TRY.std.rds")
max.height <- read.csv(file="output/formatted/France/max.height.csv", stringsAsFactors = FALSE)
max.height$Max.height.genus <- FALSE
### extract and add height
data.traits <- fun.extract.format.sp.traits.TRY(sp=species.clean[["sp"]],sp.syno.table=species.clean,data=data.TRY.std)
data.traits <- fun.extract.format.sp.traits.TRY(sp=species.clean[["sp"]],
sp.syno.table=species.clean,
data=data.TRY.std)
data.traits <- merge(data.traits,
subset(max.height,select=c("sp","Max.height.mean","Max.height.sd","Max.height.genus")),
by="sp",all.x=TRUE,all.y=FALSE)
height.genus.DF <- do.call("rbind",lapply(data.traits$Latin_name,fun.compute.mean.genus,data.traits,"Max.height.mean"))
height.genus.DF <- do.call("rbind",lapply(data.traits$Latin_name,
fun.compute.mean.genus,data.traits,"Max.height.mean"))
data.traits[is.na(data.traits[["Max.height.mean"]]),
c("Max.height.mean","Max.height.sd","Max.height.genus")] <- height.genus.DF[is.na(data.traits[["Max.height.mean"]]),]
c("Max.height.mean","Max.height.sd","Max.height.genus")] <-
height.genus.DF[is.na(data.traits[["Max.height.mean"]]),]
#### TODO GET THE ANGIO/CONIF AND EVERGREEN/DECIDUOUS
# read try categrocial data
try.cat <- read.csv("data/raw/TRY/TRY_Categorical_Traits_Lookup_Table_2012_03_17_TestRelease.csv",
stringsAsFactors=FALSE,na.strings = "")
Pheno.Zanne <- read.csv("data/raw/ZanneNature/GlobalLeafPhenologyDatabase.csv",
stringsAsFactors=FALSE)
data.cat.extract <- do.call("rbind",lapply(data.traits$sp ,fun.get.cat.var.from.try,
data.traits,try.cat,Pheno.Zanne))
data.cat.extract[data.cat.extract$Pheno.T=='deciduous'
& !is.na(data.cat.extract$Pheno.T),'Pheno.T'] <- 'D'
data.cat.extract[data.cat.extract$Pheno.T=='evergreen'
& !is.na(data.cat.extract$Pheno.T),'Pheno.T'] <- 'EV'
data.cat.extract[data.cat.extract$Pheno.T=='deciduous/evergreen'
& !is.na(data.cat.extract$Pheno.T),'Pheno.T'] <- 'D_EV'
data.traits <- merge(data.traits,data.cat.extract,by="sp")
## check if leaf Phenology agree with Zanne
sum(Pheno.Zanne$Binomial %in% try.cat$AccSpeciesName)
merge.pheno <- merge(Pheno.Zanne,try.cat,by.x="Binomial",by.y="AccSpeciesName",all=FALSE)
table(merge.pheno$LeafPhenology,merge.pheno$Phenology)
###
write.csv(data.traits,file="output/formatted/France/traits.csv",row.names = FALSE)
......
......@@ -278,3 +278,50 @@ extract.species.traits <- rbind.fill(lapply(Latin_name,FUN=fun.spe.traits.notry
return(data.frame.TRAITS)
}
## FUNCTION TO GET angio /confi genus is enough FROM TRY and Zanne
fun.get.cat.var.from.try <- function(sp,data.traits,try.cat,Pheno.Zanne){
if(sum(data.traits[data.traits$sp==sp,"Latin_name"] == try.cat$AccSpeciesName)>0){
if(sum(data.traits[data.traits$sp==sp,"Latin_name"] ==
Pheno.Zanne$Binomial)>0){
data.res <- data.frame(sp=sp,
Latin_name=data.traits[data.traits$sp==sp,"Latin_name"],
Phylo.group=try.cat[data.traits[data.traits$sp==sp,"Latin_name"] ==
try.cat$AccSpeciesName,"PhylogeneticGroup"][1],
Pheno.T=try.cat[data.traits[data.traits$sp==sp,"Latin_name"] ==
try.cat$AccSpeciesName,"LeafPhenology"][1],
Pheno.Z=Pheno.Zanne[data.traits[data.traits$sp==sp,"Latin_name"] ==
Pheno.Zanne$Binomial,'Phenology'][1] ,
stringsAsFactors=FALSE)
}else{
data.res <- data.frame(sp=sp,
Latin_name=data.traits[data.traits$sp==sp,"Latin_name"],
Phylo.group=try.cat[data.traits[data.traits$sp==sp,"Latin_name"] ==
try.cat$AccSpeciesName,"PhylogeneticGroup"][1],
Pheno.T=try.cat[data.traits[data.traits$sp==sp,"Latin_name"] ==
try.cat$AccSpeciesName,"LeafPhenology"][1],
Pheno.Z=NA ,
stringsAsFactors=FALSE)
}
}else{
if(sum(data.traits[data.traits$sp==sp,"Latin_name"] ==
Pheno.Zanne$Binomial)>0){
data.res <- data.frame(sp=sp,
Latin_name=data.traits[data.traits$sp==sp,"Latin_name"],
Phylo.group=try.cat[fun.get.genus(data.traits[data.traits$sp==sp,"Latin_name"]) ==
sapply(try.cat$AccSpeciesName,fun.get.genus),"PhylogeneticGroup"][1],
Pheno.T=NA,
Pheno.Z=Pheno.Zanne[data.traits[data.traits$sp==sp,"Latin_name"] ==
Pheno.Zanne$Binomial,'Phenology'][1] ,
stringsAsFactors=FALSE)
}else{
data.res <- data.frame(sp=sp,
Latin_name=data.traits[data.traits$sp==sp,"Latin_name"],
Phylo.group=try.cat[fun.get.genus(data.traits[data.traits$sp==sp,"Latin_name"]) ==
sapply(try.cat$AccSpeciesName,fun.get.genus),"PhylogeneticGroup"][1],
Pheno.T=NA,
Pheno.Z=NA ,
stringsAsFactors=FALSE)
}
}
return(data.res)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment