diff --git a/R/analysis/analysis.R b/R/analysis/analysis.R deleted file mode 100644 index 9ea9044af7129c377b9c25f33721f5c5244ea376..0000000000000000000000000000000000000000 --- a/R/analysis/analysis.R +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env Rscript -############################################# MERGE FRENCH DATA -path.formatted <- "output/formatted" -path.processed <- "output/processed" -library(plyr) - -source("R/analysis/analysis-fun.R") -source("R/utils/plot.R") -source("R/utils/maps.R") -source("R/utils/climate.R") - -## load all sites -data <- load_all_formatted_data(path.formatted) - - -get_unique_plot_coords <- function(set, data){ - i <- !duplicated(paste(data[[set]]$tree$Lon, data[[set]]$tree$Lat)) #find duplicates - data.frame(set = set, Lon = data[[set]]$tree$Lon[i], Lat = data[[set]]$tree$Lat[i], ecocode = data[[set]]$tree$ecocode[i], stringsAsFactors=FALSE) -} - -x <- rbind.fill(lapply(names(data), get_unique_plot_coords, data=data)) -write.csv(x, file.path(path.processed, "all.sites.coords.csv"), row.names=FALSE) -x <- read.csv(file.path(path.processed, "all.sites.coords.csv"), stringsAsFactors = FALSE) - - -## GET climate with Mark V function -all.sites.coords <- read.csv(file.path(path.processed, "all.sites.coords.csv"), stringsAsFactors = FALSE) -clim.all <- GetClimate(lats=all.sites.coords$Lat,lons=all.sites.coords$Lon) -site.clim.all <- cbind(all.sites.coords,clim.all$MAT,clim.all$MAP) -write.csv(site.clim.all, file.path(path.processed, "all.sites.clim.csv"), row.names=FALSE) - -## change ecocode of Japan to tropical to have big point - -x[x$set=='Japan','ecocode'] <- 'tropical' -# Map for all sites -to.dev(world.map.all.sites(x, add.legend =TRUE), - png,file=file.path("figs", "world_map_all.png"), height =500, width = 1000) - - -# make summary table -data.summary <- lapply(data, summarise.data) - -tab.1 <- rbind.fill(lapply(data.summary,summary.table.1)) - -write.csv(tab.1, file.path(path.processed, "summary_tab.csv"), row.names = names(data.summary)) diff --git a/R/analysis/glmer.global.std.output.R b/R/analysis/glmer.global.std.output.R new file mode 100644 index 0000000000000000000000000000000000000000..962e93b6248c155c20d0f2ddec4f67ed555889c9 --- /dev/null +++ b/R/analysis/glmer.global.std.output.R @@ -0,0 +1,11 @@ +#!/usr/bin/env Rscript + +source("R/analysis/glmer.output-fun.R") +files <- list.files("output/glmer", recursive=TRUE, full.names =TRUE, pattern = "glmer.results.global.rds") +out <- lapply(files, summarise.glmer.output.list) +names(out) <- lapply(lapply(files,files.details),function(x) paste(as.vector(x[-6]),collapse="_")) +saveRDS(out,file='output/glmer.global.std.list.out.rds') +names.param <- unique(unlist(lapply(out,function(list.res) names(list.res$glmer.summary$fixed.coeff.E)))) +DF.results <- do.call("rbind",lapply(out,fun.format.in.data.frame,names.param=names.param)) +DF.results$id <- paste(DF.results$set,DF.results$ecocode,sep=".") +saveRDS(DF.results,file='output/glmer.global.std.DF.rds') diff --git a/R/analysis/glmer.output.R b/R/analysis/glmer.output.R index df8af0b104eaaef9b5b31d2cf8095d7b5f06fdae..b23459dcb41cf201099b471eef91afba52352548 100644 --- a/R/analysis/glmer.output.R +++ b/R/analysis/glmer.output.R @@ -1,7 +1,7 @@ #!/usr/bin/env Rscript source("R/analysis/glmer.output-fun.R") -files <- list.files("output/glmer", recursive=TRUE, full.names =TRUE, pattern = "rds") +files <- list.files("output/glmer", recursive=TRUE, full.names =TRUE, pattern = "glmer.results.rds") out <- lapply(files, summarise.glmer.output.list) names(out) <- lapply(lapply(files,files.details),function(x) paste(as.vector(x[-6]),collapse="_")) saveRDS(out,file='output/glmer.list.out.rds') diff --git a/R/analysis/glmer.run.R b/R/analysis/glmer.run.R index 5f6777bce37e47518f8b2e410e11ba6f2ddfdabe..64b18f609deba38ab38fee6b8f9d1315929dd564 100644 --- a/R/analysis/glmer.run.R +++ b/R/analysis/glmer.run.R @@ -56,7 +56,7 @@ fun.call.glmer.and.save <- function(formula,df.lmer,path.out){ end <- Sys.time() print(end - Start) print(summary(glmer.output)) - saveRDS(glmer.output,file=file.path(path.out, "glmer.results.no.std.rds")) + saveRDS(glmer.output,file=file.path(path.out, "glmer.results.global.rds")) } run.glmer <- function (model.file, trait, set, ecoregion, @@ -92,10 +92,10 @@ output.dir.glmer <- function (model, trait, set, ecoregion,type.filling) { # Function to prepare data for lmer #============================================================ load.and.prepare.data.for.glmer <- function(trait, set, ecoregion, - min.obs, sample.size, type.filling, + min.obs, sample.size, type.filling, base.dir = "output/processed/"){ ### load data - data.tree.tot <- read.csv(file.path(base.dir, set,ecoregion,"data.tree.tot.no.std.csv"), stringsAsFactors = FALSE) + data.tree.tot <- read.csv(file.path(base.dir, set,ecoregion,"data.tree.tot.global.csv"), stringsAsFactors = FALSE) fun.data.for.glmer(data.tree.tot,trait,type.filling=type.filling) } @@ -105,14 +105,14 @@ fun.select.data.for.analysis <- function(data.tree,abs.CWM.tntf,perc.neigh,BATOT data.tree <- subset(data.tree,subset=(!is.na(data.tree[["dead"]])) & (!is.na(data.tree[["D"]])) ) ## remove tree with zero -data.tree <- subset(data.tree,subset=data.tree[["D"]]>9.9) -## select species with missing traits +data.tree <- subset(data.tree,subset=data.tree[["D"]]>9.9) +## select species with missing traits data.tree <- data.tree[(!is.na(data.tree[[abs.CWM.tntf]]) & !is.na(data.tree[[BATOT]])),] -# select species with minimum obs +# select species with minimum obs data.tree <- subset(data.tree,subset=data.tree[["sp"]] %in% names(table(factor(data.tree[["sp"]])))[table(factor(data.tree[["sp"]]))>min.obs]) -# select obs abov min perc.neigh +# select obs abov min perc.neigh data.tree <- subset(data.tree,subset=data.tree[[perc.neigh]] > min.perc.neigh & !is.na(data.tree[[perc.neigh]])) return(data.tree) } @@ -159,6 +159,7 @@ sumTnBn <- data.tree[[CWM.tn]]*data.tree[[BATOT]] sumTfBn <- data.tree[[tf]]*data.tree[[BATOT]] sumTnTfBn.diff <- sumTnBn-sumTfBn sumBn <- data.tree[[BATOT]] + return(data.frame(dead=dead, logD=logD, species.id=species.id, @@ -176,12 +177,14 @@ return(data.frame(dead=dead, # that will be used in a simple linear model #============================================================ fun.data.for.glmer <- function(data.tree,trait,min.obs=10,type.filling='species') { -if(! trait %in% c("SLA", "Leaf.N","Seed.mass","SLA","Wood.density","Max.height")) stop("need trait to be in SLA Leaf.N Seed.mass SLA Wood.density or Max.height ") +if(! trait %in% c("SLA", "Leaf.N","Seed.mass","SLA","Wood.density","Max.height")) + stop("need trait to be in SLA Leaf.N Seed.mass SLA Wood.density or Max.height ") + # get vars names -CWM.tn <- paste(trait,"CWM",'fill',"log",sep=".") -abs.CWM.tntf <- paste(trait,"abs.CWM",'fill',"log",sep=".") +CWM.tn <- paste(trait,"CWM",'fill',sep=".") +abs.CWM.tntf <- paste(trait,"abs.CWM",'fill',sep=".") tf <- paste(trait,"focal",sep=".") -BATOT <- "BATOT.log" +BATOT <- "BATOT" perc.neigh <- paste(trait,"perc",type.filling,sep=".") data.tree <- fun.select.data.for.analysis(data.tree,abs.CWM.tntf,perc.neigh,BATOT,min.obs) #= DATA LIST FOR JAGS diff --git a/R/analysis/lmer.global.std.nolog.output.R b/R/analysis/lmer.global.std.nolog.output.R new file mode 100644 index 0000000000000000000000000000000000000000..7b05880ed4503be0a71207a0431e2dc9119d24be --- /dev/null +++ b/R/analysis/lmer.global.std.nolog.output.R @@ -0,0 +1,42 @@ +#!/usr/bin/env Rscript + +source("R/analysis/lmer.output-fun.R") +source("R/analysis/lmer.run.nolog.R") + +## LOOP OVER FILES AND NOT SCAN ALL FILES +sets <- c('BCI','Canada','France','Fushan','NVS','Paracou', + 'Spain','US','Swiss','Sweden','NSW','Mbaiki','Luquillo','Japan') +traits <- c("SLA", "Wood.density","Max.height","Leaf.N","Seed.mass") +type.filling <- 'species' +files <- c() +for (set in sets){ +ecoregions <- get.ecoregions.for.set(set) + for (ecoregion in ecoregions){ + for (trait in traits){ + for (model in c(model.files.lmer.Tf.1,model.files.lmer.Tf.2)){ + source(model, local = TRUE) + model.obj <- load.model() + + pathout <- output.dir.lmer(model=model.obj$name, trait, + set, ecoregion,type.filling) + files <- c(files,file.path(pathout,"results.global.nolog.rds")) + } + } + } +} + + +out <- lapply(files, summarise.lmer.output.list) +names(out) <- lapply(lapply(files,files.details), + function(x) paste(as.vector(x[names(x) != 'file']), + collapse="_")) + +### remove missing +out <- out[!unlist(lapply(out,FUN=function(x) is.null(x$lmer.summary)))] +saveRDS(out,file='output/lmer.list.out.global.std.nolog.rds') +names.param <- unique(unlist(lapply(out, + function(list.res) names(list.res$lmer.summary$fixed.coeff.E)))) +DF.results <- do.call("rbind",lapply(out, fun.format.in.data.frame, + names.param = names.param)) +DF.results$id <- paste(DF.results$set, DF.results$ecocode, sep=".") +saveRDS(DF.results,file='output/lmer.global.std.nolog.DF.rds') diff --git a/R/analysis/lmer.nolog.all.output.R b/R/analysis/lmer.nolog.all.output.R index cc7140324eba52f58a54032ba53c1ba869599ee1..e2b8ec3b59a4fb1b2f06e2b59de5a791cb731eac 100644 --- a/R/analysis/lmer.nolog.all.output.R +++ b/R/analysis/lmer.nolog.all.output.R @@ -7,7 +7,7 @@ source("R/analysis/lmer.run.nolog.all.R") traits <- c("SLA", "Wood.density","Max.height","Leaf.N","Seed.mass") type.filling <- 'species' files <- c() -for (trait in trait){ +for (trait in traits){ for(model in c(model.files.lmer.Tf.1,model.files.lmer.Tf.2)){ source(model, local = TRUE) model.obj <- load.model() diff --git a/R/analysis/lmer.nolog.output.figs.R b/R/analysis/lmer.nolog.output.figs.R index dc7c50362cb2cc4712c5fa4da642be7780bf66c5..8f1d6b61dcc3cc041455bd14834f08e9dee6a3dd 100644 --- a/R/analysis/lmer.nolog.output.figs.R +++ b/R/analysis/lmer.nolog.output.figs.R @@ -1,67 +1,157 @@ #!/usr/bin/env Rscript -rm(list=ls()) +rm(list = ls()) source("R/analysis/lmer.output-fun.R") source("R/utils/plot.R") ## load results DF.results <- readRDS('output/lmer.nolog.DF.rds') +## load results all +list.all.results <- readRDS('output/list.lmer.out.nolog.all.rds') +DF.global <- do.call( 'rbind', lapply(list.all.results,fun.merge.results.global)) + +fun.get.genus <- function(x) gsub(paste(" ", gsub("^([a-zA-Z]* )", "", x), sep = ""), + "", x, fixed = TRUE) +DF.global$set <- unlist(lapply(DF.global$ecocode, fun.get.genus)) + +DF.global$id <- gsub(" ", ".", DF.global$ecocode) ## load climatic data -site.clim.all <- read.csv( file.path("output/processed", "all.sites.clim.csv"), stringsAsFactors = FALSE) -site.clim.all$id <- paste(site.clim.all$set,site.clim.all$ecocode,sep=".") -## par(mfrow=c(1,2),mar=c(5, 9, 4, 1) +0.1) -## boxplot(site.clim.all$clim.all.MAT~site.clim.all$id,horizontal=TRUE,las=2,cex.axis=0.8) -## boxplot(site.clim.all$clim.all.MAP~site.clim.all$id,horizontal=TRUE,las=2,cex.axis=0.8) -mean.MAT <- tapply(site.clim.all$clim.all.MAT,INDEX=site.clim.all$id,FUN=mean,na.rm=TRUE) -mean.MAP <- tapply(site.clim.all$clim.all.MAP,INDEX=site.clim.all$id,FUN=mean,na.rm=TRUE) -data.clim.ecocode <- data.frame(id=names(mean.MAT),MAT=mean.MAT,MAP=mean.MAP) +site.clim.all <- read.csv( file.path("output/processed", "all.sites.clim.csv"), + stringsAsFactors = FALSE) +site.clim.all$id <- paste(site.clim.all$set, site.clim.all$ecocode, sep = ".") +mean.MAT <- tapply(site.clim.all$MAT, INDEX = site.clim.all$id, + FUN = mean, na.rm = TRUE) +mean.MAP <- tapply(site.clim.all$MAP, INDEX = site.clim.all$id, + FUN = mean, na.rm = TRUE) +unique.set <- tapply(site.clim.all$set, INDEX = site.clim.all$id, + FUN = unique, na.rm = TRUE) + +data.clim.ecocode <- data.frame(id = names(mean.MAT), + MAT = mean.MAT, + MAP = mean.MAP, + set = unique.set, stringsAsFactors = FALSE) + +biomes.vec <- fun.overly.plot.on.biomes(MAP = data.clim.ecocode$MAP/10, + MAT = data.clim.ecocode$MAT, + names.vec = 1:nrow(data.clim.ecocode)) +data.clim.ecocode$biomes <- as.character(unlist(biomes.vec)) + + ### merge climate and lmer results -DF.results <- merge(DF.results,data.clim.ecocode,by="id") +DF.results <- merge(DF.results, + data.clim.ecocode[, c('id','MAT','MAP','biomes')] , + by = "id") +DF.results$id2 <- paste(DF.results$id, DF.results$trait, DF.results$filling) + +DF.global <- merge(DF.global, + data.clim.ecocode[, c('id','MAT','MAP','biomes')] , + by = "id") +DF.global$id2 <- paste(DF.global$id, DF.global$trait, DF.global$filling) + +## add value for NVS +DF.results$biomes[DF.results$ecocode == 'MixedCool'] <- 'temperate_rainforest' +DF.global$biomes[DF.global$ecocode == 'MixedCool'] <- 'temperate_rainforest' + +## ADD percentage traits +site.perc.all <- read.csv('output/processed/all.sites.perc.traits.csv', + stringsAsFactors = FALSE) +site.perc.all$id <- paste(site.perc.all$set, site.perc.all$ecocode, sep = ".") +DF.results <- merge(DF.results, + subset(site.perc.all, + select = c('id', 'perc.gymno', + 'perc.ev', 'BATOT.m')), + by = "id") + +DF.global <- merge(DF.global, + subset(site.perc.all, + select = c('id', 'perc.gymno', + 'perc.ev', 'BATOT.m')), + by = "id") + -DF.results$id2 <- paste(DF.results$id,DF.results$trait,DF.results$filling) ### save -saveRDS(DF.results,file='output/lmer.nolog.DF.results.merged.rds') +saveRDS(DF.results, file = 'output/lmer.nolog.DF.results.merged.rds') +saveRDS(DF.global, file = 'output/lmer.nolog.DF.results.merged.global.rds') + +par(mfrow = c(2, 4)) +for (param in c('sumTnBn', 'sumTfBn')){ + for (t in c('Wood.density', 'SLA', 'Leaf.N', 'Max.height')){ + plot(DF.global[DF.global$model == 'lmer.LOGLIN.ER.Tf' & + DF.global$trait == t, 'MAP'], + DF.global[DF.global$model == 'lmer.LOGLIN.ER.Tf' & + DF.global$trait == t, param], + col = col.vec[DF.global[DF.global$model == 'lmer.LOGLIN.ER.Tf' & + DF.global$trait == t, 'set']], + xlab = 'MAP (mm)', ylab = param, log = 'x') + abline( h = 0) + } +} + + +DF.results <- readRDS(file = 'output/lmer.nolog.DF.results.merged.rds') -DF.results <- readRDS(file='output/lmer.nolog.DF.results.merged.rds') ### Analysis of the results -DF.R2m.diff <- do.call("rbind",lapply(1:nrow(DF.results),fun.compute.criteria.diff,DF.results,"R2m")) -DF.R2c.diff <- do.call("rbind",lapply(1:nrow(DF.results),fun.compute.criteria.diff,DF.results,"R2c")) -DF.AIC.diff <- do.call("rbind",lapply(1:nrow(DF.results),fun.compute.criteria.diff,DF.results,"AIC")) -DF.delta.AIC <- do.call("rbind",lapply(1:nrow(DF.results),fun.compute.delta.AIC,DF.results)) +DF.R2m.diff <- do.call("rbind", lapply(1:nrow(DF.results), + fun.compute.criteria.diff, + DF.results, "R2m")) +DF.R2c.diff <- do.call("rbind", lapply(1:nrow(DF.results), + fun.compute.criteria.diff, + DF.results, "R2c")) +DF.AIC.diff <- do.call("rbind", lapply(1:nrow(DF.results), + fun.compute.criteria.diff, + DF.results, "AIC")) +DF.delta.AIC <- do.call("rbind", lapply(1:nrow(DF.results), + fun.compute.delta.AIC, DF.results)) DF.var.fixed <- fun.ratio.var.fixed.effect(DF.results) -DF.results <- cbind(DF.results,DF.R2m.diff,DF.R2c.diff,DF.AIC.diff,DF.delta.AIC,DF.var.fixed) +DF.results <- cbind(DF.results, DF.R2m.diff, DF.R2c.diff, + DF.AIC.diff, DF.delta.AIC, DF.var.fixed) ### report best model based on AIC -DF.best.and.all.AIC <- do.call('rbind',lapply(unique(DF.results$id2),FUN=fun.AIC,DF.results)) -DF.best.and.all.AICc <- do.call('rbind',lapply(unique(DF.results$id2),FUN=fun.AICc,DF.results)) +DF.best.and.all.AIC <- do.call('rbind', lapply(unique(DF.results$id2), + FUN = fun.AIC, DF.results)) +DF.best.and.all.AICc <- do.call('rbind', lapply(unique(DF.results$id2), + FUN = fun.AICc, DF.results)) + +DF2 <- DF.results[DF.results$model %in% + c('lmer.LOGLIN.AD.Tf', + 'lmer.LOGLIN.HD.Tf', + 'lmer.LOGLIN.nocomp.Tf', + 'lmer.LOGLIN.simplecomp.Tf'), ] +DF.best.and.all.AIC2 <- do.call('rbind', lapply(unique(DF2$id2), + FUN = fun.AIC,DF2 )) -table(DF.best.and.all.AIC[DF.best.and.all.AIC$filling=='species',]$best.model, - DF.best.and.all.AIC[DF.best.and.all.AIC$filling=='species',]$trait, - DF.best.and.all.AIC[DF.best.and.all.AIC$filling=='species',]$set) -t(table(DF.best.and.all.AIC[DF.best.and.all.AIC$filling=='species',]$best.model, - DF.best.and.all.AIC[DF.best.and.all.AIC$filling=='species',]$set))/(apply(table(DF.best.and.all.AIC[ - DF.best.and.all.AIC$filling=='species',]$best.model, - DF.best.and.all.AIC[DF.best.and.all.AIC$filling=='species',]$set), - MARGIN=2,sum)) +(table(DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', ]$best.model, + DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', ]$trait, + DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', ]$set)) + +t(table(DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', ]$best.model, + DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', ]$set))/ + (apply(table(DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', + ]$best.model, + DF.best.and.all.AIC[DF.best.and.all.AIC$filling == 'species', ]$set), + MARGIN=2, sum)) ## AIC weights -AIC.weights <- do.call('rbind', - lapply(1:nrow(DF.best.and.all.AICc), - FUN=function(i,DF) exp((min(DF[i,])-DF[i,])/2)/sum(exp((min(DF[i,])-DF[i,])/2)), - DF.best.and.all.AIC[,9:15])) -DF.AIC.weights <- data.frame(DF.best.and.all.AICc[,1],AIC.weights) -names(DF.AIC.weights) <- c('id2',paste('AIC.weight',names(DF.AIC.weights)[-1],sep='.')) -DF.best.and.all.AIC <- merge(DF.best.and.all.AIC,DF.AIC.weights,by='id2') +AIC.weights <- do.call('rbind', + lapply(1:nrow(DF.best.and.all.AICc), + FUN=function(i, DF) exp((min(DF[i, ])-DF[i, ])/2)/ + sum(exp((min(DF[i, ])-DF[i, ])/2)), + DF.best.and.all.AIC[, 9:15])) +DF.AIC.weights <- data.frame(DF.best.and.all.AICc[, 1], AIC.weights) +names(DF.AIC.weights) <- c('id2', paste('AIC.weight', + names(DF.AIC.weights)[-1], + sep = '.')) +DF.best.and.all.AIC <- merge(DF.best.and.all.AIC, DF.AIC.weights, by = 'id2') #### compute percentage of variance explained by var @@ -70,19 +160,249 @@ DF.results$R.perc.var <- DF.results$sumTfBn.VAR/DF.results$sumBn.VAR DF.results$E.perc.var <- DF.results$sumTnBn.VAR/DF.results$sumBn.VAR DF.results$ER.perc.var <- DF.results$effect.response.var/DF.results$sumBn.VAR +## compute a global AIC +fun.global.aic <- function(DF.results){ +DF.results <- DF.results[DF.results$nobs>1000, ] +# select set ecocode with more than 1000 obs +DF.results <- DF.results[DF.results$id2 %in% names(table(DF.results$id2))[ + table(DF.results$id2) == 7], ] +# select set ecocode with 7 model tested +## species +DF.results.sp <- DF.results[DF.results$filling == 'species', ] + # select set ecocode with more than 1000 obs +test.same.n.model.ecoregion <- apply(table(DF.results.sp$trait, + DF.results.sp$model), + MARGIN = 1, + function(x) all(x == x[1])) +if(!all(test.same.n.model.ecoregion)) + stop(paste('error not the same number of ecoregion for traits', + names(test.same.n.model.ecoregion)[!test.same.n.model.ecoregion])) +list.sp <- list(AIC.tot = tapply(DF.results.sp$AIC, + INDEX = list(DF.results.sp$trait, DF.results.sp$model), + FUN = sum), +N.ecoregion = tapply(DF.results.sp$AIC, INDEX = list(DF.results.sp$trait, + DF.results.sp$model), FUN = length)) +## genus +DF.results.ge <- DF.results[DF.results$filling == 'genus', ] + # select set ecocode with more than 1000 obs +test.same.n.model.ecoregion <- apply(table(DF.results.ge$trait, + DF.results.ge$model), + MARGIN = 1, + function(x) all(x == x[1])) +if(!all(test.same.n.model.ecoregion)) + stop(paste('error not the same number of ecoregion for traits', + names(test.same.n.model.ecoregion)[!test.same.n.model.ecoregion])) +list.ge <- list(AIC.tot = tapply(DF.results.ge$AIC, + INDEX = list(DF.results.ge$trait, + DF.results.ge$model), + FUN = sum), +N.ecoregion = tapply(DF.results.ge$AIC, + INDEX = list(DF.results.ge$trait, DF.results.ge$model), + FUN = length)) +return(list(sp = list.sp, ge = list.ge)) +} + +global.AIC.list <- fun.global.aic(DF.results) +global.AIC.list$sp$AIC.tot - apply(global.AIC.list$sp$AIC.tot, MARGIN = 1, min) + ############################################# ############################################# ### DO THE PLOT -models <- c('lmer.LOGLIN.ER.Tf','lmer.LOGLIN.ER.Tf') -names(models) <- c('Effect/response effect','Effect/response response') -list.params <- list(c(Response='sumTnBn'), - c(Effect='sumTfBn')) -pdf('figs/parameters.MAP.ER.all.pdf',width=9,height=7) -fun.plot.panel.lmer.parameters.c(models=models, - traits = c('Wood.density','SLA','Leaf.N','Max.height'), - DF.results,var.x='MAP', - list.params=list.params,small.bar=0.02,threshold.delta.AIC=10000) + +models <- c('lmer.LOGLIN.ER.Tf', 'lmer.LOGLIN.AD.Tf') +names(models) <- c('Effect/response', 'Absolute distance') +var.y.l <- list('R2m.simplecomp', 'R2m.simplecomp') + +pdf('figs/R2m.MAP.pdf', width = 12, height = 8) +fun.plot.panel.lmer.res.x.y(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, + var.x = 'MAP', var.y.l = var.y.l, + ylim = c(-0.015, 0.06)) +dev.off() + +models <- c('lmer.LOGLIN.ER.Tf', 'lmer.LOGLIN.AD.Tf') +names(models) <- c('Effect/response', 'Absolute distance') +var.y.l <- list('effect.response.var', 'sumTnTfBn.abs.VAR') +pdf('figs/perc.var.MAP.pdf', width = 12, height = 8) +fun.plot.panel.lmer.res.x.y(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, + var.x = 'MAP', var.y.l = var.y.l, + ylim = c(-0.015, 0.17)) +dev.off() + + +models <- c('lmer.LOGLIN.ER.Tf', 'lmer.LOGLIN.ER.Tf') +names(models) <- c('Effect/response effect', 'Effect/response response') +list.params <- list(c(Response = 'sumTnBn'), + c(Effect = 'sumTfBn')) + +pdf('figs/parameters.MAP.ER.all.nolog.pdf', width = 9, height = 7) +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'MAP', + list.params = list.params, + small.bar = 0.2, threshold.delta.AIC = 10000000, + ylim = c(-0.025, 0.025), ylim.same = TRUE, + log = 'x', lwd = 1.5) +dev.off() + + +par(mfrow = c(2, 4), mar=c(9,4,3,2)) +for (i in c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height')) {boxplot(sumTnBn ~ biomes , + data = fun.select.ecoregions.trait(DF.results, i, + 'lmer.LOGLIN.ER.Tf', nobs.min = 1000, + filling.selected="species", + threshold.delta.AIC=100000), las = 3) +abline(h = 0, lty = 2) +} +for (i in c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height')) {boxplot(sumTfBn ~ biomes , + data = fun.select.ecoregions.trait(DF.results, i, + 'lmer.LOGLIN.ER.Tf', nobs.min = 1000, + filling.selected="species", + threshold.delta.AIC=100000), las = 3) +abline(h = 0, lty = 2) +} + +models <- c('lmer.LOGLIN.ER.Tf') +names(models) <- c('ER effect') +list.params <- list(c(Response = 'sumTnBn')) + +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'sumTfBn', + list.params = list.params, + small.bar = 0.00002, threshold.delta.AIC = 10000000, + ylim = c(-0.025, 0.025), ylim.same = TRUE, + lwd = 1.5) + + + +models <- c('lmer.LOGLIN.HD.Tf') +names(models) <- c('HD effect') +list.params <- list(c(Response = 'sumTnTfBn.diff')) + +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'MAP', + list.params = list.params, + small.bar = 0.2, threshold.delta.AIC = 10000000, + ylim = c(-0.025, 0.025), ylim.same = TRUE, + log = 'x', lwd = 1.5) + +par(mfrow = c(1,2)) +fun.plot.all.param.x.y.c('lmer.LOGLIN.ER.Tf', 'Wood.density', DF.results, + var.x = 'MAP', params = 'sumTnBn', + add.name = TRUE, + small.bar = 0.002, threshold.delta.AIC = 10000000, + col.vec = col.vec, col.set = TRUE, + log = 'x', ylim.same = FALSE) +abline(h = 0, lty = 2) + + + fun.plot.all.param.x.y.c('lmer.LOGLIN.ER.Tf', 'Wood.density', DF.results, + var.x = 'MAP', params = 'sumTnBn', + add.name = TRUE, + small.bar = 0.002, threshold.delta.AIC = 10000000, + col.vec = col.vec, col.set = TRUE, + log = 'x', ylim.same = FALSE) +abline(h = 0, lty = 2) + +x11() +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'MAT', + list.params = list.params, + small.bar = 0.2, threshold.delta.AIC = 10000000, + ylim = c(-0.03, 0.03), ylim.same = TRUE) + + +x11() +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'perc.gymno', + list.params = list.params, + small.bar = 0.002, + threshold.delta.AIC = 10000000, + ylim = c(-0.03, 0.03), ylim.same = TRUE) + +x11() +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'perc.ev', + list.params = list.params, + small.bar = 0.002, + threshold.delta.AIC = 10000000, + ylim = c(-0.03, 0.03), ylim.same = TRUE) + + + + +models <- c('lmer.LOGLIN.ER.Tf', 'lmer.LOGLIN.AD.Tf') +names(models) <- c('ER Tf', 'AD Tf') +list.params <- list(c(Response = 'Tf'), + c(Effect = 'Tf')) +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'MAP', + list.params = list.params, + small.bar = 0.2, threshold.delta.AIC = 10000000, + ylim = c(-1, 1), ylim.same = TRUE) + +models <- c('lmer.LOGLIN.ER.Tf', 'lmer.LOGLIN.AD.Tf', 'lmer.LOGLIN.simplecomp.Tf') +names(models) <- c('ER Tf', 'AD Tf', 'Simple compet Tf') +list.params <- list(c(TF = 'Tf'), + c(TF = 'Tf'), + c(TF = 'Tf')) +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'perc.ev', + list.params = list.params, + small.bar = 0.002, + threshold.delta.AIC = 10000000, + ylim = c(-1, 1), ylim.same = FALSE) + +models <- c('lmer.LOGLIN.ER.Tf') +names(models) <- c('Effect/response BATOT') +list.params <- list(c(Response = 'sumBn')) + +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results, var.x = 'MAP', + list.params = list.params, + small.bar = 0.2, threshold.delta.AIC = 10000000, + ylim = c(-0.06, 0.01), ylim.same = TRUE, + log = 'x', lwd = 1.5) + + +pdf('figs/parm.MAT.ER.per.set.pdf') +for (i in sets) { +fun.plot.panel.lmer.parameters.c(models = models, + traits = c('Wood.density', 'SLA', + 'Leaf.N', 'Max.height'), + DF.results[DF.results$set = = i, ], + var.x = 'MAP', + list.params = list.params, + small.bar = 0.2, + threshold.delta.AIC = 10000000, + ylim = c(-0.03, 0.03), xlim = c(400, 3000) , + ylim.same = FALSE, log = 'x', main = i) +} dev.off() diff --git a/R/analysis/lmer.output-fun.R b/R/analysis/lmer.output-fun.R index 4e6b2f3a9c0a2841a5677983542faa1f16dd9827..2b405a13940ad56e12e950583ac5dbc2a19b1f56 100644 --- a/R/analysis/lmer.output-fun.R +++ b/R/analysis/lmer.output-fun.R @@ -191,7 +191,8 @@ return(df.res) ## function to compute ratio of variance explained by a trait variable over the variance explained by the BATOT fun.ratio.var.fixed.effect <- function(DF.results){ -mat.ratio <- DF.results[,c('sumTnTfBn.abs.VAR','sumTfBn.VAR','sumTnBn.VAR','effect.response.var')]/ +mat.ratio <- DF.results[,c('sumTnTfBn.abs.VAR','sumTfBn.VAR','sumTnBn.VAR', + 'effect.response.var')]/ DF.results[,'sumBn.VAR'] names(mat.ratio) <- c('abs.dist','Response','Effect','Effect.Response') return(mat.ratio) @@ -199,19 +200,26 @@ return(mat.ratio) ### FUNCTION TO REPORT BEST MODEL PER ECOREGION AND TRAITS fun.AIC <- function(id2.one,DF.results){ - models <- c('lmer.LOGLIN.nocomp.Tf', 'lmer.LOGLIN.simplecomp.Tf','lmer.LOGLIN.HD.Tf', - 'lmer.LOGLIN.E.Tf','lmer.LOGLIN.R.Tf','lmer.LOGLIN.ER.Tf','lmer.LOGLIN.AD.Tf') - best <- as.vector(DF.results[DF.results$id2==id2.one,c('id2','trait','set','ecocode','filling','MAT','MAP','model')])[which.min(DF.results$AIC[DF.results$id2==id2.one]),] + models <- c('lmer.LOGLIN.nocomp.Tf', 'lmer.LOGLIN.simplecomp.Tf', + 'lmer.LOGLIN.HD.Tf', + 'lmer.LOGLIN.E.Tf','lmer.LOGLIN.R.Tf','lmer.LOGLIN.ER.Tf', + 'lmer.LOGLIN.AD.Tf') + best <- as.vector(DF.results[DF.results$id2==id2.one,c('id2','trait','set', + 'ecocode','filling','MAT','MAP','model')])[ + which.min(DF.results$AIC[DF.results$id2==id2.one]),] AIC.all <- as.vector(DF.results[DF.results$id2==id2.one,c('AIC')]) names(AIC.all) <- as.vector(DF.results[DF.results$id2==id2.one,c('model')]) AIC.all <- AIC.all[models]-min(AIC.all) res <- data.frame((best),t(AIC.all)) - names(res) <- c('id2','trait','set','ecocode','filling','MAT','MAP','best.model',paste('AIC',models,sep='.')) + names(res) <- c('id2','trait','set','ecocode','filling','MAT','MAP', + 'best.model',paste('AIC',models,sep='.')) return(res) } fun.AICc <- function(id2.one,DF.results){ - models <- c('lmer.LOGLIN.nocomp.Tf', 'lmer.LOGLIN.simplecomp.Tf','lmer.LOGLIN.HD.Tf','lmer.LOGLIN.E.Tf','lmer.LOGLIN.R.Tf','lmer.LOGLIN.ER.Tf','lmer.LOGLIN.AD.Tf') + models <- c('lmer.LOGLIN.nocomp.Tf', 'lmer.LOGLIN.simplecomp.Tf', + 'lmer.LOGLIN.HD.Tf','lmer.LOGLIN.E.Tf','lmer.LOGLIN.R.Tf', + 'lmer.LOGLIN.ER.Tf','lmer.LOGLIN.AD.Tf') Deviance.all <- DF.results[DF.results$id2==id2.one,'deviance'] names(Deviance.all) <- DF.results[DF.results$id2==id2.one,'model'] Deviance.all <- Deviance.all[models] @@ -293,43 +301,104 @@ print(tapply(df.selected[['AIC.simplecomp']]<0, fun.plot.param.error.bar <- function(df.selected,var.x,param,small.bar,col.vec){ -segments( df.selected[[var.x]],df.selected[[param]] - 1.96*df.selected[[paste(param,"Std.Error",sep=".")]], - df.selected[[var.x]],df.selected[[param]] + 1.96*df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) -segments( df.selected[[var.x]]-small.bar,df.selected[[param]] - 1.96*df.selected[[paste(param,"Std.Error",sep=".")]], - df.selected[[var.x]]+small.bar,df.selected[[param]] - 1.96*df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) -segments( df.selected[[var.x]]-small.bar,df.selected[[param]] + 1.96*df.selected[[paste(param,"Std.Error",sep=".")]], - df.selected[[var.x]]+small.bar,df.selected[[param]] + 1.96*df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) -} - -fun.plot.all.param.x.y.c <- function(model.selected,trait.name,DF.results,var.x,params, - small.bar,threshold.delta.AIC,col.vec,col.set=TRUE,...){ -df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name,model.selected=model.selected,threshold.delta.AIC=threshold.delta.AIC) -if(col.set) {col.vec2 <- col.vec[unclass(df.selected[['set']])]}else{col.vec2 <- 1} -ylim <- range(c(df.selected[[params[1]]] - 1.96*df.selected[[paste(params[1],"Std.Error",sep=".")]], - df.selected[[params[1]]] + 1.96*df.selected[[paste(params[1],"Std.Error",sep=".")]]),na.rm=TRUE) +segments( df.selected[[var.x]],df.selected[[param]] - 1.96*df.selected[[paste( + param,"Std.Error",sep=".")]], + df.selected[[var.x]],df.selected[[param]] + 1.96*df.selected[[paste( + param,"Std.Error",sep=".")]],col=col.vec) +segments( df.selected[[var.x]]-small.bar,df.selected[[param]] - 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]], + df.selected[[var.x]]+small.bar,df.selected[[param]] - 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) +segments( df.selected[[var.x]]-small.bar,df.selected[[param]] + 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]], + df.selected[[var.x]]+small.bar,df.selected[[param]] + 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) +} + +fun.plot.all.param.x.y.c <- function(model.selected,trait.name,DF.results,var.x, + params,small.bar,threshold.delta.AIC, + col.vec,col.set=TRUE,ylim=NA, + ylim.same=FALSE,add.name = FALSE, ...){ +df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name, + model.selected=model.selected, + threshold.delta.AIC=threshold.delta.AIC) +if(col.set) {col.vec2 <- col.vec[unclass(df.selected[['set']])]}else{ + col.vec2 <- 1} +if(!ylim.same) {ylim <- range(c(df.selected[[params[1]]] - 1.96* + df.selected[[paste(params[1],"Std.Error",sep=".")]], + df.selected[[params[1]]] + 1.96* + df.selected[[paste(params[1],"Std.Error",sep=".")]]),na.rm=TRUE)} plot(df.selected[[var.x]],df.selected[[params[1]]],col=col.vec2,ylim=ylim,...) -fun.plot.param.error.bar(df.selected,var.x,param=params[1],small.bar,col=col.vec2) -} - - -fun.plot.all.param.boxplot <- function(model.selected,trait.name,DF.results,params,small.bar,...){ -df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name,model.selected=model.selected) +if (add.name) text(df.selected[[var.x]], df.selected[[params[1]]], + labels = paste(df.selected[['set']], + df.selected[['ecocode']]), cex = 0.5, pos = 2) +fun.plot.param.error.bar(df.selected,var.x,param=params[1], + small.bar,col=col.vec2) +} + + +fun.plot.param.error.bar.std <- function(df.selected,var.x,param,small.bar,col.vec){ +segments( df.selected[[var.x]],df.selected[[param]] + df.selected[['sumBn']] - 1.96*df.selected[[paste( + param,"Std.Error",sep=".")]], + df.selected[[var.x]],df.selected[[param]] + df.selected[['sumBn']] + 1.96*df.selected[[paste( + param,"Std.Error",sep=".")]],col=col.vec) +segments( df.selected[[var.x]]-small.bar,df.selected[[param]] + df.selected[['sumBn']] - 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]], + df.selected[[var.x]]+small.bar,df.selected[[param]] + df.selected[['sumBn']] - 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) +segments( df.selected[[var.x]]-small.bar,df.selected[[param]] + df.selected[['sumBn']] + 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]], + df.selected[[var.x]]+small.bar,df.selected[[param]] + df.selected[['sumBn']] + 1.96* + df.selected[[paste(param,"Std.Error",sep=".")]],col=col.vec) +} + +fun.plot.all.param.x.y.std <- function(model.selected,trait.name, + DF.results,var.x, + params,small.bar,threshold.delta.AIC, + col.vec,col.set=TRUE,ylim=NA, + ylim.same=FALSE,add.name = FALSE, ...){ +df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name, + model.selected=model.selected, + threshold.delta.AIC=threshold.delta.AIC) +if(col.set) {col.vec2 <- col.vec[unclass(df.selected[['set']])]}else{ + col.vec2 <- 1} +plot(df.selected[[var.x]],df.selected[[params[1]]] + df.selected[['sumBn']], + col = col.vec2, ...) +if (add.name) text(df.selected[[var.x]], + df.selected[[params[1]]] + df.selected[['sumBn']], + labels = paste(df.selected[['set']], + df.selected[['ecocode']]), cex = 0.5, pos = 2) +fun.plot.param.error.bar.std(df.selected,var.x,param=params[1], + small.bar,col=col.vec2) + +} + + + +fun.plot.all.param.boxplot <- function(model.selected,trait.name,DF.results, + params,small.bar,...){ +df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name, + model.selected=model.selected) if(length(params)>1){ -DF.t <- data.frame(param=rep(names(params),each=nrow(df.selected)),value=c(df.selected[[params[1]]],df.selected[[params[2]]])) +DF.t <- data.frame(param=rep(names(params),each=nrow(df.selected)),value=c( + df.selected[[params[1]]],df.selected[[params[2]]])) boxplot(DF.t[['value']]~DF.t[['param']],...) }else{ boxplot(df.selected[[params[1]]],...) } } -fun.plot.all.param.er.diff.MAP <- function(model.selected,trait.name,DF.results,...){ -df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name,model.selected=model.selected) +fun.plot.all.param.er.diff.MAP <- function(model.selected,trait.name, + DF.results,...){ +df.selected <- fun.select.ecoregions.trait(DF.results,trait.name=trait.name, + model.selected=model.selected) plot(df.selected[['MAP']],df.selected[['sumTnBn']]-df.selected[['sumTfBn']],...) } -fun.plot.panel.lmer.res.x.y <- function(models,traits,DF.results,var.x,var.y.l,express,...){ +fun.plot.panel.lmer.res.x.y <- function(models,traits,DF.results,var.x,var.y.l, + express,...){ ncols = length(traits) nrows = length(models) list.models <- as.list(names(models)) @@ -347,7 +416,8 @@ fun.plot.panel.lmer.res.x.y <- function(models,traits,DF.results,var.x,var.y.l,e if(i==nrows) mtext(var.x, side=1, line =4) if(j==1) - mtext(paste('Effect size',list.models[i]), side=2, line =4,cex=0.9) + mtext(paste('Effect size',list.models[i]), side=2, line =4, + cex=0.9) if(i==nrows & j==ncols) legend('topright',legend=levels(DF.results$set),pch=16, col=col.vec,bty='n',ncol=2) @@ -355,7 +425,8 @@ fun.plot.panel.lmer.res.x.y <- function(models,traits,DF.results,var.x,var.y.l,e } -fun.plot.panel.lmer.res.boxplot <- function(models,traits,DF.results,var.y,express,...){ +fun.plot.panel.lmer.res.boxplot <- function(models,traits,DF.results,var.y, + express,...){ ncols = length(traits) nrows = length(models) list.models <- as.list(names(models)) @@ -369,12 +440,14 @@ fun.plot.panel.lmer.res.boxplot <- function(models,traits,DF.results,var.y,expre if(i==1 ) mtext(traits[j], side=3, line =1) if(j==1) - mtext(paste("Effect size", list.models[i],sep=" "), side=2, line =4,cex=0.9) + mtext(paste("Effect size", list.models[i],sep=" "), side=2, + line =4,cex=0.9) } } -fun.plot.panel.lmer.res.boxplot.compare <- function(models,traits,DF.results,var.y,express,...){ +fun.plot.panel.lmer.res.boxplot.compare <- function(models,traits,DF.results, + var.y,express,...){ ncols = length(traits) list.models <- as.list(names(models)) names(list.models) <- rep('model',length(list.models)) @@ -389,7 +462,10 @@ fun.plot.panel.lmer.res.boxplot.compare <- function(models,traits,DF.results,var } } -fun.plot.panel.lmer.parameters.c <- function(models,traits,DF.results,var.x,list.params,threshold.delta.AIC,small.bar=10,...){ +fun.plot.panel.lmer.parameters.c <- function(models,traits,DF.results,var.x, + list.params,threshold.delta.AIC, + small.bar=10,ylim=NA, + ylim.same=FALSE,...){ ncols = length(traits) nrows = length(models) par(mfrow = c(nrows, ncols), mar = c(2,2,1,1), oma = c(4,4,4,1) ) @@ -400,10 +476,13 @@ fun.plot.panel.lmer.parameters.c <- function(models,traits,DF.results,var.x,list ## DF.results$sumTnTfBn.abs <- DF.results$sumTnTfBn.abs/2 for(i in 1:nrows) for(j in 1:ncols){ - fun.plot.all.param.x.y.c(models[i],traits[j],DF.results,var.x,params=list.params[[i]], + try(fun.plot.all.param.x.y.c(models[i],traits[j],DF.results,var.x, + params=list.params[[i]], small.bar=small.bar, - threshold.delta.AIC=threshold.delta.AIC,col.vec=col.vec,col.set=TRUE,...) - abline(h=0,lty=3) + threshold.delta.AIC=threshold.delta.AIC, + col.vec=col.vec,col.set=TRUE, + ylim=ylim,ylim.same=ylim.same,...)) + try(abline(h=0,lty=3)) if(length(list.params[[i]])>1) legend("topright",names(list.params[[i]]), pch=rep(1,length(list.params[[i]])), @@ -414,9 +493,9 @@ fun.plot.panel.lmer.parameters.c <- function(models,traits,DF.results,var.x,list mtext(var.x, side=1, line =4) if(j==1) mtext(paste('param',names(models)[i]), side=2, line =4,cex=1) - ## if(i==nrows & j==ncols) - ## legend('topright',legend=levels(DF.results$set),pch=16, - ## col=col.vec,bty='n',ncol=2) + if(i==nrows & j==ncols) + legend('bottomright',legend=levels(DF.results$set),pch=16, + col=col.vec,bty='n',ncol=2) } } @@ -439,3 +518,25 @@ fun.plot.panel.lmer.parameters.boxplot <- function(models,traits,DF.results,list } } + + +## create a data base from the global data +fun.merge.results.global <- function(list.res, + names.param = c( "(Intercept)", "Tf", + "logD", "sumBn", "sumTnBn", + "sumTfBn", "sumTnTfBn.abs", + "sumTnTfBn.diff")){ + df.details <- data.frame(list.res$files.details[1:4], + list.res$lmer.summary[1:6]) + fun.rep.df <- function(x, df = df.details) df + n_rep <- nrow(list.res$lmer.summary$ecocode.BLUP) + dat.t <- data.frame(matrix(rep(NA,n_rep * length(names.param)), nrow = n_rep, + ncol = length(names.param))) + names(dat.t) <- c(names.param) + dat.t[, match(names(list.res$lmer.summary$ecocode.BLUP), names(dat.t))] <- + list.res$lmer.summary$ecocode.BLUP + df.res <- data.frame(ecocode = rownames(list.res$lmer.summary$ecocode.BLUP), + do.call( "rbind", lapply(1:n_rep, fun.rep.df)), + dat.t) +return(df.res) +} diff --git a/R/analysis/lmer.output.all-fun.R b/R/analysis/lmer.output.all-fun.R new file mode 100644 index 0000000000000000000000000000000000000000..e74537a88dd88fbe861cbc899c7443a91e86459a --- /dev/null +++ b/R/analysis/lmer.output.all-fun.R @@ -0,0 +1,112 @@ +#### function to analyse lmer output for GLOBAL ANALYSIS + + +library(lme4) + + +read.lmer.output <- function(file.name){ + tryCatch(readRDS(file.name), error=function(cond)return(NULL)) # Choose a return value in case of error +} + + + +summarise.lmer.output <- function(x){ + list( nobs = nobs(x), + R2m =Rsquared.glmm.lmer(x)$Marginal, + R2c =Rsquared.glmm.lmer(x)$Conditional, + AIC = AIC(x), + deviance = deviance(x), + conv=x@optinfo$conv, + effect.response.var=variance.fixed.glmm.lmer.effect.and.response(x), + fixed.coeff.E=fixef(x), + fixed.coeff.Std.Error=sqrt(diag(vcov(x))), + fixed.var=variance.fixed.glmm.lmer(x), + ecocode.BLUP=coef(x)$ecocode.i) +} + + + +summarise.lmer.output.all.list <- function(f ){ + output.lmer <- read.lmer.output(f) + if(!is.null(output.lmer)){ + res <- list(files.details=files.details.all(f), + lmer.summary=summarise.lmer.output( output.lmer)) + }else{ + res <- NULL + } + return(res) +} + + + + +files.details.all <- function(x){ + s <- data.frame(t(strsplit(x, "/", fixed=TRUE)[[1]]), stringsAsFactors= FALSE) + names(s) <- c("d1", "d2", "set", "trait", "filling", "model", "file" ) + s[-(1:2)] +} + + +#### R squared functions + +# Function rsquared.glmm requires models to be input as a list (can include fixed- +# effects only models,but not a good idea to mix models of class "mer" with models +# of class "lme") FROM http://jslefche.wordpress.com/2013/03/13/r2-for-linear-mixed-effects-models/ + +Rsquared.glmm.lmer <- function(i){ +# Get variance of fixed effects by multiplying coefficients by design matrix + VarF <- var(as.vector(fixef(i) %*% t(i@pp$X))) + # Get variance of random effects by extracting variance components + VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1]))) + # Get residual variance + VarResid <- attr(VarCorr(i),"sc")^2 + # Calculate marginal R-squared (fixed effects/total variance) + Rm <- VarF/(VarF+VarRand+VarResid) + # Calculate conditional R-squared (fixed effects+random effects/total variance) + Rc <- (VarF+VarRand)/(VarF+VarRand+VarResid) + # Bind R^2s into a matrix and return with AIC values + Rsquared.mat <- data.frame(Class=class(i),Family="Gaussian",Marginal=Rm, + Conditional=Rc,AIC=AIC(i)) + return(Rsquared.mat) +} + + +variance.fixed.glmm.lmer <- function(i){ + +# Get variance of for each fixed effects by multiplying coefficients by design matrix +var.vec <- apply(fixef(i) * t(i@pp$X),MARGIN=1,var) +# Get variance of fixed effects by multiplying coefficients by design matrix +VarF <- var(as.vector(fixef(i) %*% t(i@pp$X))) +# Get variance of random effects by extracting variance components +VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1]))) +# Get residual variance +VarResid <- attr(VarCorr(i),"sc")^2 +var.vec <- var.vec/(VarF+VarRand+VarResid) +names(var.vec) <- paste(names(var.vec),"VAR",sep=".") +return(var.vec) +} + +variance.fixed.glmm.lmer.effect.and.response <- function(i){ +if(sum(c("sumTfBn","sumTnBn") %in% names(fixef(i)))==2){ +# Get variance of for each fixed effects by multiplying coefficients by design matrix +var.vec <- var(as.vector(fixef(i)[c("sumTfBn","sumTnBn")] %*% t(i@pp$X[,c("sumTfBn","sumTnBn")]))) +# Get variance of fixed effects by multiplying coefficients by design matrix +VarF <- var(as.vector(fixef(i) %*% t(i@pp$X))) +# Get variance of random effects by extracting variance components +VarRand <- colSums(do.call(rbind,lapply(VarCorr(i),function(j) j[1]))) +# Get residual variance +VarResid <- attr(VarCorr(i),"sc")^2 +var.vec <- var.vec/(VarF+VarRand+VarResid) +}else{ +var.vec <- NA +} +names(var.vec) <- paste("effect.response","VAR",sep=".") +return(var.vec) +} + + + + + +coef(.)$ecocode.i +ranef(.,whichel='ecocode.id',condVar=TRUE) diff --git a/R/analysis/lmer.output.figs.R b/R/analysis/lmer.output.figs.R index 014f9fe5763b8eab3f374ea6069f1bfd13cd4de0..985cd04fbc99a12f05aa08f4ad766efad2dbae0b 100644 --- a/R/analysis/lmer.output.figs.R +++ b/R/analysis/lmer.output.figs.R @@ -222,7 +222,7 @@ fun.plot.panel.lmer.parameters.c(models=models, traits = c('Wood.density','SLA','Leaf.N','Max.height'), DF.results,var.x='MAP', list.params=list.params,small.bar=0.02, - threshold.delta.AIC=10000) + threshold.delta.AIC=10000,ylim=c(-0.2,0.2),ylim.same=TRUE) dev.off() models <- c('lmer.LOGLIN.ER.Tf','lmer.LOGLIN.ER.Tf') @@ -258,7 +258,7 @@ pdf('figs/parameters.MAP.E.pdf',width=9,height=5) fun.plot.panel.lmer.parameters.c(models=models, traits = c('Wood.density','SLA','Leaf.N','Max.height'), DF.results,var.x='MAP', - list.params=list.params,small.bar=0.02,ylim=c(-.15,.3),threshold.delta.AIC=1000) + list.params=list.params,small.bar=0.02,threshold.delta.AIC=1000) dev.off() @@ -270,7 +270,7 @@ pdf('figs/parameters.MAP.R.pdf',width=9,height=5) fun.plot.panel.lmer.parameters.c(models=models, traits = c('Wood.density','SLA','Leaf.N','Max.height'), DF.results,var.x='MAP', - list.params=list.params,small.bar=0.02,ylim=c(-.15,.3),threshold.delta.AIC=100000) + list.params=list.params,small.bar=0.02,threshold.delta.AIC=100000) dev.off() @@ -284,7 +284,7 @@ pdf('figs/parameters.MAP.2models.pdf',width=12,height=10) fun.plot.panel.lmer.parameters.c(models=models, traits = c('Wood.density','SLA','Leaf.N','Max.height'), DF.results,var.x='MAP', - list.params=list.params,small.bar=0.02,ylim=c(-.15,.3),threshold.delta.AIC=10000) + list.params=list.params,small.bar=0.02,threshold.delta.AIC=10000) dev.off() diff --git a/R/analysis/lmer.run.nolog.R b/R/analysis/lmer.run.nolog.R index 88a7ab00303ac2051aecb521456af7c68449c529..d4903bbfca880d27ea5dd740861cf9ac96f71e40 100644 --- a/R/analysis/lmer.run.nolog.R +++ b/R/analysis/lmer.run.nolog.R @@ -59,7 +59,7 @@ fun.call.lmer <- function(formula,df.lmer){ fun.call.lmer.and.save <- function(formula,df.lmer,path.out,std){ lmer.output <- lmer(formula=formula,data=df.lmer,REML = FALSE) print(summary(lmer.output)) -if(std) { saveRDS(lmer.output,file=file.path(path.out, "results.no.std.nolog.rds")) +if(std) { saveRDS(lmer.output,file=file.path(path.out, "results.global.nolog.rds")) }else{saveRDS(lmer.output,file=file.path(path.out, "results.nolog.rds")) } } @@ -98,7 +98,7 @@ run.lmer <- function (model.file, trait, set, ecoregion, ## require(boot) ## require(multicore) ## bb <- boot(data=df.lmer, statistic=boot.fun,R= nsim,formula=formula) -## bci <- lapply(seq_along(bb$t0), boot.out = bb, boot::boot.ci, +## bci <- lapply(seq_along(bb$t0), boot.out = bb, boot::boot.ci, ## type = "perc", conf = level) ## citab <- t(sapply(bci, function(x) x[["percent"]][4:5])) ## a <- (1 - level)/2 @@ -127,10 +127,10 @@ output.dir.lmer <- function (model, trait, set, ecoregion,type.filling) { # Function to prepare data for lmer #============================================================ load.and.prepare.data.for.lmer <- function(trait, set, ecoregion, - min.obs, sample.size, type.filling, + min.obs, sample.size, type.filling, base.dir = "output/processed/",std){ ### load data -if(std) { data.tree.tot <- read.csv(file.path(base.dir, set,ecoregion,"data.tree.tot.no.std.csv"), +if(std) { data.tree.tot <- read.csv(file.path(base.dir, set,ecoregion,"data.tree.tot.global.csv"), stringsAsFactors = FALSE)}else{ data.tree.tot <- read.csv(file.path(base.dir, set,ecoregion,"data.tree.tot.csv"), stringsAsFactors = FALSE)} @@ -167,14 +167,14 @@ data.tree <- fun.remove.outlier.Condit(data.tree) ## remove tree with zero data.tree <- subset(data.tree,subset=data.tree[["BA.G"]]>min.BA.G & data.tree[["BA.G"]]<max.BA.G & - data.tree[["D"]]>9.9) -## select species with no missing traits + data.tree[["D"]]>9.9) +## select species with no missing traits data.tree <- data.tree[(!is.na(data.tree[[abs.CWM.tntf]]) & !is.na(data.tree[[BATOT]])),] -# select species with minimum obs +# select species with minimum obs data.tree <- subset(data.tree,subset=data.tree[["sp"]] %in% names(table(factor(data.tree[["sp"]])))[table(factor(data.tree[["sp"]]))>min.obs]) -# select obs abov min perc.neigh +# select obs abov min perc.neigh data.tree <- subset(data.tree,subset=data.tree[[perc.neigh]] > min.perc.neigh & !is.na(data.tree[[perc.neigh]])) return(data.tree) } @@ -267,8 +267,8 @@ lmer.data <- fun.get.the.variables.for.lmer.no.tree.id(data.tree,BATOT,CWM.tn,ab ##' @param dbh1 in mm ##' @param dbh2 in mm ##' @param slope not to be changed -##' @param intercept -##' @param err.limit +##' @param intercept +##' @param err.limit ##' @return a vector TRUE FALSE with FALSE outlier to be removed ##' @author Kunstler trim.negative.growth <- function(dbh1,dbh2,slope=0.006214, @@ -286,7 +286,7 @@ return(accept) ##' .. taken from trim.growth in Condit CTFS R package .. ##' @title trim.positive.growth ##' @param growth in mm -##' @param maxgrow in mm +##' @param maxgrow in mm ##' @return TRUE FALSE vector with FALSE outlier ##' @author Kunstler trim.positive.growth <- function(growth,maxgrow=75){ @@ -295,5 +295,5 @@ accept <- rep(TRUE,length(growth)) accept[bad.grow] <- FALSE accept[is.na(growth)] <- FALSE return(accept) -} +} diff --git a/R/analysis/lmer.run.nolog.all.R b/R/analysis/lmer.run.nolog.all.R index 56c7495cdd3e692887f70d01965a4f394a2e15d8..f25dd16f45a864926b3a7dd2fa71bb121f5e7981 100644 --- a/R/analysis/lmer.run.nolog.all.R +++ b/R/analysis/lmer.run.nolog.all.R @@ -35,6 +35,21 @@ model.files.lmer.Tf.2 <- c("R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.T "R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.R", "R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.R") +model.files.lmer.Tf.3 <- c("R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.norandom.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.norandom.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.norandom.R") +model.files.lmer.Tf.4 <- c("R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.norandom.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.norandom.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.norandom.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.norandom.R") + +model.files.lmer.Tf.5 <- c("R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.randomsetR", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.randomset.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.randomset.R") +model.files.lmer.Tf.6 <- c("R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.randomset.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.randomset.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.randomset.R", + "R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.randomset.R") fun.call.lmer.and.save <- function(formula,df.lmer,path.out){ diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..57247f8926b4f5236697dd9463e5764dd4fbacb5 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.norandom.R @@ -0,0 +1,4 @@ +load.model <- function () { + list(name="lmer.LOGLIN.AD.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnTfBn.abs+(logD|species.id)")) +} diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..023ea2a89ddf72a472c7701a7dee9e8c85e9a425 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.AD.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.AD.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnTfBn.abs+(logD-1|species.id)+(Tf-1|set.id)+(sumBn-1|set.id)+(sumTnTfBn.abs-1|set.id)")) +} + + + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..fd4a55a8c95a6ad2f59dde75cc1daa094476c8b8 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.norandom.R @@ -0,0 +1,4 @@ +load.model <- function () { + list(name="lmer.LOGLIN.E.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnBn+(logD|species.id)") ) +} diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..6ca2fe4390228f6f9ff2ac5e9c1c2518eb933af7 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.E.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.E.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnBn+(logD-1|species.id)+(Tf-1|set.id)+(sumBn-1|set.id)+(sumTnBn-1|set.id)") ) +} + + + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..fc40aa02e0a025ef3f5c37c320cbdc3a4bf6aee6 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.norandom.R @@ -0,0 +1,4 @@ +load.model <- function () { + list(name="lmer.LOGLIN.ER.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumTfBn+sumBn+sumTnBn+(logD|species.id)")) +} diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..db5d7933448c1085dae7755d5c8d5b24960ee42a --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.ER.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.ER.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumTfBn+sumBn+sumTnBn+(logD-1|species.id)+(Tf-1|set.id)+(sumTfBn-1|set.id)+(sumBn-1|set.id)+(sumTnBn-1|set.id)")) +} + + + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..8b2892cdbd44313047ada3b7750145c42355ad14 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.norandom.R @@ -0,0 +1,4 @@ +load.model <- function () { + list(name="lmer.LOGLIN.HD.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnTfBn.diff+(logD|species.id)")) +} diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..f27866a841c06a1b8b1de4ccfe10a4262480f8fa --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.HD.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.HD.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTnTfBn.diff+(logD-1|species.id)+(Tf-1|set.id)+(sumBn-1|set.id)+(sumTnTfBn.diff-1|set.id)")) +} + + + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..97f38127c1ddca984191aea32001d169150d531b --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.norandom.R @@ -0,0 +1,5 @@ +load.model <- function () { + list(name="lmer.LOGLIN.R.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTfBn+(logD|species.id)")) +} + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..659ce9276998f5d524dfec14193d778b41d2c4fa --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.R.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.R.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+sumTfBn+(logD-1|species.id)+(Tf-1|set.id)+(sumBn-1|set.id)+(sumTfBn-1|set.id)")) +} + + + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..8f40e4a07a949f555ea1de3d72fa9941c06f7fed --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.norandom.R @@ -0,0 +1,4 @@ +load.model <- function () { + list(name="lmer.LOGLIN.nocomp.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+(logD|species.id)")) +} diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..e00eab9ceb04c4f69e20448a2696a13a1fda322b --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.nocomp.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.nocomp.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+(logD-1|species.id)+(Tf-1|set.id)")) +} + + + diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.norandom.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.norandom.R new file mode 100644 index 0000000000000000000000000000000000000000..bcbe378c3a832f24e95aed45797acf2537071bb0 --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.norandom.R @@ -0,0 +1,4 @@ +load.model <- function () { + list(name="lmer.LOGLIN.simplecomp.Tf.norandom", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+(logD|species.id)")) +} diff --git a/R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.randomset.R b/R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.randomset.R new file mode 100644 index 0000000000000000000000000000000000000000..61fa5e1cb8cdab18cf791170d349de696ec0ae7c --- /dev/null +++ b/R/analysis/model.lmer.all/model.lmer.LOGLIN.simplecomp.Tf.randomset.R @@ -0,0 +1,7 @@ +load.model <- function () { + list(name="lmer.LOGLIN.simplecomp.Tf.randomset", + lmer.formula.tree.id=formula("logG~1+(1|set.id)+(1|species.id)+(1|plot.id)+Tf+logD+sumBn+(logD-1|species.id)+(Tf-1|set.id)+(sumBn-1|set.id)")) +} + + + diff --git a/R/analysis/analysis-fun.R b/R/analysis/summarize-fun.R similarity index 88% rename from R/analysis/analysis-fun.R rename to R/analysis/summarize-fun.R index 6a211152219ece017509f61b0a2d866f954cda06..1f359c7cdd6cbbb1c160c5b354bd840e148ffd88 100644 --- a/R/analysis/analysis-fun.R +++ b/R/analysis/summarize-fun.R @@ -13,7 +13,7 @@ load.formatted.data <- function(path){ } load_all_formatted_data <- function(path){ - sites <- dir(path.formatted) + sites <- list.dirs(path.formatted, full.names = FALSE)[-1] sites <- sites[sites != "TRY"] data <- list() for(site in sites) @@ -57,23 +57,21 @@ summarise.data <- function(data.site){ summarise.tree <- function(data.tree){ out <- list() - expected <- read.csv("docs/workflow/cols-tree.csv", stringsAsFactors = FALSE) - + if(!is.null(data.tree)){ out[["n.species"]] <- length(unique(data.tree[["sp.name"]])) out[["n.indivs"]] <- table(data.tree[["sp.name"]]) out[["n.eco"]] <- table(data.tree[["ecocode"]]) out[["dbh.range"]] <- range(data.tree[["D"]], na.rm= TRUE) - out[["abio.vars"]] <- names(data.tree)[!(names(data.tree) %in% expected$var)] } out } summarise.traits <- function(data.traits){ - expected <- read.csv("docs/workflow/cols-traits.csv", stringsAsFactors = FALSE) # filter to mean, numeric - var <- expected$var[expected$numeric==1] + var <- c('SLA.mean','Leaf.N.mean','Wood.density.mean', + 'Max.height.mean','Seed.mass.mean') out <- list() if(!is.null(data.traits)){ diff --git a/R/analysis/summarize.R b/R/analysis/summarize.R new file mode 100644 index 0000000000000000000000000000000000000000..3a355ab5735095f58ec59f7a19cff0bec67e315b --- /dev/null +++ b/R/analysis/summarize.R @@ -0,0 +1,58 @@ +#!/usr/bin/env Rscript +############################################# Summarize the data +path.formatted <- "output/formatted" +path.processed <- "output/processed" +library(plyr) + +source("R/analysis/summarize-fun.R") +source("R/utils/plot.R") +source("R/utils/maps.R") +source("R/utils/climate.R") + +## load all sites +data <- load_all_formatted_data(path.formatted) + + +get_unique_plot_coords <- function(set, data){ + i <- !duplicated(paste(data[[set]]$tree$Lon, data[[set]]$tree$Lat)) + #find duplicates + data.frame(set = set, Lon = data[[set]]$tree$Lon[i], + Lat = data[[set]]$tree$Lat[i], + ecocode = data[[set]]$tree$ecocode[i], + stringsAsFactors = FALSE) +} + +x <- rbind.fill(lapply(names(data), get_unique_plot_coords, data = data)) +write.csv(x, file.path(path.processed, "all.sites.coords.csv"), + row.names = FALSE) + + +## GET climate with Mark V function +all.sites.coords <- read.csv(file.path(path.processed, "all.sites.coords.csv"), + stringsAsFactors = FALSE) +clim.all <- GetClimate(lats = all.sites.coords$Lat, lons = all.sites.coords$Lon) +site.clim.all <- cbind(all.sites.coords, MAT = clim.all$MAT, MAP = clim.all$MAP) +biomes.vec <- fun.overly.plot.on.biomes(MAP = site.clim.all$MAP/10, + MAT = site.clim.all$MAT, + names.vec = 1:nrow(site.clim.all)) +site.clim.all$biomes <- unlist(biomes.vec) +write.csv(site.clim.all, file.path(path.processed, "all.sites.clim.csv"), + row.names = FALSE) + + +## change ecocode of Japan to tropical to have big point + +x[x$set=='Japan', 'ecocode'] <- 'tropical' +# Map for all sites +to.dev(world.map.all.sites(x, add.legend = TRUE), + png, file = file.path("figs", "world_map_all.png"), + height = 500, width = 1000) + + +# make summary table +data.summary <- lapply(data, summarise.data) + +tab.1 <- rbind.fill(lapply(data.summary, summary.table.1)) + +write.csv(tab.1, file.path(path.processed, "summary_tab.csv"), + row.names = names(data.summary)) diff --git a/R/find.trait/Spain.R b/R/find.trait/Spain.R index 82aaac445ba62487bbca7508f73431e5944d2368..92a0cafe21661719ccdf7606f8484cd68ce1b126 100644 --- a/R/find.trait/Spain.R +++ b/R/find.trait/Spain.R @@ -51,14 +51,20 @@ data.cat.extract <- fun.change.factor.angio.try(data.cat.extract) data.cat.extract <- fun.fill.pheno.try.with.zanne(data.cat.extract) ## fix pheno for species with issue -data.cat.extract[data.cat.extract$Latin_name %in% c('Crataegus spp.','Crataegus laciniata', - 'Pyrus spp.','Morus spp.','Salix spp.', - 'Betula spp.','Salix elaegnos', - 'Tilia spp.','Sorbus spp.', - 'Prunus spp.','Larix spp.'),'Pheno.T'] <- 'D' +data.cat.extract[data.cat.extract$Latin_name %in% c('Crataegus spp.', + 'Crataegus laciniata', + 'Pyrus spp.','Morus spp.', + 'Salix spp.', + 'Betula spp.', + 'Salix elaegnos', + 'Tilia spp.', + 'Sorbus spp.', + 'Prunus spp.','Larix spp.'), + 'Pheno.T'] <- 'D' data.cat.extract[data.cat.extract$Latin_name %in% c('Juniperus turbinata', - 'Otros pinos'),'Pheno.T']<- 'EV' + 'Otros pinos'), + 'Pheno.T']<- 'EV' diff --git a/R/find.trait/test.traits.R b/R/find.trait/test.traits.R index ecfd403239a2eefafcdbe41d9edc5dbfbdab4281..b24218c7e0ca75fa2f889a1aaf2e60903545bbe1 100644 --- a/R/find.trait/test.traits.R +++ b/R/find.trait/test.traits.R @@ -115,7 +115,7 @@ dev.off() pdf("figs/test.traits/traits.boxplot.cat.pdf") par(mfrow=c(2,2)) for (i in 1:4) { - boxplot(all.traits[[trait.name[i]]]~ cat,las=3) + boxplot(all.traits[[trait.name[i]]]~ cat,las=3,ylab=trait.name[i]) } dev.off() @@ -126,6 +126,19 @@ for (i in 1:4) hist(log10(all.traits[[trait.name[i]]]),main=trait.name[i]) ## compute global mean and sd for each traits trait.name <- c("Leaf.N.mean","SLA.mean","Wood.density.mean","Max.height.mean","Seed.mass.mean") +for (i in trait.name){ +x11() +par(mfrow=c(2,2)) +hist((all.traits[[i]]),main=i) +boxplot((log(all.traits[[i]]) - mean(log(all.traits[[i]]), na.rm = TRUE))/ + sd(log(all.traits[[i]]), na.rm = TRUE)~all.traits[['set']], las = 3) +boxplot(((all.traits[[i]]) )/ + sd((all.traits[[i]]), na.rm = TRUE)~all.traits[['set']], las = 3) +boxplot(all.traits[[i]]~all.traits[['set']], las = 3) + +} + + list.mean.log10 <- lapply(trait.name, FUN=function(trait,data) mean(log10(data[[trait]]), na.rm=TRUE),data=all.traits) diff --git a/R/format.data/BCI.R b/R/format.data/BCI.R index 91020213e413103c7ac6f5d58a3453bf93beb9b3..5f30cf9002f7ea53854e9efaa044838fe54cb17b 100644 --- a/R/format.data/BCI.R +++ b/R/format.data/BCI.R @@ -108,7 +108,7 @@ data.bci$G[data.bci$Latin %in% sp.palm.fern] <- NA ## remove fern and palm data.bci$BA.G[data.bci$Latin %in% sp.palm.fern] <- NA ## remove fern and palm data.bci$dead[data.bci$Status1=="missing" | data.bci$Status2=="missing"] <- NA data.bci$D <- data.bci[["DBH1"]]/10 ## diameter in cm -data.bci$plot <- data.bci[["Quadrat"]] +data.bci$plot <- paste(data.bci[['cluster']], data.bci[["Quadrat"]]) data.bci$htot <- rep(NA,nrow(data.bci)) ## height of tree in m data.bci$sp.name <- data.bci$Latin data.bci$Latin <- NULL diff --git a/R/format.data/France.R b/R/format.data/France.R index f0f8b50b8405fb0e5c33a4a2128706d1266bf1d2..fee9986c75b6d570ecddb0408233be03527ab452 100644 --- a/R/format.data/France.R +++ b/R/format.data/France.R @@ -1,110 +1,159 @@ #!/usr/bin/env Rscript ############################################# MERGE FRENCH DATA -rm(list = ls()); +rm(list = ls()); source("R/format.data/format-fun.R") -library(reshape,quietly=TRUE) -dir.create("output/formatted/France", recursive=TRUE,showWarnings=FALSE) +library(reshape, quietly = TRUE) +dir.create("output/formatted/France", recursive = TRUE, showWarnings = FALSE) ################################ READ DATA -data.france <- read.csv("data/raw/France/dataIFN.FRANCE.csv", stringsAsFactors = FALSE) +data.france <- read.csv("data/raw/France/dataIFN.FRANCE.csv", + stringsAsFactors = FALSE) ### read IFN species names and clean -species.clean <- fun.clean.species.tab(read.csv("data/raw/France/species.csv", stringsAsFactors = FALSE)) -species.clean$sp <- paste("sp",species.clean$sp,sep=".") -data.france$espar <- paste("sp", data.france[["espar"]],sep=".") -######## MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed -######## height to add variables to the traits data base Because we have two heights, -######## then take the max of the two heights and then bootstrap -res.quant.boot <- t(sapply(levels(factor(data.france[["espar"]])), FUN = f.quantile.boot, - R = 1000, x = data.france[["htot"]], fac = factor(data.france[["espar"]]))) +species.clean <- fun.clean.species.tab(read.csv("data/raw/France/species.csv", + stringsAsFactors = FALSE)) +species.clean$sp <- paste("sp", species.clean$sp, sep = ".") +data.france$espar <- paste("sp", data.france[["espar"]], sep = ".") +### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed +### height to add variables to the traits data base Because we have two heights, +### then take the max of the two heights and then bootstrap +res.quant.boot <- t(sapply(levels(factor(data.france[["espar"]])), + FUN = f.quantile.boot, + R = 1000, + x = data.france[["htot"]], + fac = factor(data.france[["espar"]]))) ## create data base -data.max.height <- data.frame(sp = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, - 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) +data.max.height <- data.frame(sp = rownames(res.quant.boot), + Max.height.mean = res.quant.boot[, 1], + Max.height.sd = res.quant.boot[, 2], + Max.height.nobs = res.quant.boot[, 3]) rm(res.quant.boot) -write.csv(data.max.height,file='output/formatted/France/max.height.csv',row.names = FALSE) +write.csv(data.max.height, file = 'output/formatted/France/max.height.csv', + row.names = FALSE) rm(data.max.height) -########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same -########################################## in all data for the tree +### FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same +### in all data for the tree data.france$G <- data.france[["ir5"]]/5 * 2 ## diameter growth in mm per year -data.france$BA.G <- (pi*(data.france[["c13"]]/pi/2)^2 -pi* (data.france[["c13"]]/pi/2 - data.france[["ir5"]]/10)^2)/5 ## BA growth in cm2 per year +data.france$BA.G <- (pi*(data.france[["c13"]]/pi/2)^2 - + pi* (data.france[["c13"]]/pi/2 - + data.france[["ir5"]]/10)^2)/5 +## BA growth in cm2 per year data.france$BA.G[(data.france[["c13"]]/pi/2 - data.france[["ir5"]]/10)<0 & !is.na(data.france[["c13"]]/pi/2 - data.france[["ir5"]]/10)] <- 0 -data.france$year <- rep(5, nrow(data.france)) ## number of year between measurement -data.france$D <- data.france[["c13"]]/pi ## diameter in cm -data.france$sp <- as.character(data.france[["espar"]]); data.france$espar <- NULL ## species code -data.france$sp.name <- species.clean[match(data.france$sp,species.clean$sp),"Latin_name"] +data.france$year <- rep(5, nrow(data.france)) +## number of year between measurement +data.france$D <- data.france[["c13"]]/pi ## diameter in cm +data.france$sp <- as.character(data.france[["espar"]]) +data.france$espar <- NULL ## species code +data.france$sp.name <- species.clean[match(data.france$sp, species.clean$sp), + "Latin_name"] data.france$plot <- (data.france[["idp"]]); data.france$idp <- NULL ## plot code -data.france$cluster <- rep(NA,nrow(data.france)) -data.france$tree.id <- paste(data.france[["plot"]], data.france[["a"]], sep = "_") ## tree unique id +data.france$cluster <- rep(NA, nrow(data.france)) +data.france$tree.id <- paste(data.france[["plot"]], + data.france[["a"]], sep = "_") ## tree unique id data.france$weights <- data.france[["w"]]/10000 -data.france$obs.id <- 1:nrow(data.france) ## There is only obs per tree.id, so this is superfluous -data.france$census <- rep(1,nrow(data.france)) ## only one census -######################## change coordinates system of x y to be in lat long WGS84 +data.france$obs.id <- 1:nrow(data.france) +## There is only obs per tree.id, so this is superfluous +data.france$census <- rep(1, nrow(data.france)) ## only one census +####### change coordinates system of x y to be in lat long WGS84 library(sp) library(dismo) library(rgdal) -data.sp <- data.france[, c("tree.id", "xl93", "yl93")] -coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154 +data.sp <- data.france[, c("tree.id", "xl93", "yl93")] +coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154 proj4string(data.sp) <- CRS("+init=epsg:2154") # define projection system of our data ## EPSG CODE 2154 summary(data.sp) -data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon -data.france$Lon <- coordinates(data.sp2)[, "xl93"] -data.france$Lat <- coordinates(data.sp2)[, "yl93"] -rm(data.sp, data.sp2) -## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') -## # different resolutions available plot(newmap) -## points(data.sp2,cex=0.2,col='red') -###################### ECOREGION - merge greco to have no ecoregion with low number of observation +data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon +data.france$Lon <- coordinates(data.sp2)[, "xl93"] +data.france$Lat <- coordinates(data.sp2)[, "yl93"] +rm(data.sp, data.sp2) + +## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') +## different resolutions available plot(newmap) +## points(data.sp2, cex = 0.2, col = 'red') +#### ECOREGION - merge greco to have no ecoregion with low number of observation + +######################## ECOREGION +## One ecoregion for France NEED TO TEST AND ADD BIOMES TODO +source("R/utils/ecoregions.R") +data.france$biomes <- GetBiomes(data.france$Lon, data.france$Lat) +div.biome <- tapply(data.us$sp, INDEX = data.us$biome, fun_div) +nsp.biome <- unlist(lapply(by(data.us, data.us$biome, + fun.sp.in.ecoregion), length)) +data.france$biomes[data.france$biomes == 'Temp_Conif'] <- 'Temp_Broadleaf_Mix' + +#ok + + ## merge A and B Grand Ouest cristallin and oceanique and Center semi-oceanique ## merge G D E Vosges Jura massif cemtral (low mountain) merge H and I Alpes and ## Pyrenees Merge J and K Corse and Mediteraneen -GRECO.temp <- substr(data.france[["SER"]], 1, 1) ## get GRECO from SER (smaller division by keeping only the first letter) -GRECO.temp <- sub("[AB]", "AB", GRECO.temp) -GRECO.temp <- sub("[GDE]", "GDE", GRECO.temp) -GRECO.temp <- sub("[HI]", "HI", GRECO.temp) -GRECO.temp <- sub("[JK]", "JK", GRECO.temp) -## plot(data.france[['xl93']],data.france[['yl93']],col=unclass(factor(GRECO.temp))) +GRECO.temp <- substr(data.france[["SER"]], 1, 1) +## get GRECO from SER (smaller division by keeping only the first letter) +GRECO.temp <- sub("[AB]", "AB", GRECO.temp) +GRECO.temp <- sub("[GDE]", "GDE", GRECO.temp) +GRECO.temp <- sub("[HI]", "HI", GRECO.temp) +GRECO.temp <- sub("[JK]", "JK", GRECO.temp) +## plot(data.france[['xl93']], data.france[['yl93']], +## col = unclass(factor(GRECO.temp))) data.france$ecocode <- GRECO.temp ## a single code for each ecoregion -###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is -###################### missing compute numer of dead per plot to remove plot with disturbance -perc.dead <- tapply(data.france[["dead"]], INDEX = data.france[["plot"]], FUN = function.perc.dead2) -data.france <- merge(data.france, data.frame(plot = as.numeric(names(perc.dead)), - perc.dead = perc.dead),by="plot", sort = FALSE) -########################################################################################### PLOT SELECTION FOR THE ANALYSIS -## data.france <- subset(data.france,subset= data.france[['YEAR']] != 2005) ## year 2005 bad data according to IFN -data.france <- subset(data.france, subset = data.france[["plisi"]] == 0) ## no plot on forest edge -data.france <- subset(data.france, subset = data.france[["dc"]] == 0) ## no harvesting -data.france <- subset(data.france, subset = data.france[["tplant"]] == 0) ## no plantation -data.france <- subset(data.france, subset = !is.na(data.france[["SER"]])) ## missing SER - - -### get wc climate + +div.ecocode <- tapply(data.us$sp, INDEX = data.us$ecocode, fun_div) +nsp.ecocode <- unlist(lapply(by(data.us, + data.us$ecocode, fun.sp.in.ecoregion), length)) + +###### PERCENT DEAD variable percent dead/cannot do with since dead variable is +###### missing compute numer of dead per plot to remove plot with disturbance +perc.dead <- tapply(data.france[["dead"]], INDEX = data.france[["plot"]], + FUN = function.perc.dead2) +data.france <- merge(data.france, + data.frame(plot = as.numeric(names(perc.dead)), + perc.dead = perc.dead), by = "plot", sort = FALSE) +########### PLOT SELECTION FOR THE ANALYSIS +data.france <- subset(data.france, subset = data.france[['YEAR']] != 2005) +## year 2005 bad data according to IFN +data.france <- subset(data.france, subset = data.france[["plisi"]] == 0) +## no plot on forest edge +data.france <- subset(data.france, subset = data.france[["dc"]] == 0) +## no harvesting +data.france <- subset(data.france, subset = data.france[["tplant"]] == 0) +## no plantation +data.france <- subset(data.france, subset = !is.na(data.france[["SER"]])) +## missing SER + + +### get wc climate data.france$MAT <- NULL source("R/utils/climate.R") -clim <- GetClimate(data.france$Lat,data.france$Lon) +clim <- GetClimate(data.france$Lat, data.france$Lon) data.france$MAT <- clim$MAT data.france$MAP <- clim$MAP ## SELECT GOOD COLUMNS ## names of variables for abiotic conditions -vec.abio.var.names <- c("MAT", "MAP") +vec.abio.var.names <- c("MAT", "MAP") ## other var -vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G","BA.G","year", "dead", - "Lon", "Lat","weights","census") -data.tree <- subset(data.france, select = c(vec.basic.var, vec.abio.var.names)) +vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot", + "ecocode", "D", "G", "BA.G", "year", "dead", + "Lon", "Lat", "weights", "census") +data.tree <- subset(data.france, + select = c(vec.basic.var, vec.abio.var.names)) ## select only tree above 10cm DBH -data.tree <- subset(data.tree, subset =data.tree$D>10 | is.na(data.tree$D)) +data.tree <- subset(data.tree, subset = data.tree$D>10 | is.na(data.tree$D)) ## convert var factor in character or numeric data.tree <- fun.convert.type.I(data.tree) -write.csv(data.tree,file="output/formatted/France/tree.csv",row.names = FALSE) +write.csv(data.tree, file = "output/formatted/France/tree.csv", + row.names = FALSE) ### PLOTS DATA -### write data plot with variables only at the plot level. I should delete them from the tree table but so far I keep them to not distroy later code -vec.abio.var.names <- c("MAT", "SAP") -vec.basic.var.p <- c( "cluster", "plot", "ecocode", "Lon", "Lat", "perc.dead") -data.plot <- subset(data.france, subset=!duplicated(data.france$plot),select = c(vec.basic.var.p, vec.abio.var.names)) -write.csv(data.plot,file="output/formatted/France/plot.csv",row.names = FALSE) +### write data plot with variables only at the plot level. +vec.abio.var.names <- c("MAT", "SAP") +vec.basic.var.p <- c( "cluster", "plot", "ecocode", + "Lon", "Lat", "perc.dead") +data.plot <- subset(data.france, subset=!duplicated(data.france$plot), + select = c(vec.basic.var.p, vec.abio.var.names)) +write.csv(data.plot, file="output/formatted/France/plot.csv", row.names = FALSE) diff --git a/R/format.data/Mbaiki.R b/R/format.data/Mbaiki.R index 075f54bd724ec65d90997291bfc827b3bc1eda99..460d35b20fa9bfe5b528bc36640b5bf9aa669544 100644 --- a/R/format.data/Mbaiki.R +++ b/R/format.data/Mbaiki.R @@ -2,9 +2,6 @@ ### MERGE Mbaiki DATA -data.mbaiki$D <- data.mbaiki[["dbh1"]]; -data.mbaiki$D[data.mbaiki$D == 0] <- NA; - rm(list = ls()) # setwd("/home/ghislain/Documents/Ghislain-CIRAD/Traits_Competition_Georges/trait.competition.workshop") @@ -134,7 +131,7 @@ data.mbaiki$BA.G <- (pi*(data.mbaiki$dbh2/2)^2-pi*(data.mbaiki$dbh1/2)^2)/ data.mbaiki$BA.G[data.mbaiki$code1 %in% c(1,9)] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 data.mbaiki$BA.G[data.mbaiki$code2 %in% c(1,9)] <- NA -B## indivs with code indicating problem in dbh measurment at dbh2 +## indivs with code indicating problem in dbh measurment at dbh2 data.mbaiki$census <- data.mbaiki$yr1 data.mbaiki$D <- data.mbaiki[["dbh1"]]; data.mbaiki$D[data.mbaiki$D == 0] <- NA ;## diameter in cm diff --git a/R/format.data/Sweden.R b/R/format.data/Sweden.R index 44aa470b3615a389b64fd85f910a88d23e6bd0e1..385a91caa1ed793efe125637ac7d8b76e3180ce9 100644 --- a/R/format.data/Sweden.R +++ b/R/format.data/Sweden.R @@ -99,11 +99,17 @@ data.swe$weights[data.swe$D>=4 & data.swe$D<10] <- data.swe$PlotArea4099[data.sw data.swe$weights[data.swe$D>=10] <- data.swe$PlotArea100[data.swe$D>=10] data.swe$weights <- 1/data.swe$weights data.swe$sp <- paste("sp",data.swe$sp,sep=".") +## ######################## ECOREGION +## source("R/utils/ecoregions.R") +## data.swe$ecocode <- GetEcoregions(data.swe$Lon,data.swe$Lat) +## data.swe$ecocode <- as.character(data.swe$ecocode) + ######################## ECOREGION -## One ecoregion for Sweden only? +## One ecoregion for France NEED TO TEST AND ADD BIOMES TODO source("R/utils/ecoregions.R") -data.swe$ecocode <- GetEcoregions(data.swe$Lon,data.swe$Lat) -data.swe$ecocode <- as.character(data.swe$ecocode) +data.swe$biomes <- GetBiomes(data.swe$Lon, data.swe$Lat) +data.swe$ecocode <- as.character(data.swe$biomes) + ### NEED TO VERIFY THAT ECOREGIONS ARE ASSIGNED PROPERLY table(as.character(data.swe$ecocode)) # ok diff --git a/R/format.data/US.R b/R/format.data/US.R index a87a78dffb9e5fcace4ea862ee867fb1ef760a78..677af2aba0e8bd93ff894e07cd07235084ef7af1 100644 --- a/R/format.data/US.R +++ b/R/format.data/US.R @@ -105,8 +105,8 @@ data.us$DIVISION[data.us$DIVISION %in% data.us$DIVISION[data.us$DIVISION %in% c('Temperate Steppe Division','Temperate Steppe Mountains', 'Tropical/Subtropical Steppe Division' - ,'Tropical/Subtropical Steppe Mountains')] <- - 'Steppe' + ,'Tropical/Subtropical Steppe Mountains')] <- 'Steppe' + data.us$DIVISION[data.us$DIVISION %in% c('Savanna Division','Savanna Mountains')] <-'Savanna' @@ -119,6 +119,24 @@ data.us$ecocode <- unlist(lapply(lapply(strsplit(data.us[["eco_codemerged"]], " "), FUN = substr, 1, 2), FUN = paste, collapse = ".")) +data.us$ecocode[data.us$ecocode=='De'] <- 'St' + +div.ecocode <- tapply(data.us$sp, INDEX = data.us$ecocode, fun_div) +nsp.ecocode <- unlist(lapply(by(data.us,data.us$ecocode,fun.sp.in.ecoregion),length)) + + +######################## ECOREGION +## One ecoregion for France NEED TO TEST AND ADD BIOMES TODO +source("R/utils/ecoregions.R") +data.us$biomes <- GetBiomes(data.us$Lon, data.us$Lat) +table(data.us$biome) +data.us$biome[ data.us$biome == 'Temp_Sav' ] <- 'Temp_Broadleaf_Mix' + +data.us$biome[grep('Trop_',data.us$biome)] <- 'Trop' + +div.biome <- tapply(data.us$sp, INDEX = data.us$biome, fun_div) +nsp.biome <- unlist(lapply(by(data.us,data.us$biome,fun.sp.in.ecoregion),length)) + ## ## ## plot map to check coordinates syste ## library(RColorBrewer) diff --git a/R/format.data/format-fun.R b/R/format.data/format-fun.R index 209701a2b38a10a71afc3fcfe48bc98b3ca4ddfe..e64cec6d327ca5efb73d56ab8116c2527b8c5cdf 100644 --- a/R/format.data/format-fun.R +++ b/R/format.data/format-fun.R @@ -5,7 +5,8 @@ ##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP #### G. Kunstler 11/09/2013 source("R/packages.R") -check_packages(c("reshape", "boot","RColorBrewer","sp","dismo","rgdal","foreign","rworldmap")) +check_packages(c("reshape", "boot", "RColorBrewer", "sp", "dismo", + "rgdal", "foreign", "rworldmap")) ############################ ## FUNCTION remove trailing white space @@ -15,16 +16,17 @@ trim.trailing <- function (x) sub("\\s+$", "", x) ############################################# ## FUN to clean species name for FRENCH NFI fun.clean.species.tab <- function(species.tab){ -species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] +species.tab2 <- species.tab[!is.na(species.tab$Latin_name), ] ### species IFN reformat names ## clean species names and synonyme names species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) ## remove trailing white space species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) -species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), - c("code","Latin_name","Exotic_Native_cultivated")] -names(species.clean) <- c("sp","Latin_name","Exotic_Native_cultivated") +species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), + c("code", "Latin_name", + "Exotic_Native_cultivated")] +names(species.clean) <- c("sp", "Latin_name", "Exotic_Native_cultivated") return(species.clean) } @@ -43,19 +45,21 @@ return(species.clean) ##' @param probs probability of quantile to compute ##' @return a matrix with # col and 1 row with mean sd adn nobs ##' @author Kunstler -f.quantile.boot <- function(i,x,fac,R,probs=0.99){ -require(boot,quietly=TRUE) - if(length(na.exclude(x[fac==i]))>0){ - quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) - return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) +f.quantile.boot <- function(i, x, fac, R, probs = 0.99){ +require(boot,quietly = TRUE) + if(length( na.exclude(x[fac == i])) > 0 ){ + quant.boot <- boot(x[fac == i], f.quantile, R = R, probs = probs) + return(as.matrix(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), + nobs = length(na.exclude(x[fac == i]))), + ncol = 3, nrow = 1)) }else{ - return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1)) + return(as.matrix(c(mean = NA, sd = NA, nobs = NA), ncol = 3, nrow = 1)) } } -f.quantile <- function (x,ind,probs){ - require(boot,quietly=TRUE) - quantile(x[ind],probs=probs,na.rm=TRUE) +f.quantile <- function (x, ind, probs){ + require(boot, quietly = TRUE) + quantile(x[ind], probs = probs, na.rm = TRUE) } ####################### @@ -65,28 +69,31 @@ function.perc.dead <- function(dead){ ## function to deal with missing value function.perc.dead2 <- function(dead) { - out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]) + out <- sum(dead, na.rm = T)/length(dead[!is.na(dead)]) if(!is.finite(out)) out <- NA return(out) } ## function to convert var factor in character or numeric fun.convert.type.I <- function(data.tree){ -character.var <- c("sp","sp.name","ecocode") -numeric.var <- c("D", "G","BA.G", "dead", - "Lon", "Lat","weights","MAT","MAP") -factor.to.convert.var <- c("obs.id","tree.id","cluster", "plot","census") -for (i in factor.to.convert.var) data.tree[[i]] <- as.integer(factor(data.tree[[i]])) -for (i in character.var) data.tree[[i]] <- as.character(data.tree[[i]]) -for (i in numeric.var) data.tree[[i]] <- as.numeric(data.tree[[i]]) +character.var <- c("sp", "sp.name", "ecocode") +numeric.var <- c("D", "G", "BA.G", "dead", + "Lon", "Lat", "weights", "MAT", "MAP") +factor.to.convert.var <- c("obs.id", "tree.id", "cluster", "plot", "census") +for (i in factor.to.convert.var) + data.tree[[i]] <- as.integer(factor(data.tree[[i]])) +for (i in character.var) + data.tree[[i]] <- as.character(data.tree[[i]]) +for (i in numeric.var) + data.tree[[i]] <- as.numeric(data.tree[[i]]) return(data.tree) } fun.convert.type.B <- function(data.tree){ -character.var <- c("sp","sp.name","ecocode") -numeric.var <- c("D", "G","BA.G", "dead", +character.var <- c("sp", "sp.name", "ecocode") +numeric.var <- c("D", "G", "BA.G", "dead", "x", "y") -factor.to.convert.var <- c("obs.id","tree.id","cluster", "plot","census") +factor.to.convert.var <- c("obs.id", "tree.id", "cluster", "plot", "census") for (i in factor.to.convert.var) data.tree[[i]] <- as.integer(factor(data.tree[[i]])) for (i in character.var) data.tree[[i]] <- as.character(data.tree[[i]]) for (i in numeric.var) data.tree[[i]] <- as.numeric(data.tree[[i]]) @@ -112,17 +119,19 @@ return(data.tree) ##' @param ... ##' @return ##' @author Kunstler -fun.circles.plot <- function(plot.select,x,y,plot,D,inches,fg.l,...){ -x.t <- x[plot==plot.select] -y.t <- y[plot==plot.select] -D.t <- D[plot==plot.select] -fg <- fg.l[plot==plot.select] +fun.circles.plot <- function(plot.select, x, y, plot, D, inches, fg.l, ...){ +x.t <- x[plot == plot.select] +y.t <- y[plot == plot.select] +D.t <- D[plot == plot.select] +fg <- fg.l[plot == plot.select] D.t[is.na(D.t)] <- 0 -symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,fg=fg,...) +symbols(x.t, y.t, circles = D.t , main = plot.select, inches = inches, + fg = fg, ...) } -##' .. remove too negative growth based on Condit R package with param fitted to BCI .. +##' .. remove too negative growth based on Condit R package +##' with param fitted to BCI .. ##' ##' .. taken from trim.growth function in CTFS.R .. ##' @title trim.negative.growth @@ -133,11 +142,11 @@ symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,fg=fg,...) ##' @param err.limit ##' @return a vector TRUE FALSE with FALSE outlier to be removed ##' @author Kunstler -trim.negative.growth <- function(dbh1,dbh2,slope=0.006214, - intercept=.9036,err.limit=5){ -stdev.dbh1 <- slope*dbh1+intercept +trim.negative.growth <- function(dbh1, dbh2, slope = 0.006214, + intercept = .9036, err.limit = 5){ +stdev.dbh1 <- slope*dbh1 + intercept bad.grow <- which(dbh2<=(dbh1-err.limit*stdev.dbh1)) -accept <- rep(TRUE,length(dbh1)) +accept <- rep(TRUE, length(dbh1)) accept[bad.grow] <- FALSE accept[is.na(dbh1) | is.na(dbh2) | dbh2<=0 | dbh1<=0] <- FALSE return(accept) @@ -151,9 +160,9 @@ return(accept) ##' @param maxgrow in mm ##' @return TRUE FALSE vector with FALSE outlier ##' @author Kunstler -trim.positive.growth <- function(growth,maxgrow=75){ +trim.positive.growth <- function(growth, maxgrow = 75){ bad.grow <- which(growth>maxgrow) -accept <- rep(TRUE,length(growth)) +accept <- rep(TRUE, length(growth)) accept[bad.grow] <- FALSE accept[is.na(growth)] <- FALSE return(accept) @@ -161,27 +170,41 @@ return(accept) ##################### CREATE PLOT BASED ON square.sxsquare.s m cell from X Y -fun.quadrat <- function(x,y,square.s) { +fun.quadrat <- function(x, y, square.s) { if(sum(!is.na(x))>10){ - vec.x <- seq(0,max(x,na.rm=T)+20,by=square.s) - vec.y <- seq(0,max(y,na.rm=T)+20,by=square.s) - x.cut <- cut(x,breaks=vec.x,include.lowest=TRUE) - y.cut <- cut(y,breaks=vec.y,include.lowest=TRUE) - out <- apply(cbind(x.cut,y.cut),1,paste,collapse=".") + vec.x <- seq(0, max(x, na.rm = T) + 20, by = square.s) + vec.y <- seq(0, max(y, na.rm = T) + 20, by = square.s) + x.cut <- cut(x, breaks = vec.x, include.lowest = TRUE) + y.cut <- cut(y, breaks = vec.y, include.lowest = TRUE) + out <- apply(cbind(x.cut, y.cut), 1, paste, collapse = ".") out[is.na(x.cut)] <- NA out[is.na(y.cut)] <- NA return(unclass(out)) }else{ - return(rep(NA,length(x))) + return(rep(NA, length(x))) } } ## function to apply per cluster -fun.quadrat.cluster <- function(cluster.id,cluster,tree.id,x,y,square.s){ -temp <- fun.quadrat(x[cluster==cluster.id], y[cluster==cluster.id],square.s=square.s) -temp2 <- paste((cluster[cluster==cluster.id]),temp,sep=".") +fun.quadrat.cluster <- function(cluster.id, cluster, tree.id, x, y, square.s){ +temp <- fun.quadrat(x[cluster == cluster.id], y[cluster == cluster.id], + square.s = square.s) +temp2 <- paste((cluster[cluster == cluster.id]), temp, sep = ".") temp2[is.na(temp)] <- NA -return(data.frame(make.quad=temp2,tree.id=tree.id[cluster==cluster.id])) +return(data.frame(make.quad = temp2, tree.id = tree.id[cluster == cluster.id])) +} + + +fun_div <- function(sp){ +p_i <- table(factor(sp))/sum(table(factor(sp))) +shannon <- -sum(p_i*log(p_i)) +simpson <- 1-sum(p_i^2) +return(data.frame(shannon=shannon,simpson=simpson)) +} + + +fun.sp.in.ecoregion <- function(data, thres.prop=0.05){ + names(table(data[["sp"]]))[table(data[["sp"]])/length(data[["sp"]]) > thres.prop] } diff --git a/R/process.data/explore.processed.data.R b/R/process.data/explore.processed.data.R index f8ba8dbde9413a735e3f07399e76b0def07b7c42..a9ccbbacb1ec3f08ab919472295c6daadb56fe1c 100644 --- a/R/process.data/explore.processed.data.R +++ b/R/process.data/explore.processed.data.R @@ -9,17 +9,112 @@ source("R/utils/plot.R") filedir <- "output/processed" -mat.perc <- read.csv(file=file.path(filedir, "all.sites.perc.traits.csv"), - stringsAsFactors=FALSE) +mat.perc <- read.csv(file = file.path(filedir, "all.sites.perc.traits.csv"), + stringsAsFactors = FALSE) ### read all data library(data.table) system.time(data.all <- fread(file.path(filedir, "data.all.csv"), - stringsAsFactors=FALSE)) + stringsAsFactors = FALSE)) if(dim(data.all)[1] != sum(mat.perc[['num.obs']])) stop('error not same dimension per ecoregion and total') +## Look at number of species with enough occurence +sp.mat <- table(data.all$ecocode, data.all$sp) +sum.sp <- apply(sp.mat, MARGIN = 1, sum, na.rm = TRUE) +sp.mat.perc <- sp.mat/sum.sp +sort(apply(sp.mat.perc > 0.1, MARGIN = 1, sum)) +sort(apply(sp.mat > 200, MARGIN = 1, sum)) + +## sd of trait focal and shannon +fun.sd.x.per.y <- function(x, y = 'ecocode', data = data.all){ + res <- tapply(data[[x]], data[[y]], sd , na.rm = TRUE) + df.res <- data.frame(res) + names(df.res) <- x + return(df.res) +} + +trait <- c('Leaf.N','SLA','Wood.density','Max.height') +# per ecocode +sd.ecocode <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'), + fun.sd.x.per.y, + data = data.all)) +set.per.ecocode <- tapply(data.all$set, data.all$ecocode, unique) +div.per.ecocode <- tapply(data.all$sp, data.all$ecocode, fun_div) +nsp.per.ecocode <- tapply(data.all$sp, data.all$ecocode, function(x) nlevels(factor(x))) +sd.ecocode <- data.frame(set = set.per.ecocode, ecocode = rownames(sd.ecocode), + sd.ecocode, do.call('rbind', div.per.ecocode),nsp = nsp.per.ecocode) +# per set +sd.set <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'), + fun.sd.x.per.y, y = 'set', + data = data.all)) +div.per.set <- do.call('rbind', tapply(data.all$sp, data.all$set, fun_div)) +nsp.per.set <- tapply(data.all$sp, data.all$set, function(x) nlevels(factor(x))) +sd.set <- data.frame(set = rownames(sd.set), + sd.set, div.per.set, nsp = nsp.per.set) +## plots + +pdf("figs/test.processed/boxplot.traits.sd.tot.ecocode.pdf", + width = 12, height = 12) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste(trait,'focal',sep='.')){ + barplot(sd.ecocode[[i]], las = 3, + main=i, ylab = 'sd', names.arg = sd.ecocode$ecocode, + col = col.vec[sd.ecocode$set]) +} +dev.off() + +pdf("figs/test.processed/boxplot.traits.div.tot.ecocode.pdf", + width = 12, height = 12) +par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) +for (i in c('shannon')){ + barplot(exp(sd.ecocode[[i]]), las = 3, + main=i, names.arg = sd.ecocode$ecocode, + col = col.vec[sd.ecocode$set], log ='y', + ylim = range(1, exp(sd.set[[i]])), + ylab = 'exp(shannon)') +} +for (i in c('nsp')){ + barplot(sd.ecocode[[i]], las = 3, + main=i, names.arg = sd.ecocode$ecocode, + col = col.vec[sd.ecocode$set], log ='y', + ylim = range(0.1, (sd.set[[i]])), + ylab = 'N species') +} +dev.off() + + +pdf("figs/test.processed/boxplot.traits.sd.tot.set.pdf", + width = 12, height = 12) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste(trait,'focal',sep='.')){ + barplot(sd.set[[i]], las = 3, + main = i, ylab = 'sd', names.arg = sd.set$set, + col = col.vec[sd.set$set]) +} +dev.off() + +pdf("figs/test.processed/boxplot.traits.div.tot.SET.pdf", + width = 12, height = 12) +par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) +for (i in c('shannon')){ + barplot(exp(sd.set[[i]]), las = 3, + main=i, names.arg = sd.set$set, + col = col.vec[sd.set$set], log = 'y', + ylim = range(1, exp(sd.set[[i]])), + ylab = 'exp(shannon)') + abline(h = 5) +} +for (i in c('nsp')){ + barplot((sd.set[[i]]), las = 3, + main=i, names.arg = sd.set$set, + col = col.vec[sd.set$set], log ='y', + ylim = range(0.1, (sd.set[[i]])), + ylab = 'N species') +} +dev.off() + ######## ### TODO @@ -29,105 +124,124 @@ if(dim(data.all)[1] != sum(mat.perc[['num.obs']])) #- pattern ED angio/conif ## look at BATOT per ecocode -MAP.ECOCODE <- tapply(data.all$MAP,INDEX=data.all$ecocode,FUN=mean) -SET.ECOCODE <- tapply(data.all$set,INDEX=data.all$ecocode,FUN=unique) +MAP.ECOCODE <- tapply(data.all$MAP, INDEX = data.all$ecocode, FUN = mean) +BATOT.ECOCODE <- tapply(data.all$BATOT, INDEX = data.all$ecocode, FUN = mean) +SET.ECOCODE <- tapply(data.all$set, INDEX = data.all$ecocode, FUN = unique) +pdf("figs/test.processed/BATOT.vs.MAP.pdf") +par(mar=c(8.1,4.1,4.1,2.1)) +plot(log(MAP.ECOCODE), BATOT.ECOCODE, pch = '', ylim = c(0,180) , + xlab = 'Mean annual precipitation (log)', + ylab = 'Total basal area per plot (m^2/ha)') boxplot(data.all$BATOT ~ data.all$ecocode, las = 3, - at = log(MAP.ECOCODE), boxwex = 0.05, outline = FALSE, - col=col.vec[SET.ECOCODE]) + at = log(MAP.ECOCODE),names = NA, + boxwex = 0.05, outline = FALSE, + col = col.vec[SET.ECOCODE], xlim = range(log(MAP.ECOCODE)), add = TRUE) +legend('topleft', legend = names(col.vec),fill=col.vec) +dev.off() pdf("figs/test.processed/boxplot.BATOT.D.G.BA.G.pdf") -par(mfrow=c(2,3)) -boxplot(data.all$BATOT ~ data.all$set, las = 3) -boxplot(data.all$D ~ data.all$set, las = 3) -boxplot(data.all$G ~ data.all$set, las = 3) -boxplot(data.all$BA.G ~ data.all$set, las = 3) +par(mfrow=c(2,2)) +for (i in c('BATOT', 'D', 'G', 'BA.G')) + boxplot((data.all[[i]]+1000) ~ data.all$set, las = 3, ylab = i, log = 'y') dev.off() -pdf("figs/test.processed/boxplot.traits.pdf") -par(mfrow=c(2,3)) -boxplot(data.all$Leaf.N.focal ~ data.all$set, las = 3,main="Leaf.N") -boxplot(data.all$SLA.focal ~ data.all$set, las = 3,main="SLA") -boxplot(data.all$Wood.density.focal ~ data.all$set, las = 3,main="Wood.density") -boxplot(data.all$Seed.mass.focal ~ data.all$set, las = 3,main="Seed.mass") -boxplot(data.all$Max.height.focal ~ data.all$set, las = 3,main="Max.height") +trait <- c('Leaf.N','SLA','Wood.density','Max.height') + +pdf("figs/test.processed/boxplot.traits.focal.pdf", + width = 12, height = 7) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste(trait,'focal',sep='.')){ + set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.all[[i]] ~ data.all$ecocode, las = 3, + main=i, + col = col.vec[set.vec], outline = FALSE) +} + dev.off() -par(mfrow=c(2,2)) -boxplot(data.all$Leaf.N.perc.species ~ data.all$set, las = 3,main="Leaf.N") -boxplot(data.all$SLA.perc.species ~ data.all$set, las = 3,main="SLA") -boxplot(data.all$Wood.density.perc.species ~ data.all$set, las = 3,main="Wood.density") -boxplot(data.all$Seed.mass.perc.species ~ data.all$set, las = 3,main="Seed.mass") +pdf("figs/test.processed/boxplot.traits.CWM.fill.pdf", + width = 12, height = 7) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste(trait,'CWM.fill',sep='.')){ + set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.all[[i]] ~ data.all$ecocode, las = 3, + main=i, + col = col.vec[set.vec], outline = FALSE) +} -par(mfrow=c(2,2)) -boxplot(data.all$Leaf.N.perc.genus ~ data.all$set, las = 3,main="Leaf.N") -boxplot(data.all$SLA.perc.genus ~ data.all$set, las = 3,main="SLA") -boxplot(data.all$Wood.density.perc.genus ~ data.all$set, las = 3,main="Wood.density") -boxplot(data.all$Seed.mass.perc.genus ~ data.all$set, las = 3,main="Seed.mass") +dev.off() -### compute percentage of evergreen and conif per plot +pdf("figs/test.processed/boxplot.traits.abs.CWM.fill.pdf", + width = 12, height = 7) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste(trait,'abs.CWM.fill',sep='.')){ + set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.all[[i]] ~ data.all$ecocode, las = 3, + main=i, + col = col.vec[set.vec], outline = FALSE) +} +dev.off() ### compute mean BATOT, number of species, traits and VAR OF TRAITS per cluster +system.time( data.summarise <- fun.compute.all.var.cluster( data.all ) ) -system.time(data.summarise <- fun.compute.all.var.cluster(data.all)) - -### NEED TO CHECK WHY JAPAN REACH 300 of BATOT - +pdf("figs/test.processed/perc.angio.deciduous.pdf") +par(mfrow = c(2, 1)) ## angio -boxplot(data.summarise$per.angio~data.summarise$set,las=3) - -per.angio.ecocode.mean <- tapply(data.summarise$per.angio,data.summarise$ecocode,mean,na.rm=TRUE) -per.angio.ecocode.quant <- do.call('rbind',tapply(data.summarise$per.angio,data.summarise$ecocode,quantile, - probs=c(0.1,0.8),na.rm=TRUE)) -names.ecocode <- tapply(data.summarise$ecocode,data.summarise$ecocode,unique) +per.angio.ecocode.mean <- tapply(data.summarise$per.angio, + data.summarise$ecocode,mean, + na.rm = TRUE) +names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode,unique) +set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique) names(per.angio.ecocode.mean) <- names.ecocode par(mar=c(8.1,4.1,4.1,2.1)) -mp <- barplot(per.angio.ecocode.mean,las=3) -segments(mp,per.angio.ecocode.quant[,1], - mp,per.angio.ecocode.quant[,2]) +mp <- barplot(per.angio.ecocode.mean, las = 3, + col= col.vec[set.ecocode], ylab = 'perc angio') ## deciduous -boxplot(data.summarise$per.deciduous~data.summarise$set,las=3) - -per.deciduous.ecocode.mean <- tapply(data.summarise$per.deciduous,data.summarise$ecocode,mean,na.rm=TRUE) -per.deciduous.ecocode.quant <- do.call('rbind',tapply(data.summarise$per.deciduous,data.summarise$ecocode,quantile, - probs=c(0.1,0.8),na.rm=TRUE)) -names.ecocode <- tapply(data.summarise$ecocode,data.summarise$ecocode,unique) +per.deciduous.ecocode.mean <- tapply(data.summarise$per.deciduous, + data.summarise$ecocode, mean , + na.rm = TRUE) +names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode, unique) +set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique) names(per.deciduous.ecocode.mean) <- names.ecocode par(mar=c(8.1,4.1,4.1,2.1)) -mp <- barplot(per.deciduous.ecocode.mean,las=3) -segments(mp,per.deciduous.ecocode.quant[,1], - mp,per.deciduous.ecocode.quant[,2]) - +mp <- barplot(per.deciduous.ecocode.mean, las = 3, + col = col.vec[set.ecocode], ylab = 'perc deciduous') +dev.off() -par(mfrow=c(1,2)) -plot(data.summarise$MAP,data.summarise$BATOT, - ,col=col.vec[data.summarise$set],cex=0.1) -fun.boxplot.breaks((data.summarise$MAP),data.summarise$BATOT,Nclass=15,add=TRUE) -legend("topright",legend=names(col.vec),col=col.vec,pch=1) -plot(log(data.summarise$MAP),data.summarise$BATOT, - ,col=col.vec[data.summarise$set],cex=0.1) -fun.boxplot.breaks(log(data.summarise$MAP),data.summarise$BATOT, - Nclass=15,add=TRUE) +plot(log(data.summarise$MAP), data.summarise$BATOT, + col = col.vec[data.summarise$set], cex = 0.1, ylim = c(0, 190)) +fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$BATOT, + Nclass = 15, add = TRUE) -by(data.summarise,INDICES=data.summarise$set, - function(data,col.vec) {x11();plot(data$MAP,data$BATOT, - main=unique(data$set), - col=col.vec[data$set])},col.vec) plot(data.summarise$MAP,data.summarise$sd.SLA, - ,col=col.vec[data.summarise$set]) + ,col=col.vec[data.summarise$set], log = 'x', cex = 0.5) plot(data.summarise$MAP,data.summarise$mean.SLA, - ,col=col.vec[data.summarise$set]) + ,col=col.vec[data.summarise$set], cex = 0.5, log = 'x') pch.vec <- 1:14 @@ -138,24 +252,102 @@ legend("topright",legend=names(col.vec),col=col.vec,pch=pch.vec) +pdf("figs/test.processed/boxplot.traits.sd.pdf", + width = 12, height = 7) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste("sd",trait,sep='.')){ + set.vec <- sapply(levels(factor(data.summarise$ecocode[ + !is.na(data.summarise[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, + main=i, + col = col.vec[set.vec]) +} +dev.off() -# -library(ggplot2) -qplot(data=data.summarise,x=set,y=BATOT,geom="boxplot",las=3) -ggplot(data.summarise[,], aes( x=n_sp, y=sd.Wood.density) ) +facet_wrap(~set)+ - geom_point(size=1 ) + geom_density2d() - - -ggplot(data.all[,], aes( x=BATOT, y=Wood.density.CWM.fill) ) +facet_wrap(~set)+ - geom_point(size=1 ) + geom_density2d() - - - - - +pdf("figs/test.processed/boxplot.traits.mean.pdf", + width = 12, height = 7) +par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) +for (i in paste("mean",trait,sep='.')){ + set.vec <- sapply(levels(factor(data.summarise$ecocode[ + !is.na(data.summarise[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, + main=i, + col = col.vec[set.vec]) +} +dev.off() +pdf("figs/test.processed/boxplot.div.pdf", + width = 12, height = 7) +par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) +for (i in c('shannon', 'simpson')){ + set.vec <- sapply(levels(factor(data.summarise$ecocode[ + !is.na(data.summarise[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, + main=i, + col = col.vec[set.vec]) +} +dev.off() +pdf("figs/test.processed/boxplot.nsp.pdf", + width = 12, height = 7) +par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) +for (i in c('n_sp', 'count')){ + set.vec <- sapply(levels(factor(data.summarise$ecocode[ + !is.na(data.summarise[[i]])])), + function(x) gsub(paste(" ", + gsub("^([a-zA-Z]* )", "",x), + sep = ""), + "", x, fixed = TRUE)) + boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, + main=i, + col = col.vec[set.vec]) +} +dev.off() +fun.boxplot.data <- function(data, x, y, ...){ + fun.boxplot.breaks(data[[x]]*data[['BATOT']], data[[y]], + Nclass = 15, add = FALSE, + main = unique(data$ecocode), ...) +} + +pdf("figs/test.processed/Wood.density.CWM.fill.vs.BA.G.pdf") +for (i in sets){ + data.tmp <- data.all[data.all$set == i,] + if(nlevels(factor(data.tmp$ecocode)) > 1) + par(mfrow = c(2, ceiling( nlevels(factor(data.tmp$ecocode))/2))) + by(data.tmp, data.tmp$ecocode, fun.boxplot.data, + x = 'Wood.density.CWM.fill', + y = 'BA.G' ) +} +dev.off() +## plot hist of species abundance +pdf("figs/test.processed/speciesrank.pdf") +for (i in sets){ + data.tmp <- data.all[data.all$set == i,] + if(nlevels(factor(data.tmp$ecocode)) > 1) + par(mfrow = c(2, ceiling( nlevels(factor(data.tmp$ecocode))/2))) + if(nlevels(factor(data.tmp$ecocode)) == 1) + par(mfrow = c(1,1)) + by(data.tmp, data.tmp$ecocode, + function(data) hist(table(data[['sp']])/length(data[['sp']]), + xlab = 'n obs', + ylab = ' n species', + main=unique(data[['ecocode']]) + )) +} +dev.off() diff --git a/R/process.data/process-fun.R b/R/process.data/process-fun.R index e3a0931fe86c47d10b6a5b0ca3e55d6acc4f9c4b..2aafe9a275d886b8ca034aa4dd5153d34def00a5 100644 --- a/R/process.data/process-fun.R +++ b/R/process.data/process-fun.R @@ -1,7 +1,7 @@ #################################################### function to process data install all unstallled packages source("R/packages.R") check_packages(c("reshape", "data.table", "doParallel", "data.table","Rcpp")) - + ######################### ##' .. Compute the basal area per area of competitor in a plot.. @@ -17,33 +17,33 @@ BA.fun <- function(diam, weights) { } - + ############################################## ############################################## #### FUNCTION TO FORMAT TRAITS ## std of trait fun.std.trait <- function(trait) { - (trait - mean(trait,na.rm=TRUE))/sd(trait,na.rm=TRUE) + (trait - mean(trait, na.rm=TRUE))/sd(trait, na.rm=TRUE) } ## std of trait with global mean and sd NEED TO PROVIDES MEAN AND SD IN log10 -fun.std.trait.global <- function(trait,mean.global,sd.global) { +fun.std.trait.global <- function(trait, mean.global, sd.global) { (trait - mean.global)/sd.global } ## function to standardized all traits in data and remove duplicated sp fun.std.data <- function(data.TRAITS) { - data.TRAITS <- subset(data.TRAITS,subset=!duplicated(data.TRAITS[["sp"]])) - traits.mean <- c("Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.density.mean","Max.height.mean") + data.TRAITS <- subset(data.TRAITS, subset=!duplicated(data.TRAITS[["sp"]])) + traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean") for (i in traits.mean) { data.TRAITS[[i]] <- fun.std.trait(log10(data.TRAITS[[i]])) } return(data.TRAITS) } ## function to standardized all traits in data and remove duplicated sp with GLOBAL MEAN -fun.std.data.global <- function(data.TRAITS,mean.global,sd.global) { - data.TRAITS <- subset(data.TRAITS,subset=!duplicated(data.TRAITS[["sp"]])) - traits.mean <- c("Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.density.mean","Max.height.mean") +fun.std.data.global <- function(data.TRAITS, mean.global, sd.global) { + data.TRAITS <- subset(data.TRAITS, subset=!duplicated(data.TRAITS[["sp"]])) + traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean") for (i in traits.mean) { data.TRAITS[[i]] <- fun.std.trait.global(log10(data.TRAITS[[i]]), mean.global[i], @@ -51,42 +51,53 @@ fun.std.data.global <- function(data.TRAITS,mean.global,sd.global) { return(data.TRAITS) } -##### extract traits for the species in vec.sp -fun.extract.trait.add.missing.sp.NA <- function(vec.sp,traits.data,trait.name) { +##### extract traits for the species in vec.sp +fun.extract.trait.add.missing.sp.NA <- function(vec.sp, traits.data, + trait.name) { # get value - trait.t <- traits.data[traits.data[["sp"]] %in% vec.sp,trait.name] + trait.t <- traits.data[traits.data[["sp"]] %in% vec.sp, trait.name] ## add NA for missing sp - trait.t <- c(trait.t,rep(NA,sum(! (vec.sp %in% traits.data[["sp"]])))) + trait.t <- c(trait.t, rep(NA, + sum(! (vec.sp %in% traits.data[["sp"]])))) ## reorder - names(trait.t) <- c(as.character(traits.data[traits.data[["sp"]] %in% vec.sp,"sp"]), + names(trait.t) <- c(as.character(traits.data[traits.data[["sp"]] %in% + vec.sp, "sp"]), as.character(vec.sp[! (vec.sp %in% traits.data[["sp"]])])) - trait <- (trait.t)[match(vec.sp,names(trait.t))] + trait <- (trait.t)[match(vec.sp, names(trait.t))] return(trait) } -# FUNCTION TO EXTRACT a trait with 3 different option to fill missing variable: 1=NO FILLING 2-GENUS mean 3= GENUS mean and if not community mean +# FUNCTION TO EXTRACT a trait with 3 different option to fill missing variable: +# 1=NO FILLING 2-GENUS mean 3= GENUS mean and if not community mean # using data frame with sp mean and genus T/F -fun.trait.format <- function(trait,traits.data,vec.sp) { +fun.trait.format <- function(trait, traits.data, vec.sp) { ## extract trait - genus.name <- paste(trait,"genus",sep=".") - trait.name <- paste(trait,"mean",sep=".") - trait.genus <-fun.extract.trait.add.missing.sp.NA(vec.sp,traits.data,trait.name) + genus.name <- paste(trait, "genus", sep=".") + trait.name <- paste(trait, "mean", sep=".") + trait.genus <-fun.extract.trait.add.missing.sp.NA(vec.sp, traits.data, + trait.name) # extract genus - genus <- fun.extract.trait.add.missing.sp.NA(vec.sp,traits.data,genus.name) + genus <- fun.extract.trait.add.missing.sp.NA(vec.sp, traits.data, + genus.name) ## fill with mean or keep only species value trait.fill <- trait.species <-trait.genus - trait.fill[is.na(trait.fill)] <- mean( traits.data[,trait.name],na.rm=TRUE) - trait.species[genus==TRUE & !is.na(genus)] <- NA - return(cbind(trait.species,trait.genus,trait.fill)) + trait.fill[is.na(trait.fill)] <- mean( traits.data[, trait.name], + na.rm = TRUE) + trait.species[genus == TRUE & !is.na(genus)] <- NA + res <- cbind(trait.species, trait.genus, trait.fill) + dimnames(res) <- list(vec.sp, c('species','genus','fill')) + return(res) } ## function that generate a list of traits data for all traits -format.traits.list <- function(sp.vec,data.TRAITS) { - traits <- c("Leaf.N","Seed.mass","SLA","Wood.density","Max.height") +format.traits.list <- function(sp.vec, data.TRAITS) { + traits <- c("Leaf.N", "Seed.mass", "SLA", "Wood.density", "Max.height") sp.name <- levels(factor(sp.vec)) - res.l <- lapply(traits,FUN=fun.trait.format,traits.data=data.TRAITS,vec.sp=sp.name) + res.l <- lapply(traits, FUN = fun.trait.format, + traits.data = data.TRAITS, + vec.sp = sp.name) names(res.l) <- traits return(res.l) } @@ -96,37 +107,42 @@ format.traits.list <- function(sp.vec,data.TRAITS) { ### CWM function ## compute the weight mean from traits matrix with 3 column for different missing data filling option and a BA vector -fun.weighted.mean.trait <- function(BA,T,select) { - if(sum(select)>1) { - return(as.vector(BA[select]%*%T[select,])/sum(BA[select])) } - if(sum(select)==1) { - return(as.vector(T[select,])) } +fun.weighted.mean.trait <- function(BA, T, select) { + if(length(select)>1) { + return(as.vector(sum(BA[select] * T[select, 3])) / sum(BA[select])) } + if(length(select) == 1) { + return(as.vector(T[select, 3])) } } ## compute the weight mean from traits matrix and BA vector for Tn with BA and BAlog and abs(Tn-Tf) with BA or BAlog -fun.weighted.mean.4.types <- function(BA,trait.list,trait.sp.focal) { - select.BA.no0 <- BA>0 - CWM.tn <- fun.weighted.mean.trait(BA,trait.list,select.BA.no0) - ## CWM.tn.log <- fun.weighted.mean.trait(log(BA+1),trait.list,select.BA.no0) - #abs dist - CWM.abs.tdist <- fun.weighted.mean.trait(BA,abs(trait.sp.focal-trait.list),select.BA.no0) - ## CWM.abs.tdist.log <- fun.weighted.mean.trait(log(BA+1),abs(trait.sp.focal-trait.list),select.BA.no0) - - # compute percentage of traits obs - perc.genus <- sum(!is.na(trait.list[select.BA.no0,2]))/sum(select.BA.no0) - perc.species <- sum(!is.na(trait.list[select.BA.no0,1]))/sum(select.BA.no0) - return(c(CWM.tn[3],CWM.abs.tdist[3],perc.genus,perc.species))#CWM.tn.log,CWM.abs.tdist.log, +fun.weighted.mean.4.types <- function(BA, trait.list, trait.sp.focal) { + select.BA.no0 <- names(BA)[BA > 0] + CWM.tn <- fun.weighted.mean.trait(BA, trait.list, select.BA.no0) + #abs dist + CWM.abs.tdist <- fun.weighted.mean.trait(BA, + abs(trait.sp.focal-trait.list), + select.BA.no0) + + # compute percentage of traits obs + perc.genus <- sum(!is.na(trait.list[select.BA.no0, 2]))/ + length(select.BA.no0) + perc.species <- sum(!is.na(trait.list[select.BA.no0, 1]))/ + length(select.BA.no0) + + ## to do add percentage EV and angio adn compute per type + return(c(CWM.tn, CWM.abs.tdist, perc.genus, perc.species)) } ## function compute the 4 CWM and tf for one traits -format.one.trait.CWM <- function(trait.list,sp.num,BA) { - trait.sp.focal <- trait.list[sp.num,1] +format.one.trait.CWM <- function(trait.list, sp.num, BA) { + trait.sp.focal <- trait.list[sp.num, 1] if(!is.na(trait.sp.focal)) { if(sum(BA>0)>0) { - res.vec <- fun.weighted.mean.4.types(BA,trait.list,trait.sp.focal) } - else { res.vec <- rep(0,4) } + res.vec <- fun.weighted.mean.4.types(BA, trait.list, + trait.sp.focal) } + else { res.vec <- rep(0, 4) } } - else { res.vec <- rep(NA,4) } - return(c(trait.sp.focal,res.vec)) + else { res.vec <- rep(NA, 4) } + return(c(trait.sp.focal, res.vec)) } ############################################## @@ -134,68 +150,78 @@ format.one.trait.CWM <- function(trait.list,sp.num,BA) { ##### FUNCTIONS FOR PLOT DATA ## function to compute BA of neighbour on the plot -fun.plot.BA.neigh <- function(i,BA.a,sp.fac,obs.id,plot) { - select <- obs.id!=i & plot==plot[obs.id==i ] +fun.plot.BA.neigh <- function(i, BA.a, sp.fac, obs.id, plot) { + select <- obs.id!=i & plot == plot[obs.id == i ] if(sum(select)>0) { - BA.a.n <- tapply(BA.a[select],INDEX=sp.fac[select],FUN=sum,na.rm=TRUE) + BA.a.n <- tapply(BA.a[select], INDEX = sp.fac[select], + FUN = sum, na.rm = TRUE) BA.a.n[is.na(BA.a.n)] <- 0 return(BA.a.n) } else{ return(0) } } ## function to compute CWM from local BA on the plot for one individual -fun.CWM.traits.plot <- function(i,obs.id,traits.list,sp.fac,plot,BA.a) { +fun.CWM.traits.plot <- function(i, obs.id, traits.list, sp.fac, plot, BA.a) { ## compute local BA - BA.a.n <- fun.plot.BA.neigh(i,BA.a,sp.fac,obs.id,plot) + BA.a.n <- fun.plot.BA.neigh(i, BA.a, sp.fac, obs.id, plot) BATOT <- sum(BA.a.n) ## BATOT.log <- sum(log(BA.a.n+1)) ## compute community weighted mean for each traits - res.list <- lapply(traits.list,FUN=format.one.trait.CWM,sp.num=unclass(sp.fac)[obs.id==i],BA.a.n) + res.list <- lapply(traits.list, FUN=format.one.trait.CWM, sp.num=(sp.fac)[obs.id == i], BA.a.n) ## create matrix of data - res.vec <- c(i,unlist(res.list),BATOT)#,BATOT.log + res.vec <- c(i, unlist(res.list), BATOT)#, BATOT.log return(res.vec) } ##---------------------------------------- ## function to compute CWM from local BA on the plot for all individuals -fun.CWM.traits.all.plot <- function(obs.id, plot, diam, sp,data.traits, weight,parallel) { -name.var <- c("focal","CWM.fill", - "abs.CWM.fill", - "perc.genus","perc.species") -list.traits <- format.traits.list(sp,data.traits) -cat("Start compute traits data for n observation = ", length(diam),"\n") -sp.fac <- factor(sp) -BA.a <- BA.fun(diam, weights =weight) - if (parallel) { - require(doParallel,quietly=TRUE) - registerDoParallel() - ans <- foreach(i = obs.id, .combine = rbind) %dopar% { - fun.CWM.traits.plot(i,obs.id,traits.list=list.traits,sp.fac,plot,BA.a) } +fun.CWM.traits.all.plot <- function(obs.id, plot, diam, sp, data.traits, weight, parallel) { +name.var <- c("focal", "CWM.fill", + "abs.CWM.fill", + "perc.genus", "perc.species") +list.traits <- format.traits.list(sp, data.traits) +cat("Start compute traits data for n observation = ", length(diam), "\n") +sp.fac <- as.character(sp) +BA.a <- BA.fun(diam, weights =weight) + if (parallel) { + require(doParallel, quietly=TRUE) + registerDoParallel() + ans <- foreach(i = obs.id, .combine = rbind) %dopar% { + fun.CWM.traits.plot(i, obs.id, traits.list = list.traits, + sp.fac, + plot, BA.a) } }else{ - ans <- t(sapply(obs.id,fun.CWM.traits.plot,obs.id,traits.list=list.traits,sp.fac,plot,BA.a)) + ans <- t(sapply(obs.id, fun.CWM.traits.plot, obs.id, + traits.list = list.traits, sp.fac, plot, BA.a)) } colnames(ans) <- c("obs.id", unlist(lapply(names(list.traits), - FUN=function(x,y) paste(x,y,sep="."), + FUN=function(x, y) paste(x, y, sep = "."), y=name.var)), "BATOT") return(ans) } ## to be lapply over census -fun.CWM.traits.all.plot.l <- function(census.id,census,obs.id, plot, diam, sp,data.TRAITS, weight,parallel) { - res <- fun.CWM.traits.all.plot(obs.id=obs.id[census == census.id], plot=plot[census == census.id], - diam=diam[census == census.id], sp=(sp[census == census.id]), - data.traits=data.TRAITS, weight=weight[census == census.id],parallel=parallel) +fun.CWM.traits.all.plot.l <- function(census.id, census, obs.id, plot, diam, + sp, data.TRAITS, weight, parallel) { + res <- fun.CWM.traits.all.plot(obs.id = obs.id[census == census.id], + plot = plot[census == census.id], + diam = diam[census == census.id], + sp = (sp[census == census.id]), + data.traits = data.TRAITS, + weight = weight[census == census.id], + parallel = parallel) return(res) } ## function that lapply over census -fun.CWM.traits.all.plot.census <- function(census,obs.id, plot, diam, sp,data.TRAITS, weight,parallel) { +fun.CWM.traits.all.plot.census <- function(census, obs.id, plot, diam, sp, data.TRAITS, weight, parallel) { unique.census <- unique(census) - res.l <-lapply(unique.census,FUN=fun.CWM.traits.all.plot.l,census=census, - obs.id=obs.id, plot=plot, diam=diam, sp=sp,data.TRAITS=data.TRAITS, weight=weight,parallel=parallel) - res <- do.call("rbind",res.l) - res <- res[match(obs.id, res[,"obs.id"]), ] + res.l <-lapply(unique.census, FUN=fun.CWM.traits.all.plot.l, census=census, + obs.id=obs.id, plot=plot, diam=diam, sp=sp, + data.TRAITS=data.TRAITS, weight=weight, parallel=parallel) + res <- do.call("rbind", res.l) + res <- res[match(obs.id, res[, "obs.id"]), ] return(res) } @@ -210,75 +236,76 @@ fun.CWM.traits.all.plot.census <- function(census,obs.id, plot, diam, sp,data.TR library(Rcpp) sourceCpp("R/process.data/georges.cpp") ## function to compute local BA based on Xy and compute CWM for each traits -fun.CWM.traits.xy <- function(i,obs.id,traits.list,sp.num,sp.length,xy.table,BA.a,Rlim){ - require(Rcpp,quietly=TRUE) +fun.CWM.traits.xy <- function(i, obs.id, traits.list, sp.num, sp.names, sp.length, xy.table, BA.a, Rlim){ + require(Rcpp, quietly=TRUE) ## compute local BA -i.t <- seq_len(length(obs.id))[obs.id==i] +i.t <- seq_len(length(obs.id))[obs.id == i] BA.n <- areas_by_species_within_neighbourhood(idx=i.t - 1L, - x=xy.table[,"x"], y=xy.table[,"y"], r=Rlim, d=BA.a, type=as.vector(sp.num) - 1L, n_types=sp.length) + x=xy.table[, "x"], y=xy.table[, "y"], r=Rlim, d=BA.a, type=as.vector(sp.num) - 1L, n_types=sp.length) BATOT <- sum(BA.n) -## BATOT.log <- sum(log(BA.n+1)) +names(BA.n) <- sp.names ## compute community weighted mean for each traits -res.list <- lapply(traits.list,FUN=format.one.trait.CWM,sp.num[obs.id==i],BA.n) -res.vec <- c(i,unlist(res.list),BATOT) +res.list <- lapply(traits.list, FUN=format.one.trait.CWM, sp.names[sp.num][obs.id == i], BA.n) +res.vec <- c(i, unlist(res.list), BATOT) return(res.vec) } - +####### TODO NEED TO CHECK WHY NEGQTIVE LOG ## -fun.CWM.traits.all.XY <- function(obs.id, xy.table, diam, sp,data.TRAITS, Rlim ){ - require(Rcpp,quietly=TRUE) - require(plyr,quietly=TRUE) -name.var <- c("focal","CWM.fill", +fun.CWM.traits.all.XY <- function(obs.id, xy.table, diam, sp, data.TRAITS, Rlim ){ + require(Rcpp, quietly=TRUE) + require(plyr, quietly=TRUE) +name.var <- c("focal", "CWM.fill", "abs.CWM.fill", - "perc.genus","perc.species") -cat("Start process data for one census of one cluster with number of observation ", nrow(xy.table),"\n") -BA.a <- BA.fun(diam, weights = 1/(pi * Rlim^2)) + "perc.genus", "perc.species") +cat("Start process data for one census of one cluster with number of observation ", nrow(xy.table), "\n") +BA.a <- BA.fun(diam, weights = 1/(pi * Rlim^2)) sp.num <- unclass(factor(sp)) -## format traits list -list.traits <- format.traits.list(sp,data.TRAITS) +sp.names <- as.character(levels(factor(sp))) + ## format traits list +list.traits <- format.traits.list(sp, data.TRAITS) # lapply function ans <- t(sapply(obs.id, - fun.CWM.traits.xy,obs.id=obs.id, - traits.list=list.traits,sp.num=sp.num, + fun.CWM.traits.xy, obs.id=obs.id, + traits.list=list.traits, sp.num=sp.num, sp.names = sp.names, sp.length=length(levels(sp.num)), - xy.table=xy.table,BA.a=BA.a,Rlim=Rlim)) -colnames(ans) <- c("obs.id",unlist(lapply(names(list.traits), - FUN=function(x,y) paste(x,y,sep="."), + xy.table=xy.table, BA.a=BA.a, Rlim=Rlim)) +colnames(ans) <- c("obs.id", unlist(lapply(names(list.traits), + FUN=function(x, y) paste(x, y, sep="."), y=name.var)), "BATOT") return(ans) } ## to be loop over census -fun.CWM.traits.all.XY.l <- function(census.id,census,obs.id, xy.table, diam, sp,data.TRAITS, Rlim){ -res <- fun.CWM.traits.all.XY(obs.id[census == census.id], xy.table[census == census.id,], - diam[census == census.id], sp[census == census.id], - data.TRAITS, Rlim) +fun.CWM.traits.all.XY.l <- function(census.id, census, obs.id, xy.table, diam, sp, data.TRAITS, Rlim){ +res <- fun.CWM.traits.all.XY(obs.id[census == census.id], xy.table[census == census.id, ], + diam[census == census.id], sp[census == census.id], + data.TRAITS, Rlim) return(res) } ## lapply over census -fun.CWM.traits.all.XY.census <- function(census,obs.id, xy.table, diam, sp,data.TRAITS, Rlim){ +fun.CWM.traits.all.XY.census <- function(census, obs.id, xy.table, diam, sp, data.TRAITS, Rlim){ unique.census <- unique(census) -res.l <-lapply(unique.census,FUN=fun.CWM.traits.all.XY.l,census=census, - obs.id=obs.id,xy.table= xy.table, - diam=diam, sp=sp,data.TRAITS=data.TRAITS,Rlim= Rlim) -res <- do.call("rbind",res.l) -res <- res[match(obs.id, res[,"obs.id"]), ] +res.l <-lapply(unique.census, FUN=fun.CWM.traits.all.XY.l, census=census, + obs.id=obs.id, xy.table= xy.table, + diam=diam, sp=sp, data.TRAITS=data.TRAITS, Rlim= Rlim) +res <- do.call("rbind", res.l) +res <- res[match(obs.id, res[, "obs.id"]), ] return(res) } ### to be lapply over cluster -fun.CWM.traits.all.XY.per.cluster <- function(i, data.tree,data.TRAITS, Rlim, xy.name = c("x", "y")){ - data.tree.s <- subset(data.tree, subset = data.tree[["cluster"]] == i) - CWM.temp <- fun.CWM.traits.all.XY.census(census=data.tree.s[["census"]],obs.id=data.tree.s[["obs.id"]], - xy.table=data.tree.s[, xy.name], diam=data.tree.s[["D"]], - sp=data.tree.s[["sp"]],data.TRAITS, Rlim) +fun.CWM.traits.all.XY.per.cluster <- function(i, data.tree, data.TRAITS, Rlim, xy.name = c("x", "y")){ + data.tree.s <- subset(data.tree, subset = data.tree[["cluster"]] == i) + CWM.temp <- fun.CWM.traits.all.XY.census(census=data.tree.s[["census"]], obs.id=data.tree.s[["obs.id"]], + xy.table=data.tree.s[, xy.name], diam=data.tree.s[["D"]], + sp=data.tree.s[["sp"]], data.TRAITS, Rlim) ### TEST GOOD ORDER - if (sum(!(CWM.temp[,"obs.id"] == data.tree.s[["obs.id"]])) > 0) + if (sum(!(CWM.temp[, "obs.id"] == data.tree.s[["obs.id"]])) > 0) stop("rows not in the good order") return(CWM.temp) } @@ -290,68 +317,89 @@ fun.CWM.traits.all.XY.per.cluster <- function(i, data.tree,data.TRAITS, Rlim, xy ########################################### #### FUNCTION TO FORMAT PLOT TYPE DATA -### function to test enough species diversity in ecoregion -fun.test.enough.sp.in.ecoregion <- function(data,min.sp=2,thres.prop=0.05){ -sum((table(data[["sp"]])/length(data[["sp"]]))>thres.prop)>min.sp +### function to test enough species diversity in ecoregion TODO NEED TO DO THAT BEFORE TO MERGE ECOREGION +fun.test.enough.sp.in.ecoregion <- function(data, min.sp=5, thres.prop=0.05){ +sum((table(data[["sp"]])/length(data[["sp"]]))>thres.prop)>min.sp } ## function to merge with data.table conversion -fun.merged.DT <- function(data.1,data.2,by.var){ +fun.merged.DT <- function(data.1, data.2, by.var){ DT.2 <- data.table(data.2) - setkeyv(DT.2, by.var) + setkeyv(DT.2, by.var) DT.1 <- data.table(data.1) - setkeyv(DT.1, by.var) - data.merged <- merge(DT.1, DT.2) + setkeyv(DT.1, by.var) + data.merged <- merge(DT.1, DT.2) return(data.merged) } - + ##### function to generate data in good format per ecoregion -fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name, - data.TRAITS , out.dir = "output/processed/",parallel=TRUE,std.traits) { +fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name, + data.TRAITS , out.dir = "output/processed/", parallel=TRUE, std.traits, + mean.global, sd.global) { data.tot <- data.table(data.tot) - data <- data.tot[ecocode == ecoregion, ] + data <- data.tot[ecocode == ecoregion, ] rm(data.tot) if(fun.test.enough.sp.in.ecoregion(data)){ - cat(paste("number of obs in ecoregion", ecoregion, " = ", nrow(data)),"\n") - path <- file.path(out.dir, site.name, ecoregion) - dir.create(path, recursive = TRUE, showWarnings = FALSE) - data.CWM <-fun.CWM.traits.all.plot.census(census= data[["census"]],obs.id=data[["obs.id"]], - plot=data[["plot"]], diam=data[["D"]], - sp=data[["sp"]],data.TRAITS=data.TRAITS, - weight=data[["weights"]],parallel=parallel) + cat(paste("number of obs in ecoregion", + ecoregion, " = ", nrow(data)), "\n") + if(!(std.traits %in% c('no', 'local', 'global'))) stop('std.traits need to be no local or global') + # std traits data + if(std.traits == "local") data.TRAITS <- fun.std.data(data.TRAITS[data.TRAITS$sp %in% unique(data$sp), ]) + if(std.traits == "global") data.TRAITS <- fun.std.data.global(data.TRAITS[data.TRAITS$sp %in% + unique(data$sp), ], + mean.global, sd.global) + + path <- file.path(out.dir, site.name, ecoregion) + dir.create(path, recursive = TRUE, showWarnings = FALSE) + data.CWM <-fun.CWM.traits.all.plot.census(census= data[["census"]], + obs.id=data[["obs.id"]], + plot=data[["plot"]], + diam=data[["D"]], + sp=data[["sp"]], + data.TRAITS=data.TRAITS, + weight=data[["weights"]], + parallel=parallel) ### create data frame and merge - data.merged <- fun.merged.DT(data,data.CWM,"obs.id") - cat('merge data and CWM done',"\n") + data.merged <- fun.merged.DT(data, data.CWM, "obs.id") + cat('merge data and CWM done', "\n") ## add Phylo.group and Pheno.T to the data - data.merged <- merge(data.merged,data.TRAITS[,c("sp","Phylo.group","Pheno.T",'LeafType.T')],by="sp") + data.merged <- merge(data.merged, data.TRAITS[, c("sp", + "Phylo.group", + "Pheno.T", + 'LeafType.T')], + by="sp") ## write data - if(std.traits=='local'){ - write.csv(data.merged, file = file.path(path, "data.tree.tot.csv"), row.names = FALSE) + if(std.traits == 'local'){ + write.csv(data.merged, file = file.path(path, + "data.tree.tot.csv"), + row.names = FALSE) } - if(std.traits=='no'){ - write.csv(data.merged, file = file.path(path, "data.tree.tot.no.std.csv"), row.names = FALSE) + if(std.traits == 'no'){ + write.csv(data.merged, file = file.path(path, + "data.tree.tot.no.std.csv"), + row.names = FALSE) } - if(std.traits=='global'){ - write.csv(data.merged, file = file.path(path, "data.tree.tot.global.csv"), row.names = FALSE) + if(std.traits == 'global'){ + write.csv(data.merged, file = file.path(path, "data.tree.tot.global.csv"), row.names = FALSE) } }else{ - cat(paste("Not enough species in ecoregion", ecoregion),"\n") + cat(paste("Not enough species in ecoregion", ecoregion), "\n") } } ########################################################################### ####### FUNCTION TO FORMAT DATA FOR BIG TROPICAL PLOT fun.remove.tree.in.buffer.cluster <- function(data){ -obs.id.no.buffer <- as.vector(unlist(lapply(unique(data[["cluster"]]),FUN=fun.remove.tree.in.buffer.zone,data))) -data <- subset(data,subset=data[["obs.id"]] %in% obs.id.no.buffer) +obs.id.no.buffer <- as.vector(unlist(lapply(unique(data[["cluster"]]), FUN=fun.remove.tree.in.buffer.zone, data))) +data <- subset(data, subset=data[["obs.id"]] %in% obs.id.no.buffer) return(data) } -fun.remove.tree.in.buffer.zone <-function(i,data){ -data.t <- subset(data,subset=data[["cluster"]]==i) -data.no.buffer <- subset(data.t, subset = ((data.t[["x"]] - 15) > 0 & +fun.remove.tree.in.buffer.zone <-function(i, data){ +data.t <- subset(data, subset=data[["cluster"]] == i) +data.no.buffer <- subset(data.t, subset = ((data.t[["x"]] - 15) > 0 & (data.t[["x"]] + 15) < max(data.t[["x"]])) & ((data.t[["y"]] - 15) > 0 & (data.t[["y"]] + 15) < max(data.t[["y"]]))) @@ -362,94 +410,103 @@ return( (as.vector(data.no.buffer[["obs.id"]]))) #### BIG FUNCTION TO RUN ALL OVER -fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, Rlim = 15, xy.name, - sp.code = "sp", sp.code2 = "sp", - out.dir = "output/processed/",std.traits) { - require(data.table,quietly=TRUE) +fun.data.per.bigplot <- function(data, site.name, data.TRAITS = NA, + Rlim = 15, xy.name, + sp.code = "sp", sp.code2 = "sp", + out.dir = "output/processed/", std.traits, + mean.global, sd.global) { +require(data.table, quietly=TRUE) for (i in unique(data$ecocode)){ - cat("ecoregion :", i,"\n") - path <- file.path(out.dir, site.name,i) - dir.create(path, recursive = TRUE, showWarnings = FALSE) - data.t <- data[data$ecocode==i,] - cat(paste("clusters :", paste(unique(data.t[["cluster"]]),collapse=" ")),"\n") + cat("ecoregion :", i, "\n") + path <- file.path(out.dir, site.name, i) + dir.create(path, recursive = TRUE, showWarnings = FALSE) + data.t <- data[data$ecocode == i, ] + cat(paste("clusters :", paste(unique(data.t[["cluster"]]), + collapse=" ")), "\n") + if(!(std.traits %in% c('no', 'local', 'global'))) + stop('std.traits need to be no local or global') + # std traits data + if(std.traits == "local") data.TRAITS <- fun.std.data(data.TRAITS[data.TRAITS$sp %in% unique(data$sp), ]) + if(std.traits == "global") data.TRAITS <- fun.std.data.global(data.TRAITS[data.TRAITS$sp %in% + unique(data$sp), ], + mean.global, sd.global) + ## compute CWM matrix - list.CWM.data <- lapply(unique(data.t[["cluster"]]), FUN = fun.CWM.traits.all.XY.per.cluster, - data.tree = data.t,data.TRAITS=data.TRAITS, Rlim = Rlim, xy.name = xy.name) - data.CWM <- do.call(rbind,list.CWM.data) + list.CWM.data <- lapply(unique(data.t[["cluster"]]), FUN = fun.CWM.traits.all.XY.per.cluster, + data.tree = data.t, data.TRAITS=data.TRAITS, Rlim = Rlim, xy.name = xy.name) + data.CWM <- do.call(rbind, list.CWM.data) ### create data frame and merge - data.merged <- fun.merged.DT(data.t,data.CWM,"obs.id") + data.merged <- fun.merged.DT(data.t, data.CWM, "obs.id") ## REMOVE TREE IN BUFFER ZONE data.merged <- fun.remove.tree.in.buffer.cluster(data.merged) - cat("dim after buffer tree removed",dim(data.merged),'vs ',dim(data.t),"\n") + cat("dim after buffer tree removed", dim(data.merged), 'vs ', dim(data.t), "\n") ## add Phylo.group and Pheno.T to the data - data.merged <- merge(data.merged,data.TRAITS[,c("sp","Phylo.group","Pheno.T",'LeafType.T')],by="sp") + data.merged <- merge(data.merged, data.TRAITS[, c("sp", "Phylo.group", "Pheno.T", 'LeafType.T')], by="sp") - if(std.traits=='local'){ - write.csv(data.merged, file = file.path(path, "data.tree.tot.csv"), row.names = FALSE) + if(std.traits == 'local'){ + write.csv(data.merged, file = file.path(path, "data.tree.tot.csv"), row.names = FALSE) } - if(std.traits=='no'){ - write.csv(data.merged, file = file.path(path, "data.tree.tot.no.std.csv"), row.names = FALSE) + if(std.traits == 'no'){ + write.csv(data.merged, file = file.path(path, "data.tree.tot.no.std.csv"), row.names = FALSE) } - if(std.traits=='global'){ - write.csv(data.merged, file = file.path(path, "data.tree.tot.global.csv"), row.names = FALSE) + if(std.traits == 'global'){ + write.csv(data.merged, file = file.path(path, "data.tree.tot.global.csv"), row.names = FALSE) } - } + } } ######################################## -process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed",std.traits=TRUE){ - process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "inventory",std.traits=std.traits) +process_inventory_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", std.traits){ + process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "inventory", std.traits=std.traits) } -process_bigplot_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", Rlim = 15,std.traits=TRUE){ - process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "bigplot", Rlim = Rlim,std.traits=std.traits) +process_bigplot_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", Rlim = 15, std.traits){ + process_dataset(set, path.formatted = path.formatted, path.processed = path.processed, type = "bigplot", Rlim = Rlim, std.traits=std.traits) } ### FUNCTION TO TEST GOOD VAR function.test.vars.inventory.data <- function(data.tot){ -vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G","BA.G", "dead", - "Lon", "Lat", "weights","census") +vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot", "ecocode", "D", "G", "BA.G", "dead", + "Lon", "Lat", "weights", "census") if(sum(! vec.basic.var %in% names(data.tot))>0) stop(paste("missing variable", - vec.basic.var[! vec.basic.var %in% names(data.tot)],sep=" ")) + vec.basic.var[! vec.basic.var %in% names(data.tot)], sep=" ")) } function.test.vars.bigplot.data <- function(data.tot){ -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","plot", "cluster", "D", "G","BA.G", "dead", - "x", "y", "census") +vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "plot", "cluster", "D", "G", "BA.G", "dead", + "x", "y", "census") if(sum(! vec.basic.var %in% names(data.tot))>0) stop(paste("missing variable", - vec.basic.var[! vec.basic.var %in% names(data.tot)],sep=" ")) + vec.basic.var[! vec.basic.var %in% names(data.tot)], sep=" ")) } -process_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", - type = "inventory", Rlim = 15,std.traits) { +process_dataset <- function(set, path.formatted = "output/formatted", path.processed = "output/processed", + type = "inventory", Rlim = 15, std.traits) { # load data - data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"), stringsAsFactors = FALSE) - data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE) - data.traits.std.global <- read.csv("output/formatted/traits.std.global.csv", stringsAsFactors = FALSE) + data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"), stringsAsFactors = FALSE) + data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE) + data.traits.std.global <- read.csv("output/formatted/traits.std.global.csv", stringsAsFactors = FALSE) mean.global <- data.traits.std.global[['mean.log10']] sd.global <- data.traits.std.global[['sd.log10']] names(mean.global) <- names(sd.global) <- data.traits.std.global[['trait']] - if(!(std.traits %in% c('no','local','global'))) stop('std.traits need to be no local or global') - # std traits data - if(std.traits=="local") data.traits <- fun.std.data(data.traits) - if(std.traits=="global") data.traits <- fun.std.data.global(data.traits,mean.global,sd.global) - + # remove nas - data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]])) - if(type=="inventory"){ + data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]])) + if(type == "inventory"){ # split into ecoregions function.test.vars.inventory.data(data.tree) - tmp <- lapply(unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, data.tot = data.tree, - site.name = set, data.TRAITS = data.traits, out.dir = path.processed,std.traits=std.traits) + tmp <- lapply(unique(data.tree[["ecocode"]]), FUN = fun.data.per.ecoregion, data.tot = data.tree, + site.name = set, data.TRAITS = data.traits, out.dir = path.processed, std.traits=std.traits, + mean.global=mean.global, sd.global=sd.global) } else { function.test.vars.bigplot.data(data.tree) - fun.data.per.bigplot(data=data.tree,site.name=set,data.TRAITS=data.traits,Rlim=Rlim, - xy.name=c("x","y"),std.traits=std.traits) + fun.data.per.bigplot(data=data.tree, site.name=set, data.TRAITS=data.traits, Rlim=Rlim, + xy.name=c("x", "y"), std.traits=std.traits, + mean.global=mean.global, sd.global=sd.global) } - cat("finished", file = file.path(path.processed, set, "Done.txt")) + cat("finished", file = file.path(path.processed, set, "Done.txt")) } diff --git a/R/process.data/test.tree.CWM-fun.R b/R/process.data/test.tree.CWM-fun.R index e5579b9efcf224a9f62378fc57d0639f7173d1b3..b64c054f8d937e253955ff06a5fd7ba4bedd93c6 100644 --- a/R/process.data/test.tree.CWM-fun.R +++ b/R/process.data/test.tree.CWM-fun.R @@ -203,10 +203,10 @@ if(!is.na(data.TRAITS[data.TRAITS[["sp"]]==samp.sp,"Wood.density.mean"]) & fun.test.value.one.ecoregion.I <- function(data.CWM,set,ecocode.select,path.formatted = "output/formatted" ){ data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"), stringsAsFactors = FALSE) data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE) - data.traits <- fun.std.data(data.traits) # remove nas data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]])) data.tree <- subset(data.tree,subset=data.tree$ecocode==ecocode.select) + data.traits <- fun.std.data(data.traits[data.traits$sp %in% data.tree$sp, ]) # test if same dim if (nrow(data.CWM) !=length(data.tree[["obs.id"]])) stop("data.CWM not good dim") ### test CWM for Wood.density @@ -219,10 +219,10 @@ fun.test.value.one.ecoregion.I <- function(data.CWM,set,ecocode.select,path.form fun.test.value.one.ecoregion.B <- function(data.CWM,set,ecocode.select,path.formatted = "output/formatted" ){ data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"), stringsAsFactors = FALSE) data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"), stringsAsFactors = FALSE) - data.traits <- fun.std.data(data.traits) # remove nas data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]])) data.tree <- subset(data.tree,subset=data.tree$ecocode==ecocode.select) + data.traits <- fun.std.data(data.traits[data.traits$sp %in% data.tree$sp, ]) ### test CWM for Wood.density for (i in 1:10){ test.res <- fun.test.CWM.XY.r(data.CWM,data.TRAITS=data.traits,data.tree=data.tree,Rlim=15) if (!test.res) stop(paste("CWM index for Wood.density WRONG for",set)) @@ -239,12 +239,21 @@ return( sum(data[[var.names.perc[i]]]>treshold & } +fun_div <- function(sp){ +p_i <- table(factor(sp))/sum(table(factor(sp))) +shannon <- -sum(p_i*log(p_i)) +simpson <- 1-sum(p_i^2) +return(data.frame(shannon=shannon,simpson=simpson)) +} + fun.compute.percentage.species.genus <- function(data,treshold=0.8){ perc.gymno <- sum( data[['Phylo.group']]=='Gymnosperm' & !is.na(data[['Phylo.group']]))/sum(!is.na(data[['Phylo.group']])) perc.ev <- sum(data[['Pheno.T']]=='EV' &!is.na(data[['Pheno.T']]))/sum(!is.na(data[['Pheno.T']])) traits.name <- c("Leaf.N","Seed.mass","SLA","Wood.density","Max.height") +traits.focal <- paste(c("Leaf.N","Seed.mass","SLA","Wood.density","Max.height"),"focal",sep=".") + var.name.fill <- paste(traits.name,"abs.CWM.fill",sep=".") var.name.perc.genus <- paste(traits.name,"perc.genus",sep=".") var.name.perc.species <- paste(traits.name,"perc.species",sep=".") @@ -252,14 +261,25 @@ perc.T.non.missing.sp <- lapply(1:length(var.name.fill),fun.perc.sup.treshold.sp var.name.perc.species,data,treshold=treshold) perc.T.non.missing.genus <- lapply(1:length(var.name.fill),fun.perc.sup.treshold.sp,var.name.fill, var.name.perc.genus,data,treshold=treshold) +mean.trait <- lapply(traits.focal,function(x,data) c(x=mean(data[[x]],na.rm=TRUE)), + data) + perc.genus <- lapply(var.name.perc.genus,function(i,data) mean(data[[i]],na.rm=TRUE),data) perc.species <- lapply(var.name.perc.species,function(i,data) mean(data[[i]],na.rm=TRUE),data) -vec.res <- c(nrow(data),perc.gymno,perc.ev,percT.non.missing.sp, - percT.non.missing.genus, - perc.genus,perc.species) -names(vec.res) <- c('num.obs','perc.gymno','perc.ev',paste(traits.name,"perc.above",threshold,"sp",sep="."), +BATOT.m <- mean(data$BATOT,na.rm=TRUE) +## compute the shannon & simpson index + +div.vec <- apply(do.call("rbind",tapply(data$sp,INDEX=data$plot,fun_div)), + MARGIN=2,FUN=mean,na.rm=TRUE) + +vec.res <- c(nrow(data),BATOT.m,div.vec,perc.gymno,perc.ev,perc.T.non.missing.sp, + perc.T.non.missing.genus, + perc.genus,perc.species, + mean.trait) +names(vec.res) <- c('num.obs','BATOT.m','shannon','simpson','perc.gymno','perc.ev', + paste(traits.name,"perc.above",treshold,"sp",sep="."), paste(traits.name,"perc.above",treshold,"genus",sep="."), - var.name.perc.genus,var.name.perc.species) + var.name.perc.genus,var.name.perc.species,traits.focal) return(vec.res) } @@ -467,15 +487,24 @@ DT[,eval(e),by=cluster.id] } +fun_div1 <- function(sp.id){ + fun_div(sp.id)[1,1] +} +fun_div2 <- function(sp.id){ + fun_div(sp.id)[1,2] +} + fun.compute.all.var.cluster <- function(data){ require(dplyr) -data$cluster.id <- paste(data[['set']],data[['ecocode']],data[['cluster']],data[['plot']]) +data$cluster.id <- paste(data[['ecocode']],data[['cluster']],data[['plot']]) data$sp.id <- paste(data[['set']],data[['sp']]) DF <- tbl_df(data) by_cluster<- group_by(DF, cluster.id) summarise_by_cluster<- summarise(by_cluster, - count = n(), + count = length(sp.id), n_sp=n_distinct(sp.id), + shannon=fun_div1(sp.id), + simpson=fun_div2(sp.id), set=unique(set), ecocode=unique(ecocode), BATOT = mean(BATOT, na.rm = TRUE), diff --git a/R/process.data/test.tree.CWM.R b/R/process.data/test.tree.CWM.R index 4e36fd1b5c95e2aa7f02e2bae371973a376f30b4..9d6f1a0ce7640f5d6ce1da0d1049fa06773d71da 100644 --- a/R/process.data/test.tree.CWM.R +++ b/R/process.data/test.tree.CWM.R @@ -6,7 +6,6 @@ source("R/process.data/process-fun.R") source("R/process.data/test.tree.CWM-fun.R") source("R/utils/plot.R") - #### RUN filedir <- "output/processed" sets.I <- c("Sweden","NVS","US","Canada","NSW","France","Swiss", "Spain") @@ -33,16 +32,16 @@ mat.perc <- data.frame(lapply(mat.perc, function(x) (unlist(x)))) write.csv(mat.perc,file=file.path(filedir, "all.sites.perc.traits.csv"), row.names=FALSE) -pdf("figs/test.processed/perc.tree.more.0.9.in.neigh.with.sp.pdf") +pdf("figs/test.processed/perc.tree.more.0.8.in.neigh.with.sp.pdf") par(mfrow=c(2,3)) -for (i in paste(trait.name,"perc.above0.9.sp",sep=".")){ +for (i in paste(trait.name,"perc.above.0.8.sp",sep=".")){ boxplot(mat.perc[[i]]~mat.perc$set,las=3,main=i) } dev.off() -pdf("figs/test.processed/perc.tree.more.0.9.in.neigh.with.genus.pdf") +pdf("figs/test.processed/perc.tree.more.0.8.in.neigh.with.genus.pdf") par(mfrow=c(2,3)) -for (i in paste(trait.name,"perc.above0.9.genus",sep=".")){ +for (i in paste(trait.name,"perc.above.0.8.genus",sep=".")){ boxplot(mat.perc[[i]]~mat.perc$set,las=3,main=i) } dev.off() diff --git a/R/utils/biomemap.R b/R/utils/biomemap.R index dfd39f3d6b08c4c5e2701f6ebb95ba8027467d0e..771314a9093c7286d09c24e67a99d67a93ffa678 100644 --- a/R/utils/biomemap.R +++ b/R/utils/biomemap.R @@ -1,96 +1,15 @@ source("R/utils/plot.R") -plot.biome.map <- function(){ - # MAKE EMPTY PLOT - plot(0,0, type = 'n', xlim = c(0,450), ylim= c(30,-15),xlab="Mean annual precipitation (cm)", - ylab="Mean annual temperature (deg C)") - # MAKE POLYGONS - #polygon(x = c(0,450,0,0), y = c(30,30,-15,30), col = "gray", border = NA) #whole plot - polygon(x = c(260,450,350,250,260), y = c(30,30,20,20,30), col = make.transparent("darkgreen", 0.5), border = NA) #tropical rainforest - polygon(x = c(250,350,200,160,250), y = c(20,20,5,5,20), col = make.transparent("darkolivegreen", 0.5), border = NA) #temperate rainforest - polygon(x = c(160,200,150,120,160), y = c(5,5,0,0,5), col = make.transparent("darkolivegreen4", 0.5), border = NA) #tropical montane forest - polygon(x = c(120,150,100,80,120), y = c(0,0,-5,-5,0), col = make.transparent("gray", 0.5), border = NA) #alpine shrubland - polygon(x = c(140,260,250,130,140), y = c(30,30,20,20,30), col = make.transparent("darkolivegreen4", 0.5), border = NA) #tropical deciduous forest - polygon(x = c(130,250,160,60,130), y = c(20,20,5,3,20), col = make.transparent("darkolivegreen3", 0.5), border = NA) #temperate forest - polygon(x = c(60,160,80,50,60), y = c(3,5,-5,-5,3), col = make.transparent("darkorange", 0.5), border = NA) #boreal - polygon(x = c(50,140,130,37,50), y = c(30,30,20,20,30), col = make.transparent("seagreen4", 0.5), border = NA) #tropical woodland - polygon(x = c(37,130,60,50,0,37), y = c(20,20,3,-5,-5,20), col = make.transparent("darkgoldenrod1", 0.5), border = NA) #temperate woodland - polygon(x = c(0,23,0,0), y = c(10,10,-5,10), col = make.transparent("gray", 0.5), border = NA) #cold desert - polygon(x = c(0,50,23,0,0), y = c(30,30,10,10,30), col = make.transparent("gray", 0.5), border = NA) #hot desert - polygon(x = c(0,100,0,0), y = c(-5,-5,-15,-5), col = make.transparent("gray", 0.5), border = NA) #tundra -} - - - -plot.points.on.biome.map.diff.tropical <- function(MAP, MAT, set,ecocode, cols = niceColors(), add.legend=TRUE){ - plot.biome.map() - col <- c() - pch <- c() - cex <- c() - sets <- unique(set) - for(i in 1:length(sets)){ - j <- (set == sets[i]) - if(unique(ecocode[j])[1]=='tropical') { - col <- c(col,make.transparent(cols[i],0.75)) - pch <- c(pch,16) - cex <- c(cex,1.25) - points(MAP[j], MAT[j], pch = pch[i], col = col[i], cex=cex[i]) - }else{ - col <- c(col,make.transparent(cols[i],0.25)) - pch <- c(pch,16) - cex <- c(cex,.25) - points(MAP[j], MAT[j], pch = pch[i], col = col[i], cex=cex[i]) - } - } - if(add.legend) - legend("topright", pch = pch, legend = sets, bty = "n", col = col, cex = 1) -} - -plot.points.on.biome.map.ecocode <- function(MAP, MAT, MAP.sd, MAT.sd, set,ecocode, cols = niceColors(), add.legend=TRUE){ - plot.biome.map() - col <- c() - pch <- c() - cex <- c() - sets <- unique(set) - for(i in 1:length(sets)){ - j <- (set == sets[i]) - col <- c(col,make.transparent(cols[i],0.99)) - pch <- c(pch,16) - cex <- c(cex,1.25) - points(MAP[j], MAT[j], pch = pch[i], col = col[i], cex=cex[i]) - segments(MAP[j], MAT[j]-MAT.sd[j],MAP[j], MAT[j]+MAT.sd[j], col = col[i]) - segments(MAP[j]-MAP.sd[j], MAT[j],MAP[j]+MAP.sd[j], MAT[j], col = col[i]) - - } - if(add.legend) - legend("topright", pch = pch, legend = sets, bty = "n", col = col, cex = 1) -} - - - - -plot.points.on.biome.map.panel <- function(MAP, MAT, set, ecocode, cols = niceColors()[-c(1)]){ - sets <- unique(set) - nsets <- length(sets) - - par(mfrow=c(ceiling(nsets/2),2), mar=c(4,3,3,2)) - - for(i in 1:nsets){ - j <- (set == sets[i]) - plot.points.on.biome.map(MAP = MAP[j], MAT = MAT[j], set = ecocode[j], cols=cols) - title(sets[i]) - } -} - #read.data -clim <- read.csv("output/processed/all.sites.clim.csv", header=TRUE,stringsAsFactors=FALSE) +clim <- read.csv("output/processed/all.sites.clim.csv", header = TRUE, + stringsAsFactors = FALSE) -## to make Paracou and BCI visible change slightly the mean T at Paracou -clim[clim$set%in% c('Paracou'),'clim.all.MAT'] <- 26.5 +## to make Paracou and BCI not visible change slightly the mean T at Paracou +clim[clim$set %in% c('Paracou'), 'clim.all.MAT'] <- 26.5 ## change NVS to NZ -clim[clim$set%in% c('NVS'),'set'] <- 'NZ' +clim[clim$set %in% c('NVS'), 'set'] <- 'NZ' @@ -98,30 +17,39 @@ clim[clim$set%in% c('NVS'),'set'] <- 'NZ' clim$id <- paste(clim$set,clim$ecocode) ##mean per id -MAT <- tapply(clim$clim.all.MAT,INDEX=clim$id,FUN=mean,na.rm=TRUE) -MAP <- tapply(clim$clim.all.MAP/10,INDEX=clim$id,FUN=mean,na.rm=TRUE) -MAT.sd <- tapply(clim$clim.all.MAT,INDEX=clim$id,FUN=sd,na.rm=TRUE) -MAP.sd <- tapply(clim$clim.all.MAP/10,INDEX=clim$id,FUN=sd,na.rm=TRUE) - -data.ecocode <- data.frame(id=names(MAT),MAT=MAT,MAP=MAP,MAT.sd=MAT.sd,MAP.sd=MAP.sd,stringsAsFactors=FALSE) -data.ecocode <- merge(data.ecocode,clim[!duplicated(clim$id),],by='id',all.x=TRUE,all.y=FALSE) +MAT <- tapply(clim$MAT, INDEX = clim$id, FUN = mean, na.rm = TRUE) +MAP <- tapply(clim$MAP/10, INDEX = clim$id, FUN = mean, na.rm = TRUE) +MAT.sd <- tapply(clim$MAT, INDEX = clim$id, FUN = sd, na.rm = TRUE) +MAP.sd <- tapply(clim$MAP/10, INDEX = clim$id, FUN = sd, na.rm = TRUE) + +data.ecocode <- data.frame(id = names(MAT), MAT = MAT, + MAP = MAP, MAT.sd = MAT.sd, + MAP.sd = MAP.sd, stringsAsFactors = FALSE) +data.ecocode <- merge(data.ecocode, clim[!duplicated(clim$id), + c('set', 'ecocode', 'id')], by = 'id', + all.x = TRUE, all.y = FALSE) ## rename NVS to NZ -data.ecocode[data.ecocode$set=="NVS",'set'] <- 'NZ' +data.ecocode[data.ecocode$set=="NVS", 'set'] <- 'NZ' -plot.biome.map() to.pdf(plot.biome.map(), file = "figs/biome.pdf", width = 6, height = 6) -to.pdf(plot.points.on.biome.map.panel(MAP = clim$clim.all.MAP/10, MAT = clim$clim.all.MAT, +to.pdf(plot.points.on.biome.map.panel(MAP = clim$MAP/10, + MAT = clim$MAT, set = clim$set, ecocode = clim$ecocode), file = "figs/biome-panel.pdf", width = 12, height = 12) -to.pdf(plot.points.on.biome.map(MAP = clim$clim.all.MAP/10, MAT = clim$clim.all.MAT, set = clim$set, ecocode = clim$ecocode), +to.pdf(plot.points.on.biome.map(MAP = clim$MAP/10, + MAT = clim$MAT, + set = clim$set), file = "figs/biome.xy.pdf", width = 6, height = 6) -to.pdf(plot.points.on.biome.map.ecocode(MAP = data.ecocode$MAP, MAT = data.ecocode$MAT, - MAP.sd = data.ecocode$MAP.sd, MAT.sd = data.ecocode$MAT.sd, - set = data.ecocode$set, ecocode = data.ecocode$ecocode), +to.pdf(plot.points.on.biome.map.ecocode(MAP = data.ecocode$MAP, + MAT = data.ecocode$MAT, + MAP.sd = data.ecocode$MAP.sd, + MAT.sd = data.ecocode$MAT.sd, + set = data.ecocode$set, + ecocode = data.ecocode$ecocode), file = "figs/biome.ecocode.xy.pdf", width = 6, height = 6) diff --git a/R/utils/biomes.csv b/R/utils/biomes.csv new file mode 100644 index 0000000000000000000000000000000000000000..8d1f30f3728982c57dbb6b5f5836eb6342f1dc13 --- /dev/null +++ b/R/utils/biomes.csv @@ -0,0 +1,107 @@ +x,y,biome,merged_biome +29.3386276623,0.2134483009, 'Subtropical desert',Subtropical +13.9710112345,0.2301115512, 'Subtropical desert',Subtropical +15.3708565043,1.7459383327, 'Subtropical desert',Subtropical +17.5098359463,5.3507548056, 'Subtropical desert',Subtropical +24.1305684681,7.0292466492, 'Subtropical desert',Subtropical +27.0744093473,8.4786849257, 'Subtropical desert',Subtropical +28.9153017569,9.9241557617, 'Subtropical desert',Subtropical +29.2009574756,5.3208667535, 'Subtropical desert',Subtropical +29.3386276623,0.2134483009, 'Subtropical desert',Subtropical +13.9710112345,0.2301115512, 'Temperate grassland desert',Temperate +-9.7057481601,0.0730009059, 'Temperate grassland desert',Temperate +-7.5719263908,0.8720434302, 'Temperate grassland desert',Temperate +4.491076565,3.1456513546, 'Temperate grassland desert',Temperate +17.5098359463,5.3507548056, 'Temperate grassland desert',Temperate +15.3708565043,1.7459383327, 'Temperate grassland desert',Temperate +13.9710112345,0.2301115512, 'Temperate grassland desert',Temperate +17.5098359463,5.3507548056, 'Woodland shrubland',Mediterranean +4.491076565,3.1456513546, 'Woodland shrubland',Mediterranean +-7.5719263908,0.8720434302, 'Woodland shrubland',Mediterranean +-9.7057481601,0.0730009059, 'Woodland shrubland',Mediterranean +-6.6874516468,2.0263041308, 'Woodland shrubland',Mediterranean +-0.9486811566,3.9174507872, 'Woodland shrubland',Mediterranean +3.0979759441,5.2989135825, 'Woodland shrubland',Mediterranean +7.1467490131,7.8314631259, 'Woodland shrubland',Mediterranean +10.1646487823,9.5689375855, 'Woodland shrubland',Mediterranean +13.9175830352,11.165171162, 'Woodland shrubland',Mediterranean +18.6262737137,12.6929002652, 'Woodland shrubland',Mediterranean +18.2498958547,7.9433449491, 'Woodland shrubland',Mediterranean +17.5098359463,5.3507548056, 'Woodland shrubland',Mediterranean +18.6262737137,12.6929002652, 'Temperate forest',Temperate +13.9175830352,11.165171162, 'Temperate forest',Temperate +10.1646487823,9.5689375855, 'Temperate forest',Temperate +7.1467490131,7.8314631259, 'Temperate forest',Temperate +3.0979759441,5.2989135825, 'Temperate forest',Temperate +-0.9486811566,3.9174507872, 'Temperate forest',Temperate +1.0388743049,5.14762185, 'Temperate forest',Temperate +1.9976724349,6.7338045771, 'Temperate forest',Temperate +2.5178038894,9.6853158413, 'Temperate forest',Temperate +3.1182098908,16.3061806111, 'Temperate forest',Temperate +4.4455832468,18.3972862707, 'Temperate forest',Temperate +7.757999352,20.3516474797, 'Temperate forest',Temperate +12.6144110665,22.2396201837, 'Temperate forest',Temperate +18.7197730624,23.5565459463, 'Temperate forest',Temperate +18.6367213071,18.3763910838, 'Temperate forest',Temperate +18.6262737137,12.6929002652, 'Temperate forest',Temperate +-0.8016213607,3.9179797793, 'Boreal forest',Boreal +-6.6874516468,2.0263041308, 'Boreal forest',Boreal +-4.3948000079,9.2287956834, 'Boreal forest',Boreal +-4.0979032077,10.7406550244, 'Boreal forest',Boreal +-1.5918032678,14.0590222904, 'Boreal forest',Boreal +0.91442892,17.4493324781, 'Boreal forest',Boreal +4.1546376074,20.1228584087, 'Boreal forest',Boreal +3.0446799929,16.3059161151, 'Boreal forest',Boreal +2.4442739914,9.6850513453, 'Boreal forest',Boreal +1.9976724349,6.7338045771, 'Boreal forest',Boreal +1.0388743049,5.14762185, 'Boreal forest',Boreal +-0.8016213607,3.9179797793, 'Boreal forest',Boreal +18.7197730624,23.5565459463, 'Temperate rain forest',Temperate +12.6145433145,22.3115631054, 'Temperate rain forest',Temperate +7.757867104,20.2797045579, 'Temperate rain forest',Temperate +4.4457154948,18.4692291924, 'Temperate rain forest',Temperate +3.1183421388,16.3781235329, 'Temperate rain forest',Temperate +4.1547698554,20.1948013304, 'Temperate rain forest',Temperate +15.715627087,29.3011353492, 'Temperate rain forest',Temperate +20.1356203424,33.7774662602, 'Temperate rain forest',Temperate +19.3918574895,29.1704743075, 'Temperate rain forest',Temperate +18.7197730624,23.5565459463, 'Temperate rain forest',Temperate +18.7197730624,23.5565459463, 'Tropical rain forest',Tropical +19.3919897376,29.2424172293, 'Tropical rain forest',Tropical +20.1356203424,33.7774662602, 'Tropical rain forest',Tropical +22.2775092408,38.9650270117, 'Tropical rain forest',Tropical +23.7563065774,43.4307780812, 'Tropical rain forest',Tropical +24.1988084454,44.151794275, 'Tropical rain forest',Tropical +24.7137822272,44.2975315907, 'Tropical rain forest',Tropical +25.6668936924,42.7901686823, 'Tropical rain forest',Tropical +26.1050313758,41.1370684582, 'Tropical rain forest',Tropical +27.4144190014,33.443936759, 'Tropical rain forest',Tropical +27.7718853939,27.905654264, 'Tropical rain forest',Tropical +25.7827429561,25.8121681401, 'Tropical rain forest',Tropical +21.7355568633,24.1429336578, 'Tropical rain forest',Tropical +18.7197730624,23.5565459463, 'Tropical rain forest',Tropical +17.5098359463,5.3507548056, 'Tropical forest savanna',Tropical +18.1763659567,7.9430804531, 'Tropical forest savanna',Tropical +18.5530083118,12.8365216126, 'Tropical forest savanna',Tropical +18.4899260072,18.5197479353, 'Tropical forest savanna',Tropical +18.5727132665,23.5560169542, 'Tropical forest savanna',Tropical +21.7355568633,24.1429336578, 'Tropical forest savanna',Tropical +25.7090808102,25.7399607223, 'Tropical forest savanna',Tropical +27.8454152918,27.90591876, 'Tropical forest savanna',Tropical +28.4176524654,19.2029411959, 'Tropical forest savanna',Tropical +28.9153017569,9.9241557617, 'Tropical forest savanna',Tropical +27.0744093473,8.4786849257, 'Tropical forest savanna',Tropical +24.1305684681,7.0292466492, 'Tropical forest savanna',Tropical +17.5098359463,5.3507548056, 'Tropical forest savanna',Tropical +-6.6873193988,2.0982470525, 'Tundra',Tundra +-8.8961257943,0.5075678928, 'Tundra',Tundra +-9.7057481601,0.0730009059, 'Tundra',Tundra +-13.3817140666,0.3475477911, 'Tundra',Tundra +-15.3658310796,0.987892694, 'Tundra',Tundra +-15.2174488035,1.7078509036, 'Tundra',Tundra +-8.3725558913,5.3295951227, 'Tundra',Tundra +-4.0979032077,10.7406550244, 'Tundra',Tundra +-1.5918032678,14.0590222904, 'Tundra',Tundra +-4.0979032077,10.7406550244, 'Tundra',Tundra +-4.3948000079,9.2287956834, 'Tundra',Tundra +-6.6873193988,2.0982470525, 'Tundra',Tundra diff --git a/R/utils/ecoregions.R b/R/utils/ecoregions.R index c049fbf470ef798c6411b7461cb9c745ae8bd69e..c3c35753d1583ffd3a69d87001c59a7f9c0b62bd 100644 --- a/R/utils/ecoregions.R +++ b/R/utils/ecoregions.R @@ -4,19 +4,19 @@ check_packages(c("maptools", "rgdal", "rgeos", "raster")) ### function to get ecoregion from wwf ecoregion map -GetEcoregions <- function(lons,lats) { +GetEcoregions <- function(lons,lats,var.extract = 'eco_code') { #read in ecoregions and transform shapefile to lat/lon - wwf.ecoregions <- readOGR(dsn="data/raw/wwf_ecoregion", layer="wwf_terr_ecos") + wwf.ecoregions <- readOGR(dsn = "data/raw/wwf_ecoregion", layer = "wwf_terr_ecos") geo.proj <- proj4string(wwf.ecoregions) #create SpatialPoints object for plots - pts <- SpatialPoints(cbind(lons,lats), proj4string=CRS(geo.proj)) + pts <- SpatialPoints(cbind(lons,lats), proj4string = CRS(geo.proj)) #creates object that assigns each plot index to an ecoregion - plot.ecocodes <- over(pts, wwf.ecoregions)[["eco_code"]] + plot.ecocodes <- over(pts, wwf.ecoregions)[[var.extract]] #now need to lookup closest ecoregion for NAs and Lake @@ -24,19 +24,49 @@ GetEcoregions <- function(lons,lats) { ec.list <- unique(plot.ecocodes) #get the subset of ecoregions that have been matched to these plots - ec.subset <- wwf.ecoregions[wwf.ecoregions$eco_code %in% ec.list,] + ec.subset <- wwf.ecoregions[wwf.ecoregions[[var.extract]] %in% ec.list,] - ec.na <- which(plot.ecocodes == "Lake" | is.na(plot.ecocodes)) #get all NAs and Lake regions - gdis <- gDistance(pts[ec.na,],ec.subset,byid=T) #calculate distance from na points to each polygon - gdis[which(ec.subset$eco_code == "Lake"),] <- 99999 #exclude lakes by making them seem very far away - na.ind <- apply(gdis,2,FUN=which.min) #find index of minimum distance for each point - plot.ecocodes[ec.na] <- ec.subset$eco_code[na.ind] #assign back to ecocodes + ec.na <- which(plot.ecocodes == "Lake" | plot.ecocodes == 98 | is.na(plot.ecocodes)) + #get all NAs and Lake regions + gdis <- gDistance(pts[ec.na,],ec.subset,byid = T) + #calculate distance from na points to each polygon + gdis[which(ec.subset[[var.extract]] == "Lake"),] <- 99999 + #calculate distance from na points to each polygon + gdis[which(ec.subset[[var.extract]] == 98),] <- 99999 + #exclude lakes by making them seem very far away + na.ind <- apply(gdis,2,FUN = which.min) + #find index of minimum distance for each point + plot.ecocodes[ec.na] <- ec.subset[[var.extract]][na.ind] + #assign back to ecocodes return(plot.ecocodes) } #GetEcoregions # -# plots <- read.csv("C:\\Users\\Mark\\Documents\\analyses\\FIA traits analysis\\all.sites.coords.csv",header=T) -# plots <- subset(plots,subset=plots$set=="Sweden") -# -# er <- GetEcoregions(plots$Lon,plots$Lat) + +## read csv table of code of biome +## 1 Tropical and Subtropical Moist Broadleaf forests +## 2 Tropical and Subtropical Dry Broadleaf forests +## 3 Tropical and Subtropical Coniferous forests +## 4 Temperate Broadleaf and Mixed forests +## 5 Temperate Coniferous forests +## 6 Boreal forests/Taiga +## 7 Tropical and Subtropical Grasslands, Savannas, and Shrublands +## 8 Temperate Grasslands, Savannas, and Shrublands +## 9 Flooded Grasslands and Savannas +## 10 Montane Grasslands and Savannas +## 11 Tundra +## 12 Mediterranean Forests, Woodlands, and Scrub +## 13 Deserts and Weric Shrublands +## 14 Mangroves +## 98 Lake +## 99 Rock and Ice + +code.data <- read.csv("R/utils/biomes.WWF.code.csv", stringsAsFactors = FALSE) + +GetBiomes <- function(Lon, Lat, data = code.data){ +er <- GetEcoregions(Lon, Lat, var.extract = 'BIOME') +merge.biomes <- merge(data.frame( code = er), data, by = 'code', + all.x = TRUE, all.y = FALSE) +return(merge.biomes$short_name) +} diff --git a/R/utils/plot.R b/R/utils/plot.R index 4bc1af5a100b802c1be6dc64604322e5da83f9c4..4da93e5bba5a162b95d888e0612fdb843a94caff 100644 --- a/R/utils/plot.R +++ b/R/utils/plot.R @@ -144,3 +144,155 @@ boxplot(y~x.cut,at=mid.points,outline=FALSE,names=round(mid.points,0), } } + + +### PLOTS function for biomes + +library(sp) + +biomes.data <- read.csv('R/utils/biomes.csv', stringsAsFactors = FALSE) + + list.coord <- lapply(unique(biomes.data$biome), + function(biome.id,data) cbind(data$y[data$biome == biome.id] * 10, + data$x[data$biome == biome.id]), + data=biomes.data) +names(list.coord) <- unique(biomes.data$biome) +list.Polygon <- lapply(list.coord, Polygon) +list.Polygons <- lapply(1:length(list.Polygon), + function(i, x) {Polygons(list(x[[i]]), names(x)[i])}, + x = list.Polygon) + +poly.biomes <- SpatialPolygons(list.Polygons) +DF <- data.frame(biomes= names(poly.biomes), row.names = names(poly.biomes)) +poly.DF <- SpatialPolygonsDataFrame(poly.biomes, DF) + +## creat object for polygon of whitaker +list.poly <- list(tropical_rainforest = cbind(x = c(260,450,350,250,260), + y = c(30,30,20,20,30)), + temperate_rainforest = cbind(x = c(250,350,200,160,250), + y = c(20,20,5,5,20)), + tropical_montane_forest = cbind(x = c(160,200,150,120,160), + y = c(5,5,0,0,5)), + alpine_shrubland = cbind(x = c(120,150,100,80,120), + y = c(0,0,-5,-5,0)), + tropical_deciduous_forest = cbind(x = c(140,260,250,130,140), + y = c(30,30,20,20,30)), + temperate_forest = cbind(x = c(130,250,160,60,130), + y = c(20,20,5,3,20)), + boreal = cbind(x = c(60,160,80,50,60), + y = c(3,5,-5,-5,3) ), + tropical_woodland = cbind(x = c(50,140,130,37,50), + y = c(30,30,20,20,30) ), + temperate_woodland = cbind(x = c(37,130,60,50,0,37), + y = c(20,20,3,-5,-5,20)), + cold_desert = cbind(x = c(0,23,0,0), + y = c(10,10,-5,10)), + hot_desert = cbind(x = c(0,50,23,0,0), + y = c(30,30,10,10,30)), + tundra = cbind(x = c(0,100,0,0), + y = c(-5,-5,-15,-5))) + +list.Polygon <- lapply(list.poly, Polygon) +list.Polygons <- lapply(1:length(list.Polygon), + function(i, x) {Polygons(list(x[[i]]), names(x)[i])}, + x = list.Polygon) + +poly.biomes <- SpatialPolygons(list.Polygons) +DF <- data.frame(biomes= names(poly.biomes), row.names = names(poly.biomes)) +poly.DF2 <- SpatialPolygonsDataFrame(poly.biomes, DF) + + +## function to overlay points on biomes +fun.overly.plot.on.biomes <- function(MAP,MAT,names.vec){ + xy = cbind(MAP,MAT) + dimnames(xy)[[1]] = names.vec + pts = SpatialPoints(xy) + return(over(pts,poly.DF)) +} + +c("yellow4","yellow","darkorange","darkolivegreen3", + "seagreen4", "darkolivegreen4", + "darkgreen","darkgreen2","gray") + +plot.biome.map <- function(poly = poly.DF){ +require(sp) + # MAKE EMPTY PLOT + plot(0,0, type = 'n', xlim = c(0, 450), ylim = c(30, -15), + xlab = "Mean annual precipitation (cm)", + ylab = "Mean annual temperature (deg C)") + # MAKE POLYGONS + plot(poly,col = make.transparent(c("burlywood2","darkgoldenrod1","firebrick1","darkolivegreen3", + "seagreen4", "darkolivegreen4", + "darkgreen","darkkhaki","gray"), 0.5), + border = NA, + add = TRUE) +} + + + +plot.points.on.biome.map.diff.tropical <- function(MAP, MAT, set,ecocode, cols = niceColors(), add.legend=TRUE){ + plot.biome.map() + col <- c() + pch <- c() + cex <- c() + sets <- unique(set) + for(i in 1:length(sets)){ + j <- (set == sets[i]) + if(unique(ecocode[j])[1]=='tropical') { + col <- c(col,make.transparent(cols[i],0.75)) + pch <- c(pch,16) + cex <- c(cex,1.25) + points(MAP[j], MAT[j], pch = pch[i], col = col[i], cex=cex[i]) + }else{ + col <- c(col,make.transparent(cols[i],0.25)) + pch <- c(pch,16) + cex <- c(cex,.25) + points(MAP[j], MAT[j], pch = pch[i], col = col[i], cex=cex[i]) + } + } + if(add.legend) + legend("topright", pch = pch, legend = sets, bty = "n", col = col, cex = 1) +} + +plot.points.on.biome.map.ecocode <- function(MAP, MAT, MAP.sd, MAT.sd, set, + ecocode, cols = niceColors(), + add.legend = TRUE){ + plot.biome.map() + col <- c() + pch <- c() + cex <- c() + sets <- unique(set) + for(i in 1:length(sets)){ + j <- (set == sets[i]) + col <- c(col,make.transparent(cols[i],0.99)) + pch <- c(pch,16) + cex <- c(cex,1.25) + points(MAP[j], MAT[j], pch = pch[i], col = col[i], cex=cex[i]) + segments(MAP[j], MAT[j]-MAT.sd[j],MAP[j], MAT[j]+MAT.sd[j], col = col[i]) + segments(MAP[j]-MAP.sd[j], MAT[j],MAP[j]+MAP.sd[j], MAT[j], col = col[i]) + + } + if(add.legend) + legend("topright", pch = pch, legend = sets, bty = "n", col = col, cex = 1) +} + +plot.points.on.biome.map <- function(MAP, MAT, set, cols = col.vec){ + plot.biome.map() + points(MAP, MAT, col = cols[set]) + } + +plot.points.on.biome.map.panel <- function(MAP, MAT, set, ecocode, + cols = niceColors()[-c(1)]){ + sets <- unique(set) + nsets <- length(sets) + + par(mfrow=c(ceiling(nsets/2),2), mar = c(4,3,3,2)) + + for(i in 1:nsets){ + j <- (set == sets[i]) +plot.points.on.biome.map(MAP[j], MAT[j], set[j]) + + title(sets[i]) + } +} + diff --git a/launch_all_lmer.nolog.bash b/launch_all_lmer.nolog.bash index 7c2b08d97903752cd510fd56d3b77d2e4e57a55c..581af61da0052a7eddd40018534ca2643f607fe4 100644 --- a/launch_all_lmer.nolog.bash +++ b/launch_all_lmer.nolog.bash @@ -4,7 +4,7 @@ export LD_LIBRARY_PATH=/usr/lib64/R/library mkdir -p trait.workshop -for site in "'BCI'" "'Canada'" "'France'" "'Fushan'" "'NVS'" "'Paracou'" "'Spain'" "'US'" "'Swiss'" "'Sweden'" "'NSW'" "'Mbaiki'" "'Luquillo'" "'Japan'"; do +for site in "'BCI'" "'Canada'" "'France'" "'Fushan'" "'NVS'" "'Paracou'" "'Spain'" "'US'" "'Swiss'" "'Sweden'" "'NSW'" "'Mbaiki'" "'Luquillo'" "'Japan'"; do echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.R');run.models.for.set.all.traits($site,model.files.lmer.Tf.1, run.lmer,type.filling='species',std=FALSE);print('done')\"" > trait.workshop/species1$site.sh qsub trait.workshop/species1$site.sh -l nodes=1:ppn=1 -N "lmerspecies1$site" -q opt32G -j oe diff --git a/launch_all_lmer.nolog.global.norandom.bash b/launch_all_lmer.nolog.global.norandom.bash new file mode 100644 index 0000000000000000000000000000000000000000..f6a9113032143bd36cd2488fe6f27cade76bc57e --- /dev/null +++ b/launch_all_lmer.nolog.global.norandom.bash @@ -0,0 +1,18 @@ +#!/bin/bash + +export LD_LIBRARY_PATH=/usr/lib64/R/library + +mkdir -p trait.workshop + +for trait in "'SLA'" "'Leaf.N'" "'Wood.density'" "'Max.height'" "'Seed.mass'"; do + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.all.R');run.multiple.model.for.set.one.trait(model.files.lmer.Tf.3, run.lmer,$trait,type.filling='species');print('done')\"" > trait.workshop/species1$trait.sh + qsub trait.workshop/species1$trait.sh -l nodes=1:ppn=1 -N "lmerall1$trait" -q opt32G -j oe + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.all.R');run.multiple.model.for.set.one.trait(model.files.lmer.Tf.4, run.lmer,$trait,type.filling='species');print('done')\"" > trait.workshop/species2$trait.sh + qsub trait.workshop/species2$trait.sh -l nodes=1:ppn=1 -N "lmerall2$trait" -q opt32G -j oe + + + +done + diff --git a/launch_all_lmer.nolog.global.randomset.bash b/launch_all_lmer.nolog.global.randomset.bash new file mode 100644 index 0000000000000000000000000000000000000000..54cc4977cc3d03879bff4b70c72d90b4c821c719 --- /dev/null +++ b/launch_all_lmer.nolog.global.randomset.bash @@ -0,0 +1,18 @@ +#!/bin/bash + +export LD_LIBRARY_PATH=/usr/lib64/R/library + +mkdir -p trait.workshop + +for trait in "'SLA'" "'Leaf.N'" "'Wood.density'" "'Max.height'" "'Seed.mass'"; do + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.all.R');run.multiple.model.for.set.one.trait(model.files.lmer.Tf.5, run.lmer,$trait,type.filling='species');print('done')\"" > trait.workshop/species1$trait.sh + qsub trait.workshop/species1$trait.sh -l nodes=1:ppn=1 -N "lmerall1$trait" -q opt32G -j oe + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.all.R');run.multiple.model.for.set.one.trait(model.files.lmer.Tf.6, run.lmer,$trait,type.filling='species');print('done')\"" > trait.workshop/species2$trait.sh + qsub trait.workshop/species2$trait.sh -l nodes=1:ppn=1 -N "lmerall2$trait" -q opt32G -j oe + + + +done + diff --git a/launch_all_lmer.nolog.global.std.bash b/launch_all_lmer.nolog.global.std.bash new file mode 100644 index 0000000000000000000000000000000000000000..fa3d5b4c54351686a9567d35bf242d1e96a7b7ea --- /dev/null +++ b/launch_all_lmer.nolog.global.std.bash @@ -0,0 +1,24 @@ +#!/bin/bash + +export LD_LIBRARY_PATH=/usr/lib64/R/library + +mkdir -p trait.workshop + +for site in "'BCI'" "'Canada'" "'France'" "'Fushan'" "'NVS'" "'Paracou'" "'Spain'" "'US'" "'Swiss'" "'Sweden'" "'NSW'" "'Mbaiki'" "'Luquillo'" "'Japan'"; do + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.R');run.models.for.set.all.traits($site,model.files.lmer.Tf.1, run.lmer,type.filling='species',std=TRUE);print('done')\"" > trait.workshop/species1$site.sh + qsub trait.workshop/species1$site.sh -l nodes=1:ppn=1 -N "lmerspecies1$site" -q opt32G -j oe + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.R');run.models.for.set.all.traits($site,model.files.lmer.Tf.2, run.lmer,type.filling='species',std=TRUE);print('done')\"" > trait.workshop/species2$site.sh + qsub trait.workshop/species2$site.sh -l nodes=1:ppn=1 -N "lmerspecies2$site" -q opt32G -j oe + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.R');run.models.for.set.all.traits($site,model.files.lmer.Tf.1, run.lmer,type.filling='genus',std=TRUE);print('done')\"" > trait.workshop/genus1$site.sh + qsub trait.workshop/genus1$site.sh -l nodes=1:ppn=1 -N "lmergenus1$site" -q opt32G -j oe + + echo "/usr/local/R/R-3.0.1/bin/Rscript -e \"source('R/analysis/lmer.run.nolog.R');run.models.for.set.all.traits($site,model.files.lmer.Tf.2, run.lmer,type.filling='genus',std=TRUE);print('done')\"" > trait.workshop/genus2$site.sh + qsub trait.workshop/genus2$site.sh -l nodes=1:ppn=1 -N "lmergenus2$site" -q opt32G -j oe + + + +done +