diff --git a/Makefile b/Makefile index 70f736322a7c405b5c070ea81b7999bbe6c3027e..614c9dff8647a1c78c13f72c54107a07f0e9f921 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ GLOBAL: $(D3)/Done.txt $(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt Rscript $< Rscript scripts/process.data/summarise.data.R - Rscript scripts/process.data/plot.data.all.R + Rscript scripts/process.data/plot.data.all.R; convert figs/test.processed/*.p* figs/test.processed/all.CWM.pdf $(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R $(sites) Rscript $< @@ -66,7 +66,7 @@ $(D5)/Done.txt: scripts/find.trait/test.traits.R $(D3traits) TEST.CWM: $(D6)/Done.txt $(D6)/Done.txt: scripts/process.data/test.tree.CWM.R $(D3Done) - Rscript $< ; convert figs/test.processed/*.p* figs/test.processed/all.CWM.pdf + Rscript $< #------------------------------------------------------- diff --git a/R/analysis/lmer.output-fun.R b/R/analysis/lmer.output-fun.R index 0b222b7b5f20344f2b8b129ec844ff30cd834b8f..cab2c0d5e7854111f968788c3330e134942c38cd 100644 --- a/R/analysis/lmer.output-fun.R +++ b/R/analysis/lmer.output-fun.R @@ -36,12 +36,15 @@ summarise.lmer.all.output <- function(x, random.name ){ } summarise.lmer.output.list <- function(f ){ - output.lmer <- read.lmer.output(f) + list.output.lmer <- read.lmer.output(f) + output.lmer <- list.output.lmer[['output']] + relgrad <- list.output.lmer[['relgrad']] if(!is.null(output.lmer)){ res <- list(files.details = files.details(f), lmer.summary = summarise.lmer.output( output.lmer), terms = terms(output.lmer), - vcov = vcov(output.lmer)) + vcov = vcov(output.lmer), + relgrad = relgrad) }else{ res <- NULL } @@ -50,7 +53,9 @@ summarise.lmer.output.list <- function(f ){ summarise.lmer.output.all.list <- function(f ){ - output.lmer <- read.lmer.output(f) + list.output.lmer <- read.lmer.output(f) + output.lmer <- list.output.lmer[['output']] + relgrad <- list.output.lmer[['relgrad']] if(!is.null(output.lmer)){ details <- files.details.all(f) @@ -63,7 +68,8 @@ summarise.lmer.output.all.list <- function(f ){ random.name = model$var.BLUP), terms = terms(output.lmer), - vcov = vcov(output.lmer)) + vcov = vcov(output.lmer), + relgrad = relgrad) }else{ res <- NULL } diff --git a/R/analysis/lmer.run.R b/R/analysis/lmer.run.R index bb4757c034cef4c98ebc95600fa4a24b3c27d477..49609e57ddf5d1fe20cb0400b5b993d78ccd5c36 100644 --- a/R/analysis/lmer.run.R +++ b/R/analysis/lmer.run.R @@ -41,7 +41,7 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out, var.sample, select.biome){ lmer.output <- lmer(formula = formula, data = df.lmer, REML = FALSE, control = lmerControl(optCtrl = list(maxfun = 40000) ) ) - print(warnigs()) + print(warnings()) print(summary(lmer.output)) relgrad <- with(lmer.output@optinfo$derivs,solve(Hessian,gradient)) print('test convergence of relative gradient of hessian') @@ -54,7 +54,8 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out, name.file <- paste(var.sample, gsub(' ', '_',select.biome), "results.nolog.all.rds", sep ='.') } - saveRDS(lmer.output, file = file.path(path.out, name.file)) + saveRDS(list(output = lmer.output, relgrad = relgrad), + file = file.path(path.out, name.file)) } run.lmer <- function (model.file, trait, diff --git a/R/process.data/process-fun.R b/R/process.data/process-fun.R index 0bac9b56b4e274f5e15dfa6721c722480a45f304..c7ccff49e627de68f30720dfdd5dab0aaa49854b 100644 --- a/R/process.data/process-fun.R +++ b/R/process.data/process-fun.R @@ -762,42 +762,14 @@ fun.reformat.trait <- function(data,trait){ return(data) } -fun.reformat.trait.CAT <- function(data, trait, cat){ -## - trait.CWM.sd.n <- paste(trait, 'CWM.fill', cat, sep = '.') -## - trait.CWM.sd.e <- (parse(text = - paste('(',trait,'.CWM.fill.', cat, - ' - mean(', trait, - '.focal, na.rm =TRUE))/sd(',trait, - '.focal, na.rm =TRUE)' , - sep = ''))) - data[, c(trait.CWM.sd.n) := eval(trait.CWM.sd.e) ] - - ## - trait.abs.CWM.sd.n <- paste(trait, 'abs.CWM.fill', cat, sep = '.') -## - trait.abs.CWM.sd.e <- (parse(text = - paste('(',trait,'.abs.CWM.fill.', cat, - ')/sd(',trait, - '.focal, na.rm =TRUE)' , - sep = ''))) - data[, c(trait.abs.CWM.sd.n) := eval(trait.abs.CWM.sd.e) ] -return(data) -} - fun.standardized.traits <- function(data, traits = c('SLA', 'Leaf.N', 'Wood.density', 'Max.height', - 'Seed.mass'), - cat.vec = c('A_EV', 'A_D', 'C')){ + 'Seed.mass')){ for(trait in traits){ data <- fun.reformat.trait(data, trait) - for (cat in cat.vec){ - data <- fun.reformat.trait.CAT(data, trait, cat) - } - print(data) + print(str(data)) } return(data) } @@ -806,16 +778,26 @@ return(data) #### function to reformat data for global data set -fun.reform.data.and.remove.outlier <- function(data.all, - std.traits.TF = TRUE){ +fun.reform.data.and.remove.outlier <- function(data.all, err.limit = 4, + std.traits.TF = TRUE, + select.one.census.rand = TRUE){ ## remove outlier following Condit approach require(data.table) + require(dplyr) data.all <- as.data.table(data.all) + q.l <- quantile(data.all$BA.G, probs = 0.00005, na.rm = TRUE) + q.h <- quantile(data.all$BA.G, probs = 0.99995, na.rm = TRUE) + data.all[!is.na(BA.G) & + BA.G > q.h, + c('G','BA.G') := NA] + data.all[!is.na(BA.G) & + BA.G < q.l, + c('G','BA.G') := NA] data.all[!is.na(G) & G > 75, c('G','BA.G') := NA] data.all[!is.na(G) & - ((D*10+year*G) <= (D*10 -5*(0.006214*D*10 + 0.9036))), + ((D*10+year*G) <= (D*10 -err.limit*(0.006214*D*10 + 0.9036))), c('G','BA.G') := NA] # reformat factors @@ -826,15 +808,15 @@ fun.reform.data.and.remove.outlier <- function(data.all, data.all[ , tree.id := paste(ecocode, tree.id)] data.all[ , obs.id := paste(ecocode, obs.id)] if(std.traits.TF) fun.standardized.traits(data.all) - data.all[ , plot.c := paste(plot, census)] - data.all <- as.data.frame(data.all) - plots.select <- drop(as.matrix(group_by(data.all, plot) %>% - summarise(select = sample(plot.c, 1)) %>% - select(select))) - - data.all <- filter(data.all, plot %in% plots.select) - # remove tree with multiple obs - ### TODO NEED TO CHANGE THAT TO SELECT RANDOMLY ONE CENSUS PER PLOT + if(select.one.census.rand){ + data.all[ , plot.c := paste(plot, census)] + data.all <- as.data.frame(data.all) + plots.select <- drop(as.matrix(group_by(data.all, plot) %>% + dplyr::summarise(select = sample(plot.c, 1)) %>% + dplyr::select(select))) + data.all <- filter(data.all, plot.c %in% plots.select) + } + # remove tree with multiple obs return(data.all) } diff --git a/scripts/process.data/explore.processed.data.R b/scripts/process.data/explore.processed.data.R deleted file mode 100644 index deda9bbe2c8c84f1e2568f5a0e8e5056065a9b80..0000000000000000000000000000000000000000 --- a/scripts/process.data/explore.processed.data.R +++ /dev/null @@ -1,416 +0,0 @@ -####################################### -####################################### -###### EXPLORE DATA SET BEFORE ANALYSIS -rm(list = ls()) - -source("R/process.data/process-fun.R") -source("R/process.data/test.tree.CWM-fun.R") -source("R/utils/plot.R") -source("R/utils/mem.R") -source("R/analysis/lmer.run.R") - -filedir <- "output/processed" - -mat.perc <- read.csv(file = file.path(filedir, "all.sites.perc.traits.csv"), - stringsAsFactors = FALSE) - -### read all data -library(data.table) -system.time(data.all <- fread(file.path(filedir, "data.all.log.csv"), - stringsAsFactors = FALSE)) -## system.time(data.all <- read.csv(file.path(filedir, "data.all.csv"), -## stringsAsFactors = FALSE)) - -system.time(data.all.no.log <- fread(file.path(filedir, "data.all.no.log.csv"), - stringsAsFactors = FALSE)) - -df.lmer <- fun.data.for.lmer(data.all, 'Max.height', - type.filling = 'species', cat.TF = FALSE) - -if(dim(data.all)[1] != sum(mat.perc[['num.obs']])) - stop('error not same dimension per ecoregion and total') -data.all$set <- factor(data.all$set) - -############## -fun.table.set.more.cut <- function(cut, data, var){ -return(table(data[['set']][data[[var]]> cut & !is.na(data[[var]])]) / - table(data[['set']][ !is.na(data[[var]])])) -} - -### function to look at number of observation per data set in function of min.perc -pdf('figs/test.processed/perc.vs.cut.pdf') -for (t in c('Wood.density','SLA', 'Leaf.N', 'Max.height')){ -perc.genus <- paste(t, 'perc.genus', sep = '.') -perc.species <- paste(t, 'perc.species', sep = '.') -cut.perc <- seq(0.7, 0.99, length = 30) - -set.sp <- do.call('rbind', lapply(cut.perc, - fun.table.set.more.cut, - data = data.all, - perc.species)) -set.gs <- do.call('rbind', lapply(cut.perc, - fun.table.set.more.cut, - data = data.all, - perc.genus)) - - -par(mfrow=c(1, 2)) -matplot(cut.perc, set.sp, type = 'l', col = col.vec[colnames(set.sp)], lwd = 2, - main = t, lty = 1) -matplot(cut.perc, set.gs, , type = 'l', col = col.vec[colnames(set.sp)], - lwd = 2, lty =1) -legend(0.7, 0.8, colnames(set.sp), col = col.vec[colnames(set.sp)], - lty = 1, lwd = 2) -} -dev.off() - -## Look at number of species with enough occurence -sp.mat <- table(data.all$ecocode, data.all$sp) -sum.sp <- apply(sp.mat, MARGIN = 1, sum, na.rm = TRUE) -sp.mat.perc <- sp.mat/sum.sp -sort(apply(sp.mat.perc > 0.1, MARGIN = 1, sum)) -sort(apply(sp.mat > 200, MARGIN = 1, sum)) - -## sd of trait focal and shannon -fun.sd.x.per.y <- function(x, y = 'ecocode', data = data.all){ - res <- tapply(data[[x]], data[[y]], sd , na.rm = TRUE) - df.res <- data.frame(res) - names(df.res) <- x - return(df.res) -} - -trait <- c('Leaf.N','SLA','Wood.density','Max.height') -# per ecocode -sd.ecocode <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'), - fun.sd.x.per.y, - data = data.all)) -set.per.ecocode <- tapply(data.all$set, data.all$ecocode, unique) -div.per.ecocode <- tapply(data.all$sp, data.all$ecocode, fun_div) -nsp.per.ecocode <- tapply(data.all$sp, data.all$ecocode, - function(x) nlevels(factor(x))) -nsp.per.plot <- tapply(data.all$sp, data.all$plot, - function(x) nlevels(factor(x))) -sd.ecocode <- data.frame(set = set.per.ecocode, ecocode = rownames(sd.ecocode), - sd.ecocode, do.call('rbind', div.per.ecocode),nsp = nsp.per.ecocode) -# per set -sd.set <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'), - fun.sd.x.per.y, y = 'set', - data = data.all)) -div.per.set <- do.call('rbind', tapply(data.all$sp, data.all$set, fun_div)) -nsp.per.set <- tapply(data.all$sp, data.all$set, - function(x) nlevels(factor(x))) -sd.set <- data.frame(set = rownames(sd.set), - sd.set, div.per.set, nsp = nsp.per.set) -## plots - -pdf("figs/test.processed/boxplot.traits.sd.tot.ecocode.pdf", - width = 12, height = 12) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste(trait,'focal',sep='.')){ - barplot(sd.ecocode[[i]], las = 3, - main=i, ylab = 'sd', names.arg = sd.ecocode$ecocode, - col = col.vec[sd.ecocode$set]) -} -dev.off() - -pdf("figs/test.processed/boxplot.traits.div.tot.ecocode.pdf", - width = 12, height = 12) -par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) -for (i in c('shannon')){ - barplot(exp(sd.ecocode[[i]]), las = 3, - main=i, names.arg = sd.ecocode$ecocode, - col = col.vec[sd.ecocode$set], log ='y', - ylim = range(1, exp(sd.set[[i]])), - ylab = 'exp(shannon)') -} -for (i in c('nsp')){ - barplot(sd.ecocode[[i]], las = 3, - main=i, names.arg = sd.ecocode$ecocode, - col = col.vec[sd.ecocode$set], log ='y', - ylim = range(0.1, (sd.set[[i]])), - ylab = 'N species') -} -dev.off() - - -pdf("figs/test.processed/boxplot.traits.sd.tot.set.pdf", - width = 12, height = 12) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste(trait,'focal',sep='.')){ - barplot(sd.set[[i]], las = 3, - main = i, ylab = 'sd', names.arg = sd.set$set, - col = col.vec[sd.set$set]) -} -dev.off() - -pdf("figs/test.processed/boxplot.traits.div.tot.SET.pdf", - width = 12, height = 12) -par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) -for (i in c('shannon')){ - barplot(exp(sd.set[[i]]), las = 3, - main=i, names.arg = sd.set$set, - col = col.vec[sd.set$set], log = 'y', - ylim = range(1, exp(sd.set[[i]])), - ylab = 'exp(shannon)') - abline(h = 5) -} -for (i in c('nsp')){ - barplot((sd.set[[i]]), las = 3, - main=i, names.arg = sd.set$set, - col = col.vec[sd.set$set], log ='y', - ylim = range(0.1, (sd.set[[i]])), - ylab = 'N species') -} -dev.off() - - -######## -### TODO -#- look at BATOT ALONG MAP AND MAT (log scale) -#- how to compute FD on plot with diferent size -#- pattern of CWM -#- pattern ED angio/conif - -## look at BATOT per ecocode -MAP.ECOCODE <- tapply(data.all$MAP, INDEX = data.all$ecocode, FUN = mean) -BATOT.ECOCODE <- tapply(data.all$BATOT, INDEX = data.all$ecocode, FUN = mean) -SET.ECOCODE <- tapply(data.all$set, INDEX = data.all$ecocode, FUN = unique) - -pdf("figs/test.processed/BATOT.vs.MAP.pdf") -par(mar=c(8.1,4.1,4.1,2.1)) -plot(log(MAP.ECOCODE), BATOT.ECOCODE, pch = '', ylim = c(0,180) , - xlab = 'Mean annual precipitation (log)', - ylab = 'Total basal area per plot (m^2/ha)') -boxplot(data.all$BATOT ~ data.all$ecocode, las = 3, - at = log(MAP.ECOCODE),names = NA, - boxwex = 0.05, outline = FALSE, - col = col.vec[SET.ECOCODE], xlim = range(log(MAP.ECOCODE)), add = TRUE) -legend('topleft', legend = names(col.vec),fill=col.vec) -dev.off() - -pdf("figs/test.processed/boxplot.BATOT.D.G.BA.G.pdf", width = 20 , height = 10) -par(mfrow=c(2,2)) -for (i in c('BATOT', 'D', 'G', 'BA.G')) - boxplot((data.all[[i]]+1000) ~ data.all$ecocode, las = 3, ylab = i, - log = 'y' , outline = FALSE) -dev.off() - -trait <- c('Leaf.N','SLA','Wood.density','Max.height') - -pdf("figs/test.processed/boxplot.traits.focal.pdf", - width = 12, height = 7) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste(trait,'focal',sep='.')){ - set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(data.all[[i]] ~ data.all$ecocode, las = 3, - main=i, - col = col.vec[set.vec], outline = FALSE) -} - -dev.off() - - -pdf("figs/test.processed/boxplot.traits.CWM.fill.pdf", - width = 12, height = 7) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste(trait,'CWM.fill',sep='.')){ - set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(data.all[[i]] ~ data.all$ecocode, las = 3, - main=i, - col = col.vec[set.vec], outline = FALSE) -} - -dev.off() - - - -pdf("figs/test.processed/boxplot.traits.abs.CWM.fill.pdf", - width = 12, height = 7) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste(trait,'abs.CWM.fill',sep='.')){ - set.vec <- sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(data.all[[i]] ~ data.all$ecocode, las = 3, - main=i, - col = col.vec[set.vec], outline = FALSE) -} - -dev.off() - -### compute mean BATOT, number of species, traits and VAR OF TRAITS per cluster -system.time( data.summarise <- fun.compute.all.var.cluster( data.all ) ) - -pdf("figs/test.processed/perc.angio.deciduous.pdf") -par(mfrow = c(2, 1)) -## angio -per.angio.ecocode.mean <- tapply(data.summarise$per.angio, - data.summarise$ecocode,mean, - na.rm = TRUE) -names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode,unique) -set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique) -names(per.angio.ecocode.mean) <- names.ecocode -par(mar=c(8.1,4.1,4.1,2.1)) -mp <- barplot(per.angio.ecocode.mean, las = 3, - col= col.vec[set.ecocode], ylab = 'perc angio') - -## deciduous -per.deciduous.ecocode.mean <- tapply(data.summarise$per.deciduous, - data.summarise$ecocode, mean , - na.rm = TRUE) -names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode, unique) -set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique) -names(per.deciduous.ecocode.mean) <- names.ecocode -par(mar=c(8.1,4.1,4.1,2.1)) -mp <- barplot(per.deciduous.ecocode.mean, las = 3, - col = col.vec[set.ecocode], ylab = 'perc deciduous') -dev.off() - - - - - -plot(log(data.summarise$MAP), data.summarise$BATOT, - col = col.vec[data.summarise$set], cex = 0.1, ylim = c(0, 190)) -fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$BATOT, - Nclass = 15, add = TRUE) - -par(mfrow = c(1,2)) -plot(log(data.summarise$MAP),data.summarise$sd.SLA, - ,col=col.vec[data.summarise$set], cex = 0.1) -fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$sd.SLA, - Nclass = 15, add = TRUE) - -plot(log(data.summarise$MAP),data.summarise$mean.SLA, - ,col=col.vec[data.summarise$set], cex = 0.1,) -fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$mean.SLA, - Nclass = 15, add = TRUE) - -par(mfrow = c(1,2)) -plot(log(data.summarise$MAP),data.summarise$sd.Wood.density, - ,col=col.vec[data.summarise$set], cex = 0.1) -fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$sd.Wood.density, - Nclass = 15, add = TRUE) - -plot(log(data.summarise$MAP),data.summarise$mean.Wood.density, - ,col=col.vec[data.summarise$set], cex = 0.1,) -fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$mean.Wood.density, - Nclass = 15, add = TRUE) - -pch.vec <- 1:14 -names(pch.vec) <- names(col.vec) -plot(data.summarise$n_sp,data.summarise$sd.Wood.density, - ,col=col.vec[data.summarise$set],pch=pch.vec[data.summarise$set]) -legend("topright",legend=names(col.vec),col=col.vec,pch=pch.vec) - - - -pdf("figs/test.processed/boxplot.traits.sd.pdf", - width = 12, height = 7) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste("sd",trait,sep='.')){ - set.vec <- sapply(levels(factor(data.summarise$ecocode[ - !is.na(data.summarise[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, - main=i, - col = col.vec[set.vec]) -} -dev.off() - -pdf("figs/test.processed/boxplot.traits.mean.pdf", - width = 12, height = 7) -par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1)) -for (i in paste("mean",trait,sep='.')){ - set.vec <- sapply(levels(factor(data.summarise$ecocode[ - !is.na(data.summarise[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, - main=i, - col = col.vec[set.vec]) -} -dev.off() - -pdf("figs/test.processed/boxplot.div.pdf", - width = 12, height = 7) -par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) -for (i in c('shannon', 'simpson')){ - set.vec <- sapply(levels(factor(data.summarise$ecocode[ - !is.na(data.summarise[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(exp(data.summarise[[i]]) ~ data.summarise$ecocode, las = 3, - main=i, - col = col.vec[set.vec]) -} -dev.off() - -pdf("figs/test.processed/boxplot.nsp.pdf", - width = 12, height = 7) -par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1)) -for (i in c('n_sp', 'count')){ - set.vec <- sapply(levels(factor(data.summarise$ecocode[ - !is.na(data.summarise[[i]])])), - function(x) gsub(paste(" ", - gsub("^([a-zA-Z]* )", "",x), - sep = ""), - "", x, fixed = TRUE)) - boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3, - main = i, - col = col.vec[set.vec]) -} -dev.off() - - -fun.boxplot.data <- function(data, x, y, ...){ - fun.boxplot.breaks(data[[x]]*data[['BATOT']], data[[y]], - Nclass = 15, add = FALSE, - main = unique(data$ecocode), ...) -} - -pdf("figs/test.processed/Wood.density.CWM.fill.vs.BA.G.pdf") -for (i in sets){ - data.tmp <- data.all[data.all$set == i,] - if(nlevels(factor(data.tmp$ecocode)) > 1) - par(mfrow = c(2, ceiling( nlevels(factor(data.tmp$ecocode))/2))) - by(data.tmp, data.tmp$ecocode, fun.boxplot.data, - x = 'Wood.density.CWM.fill', - y = 'BA.G' ) -} -dev.off() - -## plot hist of species abundance -pdf("figs/test.processed/speciesrank.pdf") -for (i in sets){ - data.tmp <- data.all[data.all$set == i,] - if(nlevels(factor(data.tmp$ecocode)) > 1) - par(mfrow = c(2, ceiling( nlevels(factor(data.tmp$ecocode))/2))) - if(nlevels(factor(data.tmp$ecocode)) == 1) - par(mfrow = c(1,1)) - by(data.tmp, data.tmp$ecocode, - function(data) hist(table(data[['sp']])/length(data[['sp']]), - xlab = 'n obs', - ylab = ' n species', - main=unique(data[['ecocode']]) - )) -} -dev.off() - diff --git a/scripts/process.data/explore.processed.data.report.R b/scripts/process.data/explore.processed.data.report.R index dd0a1dd027ab3448d37dae88cb7cda4211e05c57..2704261f32aafd6a757d81bb073c4c37327d914a 100644 --- a/scripts/process.data/explore.processed.data.report.R +++ b/scripts/process.data/explore.processed.data.report.R @@ -65,33 +65,17 @@ rm(data.all.t) ## plot of correlation between BA.G and D the best is log(BA.G) vs log(D) library(ggplot2) - fun.hexbin.with.smooth.ggplot(data.all[['D']], - data.all[['G']], - 'D', - 'G') - x11() - fun.hexbin.with.smooth.ggplot(data.all[['D']], - data.all[['BA.G']], - 'D', - 'BA.G')+geom_abline(aes(intercept = c(-60, 500), slope = c(0,0)))+geom_abline(aes(intercept = c(500), slope = c(0)))+geom_abline(aes(intercept = c(-100), slope = c(0))) - - x11() - fun.hexbin.with.smooth.ggplot(pi*data.all[['D']]^2/4, - data.all[['BA.G']], - 'D2', - 'BA.G') - x11() +x11() fun.hexbin.with.smooth.ggplot(log(data.all[['D']]), log(data.all[['BA.G']]-min(data.all[['BA.G']]+1, na.rm =TRUE)), 'Dlog', 'BA.Glog') -fun.boxplot.breaks(log(data.all[['D']]), - log(data.all[['BA.G']]-min(data.all[['BA.G']], na.rm =TRUE)+1), add = FALSE) - - -fun.boxplot.breaks((data.all[['D']]), - (data.all[['BA.G']]), add = FALSE) + x11() + fun.hexbin.with.smooth.ggplot(data.all[['BATOT']], + log(data.all[['BA.G']]-min(data.all[['BA.G']]+1, na.rm =TRUE)), + 'BATOT', + 'BA.Glog') @@ -157,10 +141,10 @@ names.biomes[8] <- 'Trop forest' names.biomes[8] <- 'Trop rain forest' ## REMOVE BIOMES Tundra and Subtropical desert because to few observation - -data.temp <- data.summarise[!data.summarise$biomes %in% c('Tundra', +library(dplyr) +data.temp <- dplyr::filter(data.summarise, !biomes %in% c('Tundra', 'Temperate grassland desert', - 'Subtropical desert'), ] + 'Subtropical desert')) data.temp$biomes <- factor(data.temp$biomes) ## shorter name for biomes names.biomes.t <- levels(factor(data.temp$biomes)) diff --git a/scripts/process.data/remove.out.R b/scripts/process.data/remove.out.R index 5af04047003a181dad9b398e1b7a036b7e4e9490..6331f0e5ea4812a29885a9458ceac4bd526dbc85 100644 --- a/scripts/process.data/remove.out.R +++ b/scripts/process.data/remove.out.R @@ -7,7 +7,11 @@ filedir <- "output/processed" ## remove outlier following Condit approach and reformat data data.all <- readRDS( 'output/processed/data.all.log.t.rds') -data.all <- fun.reform.data.and.remove.outlier(data.all) + +data.all <- fun.reform.data.and.remove.outlier(data.all, err.limit = 4, select.one.census.rand = TRUE) + + + gc() saveRDS(data.all, 'output/processed/data.all.log.rds') ## fun.write.big.csv(data.all,