diff --git a/Makefile b/Makefile
index 70f736322a7c405b5c070ea81b7999bbe6c3027e..614c9dff8647a1c78c13f72c54107a07f0e9f921 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ GLOBAL: $(D3)/Done.txt
 $(D3)/Done.txt: scripts/process.data/remove.out.R scripts/process.data/summarise.data.R $(D3)/Done.t.txt
 	Rscript $< 
 	Rscript scripts/process.data/summarise.data.R
-	Rscript scripts/process.data/plot.data.all.R
+	Rscript scripts/process.data/plot.data.all.R; convert figs/test.processed/*.p* figs/test.processed/all.CWM.pdf
 
 $(D3)/Done.t.txt: scripts/process.data/merge.all.processed.data.R $(sites)
 	Rscript $<
@@ -66,7 +66,7 @@ $(D5)/Done.txt:	scripts/find.trait/test.traits.R $(D3traits)
 TEST.CWM: $(D6)/Done.txt 
 
 $(D6)/Done.txt:	scripts/process.data/test.tree.CWM.R  $(D3Done)
-	Rscript $<  ; convert figs/test.processed/*.p* figs/test.processed/all.CWM.pdf
+	Rscript $<  
 
 #-------------------------------------------------------
 
diff --git a/R/analysis/lmer.output-fun.R b/R/analysis/lmer.output-fun.R
index 0b222b7b5f20344f2b8b129ec844ff30cd834b8f..cab2c0d5e7854111f968788c3330e134942c38cd 100644
--- a/R/analysis/lmer.output-fun.R
+++ b/R/analysis/lmer.output-fun.R
@@ -36,12 +36,15 @@ summarise.lmer.all.output <- function(x, random.name ){
 }
 
 summarise.lmer.output.list <- function(f ){
-    output.lmer <- read.lmer.output(f)
+    list.output.lmer <- read.lmer.output(f)
+    output.lmer <- list.output.lmer[['output']]
+    relgrad <- list.output.lmer[['relgrad']]
     if(!is.null(output.lmer)){
 	res <- list(files.details = files.details(f),
 		    lmer.summary = summarise.lmer.output( output.lmer),
                     terms = terms(output.lmer),
-                    vcov = vcov(output.lmer))
+                    vcov = vcov(output.lmer),
+                    relgrad = relgrad)
     }else{
         res <- NULL
     }
@@ -50,7 +53,9 @@ summarise.lmer.output.list <- function(f ){
 
 
 summarise.lmer.output.all.list <- function(f ){
-    output.lmer <- read.lmer.output(f)
+    list.output.lmer <- read.lmer.output(f)
+    output.lmer <- list.output.lmer[['output']]
+    relgrad <- list.output.lmer[['relgrad']]
     if(!is.null(output.lmer)){
         details <-  files.details.all(f)
 
@@ -63,7 +68,8 @@ summarise.lmer.output.all.list <- function(f ){
                                                               random.name =
                                                               model$var.BLUP),
                     terms = terms(output.lmer),
-                    vcov = vcov(output.lmer))
+                    vcov = vcov(output.lmer),
+                    relgrad = relgrad)
     }else{
         res <- NULL
     }
diff --git a/R/analysis/lmer.run.R b/R/analysis/lmer.run.R
index bb4757c034cef4c98ebc95600fa4a24b3c27d477..49609e57ddf5d1fe20cb0400b5b993d78ccd5c36 100644
--- a/R/analysis/lmer.run.R
+++ b/R/analysis/lmer.run.R
@@ -41,7 +41,7 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
                                    var.sample, select.biome){
     lmer.output <- lmer(formula = formula, data = df.lmer, REML = FALSE,
                        control = lmerControl(optCtrl = list(maxfun = 40000) ) )
-    print(warnigs())
+    print(warnings())
     print(summary(lmer.output))
     relgrad <- with(lmer.output@optinfo$derivs,solve(Hessian,gradient))
     print('test convergence of relative gradient of hessian')
@@ -54,7 +54,8 @@ fun.call.lmer.and.save <- function(formula, df.lmer, path.out,
        name.file <- paste(var.sample, gsub(' ', '_',select.biome),
           "results.nolog.all.rds", sep ='.')
        }
-  saveRDS(lmer.output, file = file.path(path.out, name.file))
+  saveRDS(list(output = lmer.output, relgrad = relgrad),
+          file = file.path(path.out, name.file))
 }
 
 run.lmer <- function (model.file, trait,
diff --git a/R/process.data/process-fun.R b/R/process.data/process-fun.R
index 0bac9b56b4e274f5e15dfa6721c722480a45f304..c7ccff49e627de68f30720dfdd5dab0aaa49854b 100644
--- a/R/process.data/process-fun.R
+++ b/R/process.data/process-fun.R
@@ -762,42 +762,14 @@ fun.reformat.trait <- function(data,trait){
 return(data)
 }
 
-fun.reformat.trait.CAT <- function(data, trait, cat){
-##
-  trait.CWM.sd.n <-  paste(trait, 'CWM.fill', cat, sep = '.')
-##
-  trait.CWM.sd.e <- (parse(text =
-                           paste('(',trait,'.CWM.fill.', cat,
-                                 ' - mean(', trait,
-                                 '.focal, na.rm =TRUE))/sd(',trait,
-                                 '.focal, na.rm =TRUE)' ,
-                                 sep = '')))
-  data[, c(trait.CWM.sd.n) := eval(trait.CWM.sd.e) ]
-
-  ##
-  trait.abs.CWM.sd.n <-  paste(trait, 'abs.CWM.fill', cat, sep = '.')
-##
-  trait.abs.CWM.sd.e <- (parse(text =
-                           paste('(',trait,'.abs.CWM.fill.', cat,
-                                 ')/sd(',trait,
-                                 '.focal, na.rm =TRUE)' ,
-                                 sep = '')))
-  data[, c(trait.abs.CWM.sd.n) := eval(trait.abs.CWM.sd.e) ]
-return(data)
-}
-
 fun.standardized.traits <-  function(data,
                                      traits = c('SLA', 'Leaf.N',
                                           'Wood.density', 'Max.height',
-                                          'Seed.mass'),
-                                     cat.vec = c('A_EV', 'A_D', 'C')){
+                                          'Seed.mass')){
 
 for(trait in traits){
    data <- fun.reformat.trait(data, trait)
-   for (cat in cat.vec){
-   data <- fun.reformat.trait.CAT(data, trait, cat)
-     }
-   print(data)
+   print(str(data))
    }
 return(data)
 }
@@ -806,16 +778,26 @@ return(data)
 
 #### function to reformat data for global data set
 
-fun.reform.data.and.remove.outlier <- function(data.all,
-                                               std.traits.TF = TRUE){
+fun.reform.data.and.remove.outlier <- function(data.all, err.limit = 4,
+                                               std.traits.TF = TRUE,
+                                               select.one.census.rand = TRUE){
     ## remove outlier following Condit approach
     require(data.table)
+    require(dplyr)
     data.all <-  as.data.table(data.all)
+    q.l <- quantile(data.all$BA.G, probs = 0.00005, na.rm = TRUE)
+    q.h <- quantile(data.all$BA.G, probs = 0.99995, na.rm = TRUE)
+    data.all[!is.na(BA.G) &
+                 BA.G > q.h,
+                 c('G','BA.G') := NA]
+    data.all[!is.na(BA.G) &
+                 BA.G < q.l,
+                 c('G','BA.G') := NA]
     data.all[!is.na(G) &
                  G > 75,
                  c('G','BA.G') := NA]
     data.all[!is.na(G) &
-                 ((D*10+year*G) <= (D*10 -5*(0.006214*D*10 + 0.9036))),
+                 ((D*10+year*G) <= (D*10 -err.limit*(0.006214*D*10 + 0.9036))),
                  c('G','BA.G') := NA]
 
     # reformat factors
@@ -826,15 +808,15 @@ fun.reform.data.and.remove.outlier <- function(data.all,
     data.all[ , tree.id := paste(ecocode, tree.id)]
     data.all[ , obs.id := paste(ecocode, obs.id)]
     if(std.traits.TF) fun.standardized.traits(data.all)
-    data.all[ , plot.c := paste(plot, census)]
-    data.all <-  as.data.frame(data.all)
-    plots.select <- drop(as.matrix(group_by(data.all, plot) %>%
-                                   summarise(select = sample(plot.c, 1)) %>%
-                                   select(select)))
-
-    data.all <-  filter(data.all, plot %in% plots.select)
-                       # remove tree with multiple obs
-    ### TODO NEED TO CHANGE THAT TO SELECT RANDOMLY ONE CENSUS PER PLOT
+    if(select.one.census.rand){
+      data.all[ , plot.c := paste(plot, census)]
+      data.all <-  as.data.frame(data.all)
+      plots.select <- drop(as.matrix(group_by(data.all, plot) %>%
+                                     dplyr::summarise(select = sample(plot.c, 1)) %>%
+                                     dplyr::select(select)))
+      data.all <-  filter(data.all, plot.c %in% plots.select)
+    }
+    # remove tree with multiple obs
     return(data.all)
 }
 
diff --git a/scripts/process.data/explore.processed.data.R b/scripts/process.data/explore.processed.data.R
deleted file mode 100644
index deda9bbe2c8c84f1e2568f5a0e8e5056065a9b80..0000000000000000000000000000000000000000
--- a/scripts/process.data/explore.processed.data.R
+++ /dev/null
@@ -1,416 +0,0 @@
-#######################################
-#######################################
-###### EXPLORE DATA SET BEFORE ANALYSIS
-rm(list = ls())
-
-source("R/process.data/process-fun.R")
-source("R/process.data/test.tree.CWM-fun.R")
-source("R/utils/plot.R")
-source("R/utils/mem.R")
-source("R/analysis/lmer.run.R")
-
-filedir <- "output/processed"
-
-mat.perc <- read.csv(file = file.path(filedir, "all.sites.perc.traits.csv"),
-                     stringsAsFactors = FALSE)
-
-### read all data
-library(data.table)
-system.time(data.all <-  fread(file.path(filedir, "data.all.log.csv"),
-                     stringsAsFactors = FALSE))
-## system.time(data.all <-  read.csv(file.path(filedir, "data.all.csv"),
-##                      stringsAsFactors = FALSE))
-
-system.time(data.all.no.log <-  fread(file.path(filedir, "data.all.no.log.csv"),
-                     stringsAsFactors = FALSE))
-
-df.lmer <- fun.data.for.lmer(data.all, 'Max.height',
-                   type.filling = 'species', cat.TF = FALSE)
-
-if(dim(data.all)[1] != sum(mat.perc[['num.obs']]))
-    stop('error not same dimension per ecoregion and total')
-data.all$set <- factor(data.all$set)
-
-##############
-fun.table.set.more.cut <- function(cut, data, var){
-return(table(data[['set']][data[[var]]> cut & !is.na(data[[var]])]) /
-   table(data[['set']][ !is.na(data[[var]])]))
-}
-
-### function to look at number of observation per data set in function of min.perc
-pdf('figs/test.processed/perc.vs.cut.pdf')
-for (t in c('Wood.density','SLA', 'Leaf.N', 'Max.height')){
-perc.genus <- paste(t, 'perc.genus', sep = '.')
-perc.species <- paste(t, 'perc.species', sep = '.')
-cut.perc <- seq(0.7, 0.99, length = 30)
-
-set.sp <- do.call('rbind', lapply(cut.perc,
-                  fun.table.set.more.cut,
-                 data = data.all,
-                 perc.species))
-set.gs <- do.call('rbind', lapply(cut.perc,
-                 fun.table.set.more.cut,
-                 data = data.all,
-                 perc.genus))
-
-
-par(mfrow=c(1, 2))
-matplot(cut.perc, set.sp, type = 'l', col = col.vec[colnames(set.sp)], lwd = 2,
-        main = t,  lty = 1)
-matplot(cut.perc, set.gs, , type = 'l', col = col.vec[colnames(set.sp)],
-        lwd = 2, lty =1)
-legend(0.7, 0.8, colnames(set.sp), col = col.vec[colnames(set.sp)],
-       lty = 1, lwd = 2)
-}
-dev.off()
-
-## Look at number of species with enough occurence
-sp.mat <- table(data.all$ecocode, data.all$sp)
-sum.sp <- apply(sp.mat, MARGIN = 1, sum, na.rm = TRUE)
-sp.mat.perc <- sp.mat/sum.sp
-sort(apply(sp.mat.perc > 0.1, MARGIN = 1, sum))
-sort(apply(sp.mat > 200, MARGIN = 1, sum))
-
-## sd of trait focal and shannon
-fun.sd.x.per.y <- function(x, y = 'ecocode', data = data.all){
-   res <- tapply(data[[x]], data[[y]], sd , na.rm = TRUE)
-   df.res <- data.frame(res)
-   names(df.res) <-  x
-   return(df.res)
-}
-
-trait <- c('Leaf.N','SLA','Wood.density','Max.height')
-# per ecocode
-sd.ecocode <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'),
-       fun.sd.x.per.y,
-       data = data.all))
-set.per.ecocode <- tapply(data.all$set, data.all$ecocode, unique)
-div.per.ecocode <- tapply(data.all$sp, data.all$ecocode, fun_div)
-nsp.per.ecocode <- tapply(data.all$sp, data.all$ecocode,
-                          function(x) nlevels(factor(x))) 
-nsp.per.plot <- tapply(data.all$sp, data.all$plot,
-                          function(x) nlevels(factor(x))) 
-sd.ecocode <- data.frame(set = set.per.ecocode, ecocode = rownames(sd.ecocode),
-           sd.ecocode, do.call('rbind', div.per.ecocode),nsp = nsp.per.ecocode)
-# per set
-sd.set <- do.call( 'cbind', lapply(paste(trait, 'focal', sep = '.'),
-       fun.sd.x.per.y, y = 'set',
-       data = data.all))
-div.per.set <- do.call('rbind', tapply(data.all$sp, data.all$set, fun_div))
-nsp.per.set  <- tapply(data.all$sp, data.all$set,
-                       function(x) nlevels(factor(x))) 
-sd.set <- data.frame(set = rownames(sd.set),
-           sd.set, div.per.set, nsp = nsp.per.set)
-## plots
-
-pdf("figs/test.processed/boxplot.traits.sd.tot.ecocode.pdf",
-    width = 12, height = 12)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste(trait,'focal',sep='.')){
-   barplot(sd.ecocode[[i]], las = 3,
-        main=i, ylab = 'sd', names.arg = sd.ecocode$ecocode,
-        col  = col.vec[sd.ecocode$set])
-}
-dev.off()
-
-pdf("figs/test.processed/boxplot.traits.div.tot.ecocode.pdf",
-    width = 12, height = 12)
-par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
-for (i in c('shannon')){
-   barplot(exp(sd.ecocode[[i]]), las = 3,
-        main=i, names.arg = sd.ecocode$ecocode,
-        col  = col.vec[sd.ecocode$set], log ='y',
-           ylim = range(1, exp(sd.set[[i]])),
-           ylab = 'exp(shannon)')
-}
-for (i in c('nsp')){
-   barplot(sd.ecocode[[i]], las = 3,
-        main=i, names.arg = sd.ecocode$ecocode,
-        col  = col.vec[sd.ecocode$set], log ='y',
-           ylim = range(0.1, (sd.set[[i]])),
-           ylab = 'N species')
-}
-dev.off()
-
-
-pdf("figs/test.processed/boxplot.traits.sd.tot.set.pdf",
-    width = 12, height = 12)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste(trait,'focal',sep='.')){
-   barplot(sd.set[[i]], las = 3,
-        main = i, ylab = 'sd', names.arg = sd.set$set,
-        col  = col.vec[sd.set$set])
-}
-dev.off()
-
-pdf("figs/test.processed/boxplot.traits.div.tot.SET.pdf",
-    width = 12, height = 12)
-par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
-for (i in c('shannon')){
-   barplot(exp(sd.set[[i]]), las = 3,
-        main=i, names.arg = sd.set$set,
-        col  = col.vec[sd.set$set], log = 'y',
-           ylim = range(1, exp(sd.set[[i]])),
-           ylab = 'exp(shannon)')
-   abline(h = 5)
-}
-for (i in c('nsp')){
-   barplot((sd.set[[i]]), las = 3,
-        main=i, names.arg = sd.set$set,
-        col  = col.vec[sd.set$set], log ='y',
-           ylim = range(0.1, (sd.set[[i]])),
-           ylab = 'N species')
-}
-dev.off()
-
-
-########
-### TODO
-#- look at BATOT ALONG MAP AND MAT (log scale)
-#- how to compute FD on plot with diferent size
-#- pattern of CWM
-#- pattern ED angio/conif
-
-## look at BATOT per ecocode
-MAP.ECOCODE <- tapply(data.all$MAP, INDEX = data.all$ecocode, FUN = mean)
-BATOT.ECOCODE <- tapply(data.all$BATOT, INDEX = data.all$ecocode, FUN = mean)
-SET.ECOCODE <- tapply(data.all$set, INDEX = data.all$ecocode, FUN = unique)
-
-pdf("figs/test.processed/BATOT.vs.MAP.pdf")
-par(mar=c(8.1,4.1,4.1,2.1))
-plot(log(MAP.ECOCODE), BATOT.ECOCODE, pch = '', ylim = c(0,180) ,
-     xlab = 'Mean annual precipitation (log)',
-     ylab = 'Total basal area per plot (m^2/ha)')
-boxplot(data.all$BATOT ~ data.all$ecocode, las = 3,
-        at = log(MAP.ECOCODE),names = NA,
-        boxwex = 0.05, outline = FALSE,
-        col = col.vec[SET.ECOCODE], xlim = range(log(MAP.ECOCODE)), add = TRUE)
-legend('topleft', legend = names(col.vec),fill=col.vec)
-dev.off()
-
-pdf("figs/test.processed/boxplot.BATOT.D.G.BA.G.pdf", width = 20 , height = 10)
-par(mfrow=c(2,2))
-for (i in c('BATOT', 'D', 'G', 'BA.G'))
-    boxplot((data.all[[i]]+1000) ~ data.all$ecocode, las = 3, ylab = i,
-            log = 'y' , outline = FALSE)
-dev.off()
-
-trait <- c('Leaf.N','SLA','Wood.density','Max.height')
-
-pdf("figs/test.processed/boxplot.traits.focal.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste(trait,'focal',sep='.')){
-   set.vec <-  sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(data.all[[i]] ~ data.all$ecocode, las = 3,
-        main=i,
-        col  = col.vec[set.vec], outline = FALSE)
-}
-
-dev.off()
-
-
-pdf("figs/test.processed/boxplot.traits.CWM.fill.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste(trait,'CWM.fill',sep='.')){
-   set.vec <-  sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(data.all[[i]] ~ data.all$ecocode, las = 3,
-        main=i,
-        col  = col.vec[set.vec], outline = FALSE)
-}
-
-dev.off()
-
-
-
-pdf("figs/test.processed/boxplot.traits.abs.CWM.fill.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste(trait,'abs.CWM.fill',sep='.')){
-   set.vec <-  sapply(levels(factor(data.all$ecocode[!is.na(data.all[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(data.all[[i]] ~ data.all$ecocode, las = 3,
-        main=i,
-        col  = col.vec[set.vec], outline = FALSE)
-}
-
-dev.off()
-
-### compute mean BATOT, number of species, traits and VAR OF TRAITS per cluster
-system.time( data.summarise <- fun.compute.all.var.cluster( data.all ) )
-
-pdf("figs/test.processed/perc.angio.deciduous.pdf")
-par(mfrow = c(2, 1))
-## angio
-per.angio.ecocode.mean <- tapply(data.summarise$per.angio,
-                                 data.summarise$ecocode,mean,
-                                 na.rm = TRUE)
-names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode,unique)
-set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique)
-names(per.angio.ecocode.mean) <- names.ecocode
-par(mar=c(8.1,4.1,4.1,2.1))
-mp <- barplot(per.angio.ecocode.mean, las = 3,
-              col= col.vec[set.ecocode], ylab = 'perc angio')
-
-## deciduous
-per.deciduous.ecocode.mean <- tapply(data.summarise$per.deciduous,
-                                     data.summarise$ecocode, mean ,
-                                     na.rm = TRUE)
-names.ecocode <- tapply(data.summarise$ecocode, data.summarise$ecocode, unique)
-set.ecocode <- tapply(data.summarise$set, data.summarise$ecocode,unique)
-names(per.deciduous.ecocode.mean) <- names.ecocode
-par(mar=c(8.1,4.1,4.1,2.1))
-mp <- barplot(per.deciduous.ecocode.mean, las = 3,
-              col = col.vec[set.ecocode], ylab = 'perc deciduous')
-dev.off()
-
-
-
-
-
-plot(log(data.summarise$MAP), data.summarise$BATOT,
-      col = col.vec[data.summarise$set], cex = 0.1, ylim = c(0, 190))
-fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$BATOT,
-                   Nclass = 15, add = TRUE)
-
-par(mfrow = c(1,2))
-plot(log(data.summarise$MAP),data.summarise$sd.SLA,
-     ,col=col.vec[data.summarise$set],  cex = 0.1)
-fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$sd.SLA,
-                   Nclass = 15, add = TRUE)
-
-plot(log(data.summarise$MAP),data.summarise$mean.SLA,
-      ,col=col.vec[data.summarise$set], cex = 0.1,)
-fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$mean.SLA,
-                   Nclass = 15, add = TRUE)
-
-par(mfrow = c(1,2))
-plot(log(data.summarise$MAP),data.summarise$sd.Wood.density,
-     ,col=col.vec[data.summarise$set],  cex = 0.1)
-fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$sd.Wood.density,
-                   Nclass = 15, add = TRUE)
-
-plot(log(data.summarise$MAP),data.summarise$mean.Wood.density,
-      ,col=col.vec[data.summarise$set], cex = 0.1,)
-fun.boxplot.breaks(log(data.summarise$MAP), data.summarise$mean.Wood.density,
-                   Nclass = 15, add = TRUE)
-
-pch.vec <- 1:14
-names(pch.vec) <- names(col.vec)
-plot(data.summarise$n_sp,data.summarise$sd.Wood.density,
-     ,col=col.vec[data.summarise$set],pch=pch.vec[data.summarise$set])
-legend("topright",legend=names(col.vec),col=col.vec,pch=pch.vec)
-
-
-
-pdf("figs/test.processed/boxplot.traits.sd.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste("sd",trait,sep='.')){
-   set.vec <-  sapply(levels(factor(data.summarise$ecocode[
-                                            !is.na(data.summarise[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3,
-        main=i,
-        col  = col.vec[set.vec])
-}
-dev.off()
-
-pdf("figs/test.processed/boxplot.traits.mean.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,2), mar = c(8.1,4.1,4.1,2.1))
-for (i in paste("mean",trait,sep='.')){
-   set.vec <-  sapply(levels(factor(data.summarise$ecocode[
-                                            !is.na(data.summarise[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3,
-        main=i,
-        col  = col.vec[set.vec])
-}
-dev.off()
-
-pdf("figs/test.processed/boxplot.div.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
-for (i in c('shannon', 'simpson')){
-   set.vec <-  sapply(levels(factor(data.summarise$ecocode[
-                                            !is.na(data.summarise[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(exp(data.summarise[[i]]) ~ data.summarise$ecocode, las = 3,
-        main=i,
-        col  = col.vec[set.vec])
-}
-dev.off()
-
-pdf("figs/test.processed/boxplot.nsp.pdf",
-    width = 12, height = 7)
-par(mfrow = c(2,1), mar = c(8.1,4.1,4.1,2.1))
-for (i in c('n_sp', 'count')){
-   set.vec <-  sapply(levels(factor(data.summarise$ecocode[
-                                            !is.na(data.summarise[[i]])])),
-                             function(x) gsub(paste(" ",
-                                          gsub("^([a-zA-Z]* )", "",x),
-                                          sep = ""), 
-                             "", x, fixed = TRUE))
-   boxplot(data.summarise[[i]] ~ data.summarise$ecocode, las = 3,
-        main = i,
-        col  = col.vec[set.vec])
-}
-dev.off()
-
-
-fun.boxplot.data <-  function(data, x, y, ...){
-    fun.boxplot.breaks(data[[x]]*data[['BATOT']], data[[y]],
-                       Nclass = 15, add = FALSE,
-                       main = unique(data$ecocode), ...)
-}
-
-pdf("figs/test.processed/Wood.density.CWM.fill.vs.BA.G.pdf")
-for (i in sets){
-    data.tmp <- data.all[data.all$set == i,]
-    if(nlevels(factor(data.tmp$ecocode)) > 1) 
-       par(mfrow = c(2, ceiling( nlevels(factor(data.tmp$ecocode))/2)))
-    by(data.tmp, data.tmp$ecocode, fun.boxplot.data,
-       x = 'Wood.density.CWM.fill',
-       y = 'BA.G' )
-}
-dev.off()
-
-## plot hist of species abundance
-pdf("figs/test.processed/speciesrank.pdf")
-for (i in sets){
-    data.tmp <- data.all[data.all$set == i,]
-    if(nlevels(factor(data.tmp$ecocode)) > 1) 
-       par(mfrow = c(2, ceiling( nlevels(factor(data.tmp$ecocode))/2)))
-    if(nlevels(factor(data.tmp$ecocode)) == 1)
-       par(mfrow = c(1,1))
-       by(data.tmp, data.tmp$ecocode,
-          function(data) hist(table(data[['sp']])/length(data[['sp']]),
-                              xlab = 'n obs',
-                              ylab = ' n species',
-                              main=unique(data[['ecocode']])
-                                     ))
-}
-dev.off()
-
diff --git a/scripts/process.data/explore.processed.data.report.R b/scripts/process.data/explore.processed.data.report.R
index dd0a1dd027ab3448d37dae88cb7cda4211e05c57..2704261f32aafd6a757d81bb073c4c37327d914a 100644
--- a/scripts/process.data/explore.processed.data.report.R
+++ b/scripts/process.data/explore.processed.data.report.R
@@ -65,33 +65,17 @@ rm(data.all.t)
 
 ## plot of correlation between BA.G and D the best is log(BA.G) vs log(D)
 library(ggplot2)
- fun.hexbin.with.smooth.ggplot(data.all[['D']],
-                                   data.all[['G']],
-                                   'D',
-                                   'G')
 
- x11()
- fun.hexbin.with.smooth.ggplot(data.all[['D']],
-                                   data.all[['BA.G']],
-                                   'D',
-                                   'BA.G')+geom_abline(aes(intercept = c(-60, 500), slope = c(0,0)))+geom_abline(aes(intercept = c(500), slope = c(0)))+geom_abline(aes(intercept = c(-100), slope = c(0)))
-
- x11()
- fun.hexbin.with.smooth.ggplot(pi*data.all[['D']]^2/4,
-                                   data.all[['BA.G']],
-                                   'D2',
-                                   'BA.G')
- x11()
+x11()
  fun.hexbin.with.smooth.ggplot(log(data.all[['D']]),
                                    log(data.all[['BA.G']]-min(data.all[['BA.G']]+1, na.rm =TRUE)),
                                    'Dlog',
                                    'BA.Glog')
-fun.boxplot.breaks(log(data.all[['D']]),
-                   log(data.all[['BA.G']]-min(data.all[['BA.G']], na.rm =TRUE)+1), add = FALSE)
-
-
-fun.boxplot.breaks((data.all[['D']]),
-                   (data.all[['BA.G']]), add = FALSE)
+ x11()
+ fun.hexbin.with.smooth.ggplot(data.all[['BATOT']],
+                                   log(data.all[['BA.G']]-min(data.all[['BA.G']]+1, na.rm =TRUE)),
+                                   'BATOT',
+                                   'BA.Glog')
 
 
 
@@ -157,10 +141,10 @@ names.biomes[8] <-  'Trop forest'
 names.biomes[8] <-  'Trop rain forest'
 
 ##  REMOVE BIOMES Tundra and Subtropical desert because to few observation
-
-data.temp <-  data.summarise[!data.summarise$biomes %in% c('Tundra',
+library(dplyr)
+data.temp <-  dplyr::filter(data.summarise, !biomes %in% c('Tundra',
                                            'Temperate grassland desert',
-                                           'Subtropical desert'), ]
+                                           'Subtropical desert'))
 data.temp$biomes <-  factor(data.temp$biomes)
 ## shorter name for biomes
 names.biomes.t <- levels(factor(data.temp$biomes))
diff --git a/scripts/process.data/remove.out.R b/scripts/process.data/remove.out.R
index 5af04047003a181dad9b398e1b7a036b7e4e9490..6331f0e5ea4812a29885a9458ceac4bd526dbc85 100644
--- a/scripts/process.data/remove.out.R
+++ b/scripts/process.data/remove.out.R
@@ -7,7 +7,11 @@ filedir <- "output/processed"
 
 ## remove outlier following Condit approach and reformat data
 data.all <- readRDS( 'output/processed/data.all.log.t.rds')
-data.all <- fun.reform.data.and.remove.outlier(data.all)
+
+data.all <- fun.reform.data.and.remove.outlier(data.all, err.limit = 4, select.one.census.rand = TRUE)
+
+
+
 gc()
 saveRDS(data.all, 'output/processed/data.all.log.rds')
 ## fun.write.big.csv(data.all,