diff --git a/merge.data.PARACOU.R b/merge.data.PARACOU.R
index ea53409455e4e5efc73db5cea21e1ad1b708ffbf..62a38ca4a04a5ac1390a5bc07cf85e1968f2d792 100644
--- a/merge.data.PARACOU.R
+++ b/merge.data.PARACOU.R
@@ -109,8 +109,8 @@ data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names
 data.tree[['sp']] <- factor(data.tree[['sp']])
 Rlim <- 15 # set size of neighborhood for competition index
 
-system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=15,parallel=TRUE,rpuDist=FALSE))
-
+## system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=Rlim,parallel=TRUE,rpuDist=FALSE))
+library(doParallel)
 list.BA.SP.data <- mclapply(unique(data.tree[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree,Rlim=Rlim,mc.cores=4)
 data.BA.sp <- rbind.fill(list.BA.SP.data)
 dim(data.BA.SP)
@@ -121,6 +121,7 @@ if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) st
 ## test same order as data.tree
 if(sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) >0) stop("competition index not in the same order than data.tree")
 
+################################################
 ## REMOVE TREE IN BUFFER ZONE BUFFER ZONE
 not.in.buffer.zone <- (data.tree[['x']]<(250-Rlim) &
 data.tree[['x']]>(0+Rlim) &
@@ -131,12 +132,6 @@ data.tree[['y']]>(0+Rlim))
 data.tree <- subset(data.tree,subset=not.in.buffer.zone)
 data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone)
 
-## plot each plot
-pdf("./figs/plots.tree.pdf")
-lapply(unique(data.tree[["plot"]]),FUN=fun.circles.plot,data.tree[['x']],data.tree[['y']],data.tree[["plot"]],data.tree[["D"]],inches=0.2,xlim=c(0,250),ylim=c(0,250))
-dev.off()
-
-
 
 
 ########################
@@ -146,28 +141,25 @@ dev.off()
 ### read species names
 species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";")
 species.clean$sp <- species.clean[["idTaxon"]]
+species.clean$Latin_name <-  paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ")
+## keep only one row pers idTaxon
+species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille"))
 
-## select species in paracou
-species.paracou <- data.frame(sp=species.clean[as.character(species.clean[["sp"]]) %in% as.vector(na.exclude(unique(data.tree[["sp"]]))),c("sp")],
-           Latin_name=apply((species.clean[as.character(species.clean[["sp"]]) %in% as.vector(na.exclude(unique(data.tree[["sp"]])))
-                                           ,c("Genre","Espece")]),MARGIN=1,FUN=paste,collapse=" "))
-
-count.sp.paracou <- data.frame(sp=names(table(data.tree[["sp"]])),n.indiv=as.vector(table(data.tree[["sp"]])))
-species.paracou <- merge(species.paracou,count.sp.paracou,by="sp")
-
-tapply(species.paracou[["n.indiv"]],INDEX=species.paracou[["Latin_name"]],FUN=sum)
-
-length(grep("Indet",species.paracou[["Latin_name"]]))/nrow(species.paracou)
+## select only species present in data base
+species.clean <-  subset(species.clean,subset=species.clean[["sp"]] %in% data.tree[["sp"]])
+## percentage of species with no taxonomic identification 
+length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25%
 
 ### need to read the different traits data based and merge .....
 bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";")
 bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ")
-
 dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ")
 seed.traits <- read.csv("./data/raw/DataParacou/Autour de Paracou - Releves par trait et taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t")
 
-## SPECIES CODE COME FROM idTaxon in paracou_taxonomie and taxonid in paracou_1984_2012 to match the traits data we need to use the "Genus species"
-## we better work not work with vernacular because this doesn't match necesseraly the Genus species taxonomie
+###
+species.clean[["Latin_name"]] %in% bridge[["Latin_name"]]
+match(
+
 
 
 
diff --git a/ms/data.format.md b/ms/data.format.md
index f1c48499fc338f2f6b3db47244b57ba05374527b..8f42907d96548c288cd65b812d8c495f7a44aada 100644
--- a/ms/data.format.md
+++ b/ms/data.format.md
@@ -10,7 +10,7 @@ This document describes the data structure and the main R functions available so
  
 # Structure of data for analysis
 
-For the analysis we need for each country a list with three elements.
+For the analysis we need for each ecoregion country (or big tropical plot) a list with three elements.
 
 * First element is a  data.frame for individual tree data with columns
 
@@ -38,13 +38,14 @@ For the analysis we need for each country a list with three elements.
 * Third element is a data.frame for the species traits data with columns
 
 	- $sp$ the species code as in previous table
-	- $Latin_name$ the latin name of the species
+	- $Latin\_name$ the latin name of the species
     - $Leaf.N.mean$ Leaf Nitrogen per mass in TRY mg/g
 	- $Seed.mass.mean$ dry mass in TRY mg
 	- $SLA.mean$ in TRY mm2 mg-1
 	- $Wood.density.mean$ in TRY mg/mm3
 	- $Max.height.mean$ from NFI data I compute the 99% quantile in m
 	- and the same columns with $sd$ instead of $mean$ with either the mean sd within species if species mean or the mean sd with genus if genus mean because no species data
+	- a dummy variable with true or false if genus mean 
 
 # Competition index
 
@@ -77,4 +78,5 @@ The objective is to have a table with the species mean of the traits or the genu
 
 * Need to write a function to compute mean per species for each traits and decide if we use the same species sd for these data sets.
 
-
+# table with data and progress in formating and work TODO
+see table.data.progress.ods
diff --git a/ms/table.data.progress.ods b/ms/table.data.progress.ods
new file mode 100644
index 0000000000000000000000000000000000000000..68bea2a36535ac1b09a0092518bfdb1e7801c0a7
Binary files /dev/null and b/ms/table.data.progress.ods differ