diff --git a/merge.data.PARACOU.R b/merge.data.PARACOU.R index ea53409455e4e5efc73db5cea21e1ad1b708ffbf..62a38ca4a04a5ac1390a5bc07cf85e1968f2d792 100644 --- a/merge.data.PARACOU.R +++ b/merge.data.PARACOU.R @@ -109,8 +109,8 @@ data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names data.tree[['sp']] <- factor(data.tree[['sp']]) Rlim <- 15 # set size of neighborhood for competition index -system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=15,parallel=TRUE,rpuDist=FALSE)) - +## system.time(test <- fun.compute.BA.SP.XY.per.plot(1,data.tree=data.tree,Rlim=Rlim,parallel=TRUE,rpuDist=FALSE)) +library(doParallel) list.BA.SP.data <- mclapply(unique(data.tree[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree,Rlim=Rlim,mc.cores=4) data.BA.sp <- rbind.fill(list.BA.SP.data) dim(data.BA.SP) @@ -121,6 +121,7 @@ if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) st ## test same order as data.tree if(sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) >0) stop("competition index not in the same order than data.tree") +################################################ ## REMOVE TREE IN BUFFER ZONE BUFFER ZONE not.in.buffer.zone <- (data.tree[['x']]<(250-Rlim) & data.tree[['x']]>(0+Rlim) & @@ -131,12 +132,6 @@ data.tree[['y']]>(0+Rlim)) data.tree <- subset(data.tree,subset=not.in.buffer.zone) data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) -## plot each plot -pdf("./figs/plots.tree.pdf") -lapply(unique(data.tree[["plot"]]),FUN=fun.circles.plot,data.tree[['x']],data.tree[['y']],data.tree[["plot"]],data.tree[["D"]],inches=0.2,xlim=c(0,250),ylim=c(0,250)) -dev.off() - - ######################## @@ -146,28 +141,25 @@ dev.off() ### read species names species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") species.clean$sp <- species.clean[["idTaxon"]] +species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ") +## keep only one row pers idTaxon +species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille")) -## select species in paracou -species.paracou <- data.frame(sp=species.clean[as.character(species.clean[["sp"]]) %in% as.vector(na.exclude(unique(data.tree[["sp"]]))),c("sp")], - Latin_name=apply((species.clean[as.character(species.clean[["sp"]]) %in% as.vector(na.exclude(unique(data.tree[["sp"]]))) - ,c("Genre","Espece")]),MARGIN=1,FUN=paste,collapse=" ")) - -count.sp.paracou <- data.frame(sp=names(table(data.tree[["sp"]])),n.indiv=as.vector(table(data.tree[["sp"]]))) -species.paracou <- merge(species.paracou,count.sp.paracou,by="sp") - -tapply(species.paracou[["n.indiv"]],INDEX=species.paracou[["Latin_name"]],FUN=sum) - -length(grep("Indet",species.paracou[["Latin_name"]]))/nrow(species.paracou) +## select only species present in data base +species.clean <- subset(species.clean,subset=species.clean[["sp"]] %in% data.tree[["sp"]]) +## percentage of species with no taxonomic identification +length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25% ### need to read the different traits data based and merge ..... bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";") bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ") - dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ") seed.traits <- read.csv("./data/raw/DataParacou/Autour de Paracou - Releves par trait et taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t") -## SPECIES CODE COME FROM idTaxon in paracou_taxonomie and taxonid in paracou_1984_2012 to match the traits data we need to use the "Genus species" -## we better work not work with vernacular because this doesn't match necesseraly the Genus species taxonomie +### +species.clean[["Latin_name"]] %in% bridge[["Latin_name"]] +match( + diff --git a/ms/data.format.md b/ms/data.format.md index f1c48499fc338f2f6b3db47244b57ba05374527b..8f42907d96548c288cd65b812d8c495f7a44aada 100644 --- a/ms/data.format.md +++ b/ms/data.format.md @@ -10,7 +10,7 @@ This document describes the data structure and the main R functions available so # Structure of data for analysis -For the analysis we need for each country a list with three elements. +For the analysis we need for each ecoregion country (or big tropical plot) a list with three elements. * First element is a data.frame for individual tree data with columns @@ -38,13 +38,14 @@ For the analysis we need for each country a list with three elements. * Third element is a data.frame for the species traits data with columns - $sp$ the species code as in previous table - - $Latin_name$ the latin name of the species + - $Latin\_name$ the latin name of the species - $Leaf.N.mean$ Leaf Nitrogen per mass in TRY mg/g - $Seed.mass.mean$ dry mass in TRY mg - $SLA.mean$ in TRY mm2 mg-1 - $Wood.density.mean$ in TRY mg/mm3 - $Max.height.mean$ from NFI data I compute the 99% quantile in m - and the same columns with $sd$ instead of $mean$ with either the mean sd within species if species mean or the mean sd with genus if genus mean because no species data + - a dummy variable with true or false if genus mean # Competition index @@ -77,4 +78,5 @@ The objective is to have a table with the species mean of the traits or the genu * Need to write a function to compute mean per species for each traits and decide if we use the same species sd for these data sets. - +# table with data and progress in formating and work TODO +see table.data.progress.ods diff --git a/ms/table.data.progress.ods b/ms/table.data.progress.ods new file mode 100644 index 0000000000000000000000000000000000000000..68bea2a36535ac1b09a0092518bfdb1e7801c0a7 Binary files /dev/null and b/ms/table.data.progress.ods differ