Commit 98e94e8a authored by fhui28's avatar fhui28
Browse files

traits formatting for PARACOU; lots to be done here

No related merge requests found
Showing with 71 additions and 63 deletions
+71 -63
### MERGE paracou DATA ### MERGE paracou DATA
### Edited by FH
rm(list = ls()) rm(list = ls())
source("./R/format.function.R") source("./R/format.function.R")
library(reshape) library(reshape)
######################### ############################ read individuals tree data
## READ DATA
####################
### read individuals tree data
data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv", data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv",
header=TRUE,stringsAsFactors=FALSE,sep = ";", na.strings = "NULL") header=TRUE,stringsAsFactors=FALSE,sep = ";", na.strings = "NULL")
#barplot(apply(!is.na(data.paracou[,paste("circ_",1984:2012,sep="")]),MARGIN=2,FUN=sum),las=3) #barplot(apply(!is.na(data.paracou[,paste("circ_",1984:2012,sep="")]),MARGIN=2,FUN=sum),las=3)
...@@ -26,7 +22,8 @@ for(k in numeric.col.name){ ...@@ -26,7 +22,8 @@ for(k in numeric.col.name){
data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k])
} ## Replace all , in decimals with . } ## Replace all , in decimals with .
data.paracou$treeid <- apply(data.paracou[,c("plot","subplot","tree")],1,paste,collapse="."); ## Create a tree id data.paracou$tree.id <- apply(data.paracou[,c("plot","subplot","tree")],1,paste,collapse="_");
data.paracou$sp <- data.paracou[["taxonid"]]
data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))] data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))]
## ## plot each plot ## ## plot each plot
...@@ -34,8 +31,7 @@ data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))] ...@@ -34,8 +31,7 @@ data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))]
## lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2) ## lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2)
## dev.off() ## dev.off()
####################### ############################# SELECT OBSERVATION WITHOUT PROBLEMS
###### SELECT OBSERVATION WITHOUT PROBLEMS
## REMOVE ALL TREES WITH X OR Y >250 m ## REMOVE ALL TREES WITH X OR Y >250 m
data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 & data.paracou[["y"]]<251) data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 & data.paracou[["y"]]<251)
#### REMOVE PLOTs 16 17 18 ACCORDING TO GHSILAIN #### REMOVE PLOTs 16 17 18 ACCORDING TO GHSILAIN
...@@ -44,16 +40,68 @@ data.paracou <- subset(data.paracou,subset=! data.paracou[["plot"]] %in% 16:18) ...@@ -44,16 +40,68 @@ data.paracou <- subset(data.paracou,subset=! data.paracou[["plot"]] %in% 16:18)
data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied"]])<=2001 & !is.na(data.paracou[["yeardied"]]))) data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied"]])<=2001 & !is.na(data.paracou[["yeardied"]])))
###################################### ######################################## MASSAGE TRAIT DATA
## MASSAGE TRAIT DATA
############################ ### read species names
species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";")
species.clean$sp <- species.clean[["idTaxon"]]
species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ")
## keep only one row pers idTaxon
species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille","idCIRAD"))
## select only species present in data base
species.clean <- subset(species.clean,subset=species.clean[["sp"]] %in% data.paracou[["sp"]])
## percentage of species with no taxonomic identification
length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25%
dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ")
#dataWD <- merge(dataWD, species.clean, by = "idCIRAD", sort = F)
length(unique(species.clean$idCIRAD)) != dim(species.clean)
## dataWD uses idCIRAD as identifier, but this is not a unique identifier in species.clean!
## But wood density seems to also be available from seed.traits
### need to read the different traits data based and merge .....
bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";")
bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ")
### check % of match of the bridg data
sum(species.clean[["Latin_name"]] %in% bridge[["Latin_name"]])/length(species.clean[["Latin_name"]])
## only 307 species /775 are in teh traits data ....
seed.traits <- read.csv("./data/raw/DataParacou/Autour-de-Paracou-Releves-par-trait-et-taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t")
## Reformat seed.traits to one row per species, with each trait as a column
spp.means <- (cast(seed.traits, LIB_TAXON ~ METHO_LIB, value = "MEASURE", fun = mean))
colnames(spp.means)[-1] <- paste(colnames(spp.means)[-1],".mean",sep="")
spp.sds <- (cast(seed.traits, LIB_TAXON ~ METHO_LIB, value = "MEASURE", fun = sd))
colnames(spp.sds) <- paste(colnames(spp.sds),".sd",sep="")
seed.traits2 <- cbind(spp.means,spp.sds[,-1])[,c("LIB_TAXON","Leaf nitrogen concentration (standard).mean","Leaf nitrogen concentration (standard).sd",
"Specific leaf area (standard).mean","Specific leaf area (standard).sd", "Wood density.mean","Wood density.sd")]
colnames(seed.traits2)[1] <- c("Latin_name")
seed.traits2 <- seed.traits2[order(seed.traits2$Latin_name),]
for(k in 2:ncol(seed.traits2)) seed.traits2[,k][!is.finite(seed.traits2[,k])] <- NA
seed.traits2$sp <- species.clean$sp[match(seed.traits2$Latin_name,species.clean$Latin_name)]
colnames(seed.traits2) <- c("Latin_name","Leaf.N.mean","Leaf.N.sd","SLA.mean","SLA.sd","Wood.density.mean","Wood.density.sd","sp")
seed.traits2$SLA.mean <- seed.traits2$SLA.mean/1 ## Conversion from m2/kg to mm2/mg
seed.traits2$Wood.density.mean <- seed.traits2$Wood.density.mean/1 ## conversion from g/cm3 to mg/mm3
# - $sp$ the species code as in previous table
# - $Latin\_name$ the latin name of the species
# - $Leaf.N.mean$ Leaf Nitrogen per mass in TRY mg/g
# - $Seed.mass.mean$ dry mass in TRY mg
# - $SLA.mean$ in TRY mm2 mg-1
# - $Wood.density.mean$ in TRY mg/mm3
# - $Max.height.mean$ from NFI data I compute the 99% quantile in m
# - and the same columns with $sd$ instead of $mean$ with either the mean sd within species if species mean or the mean sd with genus if genus mean because no species data
# - a dummy variable with true or false if genus mean
## still to be completed
############################################ FORMAT INDIVIDUAL TREE DATA
##########################################
## FORMAT INDIVIDUAL TREE DATA
#############
data.paracou2 <- data.paracou[rep(1:nrow(data.paracou),each=2),c(1:10,(ncol(data.paracou)-2):ncol(data.paracou))] data.paracou2 <- data.paracou[rep(1:nrow(data.paracou),each=2),c(1:10,(ncol(data.paracou)-2):ncol(data.paracou))]
rownames(data.paracou2) <- 1:nrow(data.paracou2); data.paracou2 <- as.data.frame(data.paracou2) rownames(data.paracou2) <- 1:nrow(data.paracou2); data.paracou2 <- as.data.frame(data.paracou2)
data.paracou2$census <- rep(c(2001,2001+4),nrow(data.paracou)); data.paracou2$yr2 <- rep(c(2005,2005+4),nrow(data.paracou)) data.paracou2$yr1 <- rep(c(2001,2001+4),nrow(data.paracou)); data.paracou2$yr2 <- rep(c(2005,2005+4),nrow(data.paracou))
data.paracou2$year <- rep(c(4,4),nrow(data.paracou)) data.paracou2$year <- rep(c(4,4),nrow(data.paracou))
data.paracou2$dbh1 <- c(rbind(data.paracou$circum2001/pi,data.paracou$circum2005/pi)) data.paracou2$dbh1 <- c(rbind(data.paracou$circum2001/pi,data.paracou$circum2005/pi))
data.paracou2$dbh2 <- c(rbind(data.paracou$circum2005/pi,data.paracou$circum2009/pi)) data.paracou2$dbh2 <- c(rbind(data.paracou$circum2005/pi,data.paracou$circum2009/pi))
...@@ -62,21 +110,19 @@ data.paracou2$code2 <- c(as.numeric(rbind(data.paracou$code2005,data.paracou$cod ...@@ -62,21 +110,19 @@ data.paracou2$code2 <- c(as.numeric(rbind(data.paracou$code2005,data.paracou$cod
data.paracou2$dead <- rep(0,nrow(data.paracou)*2) data.paracou2$dead <- rep(0,nrow(data.paracou)*2)
data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])), data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])),
as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1 as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1
data.paracou2$sp <- data.paracou[["taxonid"]]
## remove tree dead at first census for both date (census 2001-2005 2005-2009) ## remove tree dead at first census for both date (census 2001-2005 2005-2009)
data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]]))))) data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])))))
## change unit and names of variables to be the same in all data for the tree ## change unit and names of variables to be the same in all data for the tree
data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year
data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1
data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2
data.paracou2$cenus <- data.paracou$yr1
data.paracou[which(data.paracou$G < -50),] ## THERE SEEMS TO BE SOME PROBLEMS WITH THE DBH DATA ## much less issue after removing diam problem
data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm
data.paracou$plot <- data.paracou$plot#apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code data.paracou$plot <- data.paracou$plot#apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code
data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m - MISSING data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m
data.paracou$obs.id <- 1:nrow(data.paracou) data.paracou$obs.id <- 1:nrow(data.paracou)
### delete recruit in 2001 or 2005 for first census ### delete recruit in 2001 or 2005 for first census
...@@ -84,35 +130,25 @@ data.paracou <- subset(data.paracou,subset=!is.na(data.paracou$D)) ...@@ -84,35 +130,25 @@ data.paracou <- subset(data.paracou,subset=!is.na(data.paracou$D))
## minimum circumfer 30 delete all tree with a dbh <30/pi, ## minimum circumfer 30 delete all tree with a dbh <30/pi,
data.paracou <- subset(data.paracou,subset= data.paracou[["D"]]>(30/pi)) data.paracou <- subset(data.paracou,subset= data.paracou[["D"]]>(30/pi))
###################### ######################## ECOREGION
## ECOREGION
###################
## paracou has only 1 eco-region YES NO ECOREGION
###################### ## paracou has only 1 eco-region
## PERCENT DEAD
################### ######################## PERCENT DEAD - compute numer of dead per plot to remove plot with disturbance
## variable percent dead
## compute numer of dead per plot to remove plot with disturbance
## THERE ARE LOTS OF NAs - DID YOU WANT TO REMOVE THEM OR TREAT THEM AS ALIVE
perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=function.perc.dead2) perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=function.perc.dead2)
data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE)
########################################################### ################################################## VARIABLES SELECTION FOR THE ANALYSIS
### VARIABLES SELECTION FOR THE ANALYSIS
###################
#vec.abio.var.names <- c("MAT","MAP") ## MISSING NEED OTHER BASED ON TOPOGRAPHY ASK BRUNO #vec.abio.var.names <- c("MAT","MAP") ## MISSING NEED OTHER BASED ON TOPOGRAPHY ASK BRUNO
vec.basic.var <- c("obs.id","treeid","sp","plot","D","G","dead","census","year","htot","x","y","perc.dead") vec.basic.var <- c("obs.id","tree.id","sp","plot","D","G","dead","census","year","htot","x","y","perc.dead")
data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names
############################################## ##############################################
## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species
###########################
## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE ## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE
### species as factor because number ### species as factor because number
data.tree[['sp']] <- factor(data.tree[['sp']]) data.tree[['sp']] <- factor(data.tree[['sp']])
Rlim <- 15 # set size of neighborhood for competition index Rlim <- 15 # set size of neighborhood for competition index
...@@ -156,34 +192,6 @@ data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) ...@@ -156,34 +192,6 @@ data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone)
########################
#########################
##### TRAITS
### read species names
species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";")
species.clean$sp <- species.clean[["idTaxon"]]
species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ")
## keep only one row pers idTaxon
species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille"))
## select only species present in data base
species.clean <- subset(species.clean,subset=species.clean[["sp"]] %in% data.tree[["sp"]])
## percentage of species with no taxonomic identification
length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25%
### need to read the different traits data based and merge .....
bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";")
bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ")
dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ")
seed.traits <- read.csv("./data/raw/DataParacou/Autour de Paracou - Releves par trait et taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t")
### check % of match of the bridg data
sum(species.clean[["Latin_name"]] %in% bridge[["Latin_name"]])/length(species.clean[["Latin_name"]])
## only 307 species /775 are in teh traits data ....
## ## save everything as a list ## ## save everything as a list
## list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) ## list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits)
## save(list.spain,file="./data/process/list.paracou.Rdata") ## save(list.spain,file="./data/process/list.paracou.Rdata")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment