diff --git a/merge.data.SPAIN-fhv1.R b/merge.data.SPAIN-fhv1.R new file mode 100644 index 0000000000000000000000000000000000000000..9151e77fb32fc33e327c054ce096eb6e7f9bd29e --- /dev/null +++ b/merge.data.SPAIN-fhv1.R @@ -0,0 +1,158 @@ +### MERGE spain DATA +### Edited by FH +rm(list = ls()); source("./R/format.function.R"); library(reshape) + +######################### +## READ DATA +#################### +### read individuals tree data +data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t") + +# ### read species names and clean +# species.clean <- fun.clean.species.tab(read.csv("./data/species.list/species.csv",sep="\t",stringsAsFactors=FALSE)) +# +# ### read TRY data +# data.TRY.sd.update <- readRDS("./data/process/data.TRY.sd.update.rds") +# data.frame.TRY <- data.frame(Latin_name=rownames(data.TRY.sd.update),data.TRY.sd.update) +# rm(data.TRY.sd.update) +# ### merge with code and species name +# merge.TRY <- merge(species.clean,data.frame.TRY,by="Latin_name") +# rm(species.clean,data.frame.TRY) + +###################################### +## MASSAGE TRAIT DATA +############################ +## Compute maximum height per species plus sd from observed height to add variables to the traits data base +## Because we have two heights, then take the max of the two heights and then bootstrap +res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.spain[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.spain[["SP_code"]]))) +#max.heights <- read.csv("/media/fhui/Lexar/Career & Work/GKunstler_competition/data/raw/DataSpain/MaximumHeigth.csv", header = T) +# +# ## create data base +data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3]) +rm(res.quant.boot) +write.csv(data.max.height,file="./data/raw/DataSpain/data.max.height.csv") +# +# ## merge TRY with max height +# merge.TRY <- merge(merge.TRY,data.max.height,by="code") +# rm(data.max.height) +# ## use mean sd of max tree height over all species +# merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]],na.rm=TRUE),length=nrow(merge.TRY)) +# +# ### keep only variables needed in traits data +# names.traits.data <- c("code","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", +# "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd.1","Seed.mass.sd.1","SLA.sd.1", "Wood.Density.sd.1", +# "Leaf.Lifespan.sd.1","Max.height.sd.1") +# +# data.traits <- merge.TRY[,names.traits.data] +# names(data.traits) <- c("sp","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", +# "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd","Seed.mass.sd","SLA.sd", "Wood.Density.sd", +# "Leaf.Lifespan.sd","Max.height.sd") ## rename to have standard variables name +# rm(merge.TRY,names.traits.data) + +################################################################ +## FORMAT INDIVIDUAL TREE DATA +############# +data.max.height <- read.csv(file = "./data/raw/DataSpain/data.max.height.csv", header = T) + +## change unit and names of variables to be the same in all data for the tree +data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year +data.spain$year <- rep(NA,length(data.spain[["adbh"]])) ## number of year between measurement/missing! +data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm +data.spain$dead <- rep(NA,length(data.spain[["adbh"]])) ## dummy variable for dead tree 0 alive 1 dead/missing! +data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code +data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code +tmp.htot <- rep(NA, length(data.spain[["adbh"]])); ## height of tree in m +for(i in 1:length(data.max.height$code)) { + sel.sp <- which(data.spain$sp == data.max.height$code[i]); tmp.htot[sel.sp] <- data.max.height$Max.height.mean[i] } +data.spain$htot <- 10^tmp.htot +data.spain$tree.id <- paste(sapply(data.spain[,"Tree_ID_SFI"],substr,1,6),".",sapply(data.spain[,"Tree_ID_SFI"],substr,7,10),sep="") ## tree unique id + +#### change coordinates system of x y to be in lat long WGS84/don't know how to do this +library(sp); library(dismo); library(rgdal); +data.sp <- data.spain[,c("idp","xl93","yl93")] +coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154 +proj4string(data.sp) <- CRS("+init=epsg:2154") # define projection system of our data ## EPSG CODE 2154 +summary(data.sp) + +detach(package:rgdal) +data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon +dataIFN.spain$Lon <- coordinates(data.sp2)[,"xl93"] +dataIFN.spain$Lat <- coordinates(data.sp2)[,"yl93"] +data.spain$Lon <- NA; data.spain$Lat <- NA +rm(data.sp,data.sp2) +## ## plot on world map +## library(rworldmap) +## newmap <- getMap(resolution = "coarse") # different resolutions available +## plot(newmap) +## points(data.sp2,cex=0.2,col="red") + +save(data.spain, file = "./data/raw/DataSpain/datspain.RData") +############################ +## merge greco to have no ecoregion with low number of observation +greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) +greco <- greco[,c("Plot_ID_SFI","BIOME","eco_code")] +greco2 <- greco[!duplicated(greco$Plot),]; +rm(greco) + +data.spain <- merge(data.spain, greco2, by = "Plot_ID_SFI") +rm(greco2) + +table(data.spain$eco_code) +## There's an eco-region with no code, and one with 55 sites + +library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); +ecoreg <- unclass(data.spain$eco_code); +plot(data.spain[["CX"]][order(ecoreg)],data.spain[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); +legend("bottomright", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2) +points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites +## PA1219 looks to be similar to PA1209; merge them together +data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8,6,9)) + +####################### +## variable percent dead/cannot do with since dead variable is missing +# ###compute numer of dead per plot to remove plot with disturbance +# perc.dead <- tapply(data.spain[["dead"]],INDEX=data.spain[["idp"]],FUN=function.perc.dead) +# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) +# data.spain <- merge(data.spain,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE) +data.spain$perc.dead <- NA + +########################################################### +### PLOT SELECTION FOR THE ANALYSIS +################### +## Remove data with mortality == 1 or 2 +table(data.spain$Mortality_Cut) +data.spain <- subset(data.spain,subset= (data.spain[["Mortality_Cut"]] == 0 | data.spain[["Mortality_Cut"]] == "")) + +colnames(data.spain)[18:20] <- c("MAT","PP","PET"); colnames(data.spain)[c(1,8,32)] <- c("plot","sp.name","ecocode") +vec.abio.var.names <- c("MAT","PP","PET") +vec.basic.var <- c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") +data.tree <- subset(data.spain,select=c(vec.basic.var,vec.abio.var.names)) +save(data.spain, file = "./data/raw/DataSpain/datspain.RData") + +############################################## +## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species +########################### +data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.spain[["tree.id"]]), diam=as.vector(data.spain[["D"]]), + sp=as.vector(data.spain[["sp"]]), id.plot=as.vector(data.spain[["plot"]]), + weights=as.vector(data.spain[["w"]])/10000, weight.full.plot=1/(pi*(c(15))^2)) + +## change NA and <0 data for 0 +data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0 +data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0 + +### CHECK IF sp and sp name for column are the same +if(sum(! (names(data.BA.SP)[-1] %in% unique(dataIFN.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") + +#### compute BA tot for all competitors +BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE) +data.BA.SP$BATOT.COMPET <- BATOT.COMPET +### create data frame +names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) +data.BA.sp <- merge(data.frame(tree.id=dataIFN.spain[["tree.id"]],ecocode=dataIFN.spain[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) +## test +if( sum(! data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") + +## save everything as a list +list.spain <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) +save(list.spain,file="./data/process/list.spain.Rdata") +