### MERGE spain DATA ### Edited by FH rm(list = ls()); source("./R/format.function.R"); library(reshape) ######################### ## READ DATA #################### ### read individuals tree data #data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t") data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI_aug13_alldata.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t") ###################################### ## MASSAGE TRAIT DATA ############################ ## Compute maximum height per species plus sd from observed height to add variables to the traits data base ## Because we have two heights, then take the max of the two heights and then bootstrap res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.spain[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.spain[["SP_code"]]))) ## create data base data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3]) rm(res.quant.boot) write.csv(data.max.height,file="./data/process/data.max.height.spain.csv") # I was planning to save processed data in that folder # # ## merge TRY with max height # merge.TRY <- merge(merge.TRY,data.max.height,by="code") # rm(data.max.height) # ## use mean sd of max tree height over all species # merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]],na.rm=TRUE),length=nrow(merge.TRY)) # # ### keep only variables needed in traits data # names.traits.data <- c("code","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", # "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd.1","Seed.mass.sd.1","SLA.sd.1", "Wood.Density.sd.1", # "Leaf.Lifespan.sd.1","Max.height.sd.1") # # data.traits <- merge.TRY[,names.traits.data] # names(data.traits) <- c("sp","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean", # "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd","Seed.mass.sd","SLA.sd", "Wood.Density.sd", # "Leaf.Lifespan.sd","Max.height.sd") ## rename to have standard variables name # rm(merge.TRY,names.traits.data) ################################################################ ## FORMAT INDIVIDUAL TREE DATA ############# ## change unit and names of variables to be the same in all data for the tree data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year data.spain$year <- rep(NA,length(data.spain[["adbh"]])) ## number of year between measurement - MISSING data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm data.spain$dead <- as.numeric(data.spain[["Life_status"]] == "dead") ## dummy variable for dead tree 0 alive 1 dead - MIGHT WANT TO CHANGE THIS TO BE BASED ON MORTALITY_CUT data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code data.spain$htot <- data.spain[["ht1"]]## height of tree in m data.spain$tree.id <- data.spain$Tree_ID_SFI ## tree unique id #### change coordinates system of x y to be in lat long WGS84/don't know how to do this library(sp); library(dismo); library(rgdal); data.sp <- data.spain[,c("Tree_ID_SFI","CX","CY")] coordinates(data.sp) <- c("CX", "CY") # define x y proj4string(data.sp) <- CRS("+init=epsg:23030") # define projection system of our data ## EPSG CODE 23030 ED50 / UTM zone 30N summary(data.sp) detach(package:rgdal) data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon data.spain$Lon <- coordinates(data.sp2)[,"CX"] data.spain$Lat <- coordinates(data.sp2)[,"CY"] ## ## plot on world map ## library(rworldmap) ## newmap <- getMap(resolution = "coarse") # different resolutions available ## plot(newmap) ## points(data.sp2,cex=0.2,col="red") rm(data.sp,data.sp2) ###################### ## ECOREGION ################### ## merge greco to have no ecoregion with low number of observation greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) greco <- greco[,c("Plot_ID_SFI","BIOME","eco_code")] greco2 <- greco[!duplicated(greco$Plot),]; rm(greco) data.spain <- merge(data.spain, greco2, by = "Plot_ID_SFI") rm(greco2) table(data.spain$eco_code) ## There's an eco-region with no code, and one with < 1000 sites ## The former we could drop as they were on the border of Spain library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); ecoreg <- unclass(data.spain$eco_code); plot(data.spain[["CX"]][order(ecoreg)],data.spain[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); legend("topleft", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2) points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.5, col = "black"); ## Highlight the "rare" ecoregions points(data.spain[["CX"]][ecoreg == 1],data.spain[["CY"]][ecoreg == 1],pty=".",cex=.5, col = "black"); ## Highlight the "rare" ecoregions ## PA1219 looks to be similar to PA1209; merge them together data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8,6,9)) data.spain <- data.spain[-which(data.spain$eco_codemerged == ""),] ###################### ## PERCENT DEAD ################### ## variable percent dead/cannot do with since dead variable is missing ###compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.spain[["dead"]],INDEX=data.spain[["plot"]],FUN=function.perc.dead) table(data.spain$dead) ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) data.spain <- merge(data.spain,data.frame(plot=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE, by = "plot") ########################################################### ### PLOT SELECTION FOR THE ANALYSIS ################### ## Remove data with mortality == 1 or 2 table(data.spain$Mortality_Cut) data.spain <- subset(data.spain,subset= (data.spain[["Mortality_Cut"]] == 0 | data.spain[["Mortality_Cut"]] == "")) colnames(data.spain)[colnames(data.spain) %in% c("mat","pp","PET")] <- c("MAT","PP","PET") colnames(data.spain)[names(data.spain) =="eco_codemerged"] <- c("ecocode") vec.abio.var.names <- c("MAT","PP","PET") vec.basic.var <- c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead") data.tree <- subset(data.spain,select=c(vec.basic.var,vec.abio.var.names)) ############################################## ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species ########################### data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.spain[["tree.id"]]), diam=as.vector(data.spain[["D"]]), sp=as.vector(data.spain[["sp"]]), id.plot=as.vector(data.spain[["plot"]]), weights=as.vector(1/(pi*(data.spain[["R1"]])^2)), weight.full.plot=1/(pi*(25)^2)) ## change NA and <0 data for 0 data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0 data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0 ### CHECK IF sp and sp name for column are the same if(sum(!(names(data.BA.SP)[-1] %in% unique(data.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET ### create data frame names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) data.BA.sp <- merge(data.frame(tree.id=dataIFN.spain[["tree.id"]],ecocode=dataIFN.spain[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) ## test if( sum(! data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") ## save everything as a list list.spain <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.spain,file="./data/process/list.spain.Rdata")