From b2cca4c4bbd802a04b6dfde34ff0ec9701633a52 Mon Sep 17 00:00:00 2001
From: fhui28 <fhui28@gmail.com>
Date: Thu, 15 Aug 2013 20:27:22 +1000
Subject: [PATCH] added merging.spain

---
 merge.data.SPAIN-fhv1.R | 158 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 merge.data.SPAIN-fhv1.R

diff --git a/merge.data.SPAIN-fhv1.R b/merge.data.SPAIN-fhv1.R
new file mode 100644
index 0000000..9151e77
--- /dev/null
+++ b/merge.data.SPAIN-fhv1.R
@@ -0,0 +1,158 @@
+### MERGE spain DATA
+### Edited by FH
+rm(list = ls()); source("./R/format.function.R"); library(reshape)
+
+#########################
+## READ DATA
+####################
+### read individuals tree data
+data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t")
+
+# ### read species names and clean
+# species.clean <- fun.clean.species.tab(read.csv("./data/species.list/species.csv",sep="\t",stringsAsFactors=FALSE)) 
+# 
+# ### read TRY data
+# data.TRY.sd.update <- readRDS("./data/process/data.TRY.sd.update.rds")
+# data.frame.TRY <- data.frame(Latin_name=rownames(data.TRY.sd.update),data.TRY.sd.update)
+# rm(data.TRY.sd.update)
+# ### merge with code and species name
+# merge.TRY <- merge(species.clean,data.frame.TRY,by="Latin_name")
+# rm(species.clean,data.frame.TRY)
+
+######################################
+## MASSAGE TRAIT DATA
+############################
+## Compute maximum height per species plus sd from observed height to add variables to the traits data base
+## Because we have two heights, then take the max of the two heights and then bootstrap 
+res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.spain[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.spain[["SP_code"]])))
+#max.heights <- read.csv("/media/fhui/Lexar/Career & Work/GKunstler_competition/data/raw/DataSpain/MaximumHeigth.csv", header = T)
+# 
+# ## create data base
+data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3])
+rm(res.quant.boot)
+write.csv(data.max.height,file="./data/raw/DataSpain/data.max.height.csv")
+# 
+# ## merge TRY with max height
+# merge.TRY <- merge(merge.TRY,data.max.height,by="code")
+# rm(data.max.height)
+# ## use mean sd of max tree height over all species
+# merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]],na.rm=TRUE),length=nrow(merge.TRY))
+# 
+# ### keep only variables needed in traits data
+# names.traits.data <- c("code","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean",
+#   "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd.1","Seed.mass.sd.1","SLA.sd.1", "Wood.Density.sd.1",
+#   "Leaf.Lifespan.sd.1","Max.height.sd.1")
+# 
+# data.traits <- merge.TRY[,names.traits.data]
+# names(data.traits) <- c("sp","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean",
+#   "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd","Seed.mass.sd","SLA.sd", "Wood.Density.sd",
+#   "Leaf.Lifespan.sd","Max.height.sd") ## rename to have standard variables name
+# rm(merge.TRY,names.traits.data)
+
+################################################################
+## FORMAT INDIVIDUAL TREE DATA
+#############
+data.max.height <- read.csv(file = "./data/raw/DataSpain/data.max.height.csv", header = T)
+
+## change unit and names of variables to be the same in all data for the tree 
+data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year
+data.spain$year <- rep(NA,length(data.spain[["adbh"]])) ## number of year between measurement/missing!
+data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm
+data.spain$dead <- rep(NA,length(data.spain[["adbh"]])) ## dummy variable for dead tree 0 alive 1 dead/missing!
+data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code
+data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code
+tmp.htot <- rep(NA, length(data.spain[["adbh"]])); ## height of tree in m
+for(i in 1:length(data.max.height$code)) { 
+	sel.sp <- which(data.spain$sp == data.max.height$code[i]); tmp.htot[sel.sp] <- data.max.height$Max.height.mean[i] }
+data.spain$htot <- 10^tmp.htot
+data.spain$tree.id <- paste(sapply(data.spain[,"Tree_ID_SFI"],substr,1,6),".",sapply(data.spain[,"Tree_ID_SFI"],substr,7,10),sep="") ## tree unique id
+
+#### change coordinates system of x y to be in lat long WGS84/don't know how to do this
+library(sp); library(dismo); library(rgdal); 
+data.sp <-  data.spain[,c("idp","xl93","yl93")]
+coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154
+proj4string(data.sp) <- CRS("+init=epsg:2154")  # define projection system of our data ## EPSG CODE 2154
+summary(data.sp)
+
+detach(package:rgdal)
+data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon
+dataIFN.spain$Lon <- coordinates(data.sp2)[,"xl93"]
+dataIFN.spain$Lat <- coordinates(data.sp2)[,"yl93"]
+data.spain$Lon <- NA; data.spain$Lat <- NA
+rm(data.sp,data.sp2)
+## ## plot on world map
+## library(rworldmap)
+## newmap <- getMap(resolution = "coarse")  # different resolutions available
+## plot(newmap)
+## points(data.sp2,cex=0.2,col="red")
+
+save(data.spain, file = "./data/raw/DataSpain/datspain.RData")
+############################
+## merge greco to have no ecoregion with low number of observation
+greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) 
+greco <- greco[,c("Plot_ID_SFI","BIOME","eco_code")]
+greco2 <- greco[!duplicated(greco$Plot),]; 
+rm(greco)
+
+data.spain <- merge(data.spain, greco2, by = "Plot_ID_SFI")
+rm(greco2)
+
+table(data.spain$eco_code)
+## There's an eco-region with no code, and one with 55 sites
+
+library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); 
+ecoreg <- unclass(data.spain$eco_code); 
+plot(data.spain[["CX"]][order(ecoreg)],data.spain[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); 
+legend("bottomright", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
+points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites
+## PA1219 looks to be similar to PA1209; merge them together
+data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8,6,9))
+
+#######################
+## variable percent dead/cannot do with since dead variable is missing
+# ###compute numer of dead per plot to remove plot with disturbance
+# perc.dead <- tapply(data.spain[["dead"]],INDEX=data.spain[["idp"]],FUN=function.perc.dead)
+# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
+# data.spain <- merge(data.spain,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE)
+data.spain$perc.dead <- NA
+
+###########################################################
+### PLOT SELECTION FOR THE ANALYSIS
+###################
+## Remove data with mortality == 1 or 2
+table(data.spain$Mortality_Cut)
+data.spain <- subset(data.spain,subset= (data.spain[["Mortality_Cut"]] == 0 |  data.spain[["Mortality_Cut"]] == ""))
+
+colnames(data.spain)[18:20] <- c("MAT","PP","PET"); colnames(data.spain)[c(1,8,32)] <- c("plot","sp.name","ecocode")
+vec.abio.var.names <-  c("MAT","PP","PET")
+vec.basic.var <-  c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead")
+data.tree <- subset(data.spain,select=c(vec.basic.var,vec.abio.var.names))
+save(data.spain, file = "./data/raw/DataSpain/datspain.RData")
+
+##############################################
+## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species
+###########################
+data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.spain[["tree.id"]]), diam=as.vector(data.spain[["D"]]),
+	sp=as.vector(data.spain[["sp"]]), id.plot=as.vector(data.spain[["plot"]]),
+	weights=as.vector(data.spain[["w"]])/10000, weight.full.plot=1/(pi*(c(15))^2))
+
+## change NA and <0 data for 0
+data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0
+data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0
+
+### CHECK IF sp and sp name for column are the same
+if(sum(! (names(data.BA.SP)[-1] %in% unique(dataIFN.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree")
+
+#### compute BA tot for all competitors
+BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE)
+data.BA.SP$BATOT.COMPET <- BATOT.COMPET
+### create data frame
+names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1])
+data.BA.sp <- merge(data.frame(tree.id=dataIFN.spain[["tree.id"]],ecocode=dataIFN.spain[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE)
+## test
+if( sum(! data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree")
+
+## save everything as a list
+list.spain <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits)
+save(list.spain,file="./data/process/list.spain.Rdata")
+
-- 
GitLab