merge.data.SPAIN.R

### MERGE spain DATA
### Edited by FH
rm(list = ls()); source("./R/format.function.R"); library(reshape)

#########################
## READ DATA
####################
### read individuals tree data
data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t")


######################################
## MASSAGE TRAIT DATA
############################
## Compute maximum height per species plus sd from observed height to add variables to the traits data base
## Because we have two heights, then take the max of the two heights and then bootstrap
res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.spain[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.spain[["SP_code"]])))
#max.heights <- read.csv("/media/fhui/Lexar/Career & Work/GKunstler_competition/data/raw/DataSpain/MaximumHeigth.csv", header = T)
#
# ## create data base
data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3])
rm(res.quant.boot)
write.csv(data.max.height,file="./data/process/data.max.height.spain.csv") # I was planning to save processed data in that folder
#
# ## merge TRY with max height
# merge.TRY <- merge(merge.TRY,data.max.height,by="code")
# rm(data.max.height)
# ## use mean sd of max tree height over all species
# merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]],na.rm=TRUE),length=nrow(merge.TRY))
#
# ### keep only variables needed in traits data
# names.traits.data <- c("code","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean",
#   "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd.1","Seed.mass.sd.1","SLA.sd.1", "Wood.Density.sd.1",
#   "Leaf.Lifespan.sd.1","Max.height.sd.1")
#
# data.traits <- merge.TRY[,names.traits.data]
# names(data.traits) <- c("sp","Latin_name","Leaf.N.mean","Seed.mass.mean","SLA.mean","Wood.Density.mean",
#   "Leaf.Lifespan.mean","Max.height.mean","Leaf.N.sd","Seed.mass.sd","SLA.sd", "Wood.Density.sd",
#   "Leaf.Lifespan.sd","Max.height.sd") ## rename to have standard variables name
# rm(merge.TRY,names.traits.data)

################################################################
## FORMAT INDIVIDUAL TREE DATA
#############

## change unit and names of variables to be the same in all data for the tree
data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year
data.spain$year <- rep(NA,length(data.spain[["adbh"]])) ## number of year between measurement/missing!
data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm
data.spain$dead <- rep(NA,length(data.spain[["adbh"]])) ## dummy variable for dead tree 0 alive 1 dead/missing!
data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code
data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code
data.spain$htot <- data.spain[["ht1"]]## height of tree in m / here I want to keep the actual height not the max
data.spain$tree.id <- paste(sapply(data.spain[,"Tree_ID_SFI"],substr,1,6),".",
                            sapply(data.spain[,"Tree_ID_SFI"],substr,7,10),sep="") ## tree unique id

#### change coordinates system of x y to be in lat long WGS84/don't know how to do this
library(sp); library(dismo); library(rgdal);
data.sp <-  data.spain[,c("Tree_ID_SFI","CX","CY")]
coordinates(data.sp) <- c("CX", "CY") # define x y
proj4string(data.sp) <- CRS("+init=epsg:23030")  # define projection system of our data ## EPSG CODE 23030 ED50 / UTM zone 30N
summary(data.sp)

detach(package:rgdal)
data.sp2 <- spTransform(data.sp,CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon
data.spain$Lon <- coordinates(data.sp2)[,"CX"]
data.spain$Lat <- coordinates(data.sp2)[,"CY"]
## ## plot on world map
## library(rworldmap)
## newmap <- getMap(resolution = "coarse")  # different resolutions available