merge.data.FRANCE.R

############################################# MERGE FRENCH DATA
rm(list = ls());
source("./R/format.function.R")
library(reshape)

################################ READ DATA
data.france <- read.csv("./data/raw/DataFrance/dataIFN.FRANCE.csv", stringsAsFactors = FALSE)
### read IFN species names and clean
species.clean <- fun.clean.species.tab(read.csv("./data/raw/DataFrance/species.csv", stringsAsFactors = FALSE))

### read TRY data
data.TRY.sd.update <- readRDS("./data/process/data.TRY.sd.update.rds")
data.frame.TRY <- data.frame(Latin_name = rownames(data.TRY.sd.update), data.TRY.sd.update)
rm(data.TRY.sd.update)
### merge with code and species name
merge.TRY <- merge(species.clean, data.frame.TRY, by = "Latin_name")
rm(species.clean, data.frame.TRY)


###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed
###################################### height to add variables to the traits data base Because we have two heights,
###################################### then take the max of the two heights and then bootstrap
res.quant.boot <- t(sapply(levels(factor(data.france[["espar"]])), FUN = f.quantile.boot,
    R = 1000, x = log10(data.france[["htot"]]), fac = factor(data.france[["espar"]])))

## create data base
data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[,
    1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3])
rm(res.quant.boot)
## write.csv(data.max.height,file='./data/process/data.max.height.csv')

## merge TRY with max height
merge.TRY <- merge(merge.TRY, data.max.height, by = "code")
rm(data.max.height)
## use mean sd of max tree height over all species
merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]], na.rm = TRUE),
    length = nrow(merge.TRY))

### keep only variables needed in traits data
names.traits.data <- c("code", "Latin_name", "Leaf.N.mean", "Seed.mass.mean", "SLA.mean",
    "Wood.Density.mean", "Leaf.Lifespan.mean", "Max.height.mean", "Leaf.N.sd.1",
    "Seed.mass.sd.1", "SLA.sd.1", "Wood.Density.sd.1", "Leaf.Lifespan.sd.1", "Max.height.sd.1")

data.traits <- merge.TRY[, names.traits.data]
names(data.traits) <- c("espar", "latin_name", "leafN.mean", "seedmass.mean", "SLA.mean",
    "wooddensity.mean", "leaflifespan.mean", "maxheight.mean", "leafN.sd", "seedmass.sd",
    "SLA.sd", "wooddensity.sd", "leaflifespan.sd", "maxheight.sd")
rm(merge.TRY, names.traits.data)


########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same
########################################## in all data for the tree

data.france$G <- data.france[["ir5"]]/5 * 2  ## diameter growth in  mm per year
data.france$year <- rep(5, nrow(data.france))  ## number of year between measurement
data.france$D <- data.france[["c13"]]/pi  ## diameter in cm
data.france$sp <- as.character(data.france[["espar"]]); data.france$espar <- NULL ## species code
data.france$plot.id <- (data.france[["idp"]]); data.france$idp <- NULL ## plot code
data.france$tree.id <- paste(data.france[["plot.id"]], data.france[["a"]], sep = "_")  ## tree unique id
data.france$weights <- data.france[["w"]]/10000
data.france$obs.id <- 1:nrow(data.france) ## There is only obs per tree.id, so this is superfluous


######################## change coordinates system of x y to be in lat long WGS84
library(sp)
library(dismo)
library(rgdal)
data.sp <- data.france[, c("idp", "xl93", "yl93")]
coordinates(data.sp) <- c("xl93", "yl93")  ## EPSG CODE 2154
proj4string(data.sp) <- CRS("+init=epsg:2154")  # define projection system of our data ## EPSG CODE 2154