merge.data.FRANCE.R 7.67 KiB
############################################# MERGE FRENCH DATA
rm(list = ls()); 
source("./R/format.function.R")
library(reshape)
################################ READ DATA
data.france <- read.csv("./data/raw/DataFrance/dataIFN.FRANCE.csv", stringsAsFactors = FALSE)
### read IFN species names and clean
species.clean <- fun.clean.species.tab(read.csv("./data/raw/DataFrance/species.csv", stringsAsFactors = FALSE))
### read TRY data
data.TRY.sd.update <- readRDS("./data/process/data.TRY.sd.update.rds")
data.frame.TRY <- data.frame(Latin_name = rownames(data.TRY.sd.update), data.TRY.sd.update)
rm(data.TRY.sd.update)
### merge with code and species name
merge.TRY <- merge(species.clean, data.frame.TRY, by = "Latin_name")
rm(species.clean, data.frame.TRY)
###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed
###################################### height to add variables to the traits data base Because we have two heights,
###################################### then take the max of the two heights and then bootstrap
res.quant.boot <- t(sapply(levels(factor(data.france[["espar"]])), FUN = f.quantile.boot, 
    R = 1000, x = log10(data.france[["htot"]]), fac = factor(data.france[["espar"]])))
## create data base
data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 
    1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3])
rm(res.quant.boot)
## write.csv(data.max.height,file='./data/process/data.max.height.csv')
## merge TRY with max height
merge.TRY <- merge(merge.TRY, data.max.height, by = "code")
rm(data.max.height)
## use mean sd of max tree height over all species
merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]], na.rm = TRUE), 
    length = nrow(merge.TRY))
### keep only variables needed in traits data
names.traits.data <- c("code", "Latin_name", "Leaf.N.mean", "Seed.mass.mean", "SLA.mean", 
    "Wood.Density.mean", "Leaf.Lifespan.mean", "Max.height.mean", "Leaf.N.sd.1", 
    "Seed.mass.sd.1", "SLA.sd.1", "Wood.Density.sd.1", "Leaf.Lifespan.sd.1", "Max.height.sd.1")
data.traits <- merge.TRY[, names.traits.data]
names(data.traits) <- c("espar", "latin_name", "leafN.mean", "seedmass.mean", "SLA.mean", 
    "wooddensity.mean", "leaflifespan.mean", "maxheight.mean", "leafN.sd", "seedmass.sd", 
    "SLA.sd", "wooddensity.sd", "leaflifespan.sd", "maxheight.sd") 
rm(merge.TRY, names.traits.data)
########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same
########################################## in all data for the tree
data.france$G <- data.france[["ir5"]]/5 * 2  ## diameter growth in  mm per year
data.france$year <- rep(5, nrow(data.france))  ## number of year between measurement
data.france$D <- data.france[["c13"]]/pi  ## diameter in cm
data.france$sp <- as.character(data.france[["espar"]]); data.france$espar <- NULL ## species code
data.france$plot.id <- (data.france[["idp"]]); data.france$idp <- NULL ## plot code
data.france$tree.id <- paste(data.france[["plot.id"]], data.france[["a"]], sep = "_")  ## tree unique id
data.france$weights <- data.france[["w"]]/10000
data.france$obs.id <- 1:nrow(data.france) ## There is only obs per tree.id, so this is superfluous
######################## change coordinates system of x y to be in lat long WGS84
library(sp)
library(dismo)
library(rgdal)
data.sp <- data.france[, c("idp", "xl93", "yl93")]
coordinates(data.sp) <- c("xl93", "yl93")  ## EPSG CODE 2154
proj4string(data.sp) <- CRS("+init=epsg:2154")  # define projection system of our data ## EPSG CODE 2154
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
summary(data.sp) data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon data.france$Lon <- coordinates(data.sp2)[, "xl93"] data.france$Lat <- coordinates(data.sp2)[, "yl93"] rm(data.sp, data.sp2) ## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') ## # different resolutions available plot(newmap) ## points(data.sp2,cex=0.2,col='red') ###################### ECOREGION - merge greco to have no ecoregion with low number of observation ## merge A and B Grand Ouest cristallin and oceanique and Center semi-oceanique ## merge G D E Vosges Jura massif cemtral (low mountain) merge H and I Alpes and ## Pyrenees Merge J and K Corse and Mediteraneen GRECO.temp <- substr(data.france[["SER"]], 1, 1) ## get GRECO from SER (smaller division by keeping only the first letter) GRECO.temp <- sub("[AB]", "AB", GRECO.temp) GRECO.temp <- sub("[GDE]", "GDE", GRECO.temp) GRECO.temp <- sub("[HI]", "HI", GRECO.temp) GRECO.temp <- sub("[JK]", "JK", GRECO.temp) ## plot(data.france[['xl93']],data.france[['yl93']],col=unclass(factor(GRECO.temp))) data.france$ecocode <- GRECO.temp ## a single code for each ecoregion ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.france[["dead"]], INDEX = data.france[["plot.id"]], FUN = function.perc.dead2) data.france <- merge(data.france, data.frame(plot.id = as.numeric(names(perc.dead)), perc.dead = perc.dead), sort = FALSE) ########################################################################################### PLOT SELECTION FOR THE ANALYSIS ## data.france <- subset(data.france,subset= data.france[['YEAR']] != 2005) ## year 2005 bad data according to IFN data.france <- subset(data.france, subset = data.france[["plisi"]] == 0) ## no plot on forest edge data.france <- subset(data.france, subset = data.france[["dc"]] == 0) ## no harvesting data.france <- subset(data.france, subset = data.france[["tplant"]] == 0) ## no plantation data.france <- subset(data.france, subset = !is.na(data.france[["SER"]])) ## missing SER ## SELECT GOOD COLUMNS ## names of variables for abiotic conditions vec.abio.var.names <- c("MAT", "SAP", "sgdd", "WB.s", "WB.y", "WS.s", "WS.y") ## other var vec.basic.var <- c("tree.id", "sp", "plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead") data.tree <- subset(data.france, select = c(vec.basic.var, vec.abio.var.names)) ############################################## #################### GENERATE ONE OBJECT PER ECOREGION - COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in #################### m^2/ha without the target species data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.france[["tree.id"]]), diam = as.vector(data.france[["D"]]), sp = as.vector(data.france[["sp"]]), id.plot = as.vector(data.france[["idp"]]), weights = as.vector(data.france[["w"]])/10000, weight.full.plot = 1/(pi * (c(15))^2)) ## change NA and <0 data for 0 data.BA.SP[which(is.na(data.BA.SP), arr.ind = TRUE)] <- 0 data.BA.SP[, -1][which(data.BA.SP[, -1] < 0, arr.ind = TRUE)] <- 0 ### CHECK IF sp and sp name for column are the same if (sum(!(names(data.BA.SP)[-1] %in% unique(data.france[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP[, -1], MARGIN = 1, FUN = sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET ### create data frame
141142143144145146147148149150151
names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) data.BA.sp <- merge(data.frame(tree.id = data.france[["tree.id"]], ecocode = data.france[["ecocode"]]), data.BA.SP, by = "tree.id", sort = FALSE) ## test if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") ## save everything as a list list.FRANCE <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) save(list.FRANCE, file = "./data/process/list.FRANCE.Rdata")