diff --git a/R/FUN.TRY.R b/R/FUN.TRY.R index d1012060200a60611330282a04ffc640afa31257..2252d66db90d57cc17eddc11529d9c5eabe3b91d 100644 --- a/R/FUN.TRY.R +++ b/R/FUN.TRY.R @@ -89,7 +89,7 @@ fun.extract.try <- function(ObservationID.t, data, Non.Trait.Data, Trait.Data) { names(TF.exp.data) <- "TF.exp.data" res.temp <- data.frame(ObservationID = ObservationID.t, AccSpeciesName = unique(data.temp$AccSpeciesName), t(Vec.Non.Trait.Data), TF.exp.data, t(Vec.Trait.Data.OrigValue), t(Vec.Trait.Data.OrigUnit), - t(Vec.Trait.Data.StdValue)) + t(Vec.Trait.Data.StdValue), stringsAsFactors =FALSE) return(res.temp) } @@ -227,8 +227,12 @@ fun.turn.list.in.DF <- function(sp, res.list) { data.genus <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$genus, res.list = res.list)) data.nobs <- t(sapply(sp, FUN = function(i, res.list) res.list[[i]]$nobs, res.list = res.list)) ## create data.frame withh all observation - extract.species.try <- data.frame(data.mean, data.sd, data.exp, data.genus, data.nobs) - names(extract.species.try) <- c(paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "mean", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "sd", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "exp", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "genus", sep = "."), paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "nobs", sep = ".")) + extract.species.try <- data.frame(data.mean, data.sd, data.exp, data.genus, data.nobs, stringsAsFactors =FALSE) + names(extract.species.try) <- c(paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "mean", sep = "."), + paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "sd", sep = "."), + paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "exp", sep = "."), + paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "genus", sep = "."), + paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "nobs", sep = ".")) return(extract.species.try) } @@ -252,7 +256,11 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { ##### TRANSFORM LIST IN A TABLE extract.species.try <- fun.turn.list.in.DF(sp, res.list) - + + ##### TODO ADD A TEST OF GOOD EXTRACTION OF TRAITS + test.num <- sample(1:length(sp),1) + if( extract.species.try[test.num,"SLA.mean"] != fun.species.traits(sp[test.num], species.table = sp.syno.table, + traits = traits, data = data)$mean[grep("SLA",traits)]) stop('traits value not good for the species in extraction from TRY') ############### add mean sd of species or genus if we want to use that sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds") sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds") @@ -262,8 +270,8 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { genus.names <- paste(c("Leaf.N", "Seed.mass", "SLA", "Wood.Density"), "genus", sep = ".") ### add columns - extract.species.try.2 <- data.frame(extract.species.try, extract.species.try[, - sd.names]) + extract.species.try.2 <- data.frame(extract.species.try, + extract.species.try[,sd.names], stringsAsFactors =FALSE) ## update value sd.names.1 <- paste(sd.names, 1, sep = ".") @@ -272,7 +280,7 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { extract.species.try.2[[sd.names.1[i]]][extract.species.try.2[[genus.names[i]]]] <- sd.vec.genus[i] } data.frame.TRY <- data.frame(sp = sp, Latin_name = sp.syno.table[["Latin_name_syn"]], - extract.species.try.2) + extract.species.try.2, stringsAsFactors =FALSE) if (sum(!data.frame.TRY[["sp"]] == sp) > 0) stop("Wrong order of species code") return(data.frame.TRY) diff --git a/R/format.function.R b/R/format.function.R index 0526a8b9f7e2de314e78e1afea59e1f816604ba6..6b7fa86b627b3bfc4827cb11671239fe840cd26f 100644 --- a/R/format.function.R +++ b/R/format.function.R @@ -62,6 +62,7 @@ BA.fun <- function(diam,weights){ ##' @return data frame with obs.id and one column per species with basal area of the species (without the target tree) ##' @author Kunstler BA.SP.FUN <- function(obs.id,diam,sp,id.plot,weights,weight.full.plot){ +print(length(obs.id)) require(data.table) id.plot <- as.character(id.plot) obs.id <- as.character(obs.id) @@ -194,6 +195,7 @@ return(BA.SP.FUN.XY(obs.id[census==census.id],xy.table[census==census.id,],diam[ ### wrapping function to run BA.SP.FUN.XY per census BA.SP.FUN.XY.census <- function(census,obs.id,xy.table,diam,sp,Rlim,parallel=FALSE,rpuDist=FALSE){ unique.census <- unique(census) +print(unique.census) res.list <- lapply(unique.census,FUN=BA.SP.FUN.XY.l,obs.id,xy.table,diam,sp,Rlim,parallel,rpuDist) res.mat <- rbind.fill(res.list ) res.mat <- res.mat[match(obs.id,rownames(res.mat)),] @@ -264,6 +266,7 @@ if(is.null(data.tot[['weights']])) stop("Please create a weights vector, even if require(data.table) data.tot <- data.table(data.tot) data <- data.tot[ecocode==ecoregion,] +print(dim(data)) rm(data.tot) data.BA.SP <- BA.SP.FUN.census(census=data[['census']], obs.id=as.vector(data[['obs.id']]), @@ -296,17 +299,17 @@ setkeyv(data,"obs.id") if(sum(!data.BA.sp[["obs.id"]] == data[["obs.id"]]) >0) stop("competition index not in the same order than data") ##### ## ADD TRY DATA OR TRAITS IF NEEDED -if(!is.na(data.TRY)){ -sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),] -data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY) -### TO DO ADD OPTION TO INCLUE OTHER DATA on MAX HEIGHT -## save everything as a list -print(dim(data.traits)) -list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) +if(is.data.frame(data.TRY)){ + sp.extract <- species.lookup[species.lookup[["sp"]] %in% unique(data[["sp"]]),] + data.traits <- fun.extract.format.sp.traits.TRY(sp=sp.extract[["sp"]],sp.syno.table=sp.extract,data.TRY) + ### TO DO ADD OPTION TO INCLUE OTHER DATA on MAX HEIGHT + ## save everything as a list + print(dim(data.traits)) + list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=data.traits) + save(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) }else{ -list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) -saveRDS(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) + list.temp <- list(data.tree=data,data.BA.SP=data.BA.sp,data.traits=NA) + saveRDS(list.temp,file=paste("./data/process/list",name.country,ecoregion,"Rdata",sep=".")) } } @@ -336,7 +339,7 @@ if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) st ### compute sum per row BATOT <- apply(BA.SP.temp,MARGIN=1,FUN=sum) -data.res <- data.frame(obs.id=data.tree.s[['obs.id']],BA.SP.temp,BATOT=BATOT) +data.res <- data.frame(obs.id=data.tree.s[['obs.id']],BA.SP.temp,BATOT=BATOT, stringsAsFactors =FALSE) return(data.res) } diff --git a/merge.data.CANADA.R b/merge.data.CANADA.R index 4abfd334f944b11a3cd53c99459d0724f743ab5f..8711afb850fdf8b5e004691c34623493ccc04035 100644 --- a/merge.data.CANADA.R +++ b/merge.data.CANADA.R @@ -1,11 +1,10 @@ ### MERGE canada DATA Edited by FH rm(list = ls()) source("./R/format.function.R") +source("./R/FUN.TRY.R") library(reshape) ######################### READ DATA read individuals tree data data.canada <- -######################### read.csv('./data/raw/DataCanada/Canada_Data2George_20130816.csv',header=TRUE,stringsAsFactors -######################### =FALSE) data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv", header = TRUE, stringsAsFactors = FALSE) data.canada <- data.canada[which(!is.na(data.canada$Species)), ] diff --git a/merge.data.SPAIN.R b/merge.data.SPAIN.R index 7da9b81263e3df199296789fd76a0b183ed7c62f..15eb6a94708f017bce4d67c014eb414b5045f83a 100644 --- a/merge.data.SPAIN.R +++ b/merge.data.SPAIN.R @@ -1,6 +1,7 @@ ### MERGE spain DATA Edited by FH rm(list = ls()) source("./R/format.function.R") +source("./R/FUN.TRY.R") library(reshape) ######################### READ DATA read individuals tree data data.spain <- @@ -18,22 +19,9 @@ res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])), FUN = f.quan ## create data base data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, - 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) + 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) rm(res.quant.boot) write.csv(data.max.height, file = "./data/process/data.max.height.spain.csv") # I was planning to save processed data in that folder -# ## merge TRY with max height merge.TRY <- -# merge(merge.TRY,data.max.height,by='code') rm(data.max.height) ## use mean sd -# of max tree height over all species merge.TRY$Max.height.sd.1 <- -# rep(mean(merge.TRY[['Max.height.sd']],na.rm=TRUE),length=nrow(merge.TRY)) ### -# keep only variables needed in traits data names.traits.data <- -# c('code','Latin_name','Leaf.N.mean','Seed.mass.mean','SLA.mean','Wood.Density.mean', -# 'Leaf.Lifespan.mean','Max.height.mean','Leaf.N.sd.1','Seed.mass.sd.1','SLA.sd.1', -# 'Wood.Density.sd.1', 'Leaf.Lifespan.sd.1','Max.height.sd.1') data.traits <- -# merge.TRY[,names.traits.data] names(data.traits) <- -# c('sp','Latin_name','Leaf.N.mean','Seed.mass.mean','SLA.mean','Wood.Density.mean', -# 'Leaf.Lifespan.mean','Max.height.mean','Leaf.N.sd','Seed.mass.sd','SLA.sd', -# 'Wood.Density.sd', 'Leaf.Lifespan.sd','Max.height.sd') ## rename to have -# standard variables name rm(merge.TRY,names.traits.data) ################################################################ FORMAT INDIVIDUAL TREE DATA @@ -42,11 +30,14 @@ data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year data.spain$year <- data.spain[["years"]] ## number of year between measurement - MISSING data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm data.spain$dead <- as.numeric(data.spain[["Life_status"]] == "dead") ## dummy variable for dead tree 0 alive 1 dead - MIGHT WANT TO CHANGE THIS TO BE BASED ON MORTALITY_CUT -data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code +data.spain$sp <- paste("sp",(data.spain[["SP_code"]]) ,sep=".") ## species code +data.spain$sp.name <- data.spain[["SP_name"]] data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code data.spain$htot <- data.spain[["ht1"]] ## height of tree in m data.spain$tree.id <- data.spain$Tree_ID_SFI ## tree unique id - +data.spain$obs.id <- as.character(1:nrow(data.spain)) ## obs uniquue id +data.spain$census <- rep(1,nrow(data.spain)) ## only one census in spain +data.spain$weights <- as.vector(1/(pi * (data.spain[["R1"]])^2)) ## weights in 1/m^2 #### change coordinates system of x y to be in lat long WGS84 library(sp) library(dismo) @@ -80,14 +71,14 @@ table(data.spain$eco_code) ## There's an eco-region with no code, and one with < 1000 sites The former we ## could drop as they were on the border of Spain +### PLOT ECOREGION library(RColorBrewer) mycols <- brewer.pal(10, "Set3") - ecoreg <- unclass(data.spain$eco_code) +pdf("./figs/ecoregion.spain.pdf") plot(data.spain[["CX"]][order(ecoreg)], data.spain[["CY"]][order(ecoreg)], pty = ".", cex = 0.2, col = rep(mycols, as.vector(table(ecoreg)))) - legend("topleft", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19, length(levels(ecoreg))), cex = 2) points(data.spain[["CX"]][ecoreg == 9], data.spain[["CY"]][ecoreg == 9], pty = ".", @@ -97,56 +88,65 @@ points(data.spain[["CX"]][ecoreg == 1], data.spain[["CY"]][ecoreg == 1], pty = " cex = 0.5, col = "black") ## Highlight the 'rare' ecoregions PA1219 looks to be similar to PA1209, merge ## them together +dev.off() + +## merge data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8, 6, 9)) data.spain <- data.spain[-which(data.spain$eco_codemerged == ""), ] -###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is -###################### missing compute numer of dead per plot to remove plot with disturbance +##### +## PLOT of MERGED ECOREGION +## mycols <- brewer.pal(9, "Set3") +## plot(data.spain[["CX"]][order(data.spain$eco_codemerged)], data.spain[["CY"]][order(data.spain$eco_codemerged)], pty = ".", +## cex = 0.2, col = rep(mycols, as.vector(table(data.spain$eco_codemerged)))) +## legend("topleft", col = mycols, legend = levels(data.spain$eco_codemerged), pch = rep(19, +## length(levels(data.spain$eco_codemerged))), cex = 2) + +###################### PERCENT DEAD +###################### compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.spain[["dead"]], INDEX = data.spain[["plot"]], FUN = function.perc.dead) table(data.spain$dead) ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF ## AVAILABLE (disturbance record) -data.spain <- merge(data.spain, data.frame(plot = as.numeric(names(perc.dead)), perc.dead = perc.dead), - sort = FALSE, by = "plot") +data.spain <- merge(data.spain, data.frame(plot = as.numeric(names(perc.dead)), perc.dead = perc.dead, stringsAsFactors =FALSE), + sort = FALSE, by = "plot") ########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with mortality == 1 or 2 -table(data.spain$Mortality_Cut) -data.spain <- subset(data.spain, subset = (data.spain[["Mortality_Cut"]] == 0 | data.spain[["Mortality_Cut"]] == - "")) +data.spain <- subset(data.spain, + subset = (data.spain[["Mortality_Cut"]] == 0 | + data.spain[["Mortality_Cut"]] == "")) colnames(data.spain)[colnames(data.spain) %in% c("mat", "pp", "PET")] <- c("MAT", "PP", "PET") colnames(data.spain)[names(data.spain) == "eco_codemerged"] <- c("ecocode") vec.abio.var.names <- c("MAT", "PP", "PET") -vec.basic.var <- c("tree.id", "sp", "sp.name", "plot", "ecocode", "D", "G", "dead", - "year", "htot", "Lon", "Lat", "perc.dead") -data.tree <- subset(data.spain, select = c(vec.basic.var, vec.abio.var.names)) -save(data.spain, file = "./data/process/datspain.RData") - -############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in -############################################## m^2/ha without the target species -data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.spain[["tree.id"]]), diam = as.vector(data.spain[["D"]]), - sp = as.vector(data.spain[["sp"]]), id.plot = as.vector(data.spain[["plot"]]), - weights = as.vector(1/(pi * (data.spain[["R1"]])^2)), weight.full.plot = 1/(pi * - (25)^2)) - -## change NA and <0 data for 0 -data.BA.SP[which(is.na(data.BA.SP), arr.ind = TRUE)] <- 0 -data.BA.SP[, -1][which(data.BA.SP[, -1] < 0, arr.ind = TRUE)] <- 0 - -### CHECK IF sp and sp name for column are the same -if (sum(!(names(data.BA.SP)[-1] %in% unique(data.spain[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") - -#### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[, -1], MARGIN = 1, FUN = sum, na.rm = TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET -### create data frame -names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id = dataIFN.spain[["tree.id"]], ecocode = dataIFN.spain[["ecocode"]]), - data.BA.SP, by = "tree.id", sort = FALSE) -## test -if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") - -## save everything as a list -list.spain <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) -save(list.spain, file = "./data/process/list.spain.Rdata") +vec.basic.var <- c("tree.id","obs.id", "sp", "sp.name", "plot", "ecocode", "D", "G", "dead", + "year", "htot", "Lon", "Lat", "perc.dead","census","weights") +data.spain <- subset(data.spain, select = c(vec.basic.var, vec.abio.var.names)) + + + + +############################################## +############################################## +#################### GENERATE ONE OBJECT PER ECOREGION + +# vector of ecoregion name +ecoregion.unique <- unique(data.spain[["ecocode"]]) +sum(data.spain[["ecocode"]] == ecoregion.unique[1]) + +### read TRY data +TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") + +## create lookup table for spain +species.lookup <- data.frame(sp=data.spain[!duplicated(data.spain[["sp"]]),"sp"], + Latin_name=data.spain[!duplicated(data.spain[["sp"]]),"sp.name"], + Latin_name_syn=data.spain[!duplicated(data.spain[["sp"]]),"sp.name"], + stringsAsFactors =FALSE) + + +#### lapply function to generate data per ecoregion +system.time(lapply(ecoregion.unique[1:2], FUN = fun.data.per.ecoregion, data.tot = data.spain, + plot.name = "plot", weight.full.plot = NA, name.country = "Spain", data.TRY = TRY.DATA.FORMATED, + species.lookup = species.lookup)) + diff --git a/merge.data.SWISS.R b/merge.data.SWISS.R index 2efc7a50e5677ac03ce63c399659460b1803c56a..58ed741ce5e1f216d1a9e7b5c13da9f6bef899b4 100644 --- a/merge.data.SWISS.R +++ b/merge.data.SWISS.R @@ -59,9 +59,9 @@ res.quant.boot <- t(sapply(levels(factor(data.swiss[["spcode"]])), FUN = f.quant ## create data base data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, - 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) + 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) rm(res.quant.boot) -# write.csv(data.max.height,file='./data/process/data.max.height.swiss.csv') +write.csv(data.max.height,file='./data/process/data.max.height.swiss.csv') ########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same ########################################## in all data for the tree @@ -83,7 +83,7 @@ data.swiss$ecocode <- rep("A", nrow(data.swiss)) perc.dead <- tapply(data.swiss[["dead"]], INDEX = data.swiss[["plot"]], FUN = function.perc.dead) # ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OTHER VARIABLES IF # AVAILABLE (disturbance record) -data.swiss <- merge(data.swiss, data.frame(plot = names(perc.dead), perc.dead = perc.dead), +data.swiss <- merge(data.swiss, data.frame(plot = names(perc.dead), perc.dead = perc.dead, stringsAsFactors =FALSE), by = "plot", sort = FALSE) ########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 @@ -94,15 +94,12 @@ data.climate <- data.climate[, c(1, 7, 15:19)] data.climate$MAP <- apply(data.climate[, 4:7], 1, sum) data.swiss <- merge(data.swiss, data.frame(siteid = data.climate$CLNR, swb = data.climate$swb_100, - MAT = data.climate$tave_68, MAP = data.climate$MAP), sort = F, all.x = T) + MAT = data.climate$tave_68, MAP = data.climate$MAP, stringsAsFactors =FALSE), sort = F, all.x = T) rm(data.climate) -############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in -############################################## m^2/ha without the target species -data.BA.SP <- BA.SP.FUN.census(census=data.swiss[["census"]],obs.id=data.swiss[["obs.id"]],diam=data.swiss[["D"]],sp=data.swiss[["sp"]],id.plot=data.swiss[["plot"]],weights=data.swiss[["weights"]],weight.full.plot=NA) - -## check if good order - sum(! (data.BA.SP[,"obs.id"]) == data.swiss[["obs.id"]]) +############################################## +############################################## +#################### GENERATE ONE OBJECT PER ECOREGION # vector of ecoregion name ecoregion.unique <- unique(data.swiss[["ecocode"]]) @@ -114,11 +111,11 @@ TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds") ## create lookup table for swiss species.lookup <- data.frame(sp=data.swiss[!duplicated(data.swiss[["sp"]]),"sp"], Latin_name=data.swiss[!duplicated(data.swiss[["sp"]]),"sp.name"], - Latin_name_syn=data.swiss[!duplicated(data.swiss[["sp"]]),"sp.name"]) + Latin_name_syn=data.swiss[!duplicated(data.swiss[["sp"]]),"sp.name"], stringsAsFactors =FALSE) -#### lapply function -(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.swiss, +#### lapply function to generate data for each ecoregion +system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.swiss, plot.name = "plot", weight.full.plot = NA, name.country = "Swiss", data.TRY = TRY.DATA.FORMATED, species.lookup = species.lookup)) diff --git a/merge.data.US.R b/merge.data.US.R index 275c7c10b3b411573e04403500fd8ffcf12c2854..399f93134910a9c4b5418899fae602ea125f6bb7 100644 --- a/merge.data.US.R +++ b/merge.data.US.R @@ -41,7 +41,8 @@ data.us$htot <- rep(NA, length(data.us[["Species"]])) ## height of tree in m - data.us$tree.id <- as.character(data.us$TreeID) ## tree unique id data.us$sp.name <- NA - +## census is missing use as only one census and check with mark +data.us$census <- rep(1,nrow(data.us)) ### add plot weights for computation of competition index (in 1/m^2) data.us$weights <- 1/(10000 * data.us[["PlotSize"]]) @@ -68,7 +69,7 @@ for (i in 1:length(sel.small.div)) { perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function.perc.dead) # ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF # AVAILABLE (disturbance record) -data.us <- merge(data.us, data.frame(plot = names(perc.dead), perc.dead = perc.dead), +data.us <- merge(data.us, data.frame(plot = names(perc.dead), perc.dead = perc.dead, stringsAsFactors = FALSE), by = "plot", sort = FALSE) @@ -86,7 +87,7 @@ data.us[["sp"]] <- paste("sp", data.us[["sp"]], sep = ".") ## variables to keep vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("tree.id", "sp", "plot", "subplot", "ecocode", "D", "G", "dead", - "year", "htot", "Lon", "Lat", "perc.dead", "weights") + "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names)) rm(data.us) ## creat row unique id @@ -103,6 +104,6 @@ ecoregion.unique <- unique(data.tree[["ecocode"]]) #### lapply function -system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.tree, +system.time(lapply(ecoregion.unique[3], FUN = fun.data.per.ecoregion, data.tot = data.tree, plot.name = "subplot", weight.full.plot = NA, name.country = "US", data.TRY = TRY.DATA.FORMATED, species.lookup = species.clean)) diff --git a/ms/data.format.md b/ms/data.format.md index 8f42907d96548c288cd65b812d8c495f7a44aada..1dd2df4503c8d51f5c49c20fbc76be26cbbbecda 100644 --- a/ms/data.format.md +++ b/ms/data.format.md @@ -14,6 +14,7 @@ For the analysis we need for each ecoregion country (or big tropical plot) a lis * First element is a data.frame for individual tree data with columns + - $obs.id4 a unique identifier of observqtion (if multiple observation for a same tree) - $tree.id$ a unique identifier of each tree - $sp$ the species code - $plot$ the plot code @@ -22,10 +23,12 @@ For the analysis we need for each ecoregion country (or big tropical plot) a lis - $G$ the diameter growth rate in mm / yr. - $dead$ a dummy variable 0 alive 1 dead - $year$ the number of year for the growth measurement + - $census$ the name of the year of the census 1 - $htot$ the height of the individual (m) for the data base for which it is availble to compute max height per species - $Lon$ Longitude of the plot in WGS84 - $Lat$ Latitude of teh plots in WGS84 - $perc.dead$ the percentage of dead computed on each plot to exlude plot with perturbation (equal 1 for plot with known perturbation) + - $weights$ the weigths of teh tree to have an estimation of basal area per m^2 (so 1/(m^2)) - then the potential abiotic variables that we can use in the analysis * Second element is a data.frame competition index with columns diff --git a/ms/table.data.progress.ods b/ms/table.data.progress.ods index 45ad7a04fb61990b230d2d804cdfdd98e5175b1e..066e23e01c71741e74cd25e4e18686f32392640e 100644 Binary files a/ms/table.data.progress.ods and b/ms/table.data.progress.ods differ