diff --git a/R/format.data/BCI.R b/R/format.data/BCI.R index a4a7cb7a30a91b580de0f53ab1841c489519d0b0..66c3682ae37bd7dfef2735a91988551a011a48dc 100644 --- a/R/format.data/BCI.R +++ b/R/format.data/BCI.R @@ -1,19 +1,17 @@ ### MERGE BCI DATA rm(list = ls()) -source("./R/format.function.R") -source("./R/FUN.TRY.R") +source("format.fun.R") library(reshape) - ############# READ DATA read individuals tree data Requires careful formatting of 7 census ############ datasets The raw data is such that, once a tree dies in census X, then it no ########## longer exists in census X+1, X+2 etc... -data.bci1 <- read.table("./data/raw/DataBCI/census1/PlotsDataReport.txt", header = TRUE, +data.bci1 <- read.table("../../data/raw/BCI/census1/PlotsDataReport.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t") data.bci1$Date1 <- data.bci1$Date; data.bci1$Date <- NULL data.bci1$DBH1 <- data.bci1$DBH; data.bci1$DBH <- NULL big.bci <- NULL for (k in 2:7) { - new.directory <- paste("./data/raw/DataBCI/census", k, "/PlotsDataReport.txt", + new.directory <- paste("../../data/raw/BCI/census", k, "/PlotsDataReport.txt", sep = "") data.bci2 <- read.table(new.directory, header = TRUE, stringsAsFactors = FALSE, sep = "\t"); @@ -38,47 +36,16 @@ rm(data.bci1, data.bci2, sub.bci) big.bci <- big.bci[order(big.bci$TreeID), ] data.bci <- big.bci rm(big.bci) - ### read species names -species.clean <- read.table("./data/raw/DataBCI/TaxonomyDataReport.txt", stringsAsFactors = FALSE, +species.clean <- read.table("../../data/raw/BCI/TaxonomyDataReport.txt", stringsAsFactors = FALSE, header = T, sep = "\t") species.clean$Latin_name <- paste(species.clean[["Genus"]], species.clean[["species"]],sep=" ") - ## ## Try to relate SpeciesID in species.clean species names in data.bci #unique(data.bci$Latin) %in% species.clean$Latin_name data.bci <- merge(data.bci, data.frame(Latin = species.clean$Latin_name, sp = species.clean$SpeciesID, genus = species.clean$Genus, stringsAsFactors = F), by = "Latin", sort = F) -###################################### MASSAGE TRAIT DATA Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY -data.trait <- read.csv("./data/raw/DataBCI/BCITRAITS_20101220.csv", stringsAsFactors = FALSE, - header = T) -data.trait$Latin <- apply(data.trait[, 1:2], 1, paste, collapse = " ") -data.trait <- data.trait[,c("GENUS.","SP.","Latin","SEED_DRY","LMALAM_AVD","LMALAM_SED","LMALAM_ND","SG100C_AVG","SG100C_SEM","SG100C_N","HEIGHT_AVG","HEIGHT_SEM","HEIGHT_N")] -data.trait$sp <- data.trait[["SP."]] -data.trait[["SP."]] <- NULL -data.trait$Leaf.N.mean <- NA -data.trait$Leaf.N.sd <- NA -data.trait$Seed.mass.mean <- data.trait$SEED_DRY*1000 -data.trait$SEED_DRY <- NULL -data.trait$Seed.mass.sd <- NA -data.trait$SLA.mean <- 1/data.trait$LMALAM_AVD -data.trait$SLA.mean <- data.trait$SLA.mean*1000 ## Conversion from g m^-2 to mm2 mg^-1 -data.trait$SLA.sd <- 1/data.trait$LMALAM_SED -data.trait$SLA.sd <- data.trait$SLA.sd*1000 ## Conversion from g m^-2 to mm2 mg^-1 -data.trait$SLA.sd <- data.trait$SLA.sd*sqrt(data.trait$LMALAM_ND) ## conversion of SEM in SD -data.trait$LMALAM_AVD <- data.trait$LMALAM_SED <- data.trait$LMALAM_ND <- NULL -data.trait$Wood.density.mean <- data.trait$SG100C_AVG; -data.trait$Wood.density.sd <- data.trait$SG100C_SEM*sqrt(data.trait$SG100C_N) ## conversion of SEM in SD -data.trait$SG100C_AVG <- data.trait$SG100C_N <- data.trait$SG100C_SEM <- NULL -data.trait$Max.height.mean <- (data.trait$HEIGHT_AVG) -data.trait$Max.height.sd <- (data.trait$HEIGHT_SEM*sqrt(data.trait$HEIGHT_N)) -data.trait$HEIGHT_SEM <- data.trait$HEIGHT_N <- data.trait$HEIGHT_AVG <- NULL -data.trait$Latin_name <- sub(" ","_",data.trait$Latin) - -## THIS NEW FUNCTION IS WORKING -fun.extract.format.sp.traits.NOT.TRY(sp=unique(data.bci$Latin), Latin_name=unique(data.bci$Latin), data=data.trait,name.match.traits="Latin") - ########################################## FORMAT INDIVIDUAL TREE DATA data.bci <- data.bci[order(data.bci[["TreeID"]]),] @@ -91,7 +58,6 @@ data.bci$obs.id <- apply(data.bci[,c("TreeID","Census")],1,paste,collapse="_") data.bci$tree.id <- data.bci$TreeID; data.bci$TreeID <- NULL data.bci$x <- data.bci$gx; data.bci$gx <- NULL data.bci$y <- data.bci$gy; data.bci$gy <- NULL - data.bci$G <- 10 * (data.bci$DBH1 - data.bci$DBH1)/data.bci$year ## diameter growth in mm per year - BASED ON UNROUNDED YEARS data.bci$D <- data.bci[["DBH1"]]/10 ## diameter in cm data.bci$subplot <- data.bci[["Quadrat"]] @@ -102,37 +68,62 @@ data.bci$Latin <- NULL data.bci$census <- data.bci$Census data.bci$Census <- NULL data.bci$dead <- as.numeric(is.na(data.bci$DBH2)) ## Not entire sure if this works, given the way this data is designed - ###################### ECOREGION bci has only 1 eco-region - ###################### PERCENT DEAD perc.dead <- tapply(data.bci[["dead"]], INDEX = data.bci[["plot"]], FUN = function.perc.dead2) data.bci <- merge(data.bci, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) - - ########################################################### PLOT SELECTION FOR THE ANALYSIS vec.abio.var.names <- c("MAT", "MAP") -vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", - "year", "htot", "Lon", "Lat", "perc.dead","weights","census") +vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","plot", "subplot", "D", "G", "dead", + "year", "htot", "x", "y", "census") data.tree <- subset(data.bci, select = c(vec.basic.var, vec.abio.var.names)) +write.csv(data.tree,file="../../output/process/data.tree.bci.csv") + + +###################################### MASSAGE TRAIT DATA Use HEIGHT_AVG, LMALAM_AVD, SEED_DRY +data.trait <- read.csv("../../data/raw/BCI/BCITRAITS_20101220.csv", stringsAsFactors = FALSE, + header = T) +data.trait$Latin <- apply(data.trait[, 1:2], 1, paste, collapse = " ") +data.trait <- data.trait[,c("GENUS.","SP.","Latin","SEED_DRY","LMALAM_AVD","LMALAM_SED","LMALAM_ND","SG100C_AVG","SG100C_SEM","SG100C_N","HEIGHT_AVG","HEIGHT_SEM","HEIGHT_N")] +data.trait$sp <- data.trait[["SP."]] +data.trait[["SP."]] <- NULL +data.trait$Leaf.N.mean <- NA +data.trait$Leaf.N.sd <- NA +data.trait$Seed.mass.mean <- data.trait$SEED_DRY*1000 +data.trait$SEED_DRY <- NULL +data.trait$Seed.mass.sd <- NA +data.trait$SLA.mean <- 1/data.trait$LMALAM_AVD +data.trait$SLA.mean <- data.trait$SLA.mean*1000 ## Conversion from g m^-2 to mm2 mg^-1 +data.trait$SLA.sd <- 1/data.trait$LMALAM_SED +data.trait$SLA.sd <- data.trait$SLA.sd*1000 ## Conversion from g m^-2 to mm2 mg^-1 +data.trait$SLA.sd <- data.trait$SLA.sd*sqrt(data.trait$LMALAM_ND) ## conversion of SEM in SD +data.trait$LMALAM_AVD <- data.trait$LMALAM_SED <- data.trait$LMALAM_ND <- NULL +data.trait$Wood.density.mean <- data.trait$SG100C_AVG; +data.trait$Wood.density.sd <- data.trait$SG100C_SEM*sqrt(data.trait$SG100C_N) ## conversion of SEM in SD +data.trait$SG100C_AVG <- data.trait$SG100C_N <- data.trait$SG100C_SEM <- NULL +data.trait$Max.height.mean <- (data.trait$HEIGHT_AVG) +data.trait$Max.height.sd <- (data.trait$HEIGHT_SEM*sqrt(data.trait$HEIGHT_N)) +data.trait$HEIGHT_SEM <- data.trait$HEIGHT_N <- data.trait$HEIGHT_AVG <- NULL +data.trait$Latin_name <- sub(" ","_",data.trait$Latin) + +###### TODO SAVE TRAITS IN STD FORMAT + + +## THIS NEW FUNCTION IS WORKING +## fun.extract.format.sp.traits.NOT.TRY(sp=unique(data.bci$Latin), Latin_name=unique(data.bci$Latin), data=data.trait,name.match.traits="Latin") + -write.csv(data.tree,file="./data/process/data.tree.bci.csv") ############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in ############################################## m^2/ha without the target species - data.tree <- subset(data.tree,subset=!is.na(data.tree[["D"]])) -data.tree <- subset(data.tree,subset=data.tree[["D"]]>5) ### select only tree below 5cm of dbh to start - +data.tree <- subset(data.tree,subset=data.tree[["D"]]>5) ### select only tree above 5cm of dbh to start ### species as factor because number data.tree[['sp']] <- factor(data.tree[['sp']]) Rlim <- 15 # set size of neighborhood for competition index - ## for each census compute competition index - ## FOR CENSUS 6 only to start data.tree1 <- data.tree[data.tree$census==6,] - fun.data.per.bigplot(data=data.tree1,name.site="BCI",data.TRAITS=data.trait,Rlim=15,xy.name=c("x","y")) diff --git a/R/format.data/CANADA.R b/R/format.data/CANADA.R index e211c41b728e40add8abda9076dcf38351c10635..7ab4efd1491ab5aa5794b78ad8441fdb141e0585 100644 --- a/R/format.data/CANADA.R +++ b/R/format.data/CANADA.R @@ -1,28 +1,20 @@ ### MERGE canada DATA rm(list = ls()) -source("./R/format.function.R") -source("./R/FUN.TRY.R") +source("format.fun.R") library(reshape) - ######################### READ DATA read individuals tree data -#data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130818.csv", -# header = TRUE, stringsAsFactors = FALSE) -data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130910.csv", +data.canada <- read.csv("../../data/raw/Canada/Canada_Data2George_20130910.csv", header = TRUE, stringsAsFactors = FALSE) sum(is.na(data.canada$species)) data.canada$sp = data.canada$species_FIACode data.canada$species_FIACode <- NULL - ### read species names and merge with data.canada -species.clean <- read.csv("./data/raw/DataCanada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE) +species.clean <- read.csv("../../data/raw/Canada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE) data.canada <- merge(data.canada, data.frame(sp = species.clean$SPCD, species.clean[,2:3], stringsAsFactors = F), by = "sp", all.x = T) data.canada$sp.name <- data.canada$COMMON_NAME; data.canada$COMMON_NAME <- NULL - ###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER ###################################### WHEN WE ANALYZE THAT DATASET LATER ON - ########################################## FORMAT INDIVIDUAL TREE DATA - data.canada$FinalDBH[data.canada$FinalDBH < 0] <- NA data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA @@ -37,16 +29,13 @@ data.canada$census <- rep(1,nrow(data.canada)) data.canada$obs.id <- data.canada$tree.id data.canada$weights <- 1/(data.canada$SubPlotSize*10000) ### there is some strange small value in SubPlotSize 20 m^2 even for large tree NEED TO ASK Hongcheng data.canada$cluster <- as.character(data.us[["plot"]]) ## cluster code - ###################### ECOREGION merge to have no ecoregion with low number of observation -greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, +greco <- read.csv(file = "../../data/raw/Canada/EcoregionCodes.csv", header = T, sep = "\t") - data.canada$ecocode <- data.canada$Ecocode; data.canada$Ecocode <- NULL table(data.canada$Ecocode) ## Some ecoregions still have small # of individuals, so either drop off for ## analysis later on or wait for Quebec data to come in - library(RColorBrewer) ecoreg <- factor(data.canada$Ecocode); levels(ecoreg) <- mycols <- brewer.pal(length(levels(ecoreg)), "Set1") plot(data.canada[['Lon']],data.canada[['Lat']],pty='.',cex=.2,col = as.character(ecoreg)) @@ -54,48 +43,16 @@ legend('bottomleft', col = mycols, legend = names(table(data.canada$Ecocode)), p points(data.canada[['Lon']][data.canada$Ecocode == "-331"], data.canada[['Lat']][data.canada$Ecocode == "-331"],pty=2,cex=1, col = 'black') points(data.canada[['Lon']][data.canada$Ecocode == "-251"], data.canada[['Lat']][data.canada$Ecocode == "-251"],pty=2,cex=1, col = 'red') #data.canada$eco_codemerged <- combine_factor(data.canada$eco_code,c(1:8,6,9)) - ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.canada[["dead"]], INDEX = data.canada[["plot"]], FUN = function.perc.dead2) data.canada <- merge(data.canada, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) - ####################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.canada$dead) data.canada <- data.canada[data.canada$dead == 0,] - vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") data.tree <- subset(data.canada, select = c(vec.basic.var, vec.abio.var.names)) - -write.csv(data.tree,file="./data/process/data.tree.canada.csv") - -############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in -############################################## m^2/ha without the target species -data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.canada[["tree.id"]]), diam = as.vector(data.canada[["D"]]), - sp = as.vector(data.canada[["sp"]]), id.plot = as.vector(data.canada[["plot"]]), - weights = 1/(10000 * data.canada[["SubPlot_Size"]]), weight.full.plot = NA) - -## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0 -data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 - -### CHECK IF sp and sp name for column are the same -if (sum(!(names(data.BA.SP)[-1] %in% unique(data.canada[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") - -#### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET -rm(BATOT.COMPET) -### create data frame -names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id = data.canada[["tree.id"]], ecocode = data.canada[["ecocode"]]), - data.BA.SP, by = "tree.id", sort = FALSE) -## test -if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") - -## save everything as a list -list.canada <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) -save(list.canada, file = "./data/process/list.canada.Rdata") +write.csv(data.tree,file="../../output/process/data.tree.canada.csv") diff --git a/R/format.data/FRANCE.R b/R/format.data/FRANCE.R index 432e33a44c0a56b30e73df9218ee71280b4b1319..0630d4e9816c1349147da3eae88bc2e2726d03d2 100644 --- a/R/format.data/FRANCE.R +++ b/R/format.data/FRANCE.R @@ -1,49 +1,38 @@ ############################################# MERGE FRENCH DATA rm(list = ls()); -source("./R/format.function.R") +source("format.fun.R") library(reshape) - ################################ READ DATA -data.france <- read.csv("./data/raw/DataFrance/dataIFN.FRANCE.csv", stringsAsFactors = FALSE) +data.france <- read.csv("../../data/raw/France/dataIFN.FRANCE.csv", stringsAsFactors = FALSE) ### read IFN species names and clean -species.clean <- fun.clean.species.tab(read.csv("./data/raw/DataFrance/species.csv", stringsAsFactors = FALSE)) - - - +species.clean <- fun.clean.species.tab(read.csv("../../data/raw/France/species.csv", stringsAsFactors = FALSE)) ###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed ###################################### height to add variables to the traits data base Because we have two heights, ###################################### then take the max of the two heights and then bootstrap res.quant.boot <- t(sapply(levels(factor(data.france[["espar"]])), FUN = f.quantile.boot, R = 1000, x = log10(data.france[["htot"]]), fac = factor(data.france[["espar"]]))) - ## create data base data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3]) rm(res.quant.boot) -write.csv(data.max.height,file='./data/process/data.max.height.france.csv') - +write.csv(data.max.height,file='../../output/process/data.max.height.france.csv') ## merge TRY with max height merge.TRY <- merge(merge.TRY, data.max.height, by = "code") rm(data.max.height) ## use mean sd of max tree height over all species merge.TRY$Max.height.sd.1 <- rep(mean(merge.TRY[["Max.height.sd"]], na.rm = TRUE), length = nrow(merge.TRY)) - ### keep only variables needed in traits data names.traits.data <- c("code", "Latin_name", "Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.Density.mean", "Leaf.Lifespan.mean", "Max.height.mean", "Leaf.N.sd.1", "Seed.mass.sd.1", "SLA.sd.1", "Wood.Density.sd.1", "Leaf.Lifespan.sd.1", "Max.height.sd.1") - data.traits <- merge.TRY[, names.traits.data] names(data.traits) <- c("espar", "latin_name", "leafN.mean", "seedmass.mean", "SLA.mean", "wooddensity.mean", "leaflifespan.mean", "maxheight.mean", "leafN.sd", "seedmass.sd", "SLA.sd", "wooddensity.sd", "leaflifespan.sd", "maxheight.sd") rm(merge.TRY, names.traits.data) - - ########################################## FORMAT INDIVIDUAL TREE DATA change unit and names of variables to be the same ########################################## in all data for the tree - data.france$G <- data.france[["ir5"]]/5 * 2 ## diameter growth in mm per year data.france$year <- rep(5, nrow(data.france)) ## number of year between measurement data.france$D <- data.france[["c13"]]/pi ## diameter in cm @@ -55,8 +44,6 @@ data.france$tree.id <- paste(data.france[["plot.id"]], data.france[["a"]], sep = data.france$weights <- data.france[["w"]]/10000 data.france$obs.id <- 1:nrow(data.france) ## There is only obs per tree.id, so this is superfluous data.france$census <- rep(1,nrow(data.france)) ## only one census - - ######################## change coordinates system of x y to be in lat long WGS84 library(sp) library(dismo) @@ -65,7 +52,6 @@ data.sp <- data.france[, c("idp", "xl93", "yl93")] coordinates(data.sp) <- c("xl93", "yl93") ## EPSG CODE 2154 proj4string(data.sp) <- CRS("+init=epsg:2154") # define projection system of our data ## EPSG CODE 2154 summary(data.sp) - data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon data.france$Lon <- coordinates(data.sp2)[, "xl93"] data.france$Lat <- coordinates(data.sp2)[, "yl93"] @@ -73,9 +59,7 @@ rm(data.sp, data.sp2) ## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') ## # different resolutions available plot(newmap) ## points(data.sp2,cex=0.2,col='red') - ###################### ECOREGION - merge greco to have no ecoregion with low number of observation - ## merge A and B Grand Ouest cristallin and oceanique and Center semi-oceanique ## merge G D E Vosges Jura massif cemtral (low mountain) merge H and I Alpes and ## Pyrenees Merge J and K Corse and Mediteraneen @@ -86,66 +70,25 @@ GRECO.temp <- sub("[HI]", "HI", GRECO.temp) GRECO.temp <- sub("[JK]", "JK", GRECO.temp) ## plot(data.france[['xl93']],data.france[['yl93']],col=unclass(factor(GRECO.temp))) data.france$ecocode <- GRECO.temp ## a single code for each ecoregion - - ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.france[["dead"]], INDEX = data.france[["plot.id"]], FUN = function.perc.dead2) - data.france <- merge(data.france, data.frame(plot.id = as.numeric(names(perc.dead)), perc.dead = perc.dead), sort = FALSE) - - ########################################################################################### PLOT SELECTION FOR THE ANALYSIS - ## data.france <- subset(data.france,subset= data.france[['YEAR']] != 2005) ## year 2005 bad data according to IFN data.france <- subset(data.france, subset = data.france[["plisi"]] == 0) ## no plot on forest edge data.france <- subset(data.france, subset = data.france[["dc"]] == 0) ## no harvesting data.france <- subset(data.france, subset = data.france[["tplant"]] == 0) ## no plantation data.france <- subset(data.france, subset = !is.na(data.france[["SER"]])) ## missing SER - ## SELECT GOOD COLUMNS ## names of variables for abiotic conditions vec.abio.var.names <- c("MAT", "SAP", "sgdd", "WB.s", "WB.y", "WS.s", "WS.y") ## other var vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") - data.tree <- subset(data.france, select = c(vec.basic.var, vec.abio.var.names)) -write.csv(data.tree,file="./data/process/data.tree.france.csv") - - -############### -## TODO CHANGE FOR NEW FORMATING PER ECOREGION - -############################################## -#################### GENERATE ONE OBJECT PER ECOREGION - COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in -#################### m^2/ha without the target species - -data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.france[["tree.id"]]), diam = as.vector(data.france[["D"]]), - sp = as.vector(data.france[["sp"]]), id.plot = as.vector(data.france[["idp"]]), - weights = as.vector(data.france[["w"]])/10000, weight.full.plot = 1/(pi * - (c(15))^2)) - -## change NA and <0 data for 0 -data.BA.SP[which(is.na(data.BA.SP), arr.ind = TRUE)] <- 0 -data.BA.SP[, -1][which(data.BA.SP[, -1] < 0, arr.ind = TRUE)] <- 0 - -### CHECK IF sp and sp name for column are the same -if (sum(!(names(data.BA.SP)[-1] %in% unique(data.france[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") - +write.csv(data.tree,file="../../output/process/data.tree.france.csv") -#### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[, -1], MARGIN = 1, FUN = sum, na.rm = TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET -### create data frame -names(data.BA.SP) <- c("tree.id", names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id = data.france[["tree.id"]], ecocode = data.france[["ecocode"]]), - data.BA.SP, by = "tree.id", sort = FALSE) -## test -if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") -## save everything as a list -list.FRANCE <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) -save(list.FRANCE, file = "./data/process/list.FRANCE.Rdata") diff --git a/R/format.data/FUSHAN.R b/R/format.data/FUSHAN.R index e20463be35fa561d2c57a51ac2965b0b184a4169..74d5f9ba32aa277e40ed6f871f1f4595fec37aca 100644 --- a/R/format.data/FUSHAN.R +++ b/R/format.data/FUSHAN.R @@ -1,19 +1,16 @@ ### MERGE Fushan DATA rm(list = ls()) -source("./R/format.function.R") +source("format.fun.R") library(reshape) - ######################### READ DATA read individuals tree data -load("./data/raw/DataFushan/fushan.rdata") +load("../../data/raw/Fushan/fushan.rdata") data.fushan <- data.frame(fushan) rm(fushan) - ### read species names -species.clean <- read.csv("./data/raw/DataFushan/Splist_Fushan_En.csv", stringsAsFactors = FALSE) - +species.clean <- read.csv("../../data/raw/Fushan/Splist_Fushan_En.csv", stringsAsFactors = FALSE) ###################################### MASSAGE TRAIT DATA Obtain maximum height per species from data.trait no sd ###################################### available as we have only one observation for species -data.trait <- read.table("./data/raw/DataFushan/fs_trait_Kunstler.txt", header = T, +data.trait <- read.table("../../data/raw/Fushan/fs_trait_Kunstler.txt", header = T, sep = "\t") colnames(data.trait) <- c("sp", "sla", "wd", "seedmassmg", "meanN", "maxheightm") data.trait <- merge(data.trait, data.frame(sp = species.clean$sp, Latin = apply(species.clean[,c("genus","epithet")],1,paste,collapse="_"), @@ -28,11 +25,9 @@ data.trait$Wood.density.mean <- data.trait$wd; data.trait$wd <- NULL data.trait$Wood.density.sd <- NA data.trait$Max.height.mean <- log10(data.trait$maxheightm); data.trait$Max.height.sd <- NA - data.max.height <- data.frame(sp = data.trait$sp, Max.height = log10(data.trait$maxheightm)) data.fushan <- merge(data.fushan, data.max.height, by = "sp") data.trait$maxheightm <- NULL - ########################################## FORMAT INDIVIDUAL TREE DATA data.fushan$year <- rep(5, nrow(data.fushan)) ## number of year between measurement data.fushan$G <- 10*(data.fushan$dbh2 - data.fushan$dbh1)/data.fushan$year ## diameter growth in mm per year @@ -52,41 +47,31 @@ data.fushan$weights <- 1/(pi*(0.5*data.fushan$D/100)^2) data.fushan$obs.id <- data.fushan$tag data.fushan$cluster <- rep(NA, nrow(data.fushan)) data.fushan$census <- rep(1,nrow(data.fushan)) - ########################################## CHANGE COORDINATE SYSTEM - ###################### 1 ECOREGION ONLY - ###################### PERCENT DEAD CANNOT DO THIS WITHOUT A PLOTID compute numer of dead per plot to ###################### remove plot with disturbance perc.dead <- tapply(data.fushan[["dead"]], INDEX = data.fushan[["plot"]], FUN = function.perc.dead2) data.fushan <- merge(data.fushan, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) - ########################################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 table(data.fushan$dead) data.fushan <- data.fushan[data.fushan$dead == 0, ] - vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") data.tree <- subset(data.fushan, select = c(vec.basic.var, vec.abio.var.names)) - -write.csv(data.tree,file="./data/process/data.tree.fushan.csv") - +write.csv(data.tree,file="../../output/process/data.tree.fushan.csv") ############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in ############################################## m^2/ha without the target species CANNOT DO WITHOUT A PLOT ID data.BA.SP <- BA.SP.FUN(id.tree = as.vector(data.fushan[["tree.id"]]), diam = as.vector(data.fushan[["D"]]), sp = as.vector(data.fushan[["sp"]]), id.plot = as.vector(data.fushan[["plot"]]), weights = data.fushan$weights, weight.full.plot = NA) - ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0 data.BA.SP[, -1][data.BA.SP[, -1] < 0] <- 0 - ### CHECK IF sp and sp name for column are the same if (sum(!(names(data.BA.SP)[-1] %in% unique(data.fushan[["sp"]]))) > 0) stop("competition index sp name not the same as in data.tree") - #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP[, -1], 1, sum, na.rm = TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET @@ -97,7 +82,6 @@ data.BA.sp <- merge(data.frame(tree.id = data.fushan[["tree.id"]], ecocode = dat data.BA.SP, by = "tree.id", sort = FALSE) ## test if (sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) > 0) stop("competition index not in the same order than data.tree") - ## save everything as a list list.canada <- list(data.tree = data.tree, data.BA.SP = data.BA.sp, data.traits = data.traits) -save(list.spain, file = "./data/process/list.canada.Rdata") +save(list.spain, file = "../../output/process/list.canada.Rdata") diff --git a/R/format.data/NSW.R b/R/format.data/NSW.R index 59de20a8f735d4a49f18c75d7e1de781008f4ebf..ab3fde26beb57efc681981946924c1e85c4f4bd0 100644 --- a/R/format.data/NSW.R +++ b/R/format.data/NSW.R @@ -1,35 +1,31 @@ ### MERGE NSW DATA rm(list = ls()) -source("./R/format.function.R") +source("format.fun.R") library(reshape) - ######################### READ DATA read individuals tree data -data.nswbrc <- read.csv("./data/raw/DataNSW/NSW_data_BRcontrols.csv", header = TRUE, +data.nswbrc <- read.csv("../../data/raw/NSW/NSW_data_BRcontrols.csv", header = TRUE, stringsAsFactors = FALSE) data.nswbrc$Date.of.measure <- as.vector(sapply(data.nswbrc$Date.of.measure, function(x) { unlist(strsplit(x, "/"))[3] })) ## Extract the years only -data.nswbrt <- read.csv("./data/raw/DataNSW/NSW_data_BRtreatments.csv", header = TRUE, +data.nswbrt <- read.csv("../../data/raw/NSW/NSW_data_BRtreatments.csv", header = TRUE, stringsAsFactors = FALSE) data.nswbrt$Date.of.measure <- as.vector(sapply(data.nswbrt$Date.of.measure, function(x) { unlist(strsplit(x, "/"))[3] })) ## Extract the years only -data.nswbs1 <- read.csv("./data/raw/DataNSW/NSW_data_BS1.csv", header = TRUE, stringsAsFactors = FALSE) +data.nswbs1 <- read.csv("../../data/raw/NSW/NSW_data_BS1.csv", header = TRUE, stringsAsFactors = FALSE) data.nswbs1$Date.of.measure <- as.character(format(as.Date(data.nswbs1$Date.of.measure, format = "%d-%b-%y"), format = "%d/%b/%Y")) data.nswbs1$Date.of.measure <- as.vector(sapply(data.nswbs1$Date.of.measure, function(x) { unlist(strsplit(x, "/"))[3] })) ## Extract the years only - ## data.nswbs2 has a different format to the other datasets, so format to match ## the above -data.nswbs2 <- read.csv("./data/raw/DataNSW/NSW_data_BS2.csv", header = TRUE, stringsAsFactors = FALSE) +data.nswbs2 <- read.csv("../../data/raw/NSW/NSW_data_BS2.csv", header = TRUE, stringsAsFactors = FALSE) data.nswbs2$Plot <- apply(data.nswbs2[, 1:2], 1, paste, collapse = "") - data.nswbs2$Subplot <- NULL data.nswbs2$Family <- NULL colnames(data.nswbs2)[3] <- "species" - data.nswbs2$x <- data.nswbs2$y <- rep(NA, nrow(data.nswbs2)) data.nswbs22 <- data.nswbs2[rep(1:nrow(data.nswbs2), each = 2), ] data.nswbs22$Date.of.measure <- rep(c(1988, 2000), nrow(data.nswbs2)) @@ -37,18 +33,15 @@ data.nswbs22$Dbh <- c(rbind(data.nswbs2[["DBH.cm..1988."]], data.nswbs2[["DBH.cm data.nswbs22[["DBH.cm..1988."]] <- data.nswbs22[["DBH.cm..2000."]] <- NULL data.nswbs2 <- data.nswbs22[, c(1:5, 8:9, 7, 6)] rm(data.nswbs22) - -data.nswtnd <- read.csv("./data/raw/DataNSW/NSW_data_TND.csv", header = TRUE, stringsAsFactors = FALSE) +data.nswtnd <- read.csv("../../data/raw/NSW/NSW_data_TND.csv", header = TRUE, stringsAsFactors = FALSE) data.nswtnd$Date.of.measure <- as.character(format(as.Date(data.nswtnd$Date.of.measure, format = "%d-%b-%y"), format = "%d/%b/%Y")) data.nswtnd$Date.of.measure <- as.vector(sapply(data.nswtnd$Date.of.measure, function(x) { unlist(strsplit(x, "/"))[3] })) ## Extract the years only - data.nsw <- rbind(data.nswbrc, data.nswbrt, data.nswbs1, data.nswbs2, data.nswtnd) - ###################################### MASSAGE TRAIT DATA -data.trait <- read.csv("./data/raw/DataNSW/NSW_traits.csv", header = TRUE, stringsAsFactors = FALSE) +data.trait <- read.csv("../../data/raw/NSW/NSW_traits.csv", header = TRUE, stringsAsFactors = FALSE) data.trait$sp <- data.trait[["Species.all"]]; data.trait[["Species.all"]] <- NULL ## There is not sp.code in data.nsw; using spp name as code data.trait$Latin_name <- data.trait$sp data.trait$Leaf.N.mean <- NA @@ -61,7 +54,6 @@ data.trait$Wood.density.mean <- data.trait[["WD_basic_kg.m3"]]/1000; data.trait[ data.trait$Wood.density.sd <- NA data.trait$Max.height.mean <- data.trait$Log10_Hmax_m; data.trait$Log10_Hmax_m <- NULL data.trait$Max.height.sd <- NA - ########################################## FORMAT INDIVIDUAL TREE DATA Each tree has at most 3 observations (from prelim ########################################## checks of the data) data.nsw$tree.id <- apply(data.nsw[, 1:2], 1, paste, collapse = "_") @@ -88,7 +80,6 @@ data.nsw2 <- data.nsw2[-1, ] data.nsw2$Date.of.measure <- data.nsw2$Dbh <- NULL data.nsw <- data.nsw2 for (k in 9:12) data.nsw[, k] <- as.numeric(data.nsw[, k]) - ## change unit and names of variables to be the same in all data for the tree data.nsw$year <- (data.nsw$year2 - data.nsw$year1) ## number of year between measurements data.nsw$G <- 10 * (data.nsw$dbh2 - data.nsw$dbh1)/(data.nsw$year) ## diameter growth in mm per year @@ -108,58 +99,42 @@ data.nsw$weights[grep("BB", data.nsw$Plot)] <- 1/(20 * 80) data.nsw$weights[grep("CC", data.nsw$Plot)] <- 1/(20 * 80) data.nsw$weights[grep("DD", data.nsw$Plot)] <- 1/(20 * 80) data.nsw$weights[grep("BS", data.nsw$Plot)] <- 1/(25 * 30) - data.nsw$weights[grep("BR", data.nsw$Plot)] <- 1/(60.4 * 60.4) - data.nsw$weights[grep("END", data.nsw$Plot)] <- 1/(40 * 50) data.nsw$weights[grep("TND", data.nsw$Plot)] <- 1/(40 * 50) - data.nsw$cenus <- data.nsw$year1 data.nsw$obs.id <- 1:nrow(data.nsw) - ###################### ECOREGION nsw has only 1 eco-region - ###################### PERCENT DEAD NO DATA ON MORTALITY - perc.dead <- tapply(data.nsw[["dead"]], INDEX = data.nsw[["plot"]], FUN = function.perc.dead2) data.nsw <- merge(data.nsw, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) - ########################################### VARIABLES SELECTION FOR THE ANALYSIS vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") data.tree <- subset(data.nsw, select = c(vec.basic.var, vec.abio.var.names)) - -write.csv(data.tree,file="./data/process/data.tree.nsw.csv") - - +write.csv(data.tree,file="../../output/process/data.tree.nsw.csv") ############################################## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in ############################################## m^2/ha without the target species NEED TO COMPUTE BASED ON RADIUS AROUND TARGET ############################################## TREE species as factor because number data.nsw[["species"]] <- factor(data.nsw[["species"]]) data.nsw$spcode <- data.nsw[["species"]] levels(data.nsw$spcode) <- 1:length(levels(data.nsw$spcode)) - data.BA.SP <- BA.SP.FUN(obs.id = as.vector(data.nsw[["tree.id"]]), diam = as.vector(data.nsw[["D"]]), sp = as.vector(data.nsw[["spcode"]]), id.plot = as.vector(data.nsw[["Plot"]]), weights = data.nsw[["weights"]], weight.full.plot = NA) - ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0 - data.BA.SP2 <- data.frame(data.BA.SP) colnames(data.BA.SP2) <- colnames(data.BA.SP) - ### CHECK IF sp and sp name for column are the same if (sum(!(names(data.BA.SP2)[-1] %in% unique(data.nsw[["spcode"]]))) > 0) stop("competition index sp name not the same as in data.tree") - #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP2[, -1], 1, sum, na.rm = TRUE) data.BA.SP2$BATOT.COMPET <- BATOT.COMPET rm(BATOT.COMPET) data.BA.SP <- data.BA.SP2 - # Rlim <- 15 # set size of neighborhood for competition index system.time(test <- # fun.compute.BA.SP.XY.per.plot(1,data.tree=data.nsw,Rlim=15,parallel=TRUE,rpuDist=FALSE)) # list.BA.SP.data <- @@ -178,7 +153,6 @@ data.BA.SP <- data.BA.SP2 # pdf('./figs/plots.tree.pdf') # lapply(unique(data.nsw[['plot']]),FUN=fun.circles.plot,data.nsw[['x']],data.nsw[['y']],data.nsw[['plot']],data.nsw[['D']],inches=0.2,xlim=c(0,250),ylim=c(0,250)) # dev.off() - ## save everything as a list list.nsw <- list(data.tree = data.nsw, data.BA.SP = data.BA.sp, data.traits = data.traits) -save(list.nsw, file = "./data/process/list.nsw.Rdata") +save(list.nsw, file = "../../output/process/list.nsw.Rdata") diff --git a/R/format.data/NZ.R b/R/format.data/NZ.R index e57051463707e1ef0b84410355d3e1d5cf572ecc..6a67fd66963468bbb4e59e71edb1d99606abf66f 100644 --- a/R/format.data/NZ.R +++ b/R/format.data/NZ.R @@ -1,13 +1,10 @@ ### MERGE NZ DATA rm(list = ls()) -source("./R/format.function.R") -source("./R/FUN.TRY.R") +source("format.fun.R") library(reshape) - - ###################################### MASSAGE TRAIT DATA Maximum height per species is already available from ###################################### data.trait (in m); so no sd's and only obs per spp -data.trait <- read.csv("./data/raw/DataNVS/nz_traits_130801.csv", , header = TRUE, +data.trait <- read.csv("../../data/raw/NVS/nz_traits_130801.csv", , header = TRUE, stringsAsFactors = FALSE) data.trait <- data.trait[, -1] colnames(data.trait)[1] <- "sp" @@ -22,31 +19,24 @@ data.trait$Wood.density.mean <- data.trait$wood; data.trait$wood <- NULL data.trait$Wood.density.sd <- NA data.trait$Max.height.mean <- log10(data.trait$height.m); data.trait$height.m <- NULL data.trait$Max.height.sd <- NA - - write.csv(data.trait,file='./data/process/data.trait.nz.csv') - - + write.csv(data.trait,file='../../output/process/data.trait.nz.csv') ######################### READ DATA read individuals tree and environmental data -data.nz <- read.csv("./data/raw/DataNVS/nz_treedata_growth_130801.csv", header = TRUE, +data.nz <- read.csv("../../data/raw/NVS/nz_treedata_growth_130801.csv", header = TRUE, stringsAsFactors = FALSE, skip = 9) data.nz <- data.nz[, -1] data.nz$plid <- gsub("__", "_", data.nz$plid) data.nz$plid <- gsub("_", ".", data.nz$plid) ## Replace all underscores with a single dot - -data.plot <- read.csv("./data/raw/DataNVS/nz_plotinfo_130801.csv", , header = TRUE, +data.plot <- read.csv("../../data/raw/NVS/nz_plotinfo_130801.csv", , header = TRUE, stringsAsFactors = FALSE) data.plot <- data.plot[, -1] data.plot$plid <- gsub("__", "_", data.plot$plid) data.plot$plid <- gsub("_", ".", data.plot$plid) ## Replace all underscores with a single dot - data.nz <- merge(data.nz, data.frame(plid = data.plot$plid, Easting = data.plot$Easting, Northing = data.plot$Northing, Locality = data.plot$Locality, MAP = data.plot$map, MAT = data.plot$mat, Broadclass = data.plot$BroadClass, Specificclass = data.plot$SpecificClass, stringsAsFactors = F), sort = T, by = "plid") rm(data.plot) - ################################################################ FORMAT INDIVIDUAL TREE DATA - ## change unit and names of variables to be the same in all data for the tree data.nz$G <- 10 * (data.nz[["D1"]] - data.nz[["D0"]])/(data.nz[["t1"]] - data.nz[["t0"]]) ## diameter growth in mm per year data.nz$sp.name <- data.nz$sp ## no latin name so use that as not needed to get traits @@ -61,7 +51,6 @@ head(subset(data.nz,subset=data.nz$tag %in% names(table(data.nz$tag))[table(data data.nz$obs.id <- as.character(1:nrow(data.nz)) data.nz$census <- rep(1,nrow(data.nz)) # only one census NEED TO CHECK WITH DANIEL AND DAVID add issue data.nz$weights <- 1/(20*20) ## need to check with david - ########################################## CHANGE COORDINATE SYSTEM ########################################## system of Easting Northing to be in lat long WGS84 library(sp) @@ -71,7 +60,6 @@ data.sp <- data.nz[, c("tree.id", "Easting", "Northing")] coordinates(data.sp) <- c("Easting", "Northing") # define x y proj4string(data.sp) <- CRS("+init=epsg:27200") # define projection system of our data ## EPSG CODE 27200 NZGD49 / New Zealand Map Grid summary(data.sp) - detach(package:rgdal) data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon data.nz$Lon <- coordinates(data.sp2)[, "Easting"] @@ -84,53 +72,25 @@ newmap <- getMap(resolution = 'coarse') plot(newmap,xlim=c(166,177),ylim=c(-34,-47)) points(data.sp2,cex=0.2,col='red') dev.off() - rm(data.sp, data.sp2) - ###################### ECOREGION merge to have no ecoregion with low number of observation table(data.nz$Broad) ## merging two ecoregion based on Daniel Laughlin advice. data.nz$Broad[data.nz$Broad=="Wetland"] <- "BeechHumid" data.nz$Broad[data.nz$Broad=="ConiferDry"] <- "BeechHumid" data.nz$ecocode <- data.nz$Broad - - ###################### PERCENT DEAD perc.dead <- tapply(data.nz[["dead"]], INDEX = data.nz[["plot"]], FUN = function.perc.dead2) data.nz <- merge(data.nz, data.frame(plot = names(perc.dead), perc.dead = perc.dead), by = "plot", sort = FALSE) - ########################################################### PLOT SELECTION FOR THE ANALYSIS - - ##################### ###### REMOVE DEAD TREE at first census data.nz <- subset(data.nz,subset=!is.na(data.nz[["D"]])) - - vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") data.tree <- subset(data.nz, select = c(vec.basic.var, vec.abio.var.names)) - -write.csv(data.tree,file="./data/process/data.tree.nz.csv") - - -### TODO TEST BIG FUNCTION WITH GOOD OPTION - -#### GENERATE ONE OBJECT PER ECOREGION - -# vector of ecoregion name -ecoregion.unique <- unique(data.tree[["ecocode"]]) - -## create lookup table for NZ -species.lookup <- data.frame(sp=data.nz[!duplicated(data.nz[["sp"]]),"sp"], - Latin_name=data.nz[!duplicated(data.nz[["sp"]]),"sp"], - Latin_name_syn=data.nz[!duplicated(data.nz[["sp"]]),"sp"], stringsAsFactors =FALSE) - -#### lapply function to generate data per ecoregion -system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.tree, - plot.name = "plot", weight.full.plot = NA, name.country = "NZ", data.TRY = NA, - species.lookup = species.clean)) +write.csv(data.tree,file="../../output/process/data.tree.nz.csv") diff --git a/R/format.data/PARACOU.R b/R/format.data/PARACOU.R index 4f3bef5fdc7e6a4ccbffd12fefb067ea955be67f..dc6335334a855ad1c3d80ad8f40f8b20c8c3f7b8 100644 --- a/R/format.data/PARACOU.R +++ b/R/format.data/PARACOU.R @@ -1,13 +1,11 @@ ### MERGE paracou DATA rm(list = ls()) -source("./R/format.function.R") +source("format.fun.R") library(reshape) - ############################ read individuals tree data -data.paracou <- read.table("./data/raw/DataParacou/20130717_paracou_1984_2012.csv", +data.paracou <- read.table("../../data/raw/Paracou/20130717_paracou_1984_2012.csv", header=TRUE,stringsAsFactors=FALSE,sep = ";", na.strings = "NULL") #barplot(apply(!is.na(data.paracou[,paste("circ_",1984:2012,sep="")]),MARGIN=2,FUN=sum),las=3) - # select good columns data.paracou <- data.paracou[,c("foret","parcelle","carre","arbre","vernaculaire","idtaxon", "x","y","circ_2001","code_2001","circ_2005","code_2005", @@ -15,22 +13,18 @@ data.paracou <- data.paracou[,c("foret","parcelle","carre","arbre","vernaculaire colnames(data.paracou) <- c("forest","plot","subplot","tree","vernacular","taxonid","x","y", "circum2001","code2001","circum2005","code2005","circum2009", "code2009","yeardied","typedeath") - ### change numeric separator numeric.col.name <- c("x","y","circum2001","code2001","circum2005","code2005","circum2009","code2009") for(k in numeric.col.name){ data.paracou[,k] <- gsub(",",".",data.paracou[,k]); data.paracou[,k] <- as.numeric(data.paracou[,k]) } ## Replace all , in decimals with . - data.paracou$tree.id <- apply(data.paracou[,c("plot","subplot","tree")],1,paste,collapse="_"); data.paracou$sp <- data.paracou[["taxonid"]] data.paracou <- data.paracou[,c(ncol(data.paracou),1:(ncol(data.paracou)-1))] - ## ## plot each plot ## pdf("./figs/plots.paracou.pdf") ## lapply(unique(data.paracou[["plot"]]),FUN=fun.circles.plot,data.paracou[['x']],data.paracou[['y']],data.paracou[["plot"]],data.paracou[["circum2009"]],inches=0.2) ## dev.off() - ############################# SELECT OBSERVATION WITHOUT PROBLEMS ## REMOVE ALL TREES WITH X OR Y >250 m data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data.paracou[["x"]]<251 & data.paracou[["y"]]<251) @@ -38,37 +32,29 @@ data.paracou <- subset(data.paracou,subset=(!is.na(data.paracou[["x"]])) & data. data.paracou <- subset(data.paracou,subset=! data.paracou[["plot"]] %in% 16:18) ## keep only tree alive in 2001 data.paracou <- subset(data.paracou,subset=!(as.numeric(data.paracou[["yeardied"]])<=2001 & !is.na(data.paracou[["yeardied"]]))) - - ######################################## MASSAGE TRAIT DATA - ### read species names -species.clean <- read.csv("./data/raw/DataParacou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") +species.clean <- read.csv("../../data/raw/Paracou/20130717_paracou_taxonomie.csv",stringsAsFactors=FALSE, header = T, sep = ";") species.clean$sp <- species.clean[["idTaxon"]] species.clean$Latin_name <- paste(species.clean[["Genre"]],species.clean[["Espece"]],sep=" ") ## keep only one row pers idTaxon species.clean <- subset(species.clean,subset=!duplicated(species.clean[["sp"]]),select=c("sp","Latin_name","Genre","Espece","Famille","idCIRAD")) - ## select only species present in data base species.clean <- subset(species.clean,subset=species.clean[["sp"]] %in% data.paracou[["sp"]]) ## percentage of species with no taxonomic identification length(grep("Indet",species.clean[["Latin_name"]]))/nrow(species.clean) ## 25% - -dataWD <- read.csv("./data/raw/DataParacou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ") +dataWD <- read.csv("../../data/raw/Paracou/WD-Species-Paracou-Ervan_GV.csv",stringsAsFactors=FALSE, header = T,sep=" ") #dataWD <- merge(dataWD, species.clean, by = "idCIRAD", sort = F) length(unique(species.clean$idCIRAD)) != dim(species.clean) ## dataWD uses idCIRAD as identifier, but this is not a unique identifier in species.clean! ## But wood density seems to also be available from seed.traits - ### need to read the different traits data based and merge ..... -bridge <- read.csv("./data/raw/DataParacou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";") +bridge <- read.csv("../../data/raw/Paracou/BridgeDATA.g.csv",stringsAsFactors=FALSE, header = T, sep = ";") bridge$Latin_name <- paste(bridge[["Genus"]],bridge[["species"]],sep=" ") ### check % of match of the bridg data sum(species.clean[["Latin_name"]] %in% bridge[["Latin_name"]])/length(species.clean[["Latin_name"]]) ## only 307 species /775 are in teh traits data .... - -seed.traits <- read.csv("./data/raw/DataParacou/Autour-de-Paracou-Releves-par-trait-et-taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t") - +seed.traits <- read.csv("../../data/raw/Paracou/Autour-de-Paracou-Releves-par-trait-et-taxon.txt",stringsAsFactors=FALSE, header = T, sep = "\t") ## Reformat seed.traits to one row per species, with each trait as a column spp.means <- (cast(seed.traits, LIB_TAXON ~ METHO_LIB, value = "MEASURE", fun = mean)) colnames(spp.means)[-1] <- paste(colnames(spp.means)[-1],".mean",sep="") @@ -83,9 +69,6 @@ seed.traits2$sp <- species.clean$sp[match(seed.traits2$Latin_name,species.clean$ colnames(seed.traits2) <- c("Latin_name","Leaf.N.mean","Leaf.N.sd","SLA.mean","SLA.sd","Wood.density.mean","Wood.density.sd","sp") seed.traits2$SLA.mean <- seed.traits2$SLA.mean/1 ## Conversion from m2/kg to mm2/mg seed.traits2$Wood.density.mean <- seed.traits2$Wood.density.mean/1 ## conversion from g/cm3 to mg/mm3 - - - # - $sp$ the species code as in previous table # - $Latin\_name$ the latin name of the species # - $Leaf.N.mean$ Leaf Nitrogen per mass in TRY mg/g @@ -95,10 +78,8 @@ seed.traits2$Wood.density.mean <- seed.traits2$Wood.density.mean/1 ## conversion # - $Max.height.mean$ from NFI data I compute the 99% quantile in m # - and the same columns with $sd$ instead of $mean$ with either the mean sd within species if species mean or the mean sd with genus if genus mean because no species data # - a dummy variable with true or false if genus mean - ## still to be completed ############################################ FORMAT INDIVIDUAL TREE DATA - data.paracou2 <- data.paracou[rep(1:nrow(data.paracou),each=2),c(1:10,(ncol(data.paracou)-2):ncol(data.paracou))] rownames(data.paracou2) <- 1:nrow(data.paracou2); data.paracou2 <- as.data.frame(data.paracou2) data.paracou2$yr1 <- rep(c(2001,2001+4),nrow(data.paracou)); data.paracou2$yr2 <- rep(c(2005,2005+4),nrow(data.paracou)) @@ -110,89 +91,64 @@ data.paracou2$code2 <- c(as.numeric(rbind(data.paracou$code2005,data.paracou$cod data.paracou2$dead <- rep(0,nrow(data.paracou)*2) data.paracou2$dead[c(as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]])), as.numeric(data.paracou[["yeardied"]]) %in% 2006:2009 & (!is.na(data.paracou[["yeardied"]])))] <- 1 - ## remove tree dead at first census for both date (census 2001-2005 2005-2009) data.paracou <- subset(data.paracou2,subset=!(data.paracou2[['yr1']] ==2005 & (as.numeric(data.paracou[["yeardied"]]) %in% 2002:2005 & (!is.na(data.paracou[["yeardied"]]))))) - ## change unit and names of variables to be the same in all data for the tree data.paracou$G <- 10*(data.paracou$dbh2-data.paracou$dbh1)/data.paracou$year ## diameter growth in mm per year data.paracou$G[data.paracou$code1>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh1 data.paracou$G[data.paracou$code2>0] <- NA ## indivs with code indicating problem in dbh measurment at dbh2 data.paracou2$cenus <- data.paracou$yr1 - data.paracou$D <- data.paracou[["dbh1"]]; data.paracou$D[data.paracou$D == 0] <- NA ;## diameter in cm data.paracou$plot <- data.paracou$plot#apply(data.paracou[,c("forest","plot","subplot")],1,paste,collapse=".") ## plot code data.paracou$htot <- rep(NA,length(data.paracou[["G"]])) ## height of tree in m data.paracou$obs.id <- 1:nrow(data.paracou) - ### delete recruit in 2001 or 2005 for first census data.paracou <- subset(data.paracou,subset=!is.na(data.paracou$D)) ## minimum circumfer 30 delete all tree with a dbh <30/pi, data.paracou <- subset(data.paracou,subset= data.paracou[["D"]]>(30/pi)) - ######################## ECOREGION - ## paracou has only 1 eco-region - ######################## PERCENT DEAD - compute numer of dead per plot to remove plot with disturbance - perc.dead <- tapply(data.paracou[["dead"]],INDEX=data.paracou[["plot"]],FUN=function.perc.dead2) data.paracou <- merge(data.paracou,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) - ################################################## VARIABLES SELECTION FOR THE ANALYSIS - #vec.abio.var.names <- c("MAT","MAP") ## MISSING NEED OTHER BASED ON TOPOGRAPHY ASK BRUNO vec.basic.var <- c("obs.id","tree.id","sp","plot","D","G","dead","census","year","htot","x","y","perc.dead") data.tree <- subset(data.paracou,select=c(vec.basic.var)) #,vec.abio.var.names - ############################################## ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species ## NEED TO COMPUTE BASED ON RADIUS AROUND TARGET TREE - ### species as factor because number data.tree[['sp']] <- factor(data.tree[['sp']]) Rlim <- 15 # set size of neighborhood for competition index - ## for each census compute competition index - ## FOR 2001 data.tree1 <- subset(data.tree,subset=data.tree[["census"]]==2001) - library(doParallel) list.BA.SP.data1 <- mclapply(unique(data.tree1[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree1,Rlim=Rlim,mc.cores=4) data.BA.sp1 <- rbind.fill(list.BA.SP.data1) - ## FOR 2005 data.tree2 <- subset(data.tree,subset=data.tree[["census"]]==2005) - library(doParallel) list.BA.SP.data2 <- mclapply(unique(data.tree2[['plot']]),FUN=fun.compute.BA.SP.XY.per.plot,data.tree=data.tree2,Rlim=Rlim,mc.cores=4) data.BA.sp2 <- rbind.fill(list.BA.SP.data2) - data.tree <- rbind(data.tree1,data.tree2) data.BA.sp <- rbind(data.BA.sp1,data.BA.sp2) rm(data.tree1,data.tree2,data.BA.sp1,data.BA.sp2) - ### TEST DATA FORMAT if(sum(! rownames(BA.SP.temp)==data.tree.s[['obs.id']]) >0) stop('rows not in the good order') if(sum(!colnames(BA.SP.temp)==as.character((levels(data.tree.s[['sp']]))))>0) stop('colnames does mot match species name') ## test same order as data.tree if(sum(!data.BA.SP[["obs.id"]] == data.tree[["obs.id"]]) >0) stop("competition index not in the same order than data.tree") - ################################################ ## REMOVE TREE IN BUFFER ZONE BUFFER ZONE not.in.buffer.zone <- (data.tree[['x']]<(250-Rlim) & data.tree[['x']]>(0+Rlim) & data.tree[['y']]<(250-Rlim) & data.tree[['y']]>(0+Rlim)) - # remove subset data.tree <- subset(data.tree,subset=not.in.buffer.zone) data.BA.sp <- subset(data.BA.sp,subset=not.in.buffer.zone) - - - ## ## save everything as a list ## list.paracou <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -## save(list.spain,file="./data/process/list.paracou.Rdata") - +## save(list.spain,file="../../output/process/list.paracou.Rdata") diff --git a/R/format.data/SPAIN.R b/R/format.data/SPAIN.R index 1a51dfec967cb922bb1400c5c1e2b5b4f43f491a..73e8d178066e9d03e8fee4951ef70868d8c70baf 100644 --- a/R/format.data/SPAIN.R +++ b/R/format.data/SPAIN.R @@ -1,11 +1,12 @@ ### MERGE spain DATA Edited by FH rm(list = ls()) -source("./R/format.function.R") -source("./R/FUN.TRY.R") +source("format.fun.R") library(reshape) +dir.create("../../output/process/Spain") + ######################### READ DATA read individuals tree data -data.spain <- read.table("./data/raw/DataSpain/Tree_data_SFI_aug13_alldata.txt", +data.spain <- read.table("../../data/raw/Spain/Tree_data_SFI_aug13_alldata.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t") ###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed @@ -14,15 +15,14 @@ data.spain <- read.table("./data/raw/DataSpain/Tree_data_SFI_aug13_alldata.txt", res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])), FUN = f.quantile.boot, R = 1000, x = log10(apply(data.spain[, c("ht1", "ht2")], 1, max, na.rm = T)), fac = factor(data.spain[["SP_code"]]))) - ## create data base data.max.height <- data.frame(sp = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) rm(res.quant.boot) -write.csv(data.max.height, file = "./data/process/data.max.height.spain.csv") ## I was planning to save processed data in that folder +write.csv(data.max.height, file = "../../output/process/Spain/data.max.height.spain.csv") ## I was planning to save processed data in that folder -################################################################ FORMAT INDIVIDUAL TREE DATA +################################## FORMAT INDIVIDUAL TREE DATA data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year data.spain$year <- data.spain[["years"]]; data.spain$years <- NULL; ## number of year between measurement data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm @@ -36,41 +36,35 @@ data.spain$tree.id <- data.spain$Tree_ID_SFI; data.spain$Tree_ID_SFI <- NULL ## data.spain$obs.id <- as.character(1:nrow(data.spain)) ## obs uniquue id; could use tree unique id here since there is only one row per tree data.spain$census <- rep(1,nrow(data.spain)) ## only one census in spain data.spain$weights <- as.vector(1/(pi * (data.spain[["R1"]])^2)) ## weights in 1/m^2 - - ################# #### change coordinates system of x y to be in lat long WGS84 library(sp) library(dismo) library(rgdal) - -data.sp <- data.spain[, c("Tree_ID_SFI", "CX", "CY")] +data.sp <- data.spain[, c("tree.id", "CX", "CY")] coordinates(data.sp) <- c("CX", "CY") # define x y proj4string(data.sp) <- CRS("+init=epsg:23030") # define projection system of our data ## EPSG CODE 23030 ED50 / UTM zone 30N summary(data.sp) - detach(package:rgdal) data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon data.spain$Lon <- coordinates(data.sp2)[, "CX"] data.spain$Lat <- coordinates(data.sp2)[, "CY"] -## ## plot on world map library(rworldmap) newmap <- getMap(resolution = 'coarse') +## ## plot on world map +## library(rworldmap) +## newmap <- getMap(resolution = 'coarse') ## # different resolutions available plot(newmap) ## points(data.sp2,cex=0.2,col='red') rm(data.sp, data.sp2) - ###################### ECOREGION merge greco to have no ecoregion with low number of observation -greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) +greco <- read.csv(file = "../../data/raw/Spain/R_Ecoregion.csv", header = T) greco <- greco[, c("Plot_ID_SFI", "BIOME", "eco_code")] greco2 <- greco[!duplicated(greco$Plot), ]; colnames(greco2) <- c("plot", "biome", "ecocode") rm(greco) - data.spain <- merge(data.spain, greco2, by = "plot") rm(greco2) - table(data.spain$ecocode) ## There's an eco-region with no code, and one with < 1000 sites The former we could drop as they were on the border of Spain - # ### PLOT ECOREGION # library(RColorBrewer) # mycols <- brewer.pal(10, "Set3") @@ -89,32 +83,27 @@ table(data.spain$ecocode) # ## Highlight the 'rare' ecoregions PA1219 looks to be similar to PA1209, merge # ## them together # dev.off() - ## merge data.spain$ecocode.merged <- combine_factor(data.spain$ecocode, c(1:8, 6, 9)) data.spain <- data.spain[-which(data.spain$ecocode.merged == ""), ] -data.spain$ecocode.merged <- as.character(factor(data.spain$ecocode.merged)) - +data.spain$ecocode.merge <- as.character(factor(data.spain$ecocode.merged)) # PLOT of MERGED ECOREGION # mycols <- brewer.pal(9, "Set3") # plot(data.spain[["CX"]][order(data.spain$eco_codemerged)], data.spain[["CY"]][order(data.spain$eco_codemerged)], pty = ".", # cex = 0.2, col = rep(mycols, as.vector(table(data.spain$eco_codemerged)))) # legend("topleft", col = mycols, legend = levels(data.spain$eco_codemerged), pch = rep(19, # length(levels(data.spain$eco_codemerged))), cex = 2) - ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.spain[["dead"]], INDEX = data.spain[["plot"]], FUN = function.perc.dead2) table(data.spain$dead) data.spain <- merge(data.spain, data.frame(plot = as.numeric(names(perc.dead)), perc.dead = perc.dead, stringsAsFactors =FALSE), sort = FALSE, by = "plot") - ########################################################### PLOT SELECTION FOR THE ANALYSIS ## Remove data with mortality == 1 or 2 data.spain <- subset(data.spain, subset = (data.spain[["Mortality_Cut"]] == 0 | data.spain[["Mortality_Cut"]] == "")) - colnames(data.spain)[colnames(data.spain) %in% c("mat", "pp", "PET")] <- c("MAT", "PP", "PET") data.spain$ecocode <- NULL @@ -123,30 +112,4 @@ vec.abio.var.names <- c("MAT", "PP", "PET") vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name", "cluster","plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","census","weights") data.tree <- subset(data.spain, select = c(vec.basic.var, vec.abio.var.names)) - -write.csv(data.tree,file="./data/process/data.tree.spain.csv") - - -############################################## -############################################## -#################### GENERATE ONE OBJECT PER ECOREGION - -# vector of ecoregion name -ecoregion.unique <- unique(data.tree[["ecocode"]]) -sum(data.tree[["ecocode"]] == ecoregion.unique[1]) - -### read TRY data -TRY.DATA.FORMATED <- readRDS("./data/process/data.TRY.std.rds") - -## create lookup table for spain -species.lookup <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"], - Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"], - Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"], - stringsAsFactors =FALSE) - - -#### lapply function to generate data per ecoregion -system.time(lapply(ecoregion.unique[1:2], FUN = fun.data.per.ecoregion, data.tot = data.tree, - plot.name = "plot", weight.full.plot = NA, name.country = "Spain", data.TRY = TRY.DATA.FORMATED, - species.lookup = species.lookup)) - +write.csv(data.tree,file="../../output/process/Spain/data.tree.spain.csv") diff --git a/R/format.data/SWEDEN.R b/R/format.data/SWEDEN.R index 39456da618a5bfeae0401123df8b3b87b1a41db6..60470b0cab5cb88d1bc7c1be12031be0f5cad677 100644 --- a/R/format.data/SWEDEN.R +++ b/R/format.data/SWEDEN.R @@ -1,17 +1,14 @@ ### MERGE sweden DATA rm(list = ls()); -source("./R/format.function.R"); -library(reshape); +source("format.fun.R"); +library(reshape); +dir.create("../../output/process/Sweden") ######################### READ DATA read individuals tree data -#data.swe1 <- read.csv("./data/raw/DataSweden/Swe_NFI_1.csv",header=T,stringsAsFactors=F) -#data.swe2 <- read.csv("./data/raw/DataSweden/Swe_NFI_2a.csv",header=T,stringsAsFactors=F) -#data.swe3 <- read.csv("./data/raw/DataSweden/Swe_NFI_3.csv",header=T,stringsAsFactors=F) -data.swe <- read.table("./data/raw/DataSweden/Swe_NFI_all.txt",header=T,stringsAsFactors=F,sep="\t") +data.swe <- read.table("../../data/raw/Sweden/Swe_NFI_all.txt",header=T,stringsAsFactors=F,sep="\t") #head(data.swe) data.swe$tree.id <- apply(cbind(data.swe[["Year"]],data.swe[["TractNr"]],data.swe[["PlotNr"]],data.swe[["TreeNr"]]),1,paste,collapse="_") data.swe$plot <- apply(cbind(data.swe[["Year"]],data.swe[["TractNr"]],data.swe[["PlotNr"]]),1,paste,collapse="_") - dim(data.swe) #table(table(data.swe$TreeID)) #table(table(paste(data.swe$TractNr,data.swe$PlotNr,data.swe$TreeNr,data.swe$Year))) @@ -19,7 +16,6 @@ dim(data.swe) #rm(data.swe1, data.swe2, data.swe3) #data.swe <- data.swe[order(data.swe$TreeID,data.swe$PlotInvent),] ## Shows the TreeID = "" first #data.swe <- data.swe[order(paste(data.swe$TractNr,data.swe$PlotNr,data.swe$TreeNr)),] ## Shows the TreeID = "" first - ## Format to desired form data.swe$PlotInventID <- NULL; data.swe$TreeID <- NULL; data.swe2 <- data.swe[rep(1:nrow(data.swe),each=2),] @@ -33,92 +29,100 @@ data.swe2$dryw1 <- as.vector(rbind(data.swe$DryW_t1,data.swe$DryW_t2)) ## kg data.swe2$dryw2 <- as.vector(rbind(data.swe$DryW_t2,data.swe$DryW_t3)) ## kg data.swe2$DryW_t1 <- data.swe2$DryW_t2 <- data.swe2$DryW_t3 <- NULL data.swe2$census <- rep(1:2,nrow(data.swe2)/2) ## Using first census in the row - data.swe2$Diameter <- data.swe2$Volume <- data.swe2$BrhAge <- NULL data.swe2$TractNr <- data.swe2$PlotNr <- data.swe2$TreeNr <- NULL data.swe2$sp <- data.swe$TreeSpecies; data.swe$TreeSpecies <- NULL ## Species names are in the xlsx files if required data.swe <- data.swe2 rm(data.swe2) - -###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed -###################################### height to add variables to the traits data base Because we have two heights, -###################################### then take the max of the two heights and then bootstrap +############## MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed +############## height to add variables to the traits data base Because we have two heights, +############## then take the max of the two heights and then bootstrap ## Obtain heights in dataset - I assume that they use the volume of a cylinder V = pi*r^2*h data.swe$ht1 <- data.swe$vol1/(pi*(0.5*data.swe$dbh1/100)^2) ## m data.swe$ht2 <- data.swe$vol2/(pi*(0.5*data.swe$dbh2/100)^2) ## m - res.quant.boot <- t(sapply(levels(factor(data.swe[["sp"]])), FUN = f.quantile.boot, R = 1000, x = log10(apply(data.swe[, c("ht1", "ht2")], 1, max, na.rm = T)), fac = factor(data.swe[["sp"]]))) - ## create data base data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) rm(res.quant.boot) -write.csv(data.max.height,file='./data/process/data.max.height.swe.csv') +write.csv(data.max.height,file='../../output/process/Sweden/data.max.height.swe.csv') ### GEORGES NOT SURE WE SHOULD USE THAT + +################# +##### change the projection of xy to wgs84 lat long. +#### change coordinates system of x y to be in lat long WGS84 +library(sp) +library(dismo) +library(rgdal) +data.sp <- data.swe[, c("tree.id", "ecoord", "ncoord")] +coordinates(data.sp) <- c("ecoord", "ncoord") # define x y +proj4string(data.sp) <- CRS("+init=epsg:3006") # define projection system of our data ## EPSG:3006 check with Bertil +summary(data.sp) +detach(package:rgdal) +data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon +data.swe$Lon <- coordinates(data.sp2)[, "ecoord"] +data.swe$Lat <- coordinates(data.sp2)[, "ncoord"] + +## ## ## ## plot on world map +## library(rworldmap) +## newmap <- getMap(resolution = 'coarse') +## ## # different resolutions available +## plot(newmap) +## points(data.sp2,cex=0.2,col='red') +rm(data.sp, data.sp2) + + + ########################################## FORMAT INDIVIDUAL TREE DATA data.swe2 <- subset(data.swe, dbh1 >= 4 | is.na(dbh1)) ## Minimum DBH should be 40mm so remove anything below that data.swe2 <- subset(data.swe2, dbh2 >= 4 | is.na(dbh2)) data.swe <- data.swe2 +data.swe <- subset(data.swe,subset=!is.na(data.swe$dbh1)) dim(data.swe) +data.swe$cluster <- rep(NA, nrow(data.swe)) ## no plot cluster + +## load species latin name +species.clean <- read.csv("../../data/raw/Sweden/species.list.csv",header=T,stringsAsFactors=F,sep=",") +data.swe$sp.name <- species.clean[match(data.swe$sp,species.clean$sp),"Latin_name"] ## GET LATIN NAME -data.swe$cluster <- rep(NA, nrow(data.swe)) ## no plot cluster data.swe$Year <- NULL; data.swe$year <- 5 ## number of year between measurement data.swe$G <- 10*(data.swe$dbh2-data.swe$dbh1)/data.swe$year ## diameter growth in mm per year data.swe$G[is.na(data.swe$dbh1)] <- NA sum(data.swe$G < 0, na.rm = T) - data.swe$D <- data.swe[["dbh1"]]; ## diameter in cm data.swe$dead <- as.numeric(is.na(data.swe$dbh2)) ## dummy variable for dead tree 0 alive 1 dead data.swe$htot <- rep(NA,nrow(data.swe)) ## height of tree in m data.swe$obs.id <- apply(cbind(data.swe$tree.id,data.swe$census.id),1,paste,collapse="_") -data.swe$weights <- ### WHAT DO WE BASED THEM ON? +data.swe$weights <- rep(NA,nrow(data.swe)) ### DONE +data.swe$weights[data.swe$D<4] <- data.swe$PlotArea0139[data.swe$D<4] +data.swe$weights[data.swe$D>=4 & data.swe$D<10] <- data.swe$PlotArea4099[data.swe$D>=4 & data.swe$D<10] +data.swe$weights[data.swe$D>=10] <- data.swe$PlotArea100[data.swe$D>=10] +data.swe$weights <- 1/data.swe$weights ######################## ECOREGION ## One ecoregion for Sweden only? +data.swe$ecocode <- rep("A",nrow(data.swe)) +## NEED TO ASK SWEDEN ######################## PERCENT DEAD -## variable percent dead/cannot do with since dead variable is missing +## variable percent dead ## compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.swe[["dead"]],INDEX=data.swe[["plot"]],FUN=function.perc.dead2) # ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record) data.swe <- merge(data.swe,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE) - ############################################################## PLOT SELECTION FOR THE ANALYSIS ## Remove data with dead == 1 -table(data.swe$dead) -data.swe <- subset(data.swe, dead == 0) +## table(data.swe$dead) +## data.swe <- subset(data.swe, dead == 0) -vec.abio.var.names <- c("MAT", "MAP") +## vec.abio.var.names <- c("MAT", "MAP") ## TODO NO MAT MAP NEED TO LOAD FROM WORLDCLIM vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead","weights","census") -data.tree <- subset(data.swe, select = c(vec.basic.var, vec.abio.var.names)) - -write.csv(data.tree,file="./data/process/data.tree.swe.csv") - -################################### COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species -data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.swe[["tree.id"]]), diam=as.vector(data.swe[["D"]]), - sp=as.vector(data.swe[["sp"]]), id.plot=as.vector(data.swe[["plot"]]), - weights=1/(10000*data.swe[["SubPlot_Size"]]), weight.full.plot=NA) - -## change NA and <0 data for 0 -data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 - -### CHECK IF sp and sp name for column are the same -if(sum(!(names(data.BA.SP)[-1] %in% unique(data.swe[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") +data.tree <- subset(data.swe, select = c(vec.basic.var)) +write.csv(data.tree,file="../../output/process/Sweden/data.tree.swe.csv") -#### compute BA tot for all competitors -BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) -data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) -### create data frame -names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) -data.BA.sp <- merge(data.frame(tree.id=data.swe[["tree.id"]],ecocode=data.swe[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE) -## test -if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") -## save everything as a list -list.swe <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) -save(list.swe, file="./data/process/list.swe.Rdata") diff --git a/R/format.data/SWISS.R b/R/format.data/SWISS.R index 9ac248c244ae59f246754c5fc7f5bf13ae9fa914..899b98889e7fba9e0d5372bee018cf009b57af1e 100644 --- a/R/format.data/SWISS.R +++ b/R/format.data/SWISS.R @@ -1,77 +1,67 @@ ### MERGE Swiss DATA rm(list = ls()) -source("./R/format.function.R") -source("./R/FUN.TRY.R") +source("format.fun.R") library(reshape) library(foreign) +dir.create("../../output/process/Swiss") ######################### READ DATA read individuals tree data -data.swiss1 <- read.csv("./data/raw/DataSwiss/LFI12.csv", header = TRUE, stringsAsFactors = FALSE) -data.swiss2 <- read.csv("./data/raw/DataSwiss/LFI23.csv", header = TRUE, stringsAsFactors = FALSE) -data.swiss3 <- read.csv("./data/raw/DataSwiss/LFI34.csv", header = TRUE, stringsAsFactors = FALSE) +data.swiss1 <- read.csv("../../data/raw/Swiss/LFI12.csv", header = TRUE, stringsAsFactors = FALSE) +data.swiss2 <- read.csv("../../data/raw/Swiss/LFI23.csv", header = TRUE, stringsAsFactors = FALSE) +data.swiss3 <- read.csv("../../data/raw/Swiss/LFI34.csv", header = TRUE, stringsAsFactors = FALSE) data.swiss <- rbind(data.swiss1, data.swiss2, data.swiss3) rm(data.swiss1, data.swiss2, data.swiss3) data.swiss <- data.swiss[order(data.swiss$BANR), ] data.swiss <- data.swiss[, c("INVNR", "CLNR", "BANR", "X", "Y", "BAUMART", "TEXT", "VEGPER", "BHD1", "BHD2", "BA1", "BA2", "BAI", "BHD_DIFF", "RPSTZ1", "RPSTZ2", "HOEHE1", "HOEHE2")] - colnames(data.swiss) <- c("Inventid", "siteid", "tree.id", "x", "y", "sp", "sp.name", "year", "dbh1", "dbh2", "ba1", "ba2", "ba_diff", "dbh_diff", "repfactor1", "repfactor2", "ht1", "ht2") ################# ##### change the projection of xy to wgs84 lat long. - #### change coordinates system of x y to be in lat long WGS84 library(sp) library(dismo) library(rgdal) - data.sp <- data.swiss[, c("tree.id", "x", "y")] coordinates(data.sp) <- c("x", "y") # define x y proj4string(data.sp) <- CRS("+init=epsg:21781") # define projection system of our data ## EPSG:21781 CH1903 / LV03 CHECK WITH NICK STRANGE summary(data.sp) - library(maptools) -bioregion <- readShapePoly("./data/raw/DataSwiss/bio_regions/biogregions.shp") -plot(bioregion) -plot(data.sp,add=TRUE) -test <- over(data.sp,bioregion) ## need to check proj with nick - - +bioregion <- readShapePoly("../../data/raw/Swiss/bio_regions/biogregions.shp") +## plot(bioregion) +## plot(data.sp,add=TRUE) +## test <- over(data.sp,bioregion) ## need to check proj with nick detach(package:rgdal) data.sp2 <- spTransform(data.sp, CRS("+init=epsg:4326")) ## change projection in WGS84 lat lon data.swiss$Lon <- coordinates(data.sp2)[, "x"] data.swiss$Lat <- coordinates(data.sp2)[, "y"] ## ## ## plot on world map -library(rworldmap) -newmap <- getMap(resolution = 'coarse') -## # different resolutions available -plot(newmap,xlim=c(5,11),ylim=c(45,48)) -points(data.sp2,cex=0.2,col='red') -points(data.swiss$x,data.swiss$y) - - +## library(rworldmap) +## newmap <- getMap(resolution = 'coarse') +## ## # different resolutions available +## plot(newmap,xlim=c(5,11),ylim=c(45,48)) +## points(data.sp2,cex=0.2,col='red') +## points(data.swiss$x,data.swiss$y) rm(data.sp, data.sp2) - ## Do not need to read in spp list as it is already available in data.swiss + ###################################### MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed ###################################### height to add variables to the traits data base Because we have two heights, ###################################### then take the max of the two heights and then bootstrap res.quant.boot <- t(sapply(levels(factor(data.swiss[["sp"]])), FUN = f.quantile.boot, R = 1000, x = log10(apply(data.swiss[, c("ht1", "ht2")], 1, max, na.rm = T)), fac = factor(data.swiss[["spcode"]]))) - ## create data base data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) rm(res.quant.boot) -write.csv(data.max.height,file='./data/process/data.max.height.swiss.csv') +write.csv(data.max.height,file='../../output/process/Swiss/data.max.height.swiss.csv') ########################################## FORMAT INDIVIDUAL TREE DATA - data.swiss$G <- 10 * (data.swiss$dbh_diff)/data.swiss$year ## diameter growth in mm per year data.swiss$D <- data.swiss[["dbh1"]] data.swiss$D[data.swiss$D == 0] <- NA ## diameter in cm @@ -83,52 +73,23 @@ data.swiss$census <- data.swiss$Inventid data.swiss$obs.id <- apply(data.swiss[,c("tree.id","Inventid")],1,paste,collapse="_") data.swiss$weights <- data.swiss$repfactor1/10000 data.swiss$sp <- paste("sp.",data.swiss$sp,sep="") - -###################### ECOREGION Ecoregion not available for swiss data +###################### ECOREGION Ecoregion not available for swiss data NEED NICK INFO ON EPSG CODE data.swiss$ecocode <- rep("A", nrow(data.swiss)) - ###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###################### missing compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.swiss[["dead"]], INDEX = data.swiss[["plot"]], FUN = function.perc.dead2) data.swiss <- merge(data.swiss, data.frame(plot = names(perc.dead), perc.dead = perc.dead, stringsAsFactors =FALSE), by = "plot", sort = FALSE) - ########################################################### PLOT SELECTION FOR THE ANALYSIS -data.climate <- read.dbf(file = "./data/raw/DataSwiss/LFI14_climate.dbf") +data.climate <- read.dbf(file = "../../data/raw/Swiss/LFI14_climate.dbf") data.climate <- data.climate[, c("CLNR","swb_100","tave_68","prec_122","prec_35","prec_68","prec_911")] data.climate$MAP <- apply(data.climate[, c("prec_122","prec_35","prec_68","prec_911")], 1, sum) - -data.swiss <- merge(data.swiss, data.frame(siteid = data.climate$CLNR, swb = data.climate$swb_100, - MAT = data.climate$tave_68, MAP = data.climate$MAP, stringsAsFactors =FALSE), sort = F, all.x = T) +data.swiss <- merge(data.swiss, data.frame(plot = data.climate$CLNR, swb = data.climate$swb_100, + MAT = data.climate$tave_68, MAP = data.climate$MAP, stringsAsFactors =FALSE),by.x="plot", sort = FALSE, all.x =TRUE) rm(data.climate) - - ### select good columns vec.abio.var.names <- c("MAT", "MAP", "swb") vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") - data.tree <- subset(data.swiss, select = c(vec.basic.var, vec.abio.var.names)) -write.csv(data.tree,file="./data/process/data.tree.swiss.csv") - -############################################## -#################### GENERATE ONE OBJECT PER ECOREGION -# vector of ecoregion name -ecoregion.unique <- unique(data.swiss[["ecocode"]]) - - -### read TRY data -TRY.DATA.FORMATED <- readRDS("./data/process/data.TRY.std.rds") - -## create lookup table for swiss -species.lookup <- data.frame(sp=data.swiss[!duplicated(data.swiss[["sp"]]),"sp"], - Latin_name=data.swiss[!duplicated(data.swiss[["sp"]]),"sp.name"], - Latin_name_syn=data.swiss[!duplicated(data.swiss[["sp"]]),"sp.name"], stringsAsFactors =FALSE) - - -#### lapply function to generate data for each ecoregion -system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.swiss, - plot.name = "plot", weight.full.plot = NA, name.country = "Swiss", data.TRY = TRY.DATA.FORMATED, - species.lookup = species.lookup)) - - +write.csv(data.tree,file="../../output/process/Swiss/data.tree.swiss.csv") diff --git a/R/format.data/US.R b/R/format.data/US.R index 0131c68b2a0dab18dbafb1778c74e39c765ee96d..9380d4883079e57492fc4dcc2b9fd213d519481c 100644 --- a/R/format.data/US.R +++ b/R/format.data/US.R @@ -1,12 +1,13 @@ #!/usr/bin/env Rscript - ### MERGE us DATA Edited by FH library(reshape, quietly=TRUE) -source("./R/format.function.R") -source("./R/FUN.TRY.R") +source("format.fun.R") +dir.create("../../output/process/US") + + ### read species names -species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV", stringsAsFactors = FALSE) +species.clean <- read.csv("../../data/raw/US/REF_SPECIES.CSV", stringsAsFactors = FALSE) ## select column to keep species.clean <- subset(species.clean, select = c("SPCD", "GENUS", "SPECIES", "VARIETY", "SUBSPECIES", "SPECIES_SYMBOL")) @@ -14,19 +15,21 @@ species.clean$Latin_name <- paste(species.clean[["GENUS"]], species.clean[["SPEC sep = " ") species.clean$Latin_name_syn <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]], sep = " ") - names(species.clean)[1] <- "sp" species.clean[["sp"]] <- paste("sp", species.clean[["sp"]], sep = ".") -#### READ DATA US -data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv",header=TRUE,stringsAsFactors =FALSE) - -###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER -###################################### WHEN WE ANALYZE THAT DATASET LATER ON - -##################################### FORMAT INDIVIDUAL TREE DATA +## LOAD QUANTILE OF HEIGHT AND FORMAT AS OTHER +data.max.height <- read.csv("../../data/raw/US/FiaSpMaxHt.csv", stringsAsFactors = FALSE) +data.max.height <- data.frame(sp=data.max.height$SpCd,Max.height.mean=data.max.height$Ht99, + Max.height.sd=rep(NA,nrow(data.max.height)), Max.height.nobs=rep(NA,nrow(data.max.height))) +write.csv(data.max.height, file = "../../output/process/US/data.max.height.us.csv") ## I was planning to save processed data in that folder +#### READ DATA US +data.us <- read.csv("../../data/raw/US/FIA51_trees_w_supp.csv",header=TRUE,stringsAsFactors =FALSE) +############## MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER +############## WHEN WE ANALYZE THAT DATASET LATER ON +############## FORMAT INDIVIDUAL TREE DATA ## change unit and names of variables to be the same in all data for the tree data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA @@ -48,15 +51,12 @@ data.us$sp.name <- species.clean[match(data.us$sp,species.clean$sp),"Latin_name" data.us$census <- rep(1,nrow(data.us)) ### add plot weights for computation of competition index (in 1/m^2) data.us$weights <- 1/(10000 * data.us[["PlotSize"]]) - ###################### ECOREGION merge greco to have no ecoregion with low number of observation -greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T) +greco <- read.csv(file = "../../data/raw/US/EcoregionCodes.csv", header = T) colnames(greco)[1] <- "Ecocode" - table(data.us$Ecocode) data.us <- merge(data.us, greco[, -4], by = "Ecocode") data.us$DIVISION <- factor(data.us$DIVISION) - ## Some ecoregions still have small # of individuals, so create a variable which ## does division if # ind < 10000 else it reads Domain data.us$eco_codemerged <- as.character(data.us$DIVISION) @@ -67,7 +67,6 @@ for (i in 1:length(sel.small.div)) { print(length(find.ind)) data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind] } - ###### PERCENT DEAD variable percent dead/cannot do with since dead variable is ###### missing compute numer of dead per plot to remove plot with disturbance perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function.perc.dead) @@ -75,39 +74,17 @@ perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function # AVAILABLE (disturbance record) data.us <- merge(data.us, data.frame(plot = names(perc.dead), perc.dead = perc.dead, stringsAsFactors = FALSE), by = "plot", sort = FALSE) - - ##### PLOT SELECTION FOR THE ANALYSIS - ## remove everything from memory not need before computation rm(greco, perc.dead, tab.small.div, sel.small.div) - #### create good data format to be run per ecoregion data.us$ecocode <- unlist(lapply(lapply(strsplit(data.us[["eco_codemerged"]], " "), FUN = substr, 1, 2), FUN = paste, collapse = ".")) - ## variables to keep vec.abio.var.names <- c("MAT", "MAP") vec.basic.var <- c("obs.id","tree.id", "sp","sp.name", "cluster", "plot", "ecocode", "D", "G", "dead", "year", "htot", "Lon", "Lat", "perc.dead", "weights","census") data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names)) -write.csv(data.tree,file="./data/process/data.tree.us.csv") - +write.csv(data.tree,file="../../output/process/US/data.tree.us.csv") rm(data.us) gc() - -### read TRY data -TRY.DATA.FORMATED <- readRDS("./data/process/data.TRY.std.rds") - -#### GENERATE ONE OBJECT PER ECOREGION - -# vector of ecoregion name -ecoregion.unique <- unique(data.tree[["ecocode"]]) - - -#### lapply function - - -system.time(lapply(ecoregion.unique, FUN = fun.data.per.ecoregion, data.tot = data.tree, - plot.name = "plot", weight.full.plot = NA, name.country = "US", data.TRY = TRY.DATA.FORMATED, - species.lookup = species.clean)) diff --git a/R/format.data/change.script.R b/R/format.data/change.script.R new file mode 100644 index 0000000000000000000000000000000000000000..1162e5b2516bb8bd35a02d15761e1dc3a034a8d9 --- /dev/null +++ b/R/format.data/change.script.R @@ -0,0 +1,15 @@ +##### +## change path in all R scripts + +vec.files <- list.files() +vec.files <- vec.files[! vec.files %in% c("format.fun.R","change.script.R")] + +for (i in vec.files){ +TEMP <-scan(file =i,what="character",sep="\n") +TEMP <- sub(pattern='source(\"./R/format.function.R\")', replacement='source(\"format.fun.R\")', x =TEMP,fixed = TRUE) +TEMP <- sub(pattern='source(\"./R/FUN.TRY.R\")', replacement='', x =TEMP,fixed = TRUE) +TEMP <- sub(pattern='./data/raw/Data', replacement='../../data/raw/', x =TEMP,fixed = TRUE) +TEMP <- sub(pattern='./data/process/', replacement='../../output/process/', x =TEMP,fixed = TRUE) +cat(TEMP, file = i, sep="\n", fill = FALSE, labels = NULL, append = FALSE) +} + diff --git a/R/format.data/format.fun.R b/R/format.data/format.fun.R new file mode 100644 index 0000000000000000000000000000000000000000..820303bbbc42ce926274930dd7439736a2db20f7 --- /dev/null +++ b/R/format.data/format.fun.R @@ -0,0 +1,98 @@ + +################################################### +################################################### +################################################### +##### FUNCTION TO FORMAT DATA FOR THE WORKSHOP +#### G. Kunstler 11/09/2013 + +############################ +## FUNCTION remove trailing white space +trim.trailing <- function (x) sub("\\s+$", "", x) + + +############################################# +## FUN to clean species name for FRENCH NFI +fun.clean.species.tab <- function(species.tab){ +species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] +### species IFN reformat names +## clean species names and synonyme names +species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) +species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) +## remove trailing white space +species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) +species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), + c("code","Latin_name","Exotic_Native_cultivated")] +names(species.clean) <- c("sp","Latin_name","Exotic_Native_cultivated") +return(species.clean) +} + + + +##################################################### +### compute quantile 99% and sd with a bootstrap +##' .. compute quantile 99% and sd with a bootstra.. +##' +##' .. content for \details{} .. +##' @title f.quantile.boot +##' @param i subset (species) for which to compute quantile +##' @param x vector of height +##' @param fac vector of subset +##' @param R number of resampling +##' @param probs probability of quantile to compute +##' @return a matrix with # col and 1 row with mean sd adn nobs +##' @author Kunstler +f.quantile.boot <- function(i,x,fac,R,probs=0.99){ +require(boot) + if(length(na.exclude(x[fac==i]))>0){ + quant.boot <- boot(x[fac==i],f.quantile,R=R,probs=probs) + return(as.matrix(c(mean=mean(quant.boot$t),sd=sd(quant.boot$t),nobs=length(na.exclude(x[fac==i]))),ncol=3,nrow=1)) + }else{ + return(as.matrix(c(mean=NA,sd=NA,nobs=NA),ncol=3,nrow=1)) + } +} + +f.quantile <- function (x,ind,probs){ + require(boot) + quantile(x[ind],probs=probs,na.rm=TRUE) +} + +####################### +### function to compute number of dead per plot +function.perc.dead <- function(dead){ + sum(dead)/length(dead)} + +## function to deal with missing value +function.perc.dead2 <- function(dead) { + out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]) + if(!is.finite(out)) out <- NA + return(out) +} + + + + +###### +###### +## FUNCTION TO PLOT MAP OF TREE with function of DBH +##' .. Function to plot map of tree with circle function of their dbh.. +##' +##' .. content for \details{} .. +##' @title +##' @param plot.select the plot for which draw the map +##' @param x +##' @param y +##' @param plot vectore of plot id for each tree +##' @param D diameter in cm +##' @param inches controling the circle size +##' @param ... +##' @return +##' @author Kunstler +fun.circles.plot <- function(plot.select,x,y,plot,D,inches,...){ +x.t <- x[plot==plot.select] +y.t <- y[plot==plot.select] +D.t <- D[plot==plot.select] +D.t[is.na(D.t)] <- 0 +symbols(x.t,y.t,circles=D.t ,main=plot.select,inches=inches,...) +} + + diff --git a/R/format.function.R b/R/format.function.R index 24fbe5261664ca1098a7356fe38ff8c2c3f8f738..f649b3275b9bcec143c985e06290cfb7a76c4716 100644 --- a/R/format.function.R +++ b/R/format.function.R @@ -10,21 +10,6 @@ trim.trailing <- function (x) sub("\\s+$", "", x) -############################################# -## FUN to clean species name for FRENCH NFI -fun.clean.species.tab <- function(species.tab){ -species.tab2 <- species.tab[!is.na(species.tab$Latin_name),] -### species IFN reformat names -## clean species names and synonyme names -species.tab2$Latin_name <- (gsub("_", " ", species.tab2$Latin_name)) -species.tab2$Latin_name_syn<- (gsub("_", " ", species.tab2$Latin_name_syn)) -## remove trailing white space -species.tab2$Latin_name_syn<- trim.trailing(species.tab2$Latin_name_syn) -species.clean <- species.tab2[!duplicated(species.tab2$Latin_name), - c("code","Latin_name","Exotic_Native_cultivated")] -names(species.clean) <- c("sp","Latin_name","Exotic_Native_cultivated") -return(species.clean) -} @@ -56,18 +41,6 @@ f.quantile <- function (x,ind,probs){ quantile(x[ind],probs=probs,na.rm=TRUE) } -####################### -### function to compute number of dead per plot -function.perc.dead <- function(dead){ - sum(dead)/length(dead)} - -## function to deal with missing value -function.perc.dead2 <- function(dead) { - out <- sum(dead,na.rm=T)/length(dead[!is.na(dead)]) - if(!is.finite(out)) out <- NA - return(out) -} - ######