Commit abe3fcf2 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

try data extraction done

No related merge requests found
Showing with 196 additions and 85 deletions
+196 -85
##### FORMAT TRAIT FOR Canada
source("trait.fun.R")
### read species names
data.tree <- read.csv("../../output/formatted/Canada/tree.csv", stringsAsFactors = FALSE)
species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
stringsAsFactors =FALSE)
## delete the sp code with no species
species.clean <- subset(species.clean,subset=!is.na(species.clean$Latin_name))
## read in data
data.TRY.std <- readRDS("../../output/formatted/TRY/data.TRY.std.rds")
## read us max height
max.height <- read.csv(file="../../output/formatted/US/max.height.csv", stringsAsFactors = FALSE)
max.height$sp <- paste("sp",max.height$sp,sep=".")
data.traits <- fun.extract.format.sp.traits.TRY(sp=species.clean[["sp"]],sp.syno.table=species.clean,data=data.TRY.std)
data.traits <- merge(data.traits,subset(max.height,select=c("sp","Max.height.mean","Max.height.sd")),by="sp",all.x=TRUE,all.y=FALSE)
### TODO ADD GENUS MEAN FOR HEIGHT IF SPECIES IS MISSING
write.csv(data.traits,file="../../output/formatted/Canada/traits.csv")
##### FORMAT TRAIT FOR NVS
source("trait.fun.R")
### read species names
data.tree <- read.csv("../../output/formatted/NVS/tree.csv", stringsAsFactors = FALSE)
species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
stringsAsFactors =FALSE)
## delete the sp code with no species
species.clean <- subset(species.clean,subset=!is.na(species.clean$Latin_name))
## read in data
data.TRAITS.std <- read.csv("../../output/formatted/NVS/trait.csv", stringsAsFactors = FALSE)
## read us max height
data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=species.clean$sp, Latin_name=species.clean$Latin_name, data=data.TRAITS.std,name.match.traits="Latin_name")
data.traits <- merge(data.traits,subset(max.height,select=c("sp","Max.height.mean","Max.height.sd")),by="sp",all.x=TRUE,all.y=FALSE)
### TODO ADD GENUS MEAN FOR HEIGHT IF SPECIES IS MISSING
write.csv(data.traits,file="../../output/formatted/Canada/traits.csv")
##### FORMAT TRAIT FOR Sweden
source("trait.fun.R")
### read species names
data.tree <- read.csv("../../output/formatted/Sweden/tree.csv", stringsAsFactors = FALSE)
species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
stringsAsFactors =FALSE)
## delete the sp code with no species
species.clean <- subset(species.clean,subset=!is.na(species.clean$Latin_name))
## select column to keep
## read in data
data.TRY.std <- readRDS("../../output/formatted/TRY/data.TRY.std.rds")
## read France max height
max.height <- read.csv(file="../../output/formatted/France/max.height.csv", stringsAsFactors = FALSE)
## load latin name France
data.tree <- read.csv("../../output/formatted/France/tree.csv", stringsAsFactors = FALSE)
species.clean2 <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
stringsAsFactors =FALSE)
## delete the sp code with no species
species.clean2 <- subset(species.clean2,subset=!is.na(species.clean2$Latin_name))
max.height <- merge(max.height,species.clean2,by="sp") ## add latin name
## extract traits and height
data.traits <- fun.extract.format.sp.traits.TRY(sp=species.clean[["sp"]],sp.syno.table=species.clean,data=data.TRY.std)
data.traits <- merge(data.traits,subset(max.height,select=c("Latin_name","Max.height.mean","Max.height.sd")),by="Latin_name",all.x=TRUE,all.y=FALSE)
### TODO ADD GENUS MEAN FOR HEIGHT IF SPECIES IS MISSING
write.csv(data.traits,file="../../output/formatted/Sweden/traits.csv")
##### FORMAT TRAIT FOR Swiss
source("trait.fun.R")
### read species names
data.tree <- read.csv("../../output/formatted/Swiss/tree.csv", stringsAsFactors = FALSE)
species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
stringsAsFactors =FALSE)
## select column to keep
## read in data
data.TRY.std <- readRDS("../../output/formatted/TRY/data.TRY.std.rds")
max.height <- read.csv(file="../../output/formatted/Swiss/max.height.csv", stringsAsFactors = FALSE)
max.height$sp <- paste("sp",max.height$code,sep=".")
## extract traits and height
data.traits <- fun.extract.format.sp.traits.TRY(sp=species.clean[["sp"]],sp.syno.table=species.clean,data=data.TRY.std)
data.traits <- merge(data.traits,subset(max.height,select=c("sp","Max.height.mean","Max.height.sd")),by="sp",all.x=TRUE,all.y=FALSE)
### TODO ADD GENUS MEAN FOR HEIGHT IF SPECIES IS MISSING
write.csv(data.traits,file="../../output/formatted/Swiss/traits.csv")
...@@ -2,16 +2,12 @@ ...@@ -2,16 +2,12 @@
source("trait.fun.R") source("trait.fun.R")
### read species names ### read species names
species.clean <- read.csv("../../data/raw/US/REF_SPECIES.CSV", stringsAsFactors = FALSE) data.tree <- read.csv("../../output/formatted/US/tree.csv", stringsAsFactors = FALSE)
## select column to keep species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
species.clean <- subset(species.clean, select = c("SPCD", "GENUS", "SPECIES", "VARIETY", Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
"SUBSPECIES", "SPECIES_SYMBOL")) Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
species.clean$Latin_name <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]], stringsAsFactors =FALSE)
sep = " ")
species.clean$Latin_name_syn <- paste(species.clean[["GENUS"]], species.clean[["SPECIES"]],
sep = " ")
names(species.clean)[1] <- "sp"
species.clean[["sp"]] <- paste("sp", species.clean[["sp"]], sep = ".")
## read in data ## read in data
data.TRY.std <- readRDS("../../output/formatted/TRY/data.TRY.std.rds") data.TRY.std <- readRDS("../../output/formatted/TRY/data.TRY.std.rds")
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
############################################ 14/06/2013 ############################################ 14/06/2013
### install all unstallled packages ### install all unstallled packages
list.of.packages <- c("MASS", "doParallel","mvoutlier") list.of.packages <- c("MASS", "doParallel","mvoutlier","plyr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages) if(length(new.packages)) install.packages(new.packages)
...@@ -164,6 +164,8 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) { ...@@ -164,6 +164,8 @@ fun.extract.format.sp.traits.TRY <- function(sp, sp.syno.table, data) {
if (sum((sp.syno.table[["Latin_name_syn"]] %in% data[["Latin_name"]])) == if (sum((sp.syno.table[["Latin_name_syn"]] %in% data[["Latin_name"]])) ==
0) 0)
stop("not a single similar species name in sp and TRY") stop("not a single similar species name in sp and TRY")
sp <- as.character(sp)
sp.syno.table$sp <- as.character(sp.syno.table$sp)
## traits to extract ## traits to extract
traits <- c("Leaf.N", "Seed.mass", "SLA", "Wood.density") traits <- c("Leaf.N", "Seed.mass", "SLA", "Wood.density")
# lapply to extract # lapply to extract
...@@ -235,7 +237,7 @@ return(extract.species.traits) ...@@ -235,7 +237,7 @@ return(extract.species.traits)
### FUNCTION TO EXTRACT ALL SPECIES ### FUNCTION TO EXTRACT ALL SPECIES
fun.extract.format.sp.traits.NOT.TRY <- function(sp, Latin_name, data,name.match.traits="Latin_name") { fun.extract.format.sp.traits.NOT.TRY <- function(sp, Latin_name, data,name.match.traits="Latin_name") {
require(plyr)
### test data sp and sp.syno.table match ### test data sp and sp.syno.table match
if (sum((Latin_name %in% data[[name.match.traits]])) == if (sum((Latin_name %in% data[[name.match.traits]])) ==
0) 0)
...@@ -255,22 +257,8 @@ if (SD.TF) traits.sd <- paste(traits,"sd",sep=".") ...@@ -255,22 +257,8 @@ if (SD.TF) traits.sd <- paste(traits,"sd",sep=".")
## extract data ## extract data
extract.species.traits <- rbind.fill(lapply(Latin_name,FUN=fun.spe.traits.notry ,data,traits.mean,traits.sd,name.match.traits,SD.TF)) extract.species.traits <- rbind.fill(lapply(Latin_name,FUN=fun.spe.traits.notry ,data,traits.mean,traits.sd,name.match.traits,SD.TF))
############### add mean sd of species or genus if we want to use that data.frame.TRAITS <- data.frame(sp = sp, Latin_name=Latin_name ,
sd.vec.sp <- readRDS(file = "./data/process/sd.vec.sp.rds") extract.species.traits, stringsAsFactors =FALSE)
sd.vec.genus <- readRDS(file = "./data/process/sd.vec.genus.rds")
### add columns
extract.species.traits.2 <- data.frame(extract.species.traits,
extract.species.traits[,traits.sd], stringsAsFactors =FALSE)
## update value
traits.sd.1 <- paste(traits.sd, 1, sep = ".")
for (i in 1:length(traits.sd.1)) {
extract.species.traits.2[[traits.sd.1[i]]][!extract.species.traits.2[[traits.genus[i]]]] <- sd.vec.sp[i]
extract.species.traits.2[[traits.sd.1[i]]][extract.species.traits.2[[traits.genus[i]]]] <- sd.vec.genus[i]
}
data.frame.TRAITS <- data.frame(sp = sp, Latin_name ,
extract.species.traits.2, stringsAsFactors =FALSE)
if (sum(!data.frame.TRAITS[["sp"]] == sp) > 0) if (sum(!data.frame.TRAITS[["sp"]] == sp) > 0)
stop("Wrong order of species code") stop("Wrong order of species code")
return(data.frame.TRAITS) return(data.frame.TRAITS)
......
...@@ -13,11 +13,15 @@ data.canada$sp = data.canada$species_FIACode ...@@ -13,11 +13,15 @@ data.canada$sp = data.canada$species_FIACode
data.canada$species_FIACode <- NULL data.canada$species_FIACode <- NULL
### read species names and merge with data.canada ### read species names and merge with data.canada
species.clean <- read.csv("../../data/raw/Canada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE) species.clean <- read.csv("../../data/raw/Canada/FIA_REF_SPECIES.csv", stringsAsFactors = FALSE)
data.canada <- merge(data.canada, data.frame(sp = species.clean$SPCD, species.clean[,2:3], stringsAsFactors = F), by = "sp", all.x = T) species.clean$Latin_name <- paste(species.clean$GENUS,species.clean$SPECIES, sep=" ")
data.canada$sp.name <- data.canada$COMMON_NAME; data.canada$COMMON_NAME <- NULL species.clean$Latin_name_syn<- paste(species.clean$GENUS,species.clean$SPECIES, sep=" ")
###################################### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER data.canada <- merge(data.canada, data.frame(sp = species.clean$SPCD, Latin_name=species.clean[,"Latin_name"], stringsAsFactors = F), by = "sp", all.x = T)
###################################### WHEN WE ANALYZE THAT DATASET LATER ON data.canada$sp.name <- data.canada$Latin_name; data.canada$Latin_name<- NULL
########################################## FORMAT INDIVIDUAL TREE DATA
data.canada$sp <- paste("sp",data.canada$sp,sep=".")
######### MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
######### WHEN WE ANALYZE THAT DATASET LATER ON
######### FORMAT INDIVIDUAL TREE DATA
data.canada$FinalDBH[data.canada$FinalDBH < 0] <- NA data.canada$FinalDBH[data.canada$FinalDBH < 0] <- NA
data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year data.canada$G <- 10 * (data.canada$FinalDBH - data.canada$InitDBH)/data.canada$Interval ## diameter growth in mm per year
data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA data.canada$G[which(data.canada$InitDBH == 0 | data.canada$FinalDBH == -999)] <- NA
...@@ -26,12 +30,15 @@ data.canada$D <- data.canada[["InitDBH"]] ...@@ -26,12 +30,15 @@ data.canada$D <- data.canada[["InitDBH"]]
data.canada$D[data.canada$D == 0] <- NA ## diameter in cm data.canada$D[data.canada$D == 0] <- NA ## diameter in cm
data.canada$dead <- as.numeric(is.na(data.canada$FinalDBH)) ## dummy variable for dead tree 0 alive 1 dead data.canada$dead <- as.numeric(is.na(data.canada$FinalDBH)) ## dummy variable for dead tree 0 alive 1 dead
data.canada$htot <- rep(NA, nrow(data.canada)) ## height of tree in m data.canada$htot <- rep(NA, nrow(data.canada)) ## height of tree in m
data.canada$plot <- data.canada$PlotID_InitYear; data.canada$PlotID_InitYear <- NULL ## plot id data.canada$plot <- data.canada$PlotID_InitYear ## plot id
data.canada$plot[!is.na(data.canada$SUBPLOT_ID)] <- paste(data.canada$plot[!is.na(data.canada$SUBPLOT_ID)],
data.canada$SUBPLOT_ID[!is.na(data.canada$SUBPLOT_ID)],sep=".") ## when subplot include it
data.canada$cluster <-rep(NA,nrow(data.canada)) ## cluster code
data.canada$cluster[!is.na(data.canada$SUBPLOT_ID)] <- data.canada$PlotID_InitYear[!is.na(data.canada$SUBPLOT_ID)]## when subplot use plot as cluster
data.canada$tree.id <- data.canada[["PlotTree_ID"]]; data.canada[["PlotTree_ID"]] <- NULL ## tree unique id data.canada$tree.id <- data.canada[["PlotTree_ID"]]; data.canada[["PlotTree_ID"]] <- NULL ## tree unique id
data.canada$census <- rep(1,nrow(data.canada)) data.canada$census <- rep(1,nrow(data.canada))
data.canada$obs.id <- data.canada$tree.id data.canada$obs.id <- data.canada$tree.id
data.canada$weights <- 1/(data.canada$SubPlotSize*10000) ### there is some strange small value in SubPlotSize 20 m^2 even for large tree NEED TO ASK Hongcheng ## REPLY BETTER TO DElETE THIS PLOTS HE DON T KNOW WHY SO SMALLL data.canada$weights <- 1/(data.canada$SubPlotSize*10000) ### there is some strange small value in SubPlotSize 20 m^2 even for large tree NEED TO ASK Hongcheng ## REPLY BETTER TO DElETE THIS PLOTS HE DON T KNOW WHY SO SMALLL
data.canada$cluster <- as.character(data.canada[["plot"]]) ## cluster code
###################### ECOREGION merge to have no ecoregion with low number of observation ###################### ECOREGION merge to have no ecoregion with low number of observation
greco <- read.csv(file = "../../data/raw/Canada/EcoregionCodes.csv", header = T, greco <- read.csv(file = "../../data/raw/Canada/EcoregionCodes.csv", header = T,
sep = "\t") sep = "\t")
...@@ -63,7 +70,7 @@ data.canada <- merge(data.canada, data.frame(plot = names(perc.dead), perc.dead ...@@ -63,7 +70,7 @@ data.canada <- merge(data.canada, data.frame(plot = names(perc.dead), perc.dead
by = "plot", sort = FALSE) by = "plot", sort = FALSE)
####################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1 ####################################### PLOT SELECTION FOR THE ANALYSIS Remove data with dead == 1
table(data.canada$dead) table(data.canada$dead)
data.canada <- data.canada[data.canada$dead == 0,] ## data.canada <- data.canada[data.canada$dead == 0,]
vec.abio.var.names <- c("MAT", "MAP") vec.abio.var.names <- c("MAT", "MAP")
vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead", vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name","cluster","plot", "ecocode", "D", "G", "dead",
"year", "htot", "Lon", "Lat", "perc.dead","weights","census") "year", "htot", "Lon", "Lat", "perc.dead","weights","census")
......
...@@ -54,9 +54,10 @@ rm(data.sp, data.sp2) ...@@ -54,9 +54,10 @@ rm(data.sp, data.sp2)
################# MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed ################# MASSAGE TRAIT DATA Compute maximum height per species plus sd from observed
################# height to add variables to the traits data base Because we have two heights, ################# height to add variables to the traits data base Because we have two heights,
################# then take the max of the two heights and then bootstrap ################# then take the max of the two heights and then bootstrap
res.quant.boot <- suppressWarnings(t(sapply(levels(factor(data.swiss[["sp"]])), FUN = f.quantile.boot, data.swiss2 <- subset(data.swiss,subset=apply(is.na(data.swiss[, c("ht1", "ht2")]), 1, sum)<2)
R = 1000, x = log10(apply(data.swiss[, c("ht1", "ht2")], 1, max, na.rm = T)), res.quant.boot <- (t(sapply(levels(factor(data.swiss2[["sp"]])), FUN = f.quantile.boot,
fac = factor(data.swiss[["spcode"]])))) R = 1000, x = log10(apply(data.swiss2[, c("ht1", "ht2")], 1, max, na.rm = T)),
fac = factor(data.swiss2[["sp"]]))))
## create data base ## create data base
data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[, data.max.height <- data.frame(code = rownames(res.quant.boot), Max.height.mean = res.quant.boot[,
1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE) 1], Max.height.sd = res.quant.boot[, 2], Max.height.nobs = res.quant.boot[, 3], stringsAsFactors =FALSE)
......
...@@ -45,7 +45,7 @@ data.us$plot <- paste(as.character(data.us[["PlotID"]]), as.character(data.us[[" ...@@ -45,7 +45,7 @@ data.us$plot <- paste(as.character(data.us[["PlotID"]]), as.character(data.us[["
data.us$htot <- rep(NA, length(data.us[["Species"]])) ## height of tree in m - MISSING data.us$htot <- rep(NA, length(data.us[["Species"]])) ## height of tree in m - MISSING
data.us$tree.id <- as.character(data.us$TreeID) data.us$tree.id <- as.character(data.us$TreeID)
data.us$obs.id <- 1:nrow(data.us) ## tree unique id data.us$obs.id <- 1:nrow(data.us) ## tree unique id
data.us$sp.name <- species.clean[match(data.us$sp,species.clean$sp),"Latin_name"] ## TODO CHANGE THAT TO GET THE SPECIES LATIN NAME species.clean[match(data.us$sp,res.mat[["obs.id"]]),"Latin_name"]) data.us$sp.name <- species.clean[match(data.us$sp,species.clean$sp),"Latin_name"] ## GET THE SPECIES LATIN NAME species.clean[match(data.us$sp,res.mat[["obs.id"]]),"Latin_name"])
## census is missing use as only one census and check with mark ## census is missing use as only one census and check with mark
data.us$census <- rep(1,nrow(data.us)) data.us$census <- rep(1,nrow(data.us))
### add plot weights for computation of competition index (in 1/m^2) ### add plot weights for computation of competition index (in 1/m^2)
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment