merge.data.US.R 6.55 KiB
### MERGE us DATA
### Edited by FH
rm(list = ls()); source("./R/format.function.R"); library(reshape)
#########################
## READ DATA
####################
### read individuals tree data
data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv",header=TRUE,stringsAsFactors =FALSE)
### read species names
species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV",stringsAsFactors=FALSE)
######################################
## MASSAGE TRAIT DATA
############################
## HEIGHT DATA FOR TREE MISSING
## BRING US DATA FOR HEIGHT OVER WHEN WE ANALYZE THAT DATASET LATER ON
#####################################
## FORMAT INDIVIDUAL TREE DATA
#############
## change unit and names of variables to be the same in all data for the tree 
data.us$G <- 10*(data.us$FinalDbh-data.us$InitDbh)/data.us$Interval ## diameter growth in mm per year
data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA
data.us$year <- data.us$Interval ## number of year between measuremen
data.us$D <- data.us[["InitDbh"]]; data.us$D[data.us$D == 0] <- NA ;## diameter in cm
data.us$dead <- as.numeric(data.us$FinalDbh > 0) ## dummy variable for dead tree 0 alive 1 dead
data.us$sp <- as.character(data.us[["Species"]]) ## species code
data.us$plot <- as.character(data.us[["PlotID"]]) ## plot code
data.us$subplot <- paste(as.character(data.us[["PlotID"]]),as.character(data.us[["SubplotNumber"]]),sep=".") ## plot code
data.us$htot <- rep(NA,length(data.us[["Species"]])) ## height of tree in m - MISSING
data.us$tree.id <- as.character(data.us$TreeID); ## tree unique id
data.us$sp.name <- NA; 
## v <- species.clean$SPCD; for(i in 1:length(unique(data.us$sp))) { 
## 	data.us$sp.name[which(data.us$sp == unique(data.us$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.us$sp)[i])] }
######################
## ECOREGION
###################
## merge greco to have no ecoregion with low number of observation
greco <- read.csv(file = "./data/raw/DataUS/EcoregionCodes.csv", header = T); colnames(greco)[1] <- "Ecocode" 
table(data.us$Ecocode) 
data.us <- merge(data.us, greco[,-4], by = "Ecocode"); data.us$DIVISION <- factor(data.us$DIVISION)
## Some ecoregions still have small # of individuals, so create a variable which does division if # ind < 10000; else it reads Domain
data.us$eco_codemerged <- as.character(data.us$DIVISION);  tab.small.div <- table(data.us$eco_codemerged)
sel.small.div <- which(table(data.us$eco_codemerged) < 10000)
for(i in 1:length(sel.small.div)) { 
	find.ind <- which(data.us$eco_codemerged == names(tab.small.div)[sel.small.div[i]]); print(length(find.ind))
	data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind] 
data.us <- data.us[,-c(2,24,25)] ## Remove other ecocode related stuff to save space
######################
## PERCENT DEAD
###################
## variable percent dead/cannot do with since dead variable is missing
## compute numer of dead per plot to remove plot with disturbance
perc.dead <- tapply(data.us[["dead"]],INDEX=data.us[["plot"]],FUN=function.perc.dead)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
data.us <- merge(data.us,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE)
###########################################################
### PLOT SELECTION FOR THE ANALYSIS
###################
## Remove data with dead == 1
table(data.us$dead)
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
## data.us <- data.us[data.us$dead == 1,] vec.abio.var.names <- c("MAT","MAP") vec.basic.var <- c("tree.id","sp","sp.name","plot","eco_codemerged","D","G","dead","year","htot","Lon","Lat","perc.dead") ## data.tree <- subset(data.us,select=c(vec.basic.var,vec.abio.var.names)) ## remove everything from memory not ned before computation rm(greco,perc.dead,species.clean,tab.small.div,sel.small.div) library(data.table) data.us <- data.table(data.us) data.us[,Species:=NULL] data.us[,FinalDbh:=NULL] data.us[,InitDbh:=NULL] data.us[,IndWeight:=NULL] data.us[,SubplotNumber:=NULL] data.us[,IntervalYears:=NULL] gc() ############################################## ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species ########################### ecoregion.name <- unique(data.us[["eco_codemerged"]]) subplot.1 <- unique(data.us[eco_codemerged %in% ecoregion.name[1:5],subplot]) subplot.2 <- unique(data.us[eco_codemerged %in% ecoregion.name[6:11],subplot]) system.time(data.BA.SP.1 <- BA.SP.FUN(id.tree=as.vector(data.us[subplot %in% subplot.1,tree.id]), diam=as.vector(data.us[subplot %in% subplot.1,D]), sp=as.vector(data.us[subplot %in% subplot.1,sp]), id.plot=as.vector(data.us[subplot %in% subplot.1,subplot]), weights=1/(10000*data.us[subplot %in% subplot.1,PlotSize]), weight.full.plot=NA, name="France",num=1)) system.time(data.BA.SP.2 <- BA.SP.FUN(id.tree=as.vector(data.us[subplot %in% subplot.2,tree.id]), diam=as.vector(data.us[subplot %in% subplot.2,D]), sp=as.vector(data.us[subplot %in% subplot.2,sp]), id.plot=as.vector(data.us[subplot %in% subplot.2,subplot]), weights=1/(10000*data.us[subplot %in% subplot.2,PlotSize]), weight.full.plot=NA, name="France",num=2)) save(data.BA.SP.1,file="./data/process/data.BA.SP.US.1.Rdata") save(data.BA.SP.2,file="./data/process/data.BA.SP.US.2.Rdata") Possibility to merge that ? # data.BA.SP <- rbind(data.BA.SP.1,data.BA.SP.2) ## change NA and <0 data for 0 data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0 ### CHECK IF sp and sp name for column are the same if(sum(!(names(data.BA.SP)[-1] %in% unique(data.us[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree") #### compute BA tot for all competitors BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE) data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET) ### create data frame names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1]) data.BA.sp <- merge(data.frame(tree.id=data.us[["tree.id"]],ecocode=data.us[["eco_codemerged"]]),data.BA.SP,by="tree.id",sort=FALSE) ## test if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree") ## save everything as a list list.us <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits) save(list.spain,file="./data/process/list.us.Rdata")