merge script Sweden; problems with missing tree ID

1051a527 · fhui28 · 2d24ec57 · 1051a527
Commit 1051a527 authored 11 years ago by fhui28
Hide whitespace changes
Inline Side-by-side

Showing

with 113 additions and 0 deletions
+113 -0
--- a/merge.data.SWEDEN.R
+++ b/merge.data.SWEDEN.R
+### MERGE sweden DATA
+rm(list = ls()); source("./R/format.function.R"); library(reshape); 
+
+#########################
+## READ DATA
+####################
+### read individuals tree data
+#data.swe1 <- read.csv("./data/raw/DataSweden/Swe_NFI_1.csv",header=T,stringsAsFactors=F)
+#data.swe2 <- read.csv("./data/raw/DataSweden/Swe_NFI_2a.csv",header=T,stringsAsFactors=F)
+#data.swe3 <- read.csv("./data/raw/DataSweden/Swe_NFI_3.csv",header=T,stringsAsFactors=F)
+data.swe <- read.table("./data/raw/DataSweden/Swe_NFI_all.txt",header=T,stringsAsFactors=F,sep="\t")
+
+### Species names are in the xlsx files if required (we already have sp codes)
+
+#data.swe <- rbind(data.swe1, data.swe2, data.swe3); 
+#rm(data.swe1, data.swe2, data.swe3)
+#data.swe$treeid <- apply(data.swe[,3:5],1,paste,collapse="_")
+#data.swe$plotid <- apply(data.swe[,3:4],1,paste,collapse="_")
+data.swe <- data.swe[order(data.swe$TreeID,data.swe$PlotInvent),] ## Shows the TreeID = "" first
+sum(data.swe$TreeID == "")
+dim(data.swe)
+
+### STOP HERE!!!
+
+######################################
+## MASSAGE TRAIT DATA
+############################
+## Mean height in dataset
+
+##########################################
+## FORMAT INDIVIDUAL TREE DATA
+#############
+
+## change unit and names of variables to be the same in all data for the tree 
+data.swe$G <- 10*(data.swe$FinalDBH-data.swe$InitDBH)/data.swe$Interval ## diameter growth in mm per year
+data.swe$G[which(data.swe$InitDBH == 0 | data.swe$FinalDBH == -999)] <- NA
+data.swe$year <- data.swe$Interval ## number of year between measuremen
+data.swe$D <- data.swe[["InitDBH"]]; data.swe$D[data.swe$D == 0] <- NA ;## diameter in cm
+data.swe$dead <- as.numeric(data.swe$FinalDBH > 0) ## dummy variable for dead tree 0 alive 1 dead
+data.swe$sp <- as.character(data.swe[["Species"]]) ## species code
+data.swe$plot <- (data.swe[["PLOT_ID"]]) ## plot code
+data.swe$htot <- rep(NA,length(data.swe[["Species"]])) ## height of tree in m - MISSING
+data.swe$tree.id <- gsub("_",".",data.swe$PLOTTREE); ## tree unique id
+data.swe$sp.name <- NA; 
+for(i in 1:length(unique(data.swe$sp))) { 
+	v <- species.clean$SPCD
+	data.swe$sp.name[which(data.swe$sp == unique(data.swe$sp)[i])] <- species.clean$COMMON_NAME[which(v == unique(data.swe$sp)[i])] }
+
+
+######################
+## ECOREGION
+###################
+## merge greco to have no ecoregion with low number of observation
+greco <- read.csv(file = "./data/raw/DataCanada/EcoregionCodes.csv", header = T, sep = "\t") 
+
+table(data.swe$Ecocode) 
+## Some ecoregions still have small # of individuals, so either drop off for analysis later on or wait for Quebec data to come in
+# 
+# library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); 
+# ecoreg <- unclass(data.swe$eco_code); 
+# plot(data.swe[["CX"]][order(ecoreg)],data.swe[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); 
+# legend("bottomright", col = mycols, legend = levels(data.swe$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
+# points(data.swe[["CX"]][ecoreg == 9],data.swe[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites
+# ## PA1219 looks to be similar to PA1209; merge them together
+# data.swe$eco_codemerged <- combine_factor(data.swe$eco_code, c(1:8,6,9))
+
+######################
+## PERCENT DEAD
+###################
+## variable percent dead/cannot do with since dead variable is missing
+## compute numer of dead per plot to remove plot with disturbance
+perc.dead <- tapply(data.swe[["dead"]],INDEX=data.swe[["plot"]],FUN=function.perc.dead)
+# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
+data.swe <- merge(data.swe,data.frame(plot=names(perc.dead),perc.dead=perc.dead), by = "plot", sort=FALSE)
+
+###########################################################
+### PLOT SELECTION FOR THE ANALYSIS
+###################
+## Remove data with dead == 1
+table(data.swe$dead)
+## Nothing to remove
+
+colnames(data.swe)[c(3,1,11,13)] <- c("sp","plot","w","ecocode")
+vec.abio.var.names <-  c("MAT","MAP")
+vec.basic.var <-  c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead")
+data.tree <- subset(data.swe,select=c(vec.basic.var,vec.abio.var.names))
+
+##############################################
+## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species
+###########################
+data.BA.SP <- BA.SP.FUN(id.tree=as.vector(data.swe[["tree.id"]]), diam=as.vector(data.swe[["D"]]),
+	sp=as.vector(data.swe[["sp"]]), id.plot=as.vector(data.swe[["plot"]]),
+	weights=1/(10000*data.swe[["SubPlot_Size"]]), weight.full.plot=NA)
+
+## change NA and <0 data for 0
+data.BA.SP[is.na(data.BA.SP)] <- 0; data.BA.SP[,-1][data.BA.SP[,-1]<0] <- 0
+
+### CHECK IF sp and sp name for column are the same
+if(sum(!(names(data.BA.SP)[-1] %in% unique(data.swe[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree")
+
+#### compute BA tot for all competitors
+BATOT.COMPET <- apply(data.BA.SP[,-1],1,sum,na.rm=TRUE)
+data.BA.SP$BATOT.COMPET <- BATOT.COMPET; rm(BATOT.COMPET)
+### create data frame
+names(data.BA.SP) <- c("tree.id",names(data.BA.SP)[-1])
+data.BA.sp <- merge(data.frame(tree.id=data.swe[["tree.id"]],ecocode=data.swe[["ecocode"]]),data.BA.SP,by="tree.id",sort=FALSE)
+## test
+if(sum(!data.BA.sp[["tree.id"]] == data.tree[["tree.id"]]) >0) stop("competition index not in the same order than data.tree")
+
+## save everything as a list
+list.swe <- list(data.tree=data.tree,data.BA.SP=data.BA.sp,data.traits=data.traits)
+save(list.spain,file="./data/process/list.swe.Rdata")
+