edits to merge R script SPAIN 230813

36e25737 · fhui · 45480830 · 36e25737
Commit 36e25737 authored 11 years ago by fhui
Hide whitespace changes
Inline Side-by-side

Showing

with 25 additions and 25 deletions
+25 -25
--- a/merge.data.SPAIN.R
+++ b/merge.data.SPAIN.R
@@ -6,8 +6,8 @@ rm(list = ls()); source("./R/format.function.R"); library(reshape)
 ## READ DATA
 ####################
 ### read individuals tree data
-data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t")
-
+#data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t")
+data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI_aug13_alldata.txt',header=TRUE,stringsAsFactors=FALSE,sep = "\t")

 ######################################
 ## MASSAGE TRAIT DATA
@@ -15,9 +15,8 @@ data.spain <- read.table('./data/raw/DataSpain/Tree_data_SFI.txt',header=TRUE,st
 ## Compute maximum height per species plus sd from observed height to add variables to the traits data base
 ## Because we have two heights, then take the max of the two heights and then bootstrap 
 res.quant.boot <- t(sapply(levels(factor(data.spain[["SP_code"]])),FUN=f.quantile.boot,R=1000,x=log10(apply(data.spain[,c("ht1","ht2")],1,max,na.rm=T)),fac=factor(data.spain[["SP_code"]])))
-#max.heights <- read.csv("/media/fhui/Lexar/Career & Work/GKunstler_competition/data/raw/DataSpain/MaximumHeigth.csv", header = T)
-# 
-# ## create data base
+
+## create data base
 data.max.height <- data.frame(code=rownames(res.quant.boot),Max.height.mean=res.quant.boot[,1],Max.height.sd=res.quant.boot[,2],Max.height.nobs=res.quant.boot[,3])
 rm(res.quant.boot)
 write.csv(data.max.height,file="./data/process/data.max.height.spain.csv") # I was planning to save processed data in that folder
@@ -45,14 +44,13 @@ write.csv(data.max.height,file="./data/process/data.max.height.spain.csv") # I w

 ## change unit and names of variables to be the same in all data for the tree 
 data.spain$G <- data.spain[["adbh"]] ## diameter growth in mm per year
-data.spain$year <- rep(NA,length(data.spain[["adbh"]])) ## number of year between measurement/missing!
+data.spain$year <- rep(NA,length(data.spain[["adbh"]])) ## number of year between measurement - MISSING
 data.spain$D <- data.spain[["dbh1"]]/10 ## diameter in mm convert to cm
-data.spain$dead <- rep(NA,length(data.spain[["adbh"]])) ## dummy variable for dead tree 0 alive 1 dead/missing!
+data.spain$dead <- as.numeric(data.spain[["Life_status"]] == "dead") ## dummy variable for dead tree 0 alive 1 dead - MIGHT WANT TO CHANGE THIS TO BE BASED ON MORTALITY_CUT
 data.spain$sp <- as.character(data.spain[["SP_code"]]) ## species code
 data.spain$plot <- (data.spain[["Plot_ID_SFI"]]) ## plot code
-data.spain$htot <- data.spain[["ht1"]]## height of tree in m / here I want to keep the actual height not the max
-data.spain$tree.id <- paste(sapply(data.spain[,"Tree_ID_SFI"],substr,1,6),".",
-                            sapply(data.spain[,"Tree_ID_SFI"],substr,7,10),sep="") ## tree unique id
+data.spain$htot <- data.spain[["ht1"]]## height of tree in m
+data.spain$tree.id <- data.spain$Tree_ID_SFI ## tree unique id

 #### change coordinates system of x y to be in lat long WGS84/don't know how to do this
 library(sp); library(dismo); library(rgdal); 
@@ -72,9 +70,9 @@ data.spain$Lat <- coordinates(data.sp2)[,"CY"]
 ## points(data.sp2,cex=0.2,col="red")
 rm(data.sp,data.sp2)

-
-save(data.spain, file = "./data/process/datspain.RData")
-############################
+######################
+## ECOREGION
+###################
 ## merge greco to have no ecoregion with low number of observation
 greco <- read.csv(file = "./data/raw/DataSpain/R_Ecoregion.csv", header = T) 
 greco <- greco[,c("Plot_ID_SFI","BIOME","eco_code")]
@@ -85,25 +83,28 @@ data.spain <- merge(data.spain, greco2, by = "Plot_ID_SFI")
 rm(greco2)

 table(data.spain$eco_code)
-## There's an eco-region with no code, and one with 55 sites
+## There's an eco-region with no code, and one with < 1000 sites
 ## The former we could drop as they were on the border of Spain

 library(RColorBrewer); mycols <- brewer.pal(10,"Set3"); 
 ecoreg <- unclass(data.spain$eco_code); 
 plot(data.spain[["CX"]][order(ecoreg)],data.spain[["CY"]][order(ecoreg)],pty=".",cex=.2, col = rep(mycols,as.vector(table(ecoreg)))); 
-legend("bottomright", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
-points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.2, col = "black"); ## Highlight the region with 55 sites
+legend("topleft", col = mycols, legend = levels(data.spain$eco_code), pch = rep(19,length(levels(ecoreg))),cex=2)
+points(data.spain[["CX"]][ecoreg == 9],data.spain[["CY"]][ecoreg == 9],pty=".",cex=.5, col = "black"); ## Highlight the "rare" ecoregions
+points(data.spain[["CX"]][ecoreg == 1],data.spain[["CY"]][ecoreg == 1],pty=".",cex=.5, col = "black"); ## Highlight the "rare" ecoregions
 ## PA1219 looks to be similar to PA1209; merge them together
 data.spain$eco_codemerged <- combine_factor(data.spain$eco_code, c(1:8,6,9))
 data.spain <- data.spain[-which(data.spain$eco_codemerged == ""),]

-#######################
+######################
+## PERCENT DEAD
+###################
 ## variable percent dead/cannot do with since dead variable is missing
-# ###compute numer of dead per plot to remove plot with disturbance
-# perc.dead <- tapply(data.spain[["dead"]],INDEX=data.spain[["idp"]],FUN=function.perc.dead)
-# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
-# data.spain <- merge(data.spain,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE)
-data.spain$perc.dead <- NA
+###compute numer of dead per plot to remove plot with disturbance
+perc.dead <- tapply(data.spain[["dead"]],INDEX=data.spain[["plot"]],FUN=function.perc.dead)
+table(data.spain$dead)
+## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
+data.spain <- merge(data.spain,data.frame(plot=as.numeric(names(perc.dead)),perc.dead=perc.dead),sort=FALSE, by = "plot")

 ###########################################################
 ### PLOT SELECTION FOR THE ANALYSIS
@@ -113,11 +114,10 @@ table(data.spain$Mortality_Cut)
 data.spain <- subset(data.spain,subset= (data.spain[["Mortality_Cut"]] == 0 |  data.spain[["Mortality_Cut"]] == ""))

 colnames(data.spain)[colnames(data.spain) %in% c("mat","pp","PET")] <- c("MAT","PP","PET")
-colnames(data.spain)[names(data.spain) =="eco_codemerged"  ] <- c("ecocode")
+colnames(data.spain)[names(data.spain) =="eco_codemerged"] <- c("ecocode")
 vec.abio.var.names <-  c("MAT","PP","PET")
 vec.basic.var <-  c("tree.id","sp","sp.name","plot","ecocode","D","G","dead","year","htot","Lon","Lat","perc.dead")
 data.tree <- subset(data.spain,select=c(vec.basic.var,vec.abio.var.names))
-save(data.spain, file = "./data/process/datspain.RData")

 ##############################################
 ## COMPUTE MATRIX OF COMPETITION INDEX WITH SUM OF BA PER SPECIES IN EACH PLOT in m^2/ha without the target species
@@ -131,7 +131,7 @@ data.BA.SP[which(is.na(data.BA.SP),arr.ind=TRUE)] <- 0
 data.BA.SP[,-1][which(data.BA.SP[,-1]<0,arr.ind=TRUE)] <- 0

 ### CHECK IF sp and sp name for column are the same
-if(sum(! (names(data.BA.SP)[-1] %in% unique(data.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree")
+if(sum(!(names(data.BA.SP)[-1] %in% unique(data.spain[["sp"]]))) >0) stop("competition index sp name not the same as in data.tree")

 #### compute BA tot for all competitors
 BATOT.COMPET <- apply(data.BA.SP[,-1],MARGIN=1,FUN=sum,na.rm=TRUE)