Commit 6b35996d authored by Georges Kunstler's avatar Georges Kunstler
Browse files

start with dplyr progress

parent 86b60005
......@@ -243,8 +243,7 @@ return(df.res)
fun.CWM.Tn <- function(data){
# comput CWM and perc
data.plot<- group_by(data, plot.c) %>%
summarise(
data.plot<- group_by(data, plot.c) %>% summarise(
BATOT = sum(BA.w),
count = n(),
Leaf.N.CWM.fill = sum(BA.w*Leaf.N.mean),
......@@ -272,9 +271,7 @@ fun.CWM.Tn <- function(data){
na.rm = TRUE)/count,
Seed.mass.perc.species = (sum(!Seed.mass.genus,na.rm = TRUE)+
sum(!is.na(Seed.mass.genus)))/count
) %>%
select(-count) %>%
ungroup()
) %>% select(-count) %>%x ungroup()
data <- left_join(data, data.plot, by = 'plot.c')
## remove BA obs tree
......@@ -547,7 +544,7 @@ for (i in unique(data$ecocode)){
cat("dim after buffer tree removed", dim(data.merged),
'vs ', dim(data.t), "\n")
## add Phylo.group and Pheno.T to the data
data.merged <- left_join((data.merged,
data.merged <- left_join(data.merged,
data.TRAITS[, c("sp", "Phylo.group",
"Pheno.T", 'LeafType.T')],
by="sp")
......
#!/usr/bin/env Rscript
### MERGE us DATA Edited by FH
library(reshape, quietly = TRUE)
## library(reshape, quietly = TRUE)
library(dplyr)
library(rgeos)
source("R/format.data/format-fun.R")
dir.create("output/formatted/US", recursive = TRUE, showWarnings = FALSE)
......@@ -11,7 +12,7 @@ dir.create("output/formatted/US", recursive = TRUE, showWarnings = FALSE)
species.clean <- read.csv("data/raw/US/REF_SPECIES.CSV",
stringsAsFactors = FALSE)
## select column to keep
species.clean <- select(species.clean,
species.clean <- dplyr::select(species.clean,
SPCD, GENUS, SPECIES, VARIETY,
SUBSPECIES, SPECIES_SYMBOL)
names(species.clean)[1] <- "sp"
......@@ -45,22 +46,19 @@ data.us <- left_join(data.us, data.us.plot, by = 'PlotID')
data.us <- mutate(data.us, PLOT_ID_C = PlotID)
data.us$PlotID <- NULL
## NEED TO GET THE CENSUS
## MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
## WHEN WE ANALYZE THAT DATASET LATER ON
## FORMAT INDIVIDUAL TREE DATA
## change unit and names of variables to be the same in all data for the tree
data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$Interval
data.us$G <- 10 * (data.us$FinalDbh - data.us$InitDbh)/data.us$IntervalYears
## diameter growth in mm per year
data.us$BA.G <- (pi*(data.us$FinalDbh/2)^2-pi*(data.us$InitDbh/2)^2)/
data.us$Interval ## ba growth in cm^2/year
data.us$IntervalYears ## ba growth in cm^2/year
data.us$G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA
data.us$BA.G[which(data.us$InitDbh == 0 | data.us$FinalDbh == -999)] <- NA
data.us$year <- data.us$Interval ## number of year between measuremen
data.us$year <- data.us$IntervalYears ## number of year between measuremen
data.us$D <- data.us[["InitDbh"]]
data.us$D[data.us$D == 0] <- NA
## diameter in cm
......@@ -117,14 +115,43 @@ data.us$census <- data.us$StartMeasYear
### add plot weights for computation of competition index (in 1/m^2)
data.us$weights <- 1/(10000 * data.us[["PlotSize"]])
#### UPDATE NEED TO USE PLOT AND NOT SUBPLOT for competition index based on Mark
###################################################################################
# Ecoregions
fia.plots <- filter(data.us, !duplicated(PLOT_ID)) %>% dplyr::select(PLOT_ID,Lat,Lon)
geo.proj <- CRS("+proj=longlat +towgs84")
pts <- SpatialPoints(fia.plots[,3:2], proj4string=geo.proj)
#read in ecoregions and transform shapefile to lat/lon
Ecoregions <- readOGR(dsn="data/raw/US/ecoregions", layer="na_regns")
ecoregions.geo <- spTransform(Ecoregions, CRS=geo.proj)
#creates object that assigns each plot index to an ecoregion
plot.ecocodes <- over(pts, ecoregions.geo)[["ECOCODE"]]
#need to assign ecoregions to plots that are just outside polygon boundaries
ec.na <- which(plot.ecocodes == "- 0" | is.na(plot.ecocodes)) #get all NAs and Lake regions
gdis <- gDistance(pts[ec.na,],ecoregions.geo,byid=T) #calculate distance from na points to each polygon
gdis[which(ecoregions.geo$ECOCODE == "- 0"),] <- 99999 #exclude lakes by making them seem very far away
na.ind <- apply(gdis,2,FUN=which.min) #find index of minimum distance for each point
plot.ecocodes[ec.na] <- ecoregions.geo$ECOCODE[na.ind] #assign back to ecocodes
#merge back with data us
plot.indices <- match(data.us[['PLOT_ID']],fia.plots[['PLOT_ID']])
data.us$Ecocode <- plot.ecocodes[plot.indices]
## ECOREGION merge greco to have no ecoregion
## with low number of observation
greco <- read.csv(file = "data/raw/US/EcoregionCodes.csv", header = T)
colnames(greco)[1] <- "Ecocode"
table(data.us$Ecocode)
data.us <- merge(data.us, greco[, -4], by = "Ecocode")
data.us <- left_join(data.us, greco[, -4], by = "Ecocode")
data.us$DIVISION <- as.character(data.us$DIVISION)
data.us$DIVISION[data.us$DIVISION %in%
......@@ -167,6 +194,12 @@ div.ecocode <- tapply(data.us$sp, INDEX = data.us$ecocode, fun_div)
nsp.ecocode <- unlist(lapply(by(data.us, data.us$ecocode, fun.sp.in.ecoregion), length))
#### get wc climate
source("R/utils/climate.R")
clim <- GetClimate(data.us$Lat, data.us$Lon)
data.us$MAT <- clim$MAT
data.us$MAP <- clim$MAP
######################## BIOMES
### biomes from Whittaker
source("R/utils/plot.R")
......@@ -177,7 +210,7 @@ biomes <- fun.overly.plot.on.biomes(MAP = data.us$MAP/10,
library(BIOMEplot)
plot_biome()
points(data.us$MAP/10, data.us$MAT, col = 1+as.numeric(is.na(biomes)))
abline(h = 4)
abline(h = 4)
# change factor
biomes <- as.character(biomes$biomes)
biomes[is.na(biomes) & data.us$MAT < 4] <- 'Boreal forest'
......@@ -185,20 +218,13 @@ biomes[is.na(biomes) & data.us$MAT > 4] <- 'Temperate rain forest'
table(biomes)
data.us$biomes <- biomes
## ## WWF
## source("R/utils/ecoregions.R")
## data.us$biomes <- GetBiomes(data.us$Lon, data.us$Lat)
## table(data.us$biome)
## data.us$biome[ data.us$biome == 'Temp_Sav' ] <- 'Temp_Broadleaf_Mix'
## data.us$biome[grep('Trop_', data.us$biome)] <- 'Trop'
div.biome <- tapply(data.us$sp, INDEX = data.us$biome, fun_div)
nsp.biome <- unlist(lapply(by(data.us, data.us$biome, fun.sp.in.ecoregion),
div.biome <- tapply(data.us$sp, INDEX = data.us$biomes, fun_div)
nsp.biome <- unlist(lapply(by(data.us, data.us$biomes, fun.sp.in.ecoregion),
length))
## ## ## plot map to check coordinates syste
## ## ## ## plot map to check coordinates system
## library(RColorBrewer)
## library(rworldmap)
## newmap <- getMap(resolution = 'coarse')
......@@ -207,8 +233,6 @@ nsp.biome <- unlist(lapply(by(data.us, data.us$biome, fun.sp.in.ecoregion),
## ecoreg <- factor(data.us$ecocode);# levels(ecoreg) <- mycols <-
## brewer.pal(length(levels(ecoreg)), "Set3")
## points(data.us[['Lon']], data.us[['Lat']], pty = '.', cex = .2, col = (ecoreg))
## legend('bottomleft', col = mycols,
## legend = names(table(data.us$ecocode)), pch = rep(19, length(levels(ecoreg))))
###### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###### missing compute numer of dead per plot to remove plot with disturbance
......@@ -216,10 +240,10 @@ perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]],
FUN = function.perc.dead)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF
# AVAILABLE (disturbance record)
data.us <- merge(data.us, data.frame(plot = names(perc.dead),
data.us <- left_join(data.us, data.frame(plot = names(perc.dead),
perc.dead = perc.dead,
stringsAsFactors = FALSE),
by = "plot", sort = FALSE)
by = "plot")
## variables to keep
vec.abio.var.names <- c("MAT", "MAP")
......@@ -229,7 +253,7 @@ vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name", "cluster",
"Lon", "Lat", "weights", "census", 'biomes')
data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names))
## select only tree above 10cm DBH
data.tree <- subset(data.tree, subset =data.tree$D>10 | is.na(data.tree$D))
data.tree <- filter(data.tree, D>10 | is.na(D))
## convert var factor in character or numeric
data.tree <- fun.convert.type.I(data.tree)
write.csv(data.tree, file = "output/formatted/US/tree.csv", row.names = FALSE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment