Commit 065497c6 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

fixed issue with dplyr

parent d5f590e9
......@@ -76,7 +76,7 @@ function.perc.dead2 <- function(dead) {
## function to convert var factor in character or numeric
fun.convert.type.I <- function(data.tree){
character.var <- c("sp", "sp.name", "ecocode")
character.var <- c("sp", "sp.name", "ecocode", "biomes")
numeric.var <- c("D", "G", "BA.G", "dead",
"Lon", "Lat", "weights", "MAT", "MAP")
factor.to.convert.var <- c("obs.id", "tree.id", "cluster", "plot", "census")
......@@ -90,7 +90,7 @@ return(data.tree)
}
fun.convert.type.B <- function(data.tree){
character.var <- c("sp", "sp.name", "ecocode")
character.var <- c("sp", "sp.name", "ecocode", "biomes")
numeric.var <- c("D", "G", "BA.G", "dead",
"x", "y")
factor.to.convert.var <- c("obs.id", "tree.id", "cluster", "plot", "census")
......
......@@ -10,7 +10,7 @@
##' @return basal area in cm^2/m^2
##' @author Kunstler
BA.fun <- function(diam, weights) {
((diam/2)^2) * pi * weights
diam^2/4 * pi * weights
}
## function to fill missing cat variables with A_EV
......@@ -53,12 +53,12 @@ fun.extract.trait.add.missing.sp.NA <- function(vec.sp, traits.data,
trait.t <- traits.data[traits.data[["sp"]] %in% vec.sp, trait.name]
## add NA for missing sp
trait.t <- c(trait.t, rep(NA,
sum(! (vec.sp %in% traits.data[["sp"]]))))
sum(!(vec.sp %in% traits.data[["sp"]]))))
## reorder
names(trait.t) <- c(as.character(traits.data[traits.data[["sp"]] %in%
vec.sp, "sp"]),
as.character(vec.sp[! (vec.sp %in% traits.data[["sp"]])]))
trait <- (trait.t)[match(vec.sp, names(trait.t))]
as.character(vec.sp[!(vec.sp %in% traits.data[["sp"]])]))
trait <- trait.t[match(vec.sp, names(trait.t))]
return(trait)
}
......@@ -161,33 +161,34 @@ format.one.trait.CWM <- function(trait.list, sp.num, BA) {
##############################################
##### FUNCTIONS FOR PLOT DATA
fun.fill.missing.traits <- function(data){
data <- mutate(data,
require(dplyr)
data <- mutate(data,
Leaf.N.genus = ifelse(is.na(Leaf.N.mean),
NA,
Leaf.N.genus),
NA,
Leaf.N.genus),
Leaf.N.mean = ifelse(is.na(Leaf.N.mean),
mean(Leaf.N.mean, na.rm = TRUE),
Leaf.N.mean),
SLA.genus = ifelse(is.na(SLA.mean),
NA,
SLA.genus),
NA,
SLA.genus),
SLA.mean = ifelse(is.na(SLA.mean),
mean(SLA.mean, na.rm = TRUE),
SLA.mean),
Wood.density.genus = ifelse(is.na(Wood.density.mean),
NA,
Wood.density.genus),
NA,
Wood.density.genus),
Wood.density.mean = ifelse(is.na(Wood.density.mean),
mean(Wood.density.mean, na.rm = TRUE),
Wood.density.mean),
Max.height.genus = ifelse(is.na(Max.height.mean),
NA,
Max.height.genus),
NA,
Max.height.genus),
Max.height.mean = ifelse(is.na(Max.height.mean),
mean(Max.height.mean, na.rm = TRUE),
Max.height.mean),
Seed.mass.genus = ifelse(is.na(Seed.mass.mean),
NA,
NA,
Seed.mass.genus),
Seed.mass.mean = ifelse(is.na(Seed.mass.mean),
mean(Seed.mass.mean, na.rm = TRUE),
......@@ -198,29 +199,37 @@ return(data)
fun.order.colums <- function(data){
data <- mutate(data,
Leaf.N.focal = Leaf.N.focal,
Leaf.N.CWM.fill = Leaf.N.CWM.fill,
Leaf.N.perc.genus = Leaf.N.perc.genus,
Leaf.N.perc.species = Leaf.N.perc.species,
Seed.mass.focal = Seed.mass.focal,
Seed.mass.CWM.fill = Seed.mass.CWM.fill,
Seed.mass.perc.genus = Seed.mass.perc.genus,
Seed.mass.perc.species = Seed.mass.perc.species,
SLA.focal = SLA.focal,
SLA.CWM.fill = SLA.CWM.fill,
SLA.perc.genus = SLA.perc.genus,
SLA.perc.species = SLA.perc.species,
Wood.density.focal = Wood.density.focal,
Wood.density.CWM.fill = Wood.density.CWM.fill,
Wood.density.perc.genus = Wood.density.perc.genus,
Wood.density.perc.species = Wood.density.perc.species,
Max.height.focal = Max.height.focal,
Max.height.CWM.fill = Max.height.CWM.fill,
Max.height.perc.genus = Max.height.perc.genus,
Max.height.perc.species = Max.height.perc.species
)
fun.order.colums <- function(data, var.order = c("obs.id", "tree.id", "sp",
"sp.name", "cluster", "plot",
"ecocode","D", "G", "BA.G", "year",
"dead", "Lon", "Lat", "census", "MAT",
"MAP", "biomes", "BA.w", "Leaf.N.focal",
"Leaf.N.CWM.fill", "Leaf.N.abs.CWM.fill",
"Leaf.N.perc.genus",
"Leaf.N.perc.species",
"Seed.mass.focal","Seed.mass.CWM.fill",
"Seed.mass.abs.CWM.fill",
"Seed.mass.perc.genus",
"Seed.mass.perc.species",
"SLA.focal", "SLA.CWM.fill",
"SLA.abs.CWM.fill","SLA.perc.genus",
"SLA.perc.species", "Wood.density.focal",
"Wood.density.CWM.fill",
"Wood.density.abs.CWM.fill",
"Wood.density.perc.genus",
"Wood.density.perc.species",
"Max.height.focal",
"Max.height.CWM.fill",
"Max.height.abs.CWM.fill",
"Max.height.perc.genus",
"Max.height.perc.species", "BATOT",
"Phylo.group", "Pheno.T",
"LeafType.T", "cat")){
require(dplyr)
data <- data %>% dplyr::select(-Leaf.N.genus, -Seed.mass.genus, - SLA.genus,
-Wood.density.genus, -Max.height.genus)
data <- data[, var.order]
return(data)
}
......@@ -240,37 +249,43 @@ df.res$obs.id <- df[['obs.id']]
return(df.res)
}
fun.CWM.Tn <- function(data){
require(dplyr)
# comput CWM and perc
data.plot<- group_by(data, plot.c) %>% summarise(
BATOT = sum(BA.w),
count = n(),
Leaf.N.CWM.fill = sum(BA.w*Leaf.N.mean),
Leaf.N.perc.genus = sum(!Leaf.N.genus,
na.rm = TRUE)/count,
Leaf.N.perc.species = (sum(!Leaf.N.genus,na.rm = TRUE)+
sum(!is.na(Leaf.N.genus)))/count,
SLA.CWM.fill = sum(BA.w*SLA.mean),
SLA.perc.genus = sum(!SLA.genus,
na.rm = TRUE)/count,
SLA.perc.species = (sum(!SLA.genus,na.rm = TRUE)+
sum(!is.na(SLA.genus)))/count,
Wood.density.CWM.fill = sum(BA.w*Wood.density.mean),
Wood.density.perc.genus = sum(!Wood.density.genus,
data.plot<- group_by(data, plot.c) %>%
summarise(
BATOT = sum(BA.w),
count = n(),
Leaf.N.CWM.fill = sum(BA.w*Leaf.N.mean),
Leaf.N.perc.genus = sum(!is.na(Leaf.N.genus),
na.rm = TRUE)/count,
Leaf.N.perc.species = sum(!Leaf.N.genus &
!is.na(Leaf.N.genus),na.rm = TRUE)/count,
SLA.CWM.fill = sum(BA.w*SLA.mean),
SLA.perc.genus = sum(!is.na(SLA.genus),
na.rm = TRUE)/count,
SLA.perc.species = sum(!SLA.genus &
!is.na(SLA.genus),na.rm = TRUE)/count,
Wood.density.CWM.fill = sum(BA.w*Wood.density.mean),
Wood.density.perc.genus = sum(!is.na(Wood.density.genus),
na.rm = TRUE)/count,
Wood.density.perc.species = sum(!Wood.density.genus &
!is.na(Wood.density.genus),
na.rm = TRUE)/count,
Wood.density.perc.species = (sum(!Wood.density.genus,na.rm = TRUE)+
sum(!is.na(Wood.density.genus)))/count,
Max.height.CWM.fill = sum(BA.w*Max.height.mean),
Max.height.perc.genus = sum(!Max.height.genus,
Max.height.CWM.fill = sum(BA.w*Max.height.mean),
Max.height.perc.genus = sum(!is.na(Max.height.genus),
na.rm = TRUE)/count,
Max.height.perc.species = sum(!Max.height.genus &
!is.na(Max.height.genus),
na.rm = TRUE)/count,
Max.height.perc.species = (sum(!Max.height.genus,na.rm = TRUE)+
sum(!is.na(Max.height.genus)))/count,
Seed.mass.CWM.fill = sum(BA.w*Seed.mass.mean),
Seed.mass.perc.genus = sum(!Seed.mass.genus,
na.rm = TRUE)/count,
Seed.mass.perc.species = (sum(!Seed.mass.genus,na.rm = TRUE)+
sum(!is.na(Seed.mass.genus)))/count) %>%
select(-count) %>% ungroup()
Seed.mass.CWM.fill = sum(BA.w*Seed.mass.mean),
Seed.mass.perc.genus = sum(!is.na(Seed.mass.genus),
na.rm = TRUE)/count,
Seed.mass.perc.species = sum(!Seed.mass.genus &
!is.na(Seed.mass.genus),
na.rm = TRUE)/count) %>%
select(-count) %>% ungroup()
data <- left_join(data, data.plot, by = 'plot.c')
## remove BA obs tree
......@@ -287,6 +302,7 @@ return(data)
fun.traits.focal <- function(data){
require(dplyr)
data <- data %>%
mutate(
Leaf.N.focal = ifelse(is.na(Leaf.N.genus) |
......@@ -324,8 +340,15 @@ fun.CWM.traits.all.plot.census.dplyr <- function(data,data.TRAITS){
plot.c = paste(plot, census, sep ='_'),
BA.w = BA.fun(D, weights))
# merge traits
data <- left_join(data, data.TRAITS, by = 'sp')
data <- fun.fill.missing.traits(data)
data.TRAITS <- fun.fill.missing.traits(data.TRAITS)
data <- left_join(data,
select(data.TRAITS, sp,
Leaf.N.mean, Leaf.N.genus,
Seed.mass.mean, Seed.mass.genus,
SLA.mean, SLA.genus,
Wood.density.mean, Wood.density.genus,
Max.height.mean, Max.height.genus),
by = 'sp')
# compute CWM Tn and BATOT
data <- fun.CWM.Tn(data)
......@@ -334,9 +357,14 @@ fun.CWM.traits.all.plot.census.dplyr <- function(data,data.TRAITS){
do(fun.CWM.abs.all(.)) %>%
ungroup() %>% select(-plot.c)
data <- left_join(data, data.CWM.abs, by = 'obs.id')
data <- left_join(data, data.CWM.abs, by = 'obs.id') %>%
select(-weights, -plot.c)
# set trait to NA for species with missing species
data <- fun.traits.focal(data)
data <- left_join(data,
data.TRAITS[, c("sp", "Phylo.group",
"Pheno.T", 'LeafType.T', 'cat')],
by="sp")
data <- fun.order.colums(data)
return(data)
......@@ -380,7 +408,7 @@ cat("Start process data for one census of one cluster with number of observation
nrow(xy.table), "\n")
sp.num <- unclass(factor(sp))
sp.names <- as.character(levels(factor(sp)))
## format traits list
## format traits list
list.traits <- format.traits.list(sp, data.TRAITS)
# lapply function
ans <- t(sapply(obs.id,
......@@ -414,13 +442,16 @@ fun.CWM.traits.TOT.XY.dplyr <- function(data, data.TRAITS, Rlim){
data <- mutate(data,
plot.c = paste(cluster, census, sep ='_'),
BA.w = BA.fun(D, 1/(pi * Rlim^2)))
# compute CWM
data.CWM <- data %>% group_by(plot.c) %>%
do(fun.CWM.XY.cluster.census(., data.TRAITS = data.TRAITS, Rlim = Rlim)) %>%
do(fun.CWM.XY.cluster.census(.,
data.TRAITS = data.TRAITS,
Rlim = Rlim)) %>%
ungroup() %>% dplyr::select(-plot.c)
data <- left_join(data, data.CWM, by = 'obs.id')
data.CWM <- mutate(data.CWM, obs.id = as.integer(obs.id))
data <- left_join(data, data.CWM, by = 'obs.id') %>%
dplyr::select(-plot.c)
return(data)
}
......@@ -465,7 +496,6 @@ fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name,
path <- file.path(out.dir, site.name, ecoregion)
dir.create(path, recursive = TRUE, showWarnings = FALSE)
data.merged <-fun.CWM.traits.all.plot.census.dplyr(data, data.TRAITS)
### reorder columns ???
cat('merge data and CWM done', dim(data.merged), "\n")
## write data
......@@ -534,7 +564,7 @@ for (i in unique(data$ecocode)){
data.t <- filter(data, ecocode == i)
cat(paste("clusters :", paste(unique(data.t[["cluster"]]),
collapse=" ")), "\n")
## compute CWM matrix
## compute CWM matrix
data.merged <- fun.CWM.traits.TOT.XY.dplyr(data.t, data.TRAITS, Rlim)
data.merged <- fun.remove.tree.in.buffer.cluster(data.merged)
cat("dim after buffer tree removed", dim(data.merged),
......@@ -543,7 +573,7 @@ for (i in unique(data$ecocode)){
data.merged <- left_join(data.merged,
data.TRAITS[, c("sp", "Phylo.group",
"Pheno.T", 'LeafType.T', 'cat')],
by="sp")
by="sp") %>% dplyr::select( -x, -y)
if(std.traits == 'local'){
write.csv(data.merged,
file = file.path(path, "data.tree.tot.csv"),
......@@ -660,7 +690,8 @@ process_dataset <- function(set, path.formatted = "output/formatted",
mean.global=mean.global, sd.global=sd.global)
}
cat("finished", file = file.path(path.processed, set, paste("Done", std.traits, "txt", sep = '.')))
cat("finished", file = file.path(path.processed, set,
paste("Done", std.traits, "txt", sep = '.')))
}
......
......@@ -69,11 +69,15 @@ if(any(vec.test))
## test functions for Wood density for one indiv
fun.test.CWM.plot.r <- function(data, data.TRAITS, data.tree){
rep.count <- 0
repeat{
res <- fun.test.CWM.plot(data.t = data, data.TRAITS, data.tree)
rep.count <- rep.count +1
if(!is.na(res)){
break
}
if(rep.count>1000) stop('error no wood density data')
}
return(res)
}
......@@ -144,11 +148,14 @@ fun.test.CWM.plot <- function(data.t, data.TRAITS, data.tree) {
fun.test.CWM.XY.r <- function(data, data.TRAITS, data.tree, Rlim){
rep.count <- 0
repeat{
res <- fun.test.CWM.XY(data.r = data, data.TRAITS, data.tree, Rlim = Rlim)
rep.count <- rep.count +1
if(!is.na(res)){
break
}
if(rep.count>1000) stop('error no Wood density data')
}
return(res)
}
......@@ -172,14 +179,6 @@ return(res)
}
fun.compute.CWM.trait.test <- function(samp.id, data, Rlim, data.TRAITS){
## library(Rcpp)
## sourceCpp("R/process.data/georges.cpp")
## BA.a <- BA.fun(data[['D']], weights = 1/(pi * Rlim^2))
## i.t <- seq_len(length(obs.id))[obs.id==i]
## BA.n <- areas_by_species_within_neighbourhood(idx = i.t - 1L,
## x = data[, "x"], y = data[, "y"], r = Rlim, d = BA.a,
## type = as.vector(sp.num) - 1L, n_types = sp.length)
BA.n <- fun.compute.test(obs.id.t = samp.id, obs.id = data[["obs.id"]],
xy.table = as.matrix(subset(data,
......@@ -236,18 +235,19 @@ if(!is.na(data.TRAITS[data.TRAITS[["sp"]]==samp.sp, "Wood.density.mean"]) &
### FUNCTION TO TEST IF CWM VALUE OK
fun.test.value.one.ecoregion.I <- function(data.CWM, set, ecocode.select,
path.formatted = "output/formatted" ){
require(dplyr)
data.tree <- read.csv(file.path(path.formatted, set, "tree.csv"),
stringsAsFactors = FALSE)
data.traits <- read.csv(file.path(path.formatted, set, "traits.csv"),
stringsAsFactors = FALSE)
# remove nas
data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]]))
data.tree <- subset(data.tree, subset = data.tree$ecocode==ecocode.select)
data.tree <- filter(data.tree, !is.na(D))
data.tree <- filter(data.tree, ecocode==ecocode.select)
# test if same dim
if (nrow(data.CWM) !=length(data.tree[["obs.id"]]))
stop(paste("data.CWM not good dim", nrow(data.CWM), 'vs',length(data.tree[["obs.id"]])) )
### test CWM for Wood.density
for (i in 1:10){
for (i in 1:20){
if (!fun.test.CWM.plot.r(data.CWM, data.TRAITS = data.traits,
data.tree = data.tree))
stop(paste("CWM index for Wood.density WRONG for", set))
......@@ -264,7 +264,7 @@ fun.test.value.one.ecoregion.B <- function(data.CWM, set, ecocode.select,
data.tree <- subset(data.tree, subset = !is.na(data.tree[["D"]]))
data.tree <- subset(data.tree, subset = data.tree$ecocode==ecocode.select)
### test CWM for Wood.density
for (i in 1:10){ test.res <- fun.test.CWM.XY.r(data.CWM,
for (i in 1:20){ test.res <- fun.test.CWM.XY.r(data.CWM,
data.TRAITS = data.traits,
data.tree = data.tree,
Rlim = 15)
......
......@@ -250,10 +250,9 @@ biomes <- as.character(biomes$biomes)
data.bci$biomes <- biomes
###################### PLOT SELECTION FOR THE ANALYSIS
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name",
"cluster", "plot", "ecocode", "D", "G", "BA.G", "year",
"dead",
'Lon', 'Lat', "x", "y", "census", 'MAT', 'MAP','biomes')
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G","BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'biomes', 'MAT', 'MAP')
data.tree <- subset(data.bci, select = c(vec.basic.var))
data.tree <- subset(data.tree,subset=!is.na(data.tree$x) & !is.na(data.tree$y))
......
......@@ -92,7 +92,7 @@ data.fushan$biomes <- biomes
###################### PLOT SELECTION FOR THE ANALYSIS - NEEDS REDOING
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G","BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'MAT', 'MAP', 'biomes')
'Lon', 'Lat', "x", "y", "census", 'biomes', 'MAT', 'MAP')
data.tree <- subset(data.fushan, select = c(vec.basic.var))
## select only tree above 10cm DBH
......
......@@ -269,9 +269,9 @@ data.japan <- subset(data.japan,
## na.rm = TRUE)
###################### PLOT SELECTION FOR THE ANALYSIS
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G", "BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'MAT', 'MAP', 'biomes')
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G","BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'biomes', 'MAT', 'MAP')
data.tree <- subset(data.japan, select = c(vec.basic.var))
data.tree <- fun.convert.type.B(data.tree)
......
......@@ -175,8 +175,8 @@ data.luq$biomes <- biomes
###################### PLOT SELECTION FOR THE ANALYSIS - NEEDS REDOING
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G", "BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'MAT', 'MAP', 'biomes')
"ecocode", "D", "G","BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'biomes', 'MAT', 'MAP')
data.tree <- subset(data.luq, select = c(vec.basic.var))
data.tree <- subset(data.tree,subset=!is.na(data.tree$x) & !is.na(data.tree$y))
## convert var factor in character or numeric
......
......@@ -179,9 +179,9 @@ data.mbaiki$biomes <- biomes
###################### PLOT SELECTION FOR THE ANALYSIS - NEEDS REDOING
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster","plot",
"ecocode", "D", "G", "BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'MAT', 'MAP', 'biomes')
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G","BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'biomes', 'MAT', 'MAP')
data.tree <- subset(data.mbaiki, select = c(vec.basic.var))
## select tree above 10 cm and last census only
......
......@@ -229,9 +229,9 @@ data.paracou$biomes <- biomes
###################### PLOT SELECTION FOR THE ANALYSIS - NEEDS REDOING
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G", "BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'MAT', 'MAP', 'biomes')
vec.basic.var <- c("obs.id", "tree.id", "sp", "sp.name", "cluster", "plot",
"ecocode", "D", "G","BA.G", "year", "dead",
'Lon', 'Lat', "x", "y", "census", 'biomes', 'MAT', 'MAP')
data.tree <- subset(data.paracou, select = c(vec.basic.var))
......
......@@ -15,15 +15,10 @@ dir.create("figs/test.processed", recursive = TRUE, showWarnings = FALSE)
## test all set
library(doParallel)
registerDoParallel(cores = 5)
lmat.perc.I <- lapply(sets.I, fun.test.set.tree.CWM.I, filedir = filedir)#,
## mc.cores = getOption("mc.cores", 5))
mat.perc.I <- do.call("rbind", lmat.perc.I)
lmat.perc.B <-lapply(sets.B, fun.test.set.tree.CWM.B, filedir = filedir)#,
## mc.cores = getOption("mc.cores", 5))
lmat.perc.B <- lapply(sets.B, fun.test.set.tree.CWM.B, filedir = filedir)#,
mat.perc.B <- do.call("rbind", lmat.perc.B)
rm(lmat.perc.B, lmat.perc.I)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment