Commit c2c1adae authored by Georges Kunstler's avatar Georges Kunstler
Browse files

start with dplyr progress

parent 2e8975ae
......@@ -25,10 +25,6 @@ return(data)
##############################################
#### FUNCTION TO FORMAT TRAITS
## std of trait
fun.std.trait <- function(trait) {
(trait - mean(trait, na.rm=TRUE))/sd(trait, na.rm=TRUE)
}
## std of trait with global mean and sd NEED TO PROVIDES MEAN AND SD IN log10
fun.std.trait.global <- function(trait, mean.global, sd.global) {
......@@ -38,9 +34,11 @@ fun.std.trait.global <- function(trait, mean.global, sd.global) {
## function to standardized all traits in data and remove duplicated sp
fun.std.data <- function(data.TRAITS) {
data.TRAITS <- subset(data.TRAITS, subset=!duplicated(data.TRAITS[["sp"]]))
traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean")
traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean",
"Wood.density.mean", "Max.height.mean")
for (i in traits.mean) {
data.TRAITS[[i]] <- fun.std.trait(log10(data.TRAITS[[i]])) }
data.TRAITS[[i]] <- as.vector(scale(log10(data.TRAITS[[i]])))
}
return(data.TRAITS)
}
## function to standardized all traits in data and remove duplicated sp with GLOBAL MEAN
......@@ -423,7 +421,32 @@ data <- mutate(data,
Leaf.N.genus),
Leaf.N.mean = ifelse(is.na(Leaf.N.mean),
mean(Leaf.N.mean, na.rm = TRUE),
Leaf.N.mean))
Leaf.N.mean),
SLA.genus = ifelse(is.na(SLA.mean),
NA,
SLA.genus),
SLA.mean = ifelse(is.na(SLA.mean),
mean(SLA.mean, na.rm = TRUE),
SLA.mean),
Wood.density.genus = ifelse(is.na(Wood.density.mean),
NA,
Wood.density.genus),
Wood.density.mean = ifelse(is.na(Wood.density.mean),
mean(Wood.density.mean, na.rm = TRUE),
Wood.density.mean),
Max.height.genus = ifelse(is.na(Max.height.mean),
NA,
Max.height.genus),
Max.height.mean = ifelse(is.na(Max.height.mean),
mean(Max.height.mean, na.rm = TRUE),
Max.height.mean),
Seed.mass.genus = ifelse(is.na(Seed.mass.mean),
NA,
Seed.mass.genus),
Seed.mass.mean = ifelse(is.na(Seed.mass.mean),
mean(Seed.mass.mean, na.rm = TRUE),
Seed.mass.mean)
)
}
......@@ -433,25 +456,106 @@ fun.CWM.traits.all.plot.census.dplyr <- function(data,data.TRAITS){
data <- mutate(data,
plot.c = paste(plot, census, sep ='_'),
BA.w = BA.fun(D, weights))
# merge traits
# merge traits
data <- left_join(data, data.TRAITS, by = 'sp')
data <- fun.fill.missing.traits(data)
test <- group_by(data, plot.c) %>%
summarise(BATOT = sum(BA.w),
Leaf.N.CWM.fill = sum(BA.w*Leaf.N.mean)/BATOT,
count = n(),
Leaf.N.perc.genus = sum(!Leaf.N.genus,
# compute CWM abs
data.CWM.abs <- data %>% group_by(plot.c) %>%
do(fun.CWM.abs.all(.)) %>%
ungroup() %>% select(-plot.c)
data <- left_join(data, data.CWM.abs, by = 'obs.id')
# comput CWM and perc
data.plot<- group_by(data, plot.c) %>%
summarise(
BATOT = sum(BA.w),
count = n(),
Leaf.N.CWM.fill = sum(BA.w*Leaf.N.mean),
Leaf.N.perc.genus = sum(!Leaf.N.genus,
na.rm = TRUE)/count,
Leaf.N.perc.species = (sum(!Leaf.N.genus,na.rm = TRUE)+
sum(!is.na(Leaf.N.genus)))/count,
SLA.CWM.fill = sum(BA.w*SLA.mean),
SLA.perc.genus = sum(!SLA.genus,
na.rm = TRUE)/count,
SLA.perc.species = (sum(!SLA.genus,na.rm = TRUE)+
sum(!is.na(SLA.genus)))/count,
Wood.density.CWM.fill = sum(BA.w*Wood.density.mean),
Wood.density.perc.genus = sum(!Wood.density.genus,
na.rm = TRUE)/count,
Leaf.N.perc.species = (sum(!Leaf.N.genus,na.rm = TRUE)+
sum(!is.na(Leaf.N.genus)))/count
) %>%
select(-count)
### THEN NEED TO MERGE AND SUBSTRATEC BA self BA*Tf
### COMMENT FAIRE POUR LA distance abolue en dplyr ??
test2 <- by(data, data$plot.c, function(dd) {apply(dd$BA.w/sum(dd$BA.w)*abs(outer(dd$Leaf.N.mean, dd$Leaf.N.mean, '-')), 2, mean)})
### NEED TO CHECK ORDER
Wood.density.perc.species = (sum(!Wood.density.genus,na.rm = TRUE)+
sum(!is.na(Wood.density.genus)))/count,
Max.height.CWM.fill = sum(BA.w*Max.height.mean),
Max.height.perc.genus = sum(!Max.height.genus,
na.rm = TRUE)/count,
Max.height.perc.species = (sum(!Max.height.genus,na.rm = TRUE)+
sum(!is.na(Max.height.genus)))/count,
Seed.mass.CWM.fill = sum(BA.w*Seed.mass.mean),
Seed.mass.perc.genus = sum(!Seed.mass.genus,
na.rm = TRUE)/count,
Seed.mass.perc.species = (sum(!Seed.mass.genus,na.rm = TRUE)+
sum(!is.na(Seed.mass.genus)))/count
) %>%
select(-count) %>%
ungroup()
data <- left_join(data, data.plot, by = 'plot.c')
## remove BA obs tree
data <- data %>%
mutate(
BATOT = BATOT - BA.w,
Leaf.N.CWM.fill = (Leaf.N.CWM.fill - BA.w*Leaf.N.mean)/BATOT,
SLA.CWM.fill = (SLA.CWM.fill - BA.w*SLA.mean)/BATOT,
Wood.density.CWM.fill = (Wood.density.CWM.fill - BA.w*Wood.density.mean)/BATOT,
Max.height.CWM.fill = (Max.height.CWM.fill - BA.w*Max.height.mean)/BATOT,
Seed.mass.CWM.fill = (Seed.mass.CWM.fill - BA.w*Seed.mass.mean)/BATOT)
# set trait to NA for species with missing species
data <- data %>%
mutate(
Leaf.N.focal = ifelse(is.na(Leaf.N.genus) |
(!is.na(Leaf.N.genus) & Leaf.N.genus ),
NA,
Leaf.N.mean),
SLA.focal = ifelse(is.na(SLA.genus) |
(!is.na(SLA.genus) & SLA.genus ),
NA,
SLA.mean),
Wood.density.focal = ifelse(is.na(Wood.density.genus) |
(!is.na(Wood.density.genus) & Wood.density.genus ),
NA,
Wood.density.mean),
Max.height.focal = ifelse(is.na(Max.height.genus) |
(!is.na(Max.height.genus) & Max.height.genus ),
NA,
Max.height.mean),
Seed.mass.focal = ifelse(is.na(Seed.mass.genus) |
(!is.na(Seed.mass.genus) & Seed.mass.genus ),
NA,
Seed.mass.mean)
) %>%
select(-Leaf.N.mean, -SLA.mean,
-Wood.density.mean, -Max.height.mean,
-Seed.mass.mean)
}
fun.CWM.abs.trait <- function(trait, data){
trait <- paste(trait, 'mean', sep = '.')
perc.BA <- data[['BA.w']]/sum(data[['BA.w']])
res <- apply(perc.BA*abs(outer(data[[trait]], data[[trait]], '-')), 2, mean)
return(res)
}
fun.CWM.abs.all <- function(df, traits = c('SLA', 'Leaf.N',
'Wood.density', 'Max.height',
'Seed.mass')){
names.abs <- paste(traits, "abs.CWM.fill", sep = '.')
df.res <- as.data.frame(lapply(traits, fun.CWM.abs.trait, data = df))
names(df.res) <- names.abs
df.res[['obs.id']] <- df[['obs.id']]
return(df.res)
}
##### function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name,
......@@ -462,54 +566,40 @@ fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name,
rm(data.tot)
if(fun.test.enough.sp.in.ecoregion(data)){
cat(paste("number of obs in ecoregion",
ecoregion, " = ", nrow(data)), "\n")
path <- file.path(out.dir, site.name, ecoregion)
dir.create(path, recursive = TRUE, showWarnings = FALSE)
browser()
data.CWM <-fun.CWM.traits.all.plot.census(census= data[["census"]],
obs.id=data[["obs.id"]],
plot=data[["plot"]],
diam=data[["D"]],
sp=data[["sp"]],
data.TRAITS=data.TRAITS,
weight=data[["weights"]],
parallel=parallel)
### create data frame and merge
data.merged <- fun.merged.DT(data, data.CWM, "obs.id")
cat('merge data and CWM done', dim(data),
dim(data.CWM), dim(data.merged), "\n")
## add Phylo.group and Pheno.T to the data
print(dim(data.merged))
data.merged <- merge(data.merged, data.TRAITS[, c("sp",
"Phylo.group",
"Pheno.T",
'LeafType.T')],
by="sp")
print(dim(data.merged))
## write data
if(std.traits == 'local'){
write.csv(data.merged, file = file.path(path,
"data.tree.tot.csv"),
row.names = FALSE)
}
if(std.traits == 'no'){
write.csv(data.merged,
file = file.path(path,
"data.tree.tot.no.std.csv"),
row.names = FALSE)
}
if(std.traits == 'global'){
write.csv(data.merged,
file = file.path(path, "data.tree.tot.global.csv"),
row.names = FALSE)
}
if(std.traits == 'log'){
write.csv(data.merged,
file = file.path(path, "data.tree.tot.log.csv"),
row.names = FALSE)
}
cat(paste("number of obs in ecoregion",
ecoregion, " = ", nrow(data)), "\n")
path <- file.path(out.dir, site.name, ecoregion)
dir.create(path, recursive = TRUE, showWarnings = FALSE)
browser()
system.time(
data.merged <-fun.CWM.traits.all.plot.census.dplyr(data, data.TRAITS)
)
### reorder columns ???
cat('merge data and CWM done', dim(data.merged), "\n")
## write data
if(std.traits == 'local'){
write.csv(data.merged, file = file.path(path,
"data.tree.tot.csv"),
row.names = FALSE)
}
if(std.traits == 'no'){
write.csv(data.merged,
file = file.path(path,
"data.tree.tot.no.std.csv"),
row.names = FALSE)
}
if(std.traits == 'global'){
write.csv(data.merged,
file = file.path(path, "data.tree.tot.global.csv"),
row.names = FALSE)
}
if(std.traits == 'log'){
write.csv(data.merged,
file = file.path(path, "data.tree.tot.log.csv"),
row.names = FALSE)
}
}else{
cat(paste("Not enough species in ecoregion", ecoregion), "\n")
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment