Commit 2e8975ae authored by Georges Kunstler's avatar Georges Kunstler
Browse files

start with dplyr

parent ca493f9f
##################### function to process data install all unstallled packages
#########################
##' .. Compute the basal area per area of competitor in a plot..
##'
......@@ -18,7 +15,8 @@ BA.fun <- function(diam, weights) {
## function to fill missing cat variables with A_EV
fun.fill.cat <- function(data) {
data$cat[is.na(data$cat)] <- 1
require(dplyr)
data <- mutate(data, cat = ifelse(is.na(cat), 1, cat))
return(data)
}
......@@ -39,38 +37,38 @@ fun.std.trait.global <- function(trait, mean.global, sd.global) {
## function to standardized all traits in data and remove duplicated sp
fun.std.data <- function(data.TRAITS) {
data.TRAITS <- subset(data.TRAITS, subset=!duplicated(data.TRAITS[["sp"]]))
traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean")
for (i in traits.mean) {
data.TRAITS[[i]] <- fun.std.trait(log10(data.TRAITS[[i]])) }
return(data.TRAITS)
}
data.TRAITS <- subset(data.TRAITS, subset=!duplicated(data.TRAITS[["sp"]]))
traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean")
for (i in traits.mean) {
data.TRAITS[[i]] <- fun.std.trait(log10(data.TRAITS[[i]])) }
return(data.TRAITS)
}
## function to standardized all traits in data and remove duplicated sp with GLOBAL MEAN
fun.std.data.global <- function(data.TRAITS, mean.global, sd.global) {
data.TRAITS <- subset(data.TRAITS, subset=!duplicated(data.TRAITS[["sp"]]))
traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean")
for (i in traits.mean) {
data.TRAITS[[i]] <- fun.std.trait.global(log10(data.TRAITS[[i]]),
mean.global[i],
sd.global[i]) }
return(data.TRAITS)
}
traits.mean <- c("Leaf.N.mean", "Seed.mass.mean", "SLA.mean", "Wood.density.mean", "Max.height.mean")
for (i in traits.mean) {
data.TRAITS[[i]] <- fun.std.trait.global(log10(data.TRAITS[[i]]),
mean.global[i],
sd.global[i]) }
return(data.TRAITS)
}
##### extract traits for the species in vec.sp
fun.extract.trait.add.missing.sp.NA <- function(vec.sp, traits.data,
trait.name) {
# get value
trait.t <- traits.data[traits.data[["sp"]] %in% vec.sp, trait.name]
## add NA for missing sp
trait.t <- c(trait.t, rep(NA,
sum(! (vec.sp %in% traits.data[["sp"]]))))
## reorder
names(trait.t) <- c(as.character(traits.data[traits.data[["sp"]] %in%
vec.sp, "sp"]),
as.character(vec.sp[! (vec.sp %in% traits.data[["sp"]])]))
trait <- (trait.t)[match(vec.sp, names(trait.t))]
return(trait)
}
# get value
trait.t <- traits.data[traits.data[["sp"]] %in% vec.sp, trait.name]
## add NA for missing sp
trait.t <- c(trait.t, rep(NA,
sum(! (vec.sp %in% traits.data[["sp"]]))))
## reorder
names(trait.t) <- c(as.character(traits.data[traits.data[["sp"]] %in%
vec.sp, "sp"]),
as.character(vec.sp[! (vec.sp %in% traits.data[["sp"]])]))
trait <- (trait.t)[match(vec.sp, names(trait.t))]
return(trait)
}
##### extract traits for the species in vec.sp
......@@ -418,6 +416,42 @@ fun.merged.DT <- function(data.1, data.2, by.var){
return(data.merged)
}
fun.fill.missing.traits <- function(data){
data <- mutate(data,
Leaf.N.genus = ifelse(is.na(Leaf.N.mean),
NA,
Leaf.N.genus),
Leaf.N.mean = ifelse(is.na(Leaf.N.mean),
mean(Leaf.N.mean, na.rm = TRUE),
Leaf.N.mean))
}
fun.CWM.traits.all.plot.census.dplyr <- function(data,data.TRAITS){
require(dplyr)
data <- tbl_df(data)
data <- mutate(data,
plot.c = paste(plot, census, sep ='_'),
BA.w = BA.fun(D, weights))
# merge traits
data <- left_join(data, data.TRAITS, by = 'sp')
data <- fun.fill.missing.traits(data)
test <- group_by(data, plot.c) %>%
summarise(BATOT = sum(BA.w),
Leaf.N.CWM.fill = sum(BA.w*Leaf.N.mean)/BATOT,
count = n(),
Leaf.N.perc.genus = sum(!Leaf.N.genus,
na.rm = TRUE)/count,
Leaf.N.perc.species = (sum(!Leaf.N.genus,na.rm = TRUE)+
sum(!is.na(Leaf.N.genus)))/count
) %>%
select(-count)
### THEN NEED TO MERGE AND SUBSTRATEC BA self BA*Tf
### COMMENT FAIRE POUR LA distance abolue en dplyr ??
test2 <- by(data, data$plot.c, function(dd) {apply(dd$BA.w/sum(dd$BA.w)*abs(outer(dd$Leaf.N.mean, dd$Leaf.N.mean, '-')), 2, mean)})
### NEED TO CHECK ORDER
}
##### function to generate data in good format per ecoregion
fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name,
......@@ -433,6 +467,7 @@ fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name,
path <- file.path(out.dir, site.name, ecoregion)
dir.create(path, recursive = TRUE, showWarnings = FALSE)
browser()
data.CWM <-fun.CWM.traits.all.plot.census(census= data[["census"]],
obs.id=data[["obs.id"]],
plot=data[["plot"]],
......@@ -660,34 +695,36 @@ process_dataset <- function(set, path.formatted = "output/formatted",
#### FUNCTIONS TO MERGE ALL SET IN ONE BIG FILE
## FUNCTION TO LOAD ALL SET IN ONE BIG DATA SET "data.tree.tot.no.log.csv"
fun.load.set.in.big.file <- function(set, filedir, type,
fun.load.set.in.big.file <- function(set.t, filedir, type,
file.to.load = "data.tree.tot.no.log.csv"){
ecocodes <- list_all_processed_data(set, filedir)
print(set)
require(dplyr)
ecocodes <- list_all_processed_data(set.t, filedir)
print(set.t)
# load first ecoregion
ecocode.select <- ecocodes[1]
data.temp <- load.processed.data(file.path(filedir, set, ecocode.select),
data.temp <- load.processed.data(file.path(filedir, set.t, ecocode.select),
file.to.load)
data.all <- data.frame(set = rep(set, nrow(data.temp)),
data.temp)
data.all <- mutate(data.temp, set = set.t)
## other
if (length(ecocodes)>1){
for (ecocode.select in ecocodes[-1]) {
data.temp <- load.processed.data(file.path(filedir, set, ecocode.select),
data.temp <- load.processed.data(file.path(filedir, set.t, ecocode.select),
file.to.load)
data.temp <- data.frame(set = rep(set, nrow(data.temp)),
data.temp)
data.all <- rbind(data.all, data.temp)
data.temp <- mutate(data.temp, set = set.t)
data.all <- rbind_list(data.all, data.temp)
}
}
# replace missing species
data.all$sp.name[is.na(data.all$sp.name)] <- 'missing.sp'
data.all <- mutate(data.all,
sp.name = ifelse(is.na(data.all$sp.name),
'missing.sp',
sp.name))
if (type=='B'){
data.all <- data.all[, !names(data.all) %in% c( "x" , "y" )]
data.all <- select(data.all, -x, -y)
}
if (type=='I'){
data.all <- data.all[, !names(data.all) %in% "weights"]
data.all <- select(data.all, -weights)
}
return(data.all)
}
......@@ -786,8 +823,14 @@ fun.reform.data.and.remove.outlier <- function(data.all,
data.all[ , obs.id := paste(ecocode, obs.id)]
if(std.traits.TF) fun.standardized.traits(data.all)
data.all <- as.data.frame(data.all)
data.all <- data.all[!duplicated(data.all$tree.id), ]
data.all[ , plot.c = paste(plot, census)]
plots.select <- drop(as.matrix(group_by(data.all, plot) %>%
summarise(select = sample(plot.c, 1)) %>%
select(select)))
data.all <- filter(data.all, plot %in% plots.select)
# remove tree with multiple obs
### TODO NEED TO CHANGE THAT TO SELECT RANDOMLY ONE CENSUS PER PLOT
return(data.all)
}
......@@ -816,3 +859,15 @@ fun.write.big.csv <- function(dt, file){
col.names = i ==1)
}
######
load.processed.data <- function(path, file.name = "data.tree.tot.no.std.csv"){
require(data.table)
fname <- file.path(path, file.name )
if(file.exists(fname)){
cat('loading file', path, file.name)
data <- fread(fname, stringsAsFactors = FALSE)
print(warnings())
return(data)
}else{return(NULL)}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment