Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Kunstler Georges
traitcompet
Commits
2e8975ae
Commit
2e8975ae
authored
Sep 25, 2014
by
Georges Kunstler
Browse files
start with dplyr
parent
ca493f9f
Changes
1
Hide whitespace changes
Inline
Side-by-side
R/process.data/process-fun.R
View file @
2e8975ae
##################### function to process data install all unstallled packages
#########################
##' .. Compute the basal area per area of competitor in a plot..
##'
...
...
@@ -18,7 +15,8 @@ BA.fun <- function(diam, weights) {
## function to fill missing cat variables with A_EV
fun.fill.cat
<-
function
(
data
)
{
data
$
cat
[
is.na
(
data
$
cat
)]
<-
1
require
(
dplyr
)
data
<-
mutate
(
data
,
cat
=
ifelse
(
is.na
(
cat
),
1
,
cat
))
return
(
data
)
}
...
...
@@ -39,38 +37,38 @@ fun.std.trait.global <- function(trait, mean.global, sd.global) {
## function to standardized all traits in data and remove duplicated sp
fun.std.data
<-
function
(
data.TRAITS
)
{
data.TRAITS
<-
subset
(
data.TRAITS
,
subset
=!
duplicated
(
data.TRAITS
[[
"sp"
]]))
traits.mean
<-
c
(
"Leaf.N.mean"
,
"Seed.mass.mean"
,
"SLA.mean"
,
"Wood.density.mean"
,
"Max.height.mean"
)
for
(
i
in
traits.mean
)
{
data.TRAITS
[[
i
]]
<-
fun.std.trait
(
log10
(
data.TRAITS
[[
i
]]))
}
return
(
data.TRAITS
)
}
data.TRAITS
<-
subset
(
data.TRAITS
,
subset
=!
duplicated
(
data.TRAITS
[[
"sp"
]]))
traits.mean
<-
c
(
"Leaf.N.mean"
,
"Seed.mass.mean"
,
"SLA.mean"
,
"Wood.density.mean"
,
"Max.height.mean"
)
for
(
i
in
traits.mean
)
{
data.TRAITS
[[
i
]]
<-
fun.std.trait
(
log10
(
data.TRAITS
[[
i
]]))
}
return
(
data.TRAITS
)
}
## function to standardized all traits in data and remove duplicated sp with GLOBAL MEAN
fun.std.data.global
<-
function
(
data.TRAITS
,
mean.global
,
sd.global
)
{
data.TRAITS
<-
subset
(
data.TRAITS
,
subset
=!
duplicated
(
data.TRAITS
[[
"sp"
]]))
traits.mean
<-
c
(
"Leaf.N.mean"
,
"Seed.mass.mean"
,
"SLA.mean"
,
"Wood.density.mean"
,
"Max.height.mean"
)
for
(
i
in
traits.mean
)
{
data.TRAITS
[[
i
]]
<-
fun.std.trait.global
(
log10
(
data.TRAITS
[[
i
]]),
mean.global
[
i
],
sd.global
[
i
])
}
return
(
data.TRAITS
)
}
traits.mean
<-
c
(
"Leaf.N.mean"
,
"Seed.mass.mean"
,
"SLA.mean"
,
"Wood.density.mean"
,
"Max.height.mean"
)
for
(
i
in
traits.mean
)
{
data.TRAITS
[[
i
]]
<-
fun.std.trait.global
(
log10
(
data.TRAITS
[[
i
]]),
mean.global
[
i
],
sd.global
[
i
])
}
return
(
data.TRAITS
)
}
##### extract traits for the species in vec.sp
fun.extract.trait.add.missing.sp.NA
<-
function
(
vec.sp
,
traits.data
,
trait.name
)
{
# get value
trait.t
<-
traits.data
[
traits.data
[[
"sp"
]]
%in%
vec.sp
,
trait.name
]
## add NA for missing sp
trait.t
<-
c
(
trait.t
,
rep
(
NA
,
sum
(
!
(
vec.sp
%in%
traits.data
[[
"sp"
]]))))
## reorder
names
(
trait.t
)
<-
c
(
as.character
(
traits.data
[
traits.data
[[
"sp"
]]
%in%
vec.sp
,
"sp"
]),
as.character
(
vec.sp
[
!
(
vec.sp
%in%
traits.data
[[
"sp"
]])]))
trait
<-
(
trait.t
)[
match
(
vec.sp
,
names
(
trait.t
))]
return
(
trait
)
}
# get value
trait.t
<-
traits.data
[
traits.data
[[
"sp"
]]
%in%
vec.sp
,
trait.name
]
## add NA for missing sp
trait.t
<-
c
(
trait.t
,
rep
(
NA
,
sum
(
!
(
vec.sp
%in%
traits.data
[[
"sp"
]]))))
## reorder
names
(
trait.t
)
<-
c
(
as.character
(
traits.data
[
traits.data
[[
"sp"
]]
%in%
vec.sp
,
"sp"
]),
as.character
(
vec.sp
[
!
(
vec.sp
%in%
traits.data
[[
"sp"
]])]))
trait
<-
(
trait.t
)[
match
(
vec.sp
,
names
(
trait.t
))]
return
(
trait
)
}
##### extract traits for the species in vec.sp
...
...
@@ -418,6 +416,42 @@ fun.merged.DT <- function(data.1, data.2, by.var){
return
(
data.merged
)
}
fun.fill.missing.traits
<-
function
(
data
){
data
<-
mutate
(
data
,
Leaf.N.genus
=
ifelse
(
is.na
(
Leaf.N.mean
),
NA
,
Leaf.N.genus
),
Leaf.N.mean
=
ifelse
(
is.na
(
Leaf.N.mean
),
mean
(
Leaf.N.mean
,
na.rm
=
TRUE
),
Leaf.N.mean
))
}
fun.CWM.traits.all.plot.census.dplyr
<-
function
(
data
,
data.TRAITS
){
require
(
dplyr
)
data
<-
tbl_df
(
data
)
data
<-
mutate
(
data
,
plot.c
=
paste
(
plot
,
census
,
sep
=
'_'
),
BA.w
=
BA.fun
(
D
,
weights
))
# merge traits
data
<-
left_join
(
data
,
data.TRAITS
,
by
=
'sp'
)
data
<-
fun.fill.missing.traits
(
data
)
test
<-
group_by
(
data
,
plot.c
)
%>%
summarise
(
BATOT
=
sum
(
BA.w
),
Leaf.N.CWM.fill
=
sum
(
BA.w
*
Leaf.N.mean
)
/
BATOT
,
count
=
n
(),
Leaf.N.perc.genus
=
sum
(
!
Leaf.N.genus
,
na.rm
=
TRUE
)
/
count
,
Leaf.N.perc.species
=
(
sum
(
!
Leaf.N.genus
,
na.rm
=
TRUE
)
+
sum
(
!
is.na
(
Leaf.N.genus
)))
/
count
)
%>%
select
(
-
count
)
### THEN NEED TO MERGE AND SUBSTRATEC BA self BA*Tf
### COMMENT FAIRE POUR LA distance abolue en dplyr ??
test2
<-
by
(
data
,
data
$
plot.c
,
function
(
dd
)
{
apply
(
dd
$
BA.w
/
sum
(
dd
$
BA.w
)
*
abs
(
outer
(
dd
$
Leaf.N.mean
,
dd
$
Leaf.N.mean
,
'-'
)),
2
,
mean
)})
### NEED TO CHECK ORDER
}
##### function to generate data in good format per ecoregion
fun.data.per.ecoregion
<-
function
(
ecoregion
,
data.tot
,
site.name
,
...
...
@@ -433,6 +467,7 @@ fun.data.per.ecoregion <- function(ecoregion, data.tot, site.name,
path
<-
file.path
(
out.dir
,
site.name
,
ecoregion
)
dir.create
(
path
,
recursive
=
TRUE
,
showWarnings
=
FALSE
)
browser
()
data.CWM
<-
fun.CWM.traits.all.plot.census
(
census
=
data
[[
"census"
]],
obs.id
=
data
[[
"obs.id"
]],
plot
=
data
[[
"plot"
]],
...
...
@@ -660,34 +695,36 @@ process_dataset <- function(set, path.formatted = "output/formatted",
#### FUNCTIONS TO MERGE ALL SET IN ONE BIG FILE
## FUNCTION TO LOAD ALL SET IN ONE BIG DATA SET "data.tree.tot.no.log.csv"
fun.load.set.in.big.file
<-
function
(
set
,
filedir
,
type
,
fun.load.set.in.big.file
<-
function
(
set
.t
,
filedir
,
type
,
file.to.load
=
"data.tree.tot.no.log.csv"
){
ecocodes
<-
list_all_processed_data
(
set
,
filedir
)
print
(
set
)
require
(
dplyr
)
ecocodes
<-
list_all_processed_data
(
set.t
,
filedir
)
print
(
set.t
)
# load first ecoregion
ecocode.select
<-
ecocodes
[
1
]
data.temp
<-
load.processed.data
(
file.path
(
filedir
,
set
,
ecocode.select
),
data.temp
<-
load.processed.data
(
file.path
(
filedir
,
set
.t
,
ecocode.select
),
file.to.load
)
data.all
<-
data.frame
(
set
=
rep
(
set
,
nrow
(
data.temp
)),
data.temp
)
data.all
<-
mutate
(
data.temp
,
set
=
set.t
)
## other
if
(
length
(
ecocodes
)
>
1
){
for
(
ecocode.select
in
ecocodes
[
-1
])
{
data.temp
<-
load.processed.data
(
file.path
(
filedir
,
set
,
ecocode.select
),
data.temp
<-
load.processed.data
(
file.path
(
filedir
,
set
.t
,
ecocode.select
),
file.to.load
)
data.temp
<-
data.frame
(
set
=
rep
(
set
,
nrow
(
data.temp
)),
data.temp
)
data.all
<-
rbind
(
data.all
,
data.temp
)
data.temp
<-
mutate
(
data.temp
,
set
=
set.t
)
data.all
<-
rbind_list
(
data.all
,
data.temp
)
}
}
# replace missing species
data.all
$
sp.name
[
is.na
(
data.all
$
sp.name
)]
<-
'missing.sp'
data.all
<-
mutate
(
data.all
,
sp.name
=
ifelse
(
is.na
(
data.all
$
sp.name
),
'missing.sp'
,
sp.name
))
if
(
type
==
'B'
){
data.all
<-
data.all
[,
!
names
(
data.all
)
%in%
c
(
"x"
,
"y"
)]
data.all
<-
select
(
data.all
,
-
x
,
-
y
)
}
if
(
type
==
'I'
){
data.all
<-
data.all
[,
!
names
(
data.all
)
%in%
"
weights
"
]
data.all
<-
select
(
data.all
,
-
weights
)
}
return
(
data.all
)
}
...
...
@@ -786,8 +823,14 @@ fun.reform.data.and.remove.outlier <- function(data.all,
data.all
[
,
obs.id
:=
paste
(
ecocode
,
obs.id
)]
if
(
std.traits.TF
)
fun.standardized.traits
(
data.all
)
data.all
<-
as.data.frame
(
data.all
)
data.all
<-
data.all
[
!
duplicated
(
data.all
$
tree.id
),
]
data.all
[
,
plot.c
=
paste
(
plot
,
census
)]
plots.select
<-
drop
(
as.matrix
(
group_by
(
data.all
,
plot
)
%>%
summarise
(
select
=
sample
(
plot.c
,
1
))
%>%
select
(
select
)))
data.all
<-
filter
(
data.all
,
plot
%in%
plots.select
)
# remove tree with multiple obs
### TODO NEED TO CHANGE THAT TO SELECT RANDOMLY ONE CENSUS PER PLOT
return
(
data.all
)
}
...
...
@@ -816,3 +859,15 @@ fun.write.big.csv <- function(dt, file){
col.names
=
i
==
1
)
}
######
load.processed.data
<-
function
(
path
,
file.name
=
"data.tree.tot.no.std.csv"
){
require
(
data.table
)
fname
<-
file.path
(
path
,
file.name
)
if
(
file.exists
(
fname
)){
cat
(
'loading file'
,
path
,
file.name
)
data
<-
fread
(
fname
,
stringsAsFactors
=
FALSE
)
print
(
warnings
())
return
(
data
)
}
else
{
return
(
NULL
)}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment