Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Kunstler Georges
traitcompet
Commits
6b35996d
Commit
6b35996d
authored
Sep 29, 2014
by
Georges Kunstler
Browse files
start with dplyr progress
parent
86b60005
Changes
2
Hide whitespace changes
Inline
Side-by-side
R/process.data/process-fun.R
View file @
6b35996d
...
...
@@ -243,8 +243,7 @@ return(df.res)
fun.CWM.Tn
<-
function
(
data
){
# comput CWM and perc
data.plot
<-
group_by
(
data
,
plot.c
)
%>%
summarise
(
data.plot
<-
group_by
(
data
,
plot.c
)
%>%
summarise
(
BATOT
=
sum
(
BA.w
),
count
=
n
(),
Leaf.N.CWM.fill
=
sum
(
BA.w
*
Leaf.N.mean
),
...
...
@@ -272,9 +271,7 @@ fun.CWM.Tn <- function(data){
na.rm
=
TRUE
)
/
count
,
Seed.mass.perc.species
=
(
sum
(
!
Seed.mass.genus
,
na.rm
=
TRUE
)
+
sum
(
!
is.na
(
Seed.mass.genus
)))
/
count
)
%>%
select
(
-
count
)
%>%
ungroup
()
)
%>%
select
(
-
count
)
%>%
x
ungroup
()
data
<-
left_join
(
data
,
data.plot
,
by
=
'plot.c'
)
## remove BA obs tree
...
...
@@ -547,7 +544,7 @@ for (i in unique(data$ecocode)){
cat
(
"dim after buffer tree removed"
,
dim
(
data.merged
),
'vs '
,
dim
(
data.t
),
"\n"
)
## add Phylo.group and Pheno.T to the data
data.merged
<-
left_join
(
(
data.merged
,
data.merged
<-
left_join
(
data.merged
,
data.TRAITS
[,
c
(
"sp"
,
"Phylo.group"
,
"Pheno.T"
,
'LeafType.T'
)],
by
=
"sp"
)
...
...
scripts/format.data/US.R
View file @
6b35996d
#!/usr/bin/env Rscript
### MERGE us DATA Edited by FH
library
(
reshape
,
quietly
=
TRUE
)
##
library(reshape, quietly = TRUE)
library
(
dplyr
)
library
(
rgeos
)
source
(
"R/format.data/format-fun.R"
)
dir.create
(
"output/formatted/US"
,
recursive
=
TRUE
,
showWarnings
=
FALSE
)
...
...
@@ -11,7 +12,7 @@ dir.create("output/formatted/US", recursive = TRUE, showWarnings = FALSE)
species.clean
<-
read.csv
(
"data/raw/US/REF_SPECIES.CSV"
,
stringsAsFactors
=
FALSE
)
## select column to keep
species.clean
<-
select
(
species.clean
,
species.clean
<-
dplyr
::
select
(
species.clean
,
SPCD
,
GENUS
,
SPECIES
,
VARIETY
,
SUBSPECIES
,
SPECIES_SYMBOL
)
names
(
species.clean
)[
1
]
<-
"sp"
...
...
@@ -45,22 +46,19 @@ data.us <- left_join(data.us, data.us.plot, by = 'PlotID')
data.us
<-
mutate
(
data.us
,
PLOT_ID_C
=
PlotID
)
data.us
$
PlotID
<-
NULL
## NEED TO GET THE CENSUS
## MASSAGE TRAIT DATA HEIGHT DATA FOR TREE MISSING BRING US DATA FOR HEIGHT OVER
## WHEN WE ANALYZE THAT DATASET LATER ON
## FORMAT INDIVIDUAL TREE DATA
## change unit and names of variables to be the same in all data for the tree
data.us
$
G
<-
10
*
(
data.us
$
FinalDbh
-
data.us
$
InitDbh
)
/
data.us
$
Interval
data.us
$
G
<-
10
*
(
data.us
$
FinalDbh
-
data.us
$
InitDbh
)
/
data.us
$
Interval
Years
## diameter growth in mm per year
data.us
$
BA.G
<-
(
pi
*
(
data.us
$
FinalDbh
/
2
)
^
2
-
pi
*
(
data.us
$
InitDbh
/
2
)
^
2
)
/
data.us
$
Interval
## ba growth in cm^2/year
data.us
$
Interval
Years
## ba growth in cm^2/year
data.us
$
G
[
which
(
data.us
$
InitDbh
==
0
|
data.us
$
FinalDbh
==
-999
)]
<-
NA
data.us
$
BA.G
[
which
(
data.us
$
InitDbh
==
0
|
data.us
$
FinalDbh
==
-999
)]
<-
NA
data.us
$
year
<-
data.us
$
Interval
## number of year between measuremen
data.us
$
year
<-
data.us
$
Interval
Years
## number of year between measuremen
data.us
$
D
<-
data.us
[[
"InitDbh"
]]
data.us
$
D
[
data.us
$
D
==
0
]
<-
NA
## diameter in cm
...
...
@@ -117,14 +115,43 @@ data.us$census <- data.us$StartMeasYear
### add plot weights for computation of competition index (in 1/m^2)
data.us
$
weights
<-
1
/
(
10000
*
data.us
[[
"PlotSize"
]])
#### UPDATE NEED TO USE PLOT AND NOT SUBPLOT for competition index based on Mark
###################################################################################
# Ecoregions
fia.plots
<-
filter
(
data.us
,
!
duplicated
(
PLOT_ID
))
%>%
dplyr
::
select
(
PLOT_ID
,
Lat
,
Lon
)
geo.proj
<-
CRS
(
"+proj=longlat +towgs84"
)
pts
<-
SpatialPoints
(
fia.plots
[,
3
:
2
],
proj4string
=
geo.proj
)
#read in ecoregions and transform shapefile to lat/lon
Ecoregions
<-
readOGR
(
dsn
=
"data/raw/US/ecoregions"
,
layer
=
"na_regns"
)
ecoregions.geo
<-
spTransform
(
Ecoregions
,
CRS
=
geo.proj
)
#creates object that assigns each plot index to an ecoregion
plot.ecocodes
<-
over
(
pts
,
ecoregions.geo
)[[
"ECOCODE"
]]
#need to assign ecoregions to plots that are just outside polygon boundaries
ec.na
<-
which
(
plot.ecocodes
==
"- 0"
|
is.na
(
plot.ecocodes
))
#get all NAs and Lake regions
gdis
<-
gDistance
(
pts
[
ec.na
,],
ecoregions.geo
,
byid
=
T
)
#calculate distance from na points to each polygon
gdis
[
which
(
ecoregions.geo
$
ECOCODE
==
"- 0"
),]
<-
99999
#exclude lakes by making them seem very far away
na.ind
<-
apply
(
gdis
,
2
,
FUN
=
which.min
)
#find index of minimum distance for each point
plot.ecocodes
[
ec.na
]
<-
ecoregions.geo
$
ECOCODE
[
na.ind
]
#assign back to ecocodes
#merge back with data us
plot.indices
<-
match
(
data.us
[[
'PLOT_ID'
]],
fia.plots
[[
'PLOT_ID'
]])
data.us
$
Ecocode
<-
plot.ecocodes
[
plot.indices
]
## ECOREGION merge greco to have no ecoregion
## with low number of observation
greco
<-
read.csv
(
file
=
"data/raw/US/EcoregionCodes.csv"
,
header
=
T
)
colnames
(
greco
)[
1
]
<-
"Ecocode"
table
(
data.us
$
Ecocode
)
data.us
<-
merge
(
data.us
,
greco
[,
-4
],
by
=
"Ecocode"
)
data.us
<-
left_join
(
data.us
,
greco
[,
-4
],
by
=
"Ecocode"
)
data.us
$
DIVISION
<-
as.character
(
data.us
$
DIVISION
)
data.us
$
DIVISION
[
data.us
$
DIVISION
%in%
...
...
@@ -167,6 +194,12 @@ div.ecocode <- tapply(data.us$sp, INDEX = data.us$ecocode, fun_div)
nsp.ecocode
<-
unlist
(
lapply
(
by
(
data.us
,
data.us
$
ecocode
,
fun.sp.in.ecoregion
),
length
))
#### get wc climate
source
(
"R/utils/climate.R"
)
clim
<-
GetClimate
(
data.us
$
Lat
,
data.us
$
Lon
)
data.us
$
MAT
<-
clim
$
MAT
data.us
$
MAP
<-
clim
$
MAP
######################## BIOMES
### biomes from Whittaker
source
(
"R/utils/plot.R"
)
...
...
@@ -177,7 +210,7 @@ biomes <- fun.overly.plot.on.biomes(MAP = data.us$MAP/10,
library
(
BIOMEplot
)
plot_biome
()
points
(
data.us
$
MAP
/
10
,
data.us
$
MAT
,
col
=
1
+
as.numeric
(
is.na
(
biomes
)))
abline
(
h
=
4
)
abline
(
h
=
4
)
# change factor
biomes
<-
as.character
(
biomes
$
biomes
)
biomes
[
is.na
(
biomes
)
&
data.us
$
MAT
<
4
]
<-
'Boreal forest'
...
...
@@ -185,20 +218,13 @@ biomes[is.na(biomes) & data.us$MAT > 4] <- 'Temperate rain forest'
table
(
biomes
)
data.us
$
biomes
<-
biomes
## ## WWF
## source("R/utils/ecoregions.R")
## data.us$biomes <- GetBiomes(data.us$Lon, data.us$Lat)
## table(data.us$biome)
## data.us$biome[ data.us$biome == 'Temp_Sav' ] <- 'Temp_Broadleaf_Mix'
## data.us$biome[grep('Trop_', data.us$biome)] <- 'Trop'
div.biome
<-
tapply
(
data.us
$
sp
,
INDEX
=
data.us
$
biome
,
fun_div
)
nsp.biome
<-
unlist
(
lapply
(
by
(
data.us
,
data.us
$
biome
,
fun.sp.in.ecoregion
),
div.biome
<-
tapply
(
data.us
$
sp
,
INDEX
=
data.us
$
biome
s
,
fun_div
)
nsp.biome
<-
unlist
(
lapply
(
by
(
data.us
,
data.us
$
biome
s
,
fun.sp.in.ecoregion
),
length
))
## ## ## plot map to check coordinates syste
## ## ##
##
plot map to check coordinates syste
m
## library(RColorBrewer)
## library(rworldmap)
## newmap <- getMap(resolution = 'coarse')
...
...
@@ -207,8 +233,6 @@ nsp.biome <- unlist(lapply(by(data.us, data.us$biome, fun.sp.in.ecoregion),
## ecoreg <- factor(data.us$ecocode);# levels(ecoreg) <- mycols <-
## brewer.pal(length(levels(ecoreg)), "Set3")
## points(data.us[['Lon']], data.us[['Lat']], pty = '.', cex = .2, col = (ecoreg))
## legend('bottomleft', col = mycols,
## legend = names(table(data.us$ecocode)), pch = rep(19, length(levels(ecoreg))))
###### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###### missing compute numer of dead per plot to remove plot with disturbance
...
...
@@ -216,10 +240,10 @@ perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]],
FUN
=
function.perc.dead
)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF
# AVAILABLE (disturbance record)
data.us
<-
merge
(
data.us
,
data.frame
(
plot
=
names
(
perc.dead
),
data.us
<-
left_join
(
data.us
,
data.frame
(
plot
=
names
(
perc.dead
),
perc.dead
=
perc.dead
,
stringsAsFactors
=
FALSE
),
by
=
"plot"
,
sort
=
FALSE
)
by
=
"plot"
)
## variables to keep
vec.abio.var.names
<-
c
(
"MAT"
,
"MAP"
)
...
...
@@ -229,7 +253,7 @@ vec.basic.var <- c("obs.id","tree.id", "sp", "sp.name", "cluster",
"Lon"
,
"Lat"
,
"weights"
,
"census"
,
'biomes'
)
data.tree
<-
subset
(
data.us
,
select
=
c
(
vec.basic.var
,
vec.abio.var.names
))
## select only tree above 10cm DBH
data.tree
<-
subset
(
data.tree
,
subset
=
data.tree
$
D
>
10
|
is.na
(
data.tree
$
D
))
data.tree
<-
filter
(
data.tree
,
D
>
10
|
is.na
(
D
))
## convert var factor in character or numeric
data.tree
<-
fun.convert.type.I
(
data.tree
)
write.csv
(
data.tree
,
file
=
"output/formatted/US/tree.csv"
,
row.names
=
FALSE
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment