Mbaiki.R 9.22 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/usr/bin/env Rscript

#===================================================#
#
# Format Mbaiki traits data
# Ghislain Vieilledent
# <ghislain.vieilledent@cirad.fr>
# October 15th, 2013
#
#===================================================#

# The following files must be in the Mbaiki raw data folder (../../data/raw/Mbaiki/)
# 1. "MBaiki_1995_2000_2005.csv" --> Inventory file (year 1995, 2000 and 2005) for Mbaiki with species list
# 2. "liaison_spmbaiki_bois-niv-sp.csv" --> Extract of the Cirad wood density data-base for Mbaiki species
# 3. "DensiteBoisSimpleMbaiki.csv" --> An additional file for wood density data
# 4. "Autour-de-Mbaiki-Releves-par-trait-et-taxon.txt" --> PlanTrait data

library(reshape)

#==========================
# List of species in MBaiki

# Load inventory data
24
data.inv <- read.csv(file="data/raw/Mbaiki/Mbaiki_1995_2000_2005.csv")
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
names(data.inv)
head(data.inv)

# List of species (chose "scientific_inventory_name_1995")
List.species <- levels(as.factor(data.inv[["scientific_inventory_name_1995"]]))
nspecies <- length(List.species)

#============================
# Building the trait data-set

data.species <- data.frame(Species=List.species,
                           Leaf.N.mean=NA,Leaf.N.sd=NA,Leaf.N.n=NA,
                           Seed.mass.mean=NA,Seed.mass.sd=NA,Seed.mass.n=NA,
                           SLA.mean=NA,SLA.sd=NA,SLA.n=NA,
                           Wood.density.mean=NA,Wood.density.sd=NA,Wood.density.n=NA)

#= Wood density

# Load the Cirad wood density data-base
44
data.wd <- read.csv2("data/raw/Mbaiki/liaison_spmbaiki_bois-niv-sp.csv")
45
46
47
index <- which(duplicated(data.wd$Scientifique.Name.1995))
data.wd.2 <- data.wd[-index,] # remove duplicates
# Load another Cirad wood density data-base (from Cindy Gidoin and Sylvie Gourlet-Fleury)
48
data.wd.Cindy <- read.table("data/raw/Mbaiki/DensiteBoisSimpleMbaiki.csv",header=TRUE,sep="\t")
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
data.wd.Cindy.2 <- data.wd.Cindy[data.wd.Cindy$NivMoyID=="E",]
data.wd.Cindy.2$LatinName <- paste(data.wd.Cindy.2$Genre,data.wd.Cindy.2$Espece,sep=" ")
ListSpeciesCindy <- levels(as.factor(data.wd.Cindy.2$LatinName))
# Merge
data.species.2 <- merge(data.species,data.wd.2,by.x="Species",by.y="Scientifique.Name.1995",all.x=TRUE)
data.species.3 <- merge(data.species.2,data.wd.Cindy.2,by.x="Species",by.y="LatinName",all.x=TRUE)
# Fill-up wood density variables
data.species.3$Wood.density.mean <- as.numeric(as.character(data.species.3$moy_infra_densite))
data.species.3$Wood.density.sd <- sqrt(as.numeric(as.character(data.species.3$var_infra_densite)))
data.species.3$Wood.density.n <- as.numeric(as.character(data.species.3$nb_arbre))
sum(!is.na(data.species.3$Wood.density.mean))

# Complete with Cindy data
for (i in 1:nrow(data.species.3)) {
    sp.i <- data.species.3$Species[i]
    b <- ifelse(sp.i %in% ListSpeciesCindy, data.wd.Cindy.2$MoyID[data.wd.Cindy.2$LatinName==sp.i], NA)
    c <- data.wd.Cindy.2$VarID[data.wd.Cindy.2$LatinName==sp.i]
    d <- data.wd.Cindy.2$NivVarID[data.wd.Cindy.2$LatinName==sp.i]
    if (is.na(data.species.3$Wood.density.mean[i]) & !is.na(b)) {
        data.species.3$Wood.density.mean[i] <- b
        if (d=="E") {
            data.species.3$Wood.density.sd[i] <- c
        }
    }
}
sum(!is.na(data.species.3$Wood.density.mean)) # 73, two additional wood density with Cindy data-set

#=========================================================================================
# Completing the data-set with the PlanTrait database from E. Parent and S. Gourlet-Fleury

#= Load libraries
library(reshape)

#= Load PlanTraits
83
PlanTraits <- read.csv("data/raw/Mbaiki/Autour_de_Mbaiki_Releves_par_trait_et_taxon.txt",
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
                       stringsAsFactors=FALSE, header=TRUE, sep="\t")
names(PlanTraits)

#= Reformat PlanTraits to one row per species, with each trait as a column
spp.means <- cast(PlanTraits, LIB_TAXON ~ METHO_LIB, value = "MEASURE", fun = mean)
colnames(spp.means)[-1] <- paste(colnames(spp.means)[-1],".mean",sep="")
spp.sds <- (cast(PlanTraits, LIB_TAXON ~ METHO_LIB, value = "MEASURE", fun = sd))
colnames(spp.sds) <- paste(colnames(spp.sds),".sd",sep="")
PlanTraits2 <- cbind(spp.means,spp.sds[,-1])[,c("LIB_TAXON","Leaf nitrogen concentration (standard).mean","Leaf nitrogen concentration (standard).sd",
	"Specific leaf area (standard).mean","Specific leaf area (standard).sd", "Wood density.mean","Wood density.sd")]
colnames(PlanTraits2)[1] <- c("Latin_name")
PlanTraits2 <- PlanTraits2[order(PlanTraits2$Latin_name),]
for (k in 2:ncol(PlanTraits2)) {
    PlanTraits2[,k][!is.finite(PlanTraits2[,k])] <- NA
}
colnames(PlanTraits2) <- c("Latin_name","LeafN.mean","LeafN.sd","SLA.mean","SLA.sd","WSG.mean","WSG.sd")
PlanTraits2$LeafN.mean <- PlanTraits2$LeafN.mean/1 # LeafN already in mg.g-1
PlanTraits2$SLA.mean <- PlanTraits2$SLA.mean/1 # Conversion from m2.kg-1 to mm2.mg-1
PlanTraits2$WSG.mean <- PlanTraits2$WSG.mean/1 # Conversion from g.cm-3 to mg.mm-3

#= Fill up species trait data-base with PlanTraits
ListSpeciesPlanTraits <- levels(as.factor(PlanTraits2$Latin_name))
countsup.LeafN <- countsup.SLA <- countsup.WSG <- 0 # Counter to see how much new trait values come from PlanTraits
for (i in 1:nrow(data.species.3)) {
    if (is.na(data.species.3$Leaf.N.mean[i]) & (data.species.3$Species[i] %in% ListSpeciesPlanTraits)) {
        data.species.3$Leaf.N.mean[i] <- PlanTraits2$LeafN.mean[PlanTraits2$Latin_name==data.species.3$Species[i]]
        data.species.3$Leaf.N.sd[i] <- PlanTraits2$LeafN.sd[PlanTraits2$Latin_name==data.species.3$Species[i]]
        if (!is.na(data.species.3$Leaf.N.mean[i])) {countsup.LeafN <- countsup.LeafN+1}
    }
    if (is.na(data.species.3$SLA.mean[i]) & (data.species.3$Species[i] %in% ListSpeciesPlanTraits)) {
        data.species.3$SLA.mean[i] <- PlanTraits2$SLA.mean[PlanTraits2$Latin_name==data.species.3$Species[i]]
        data.species.3$SLA.sd[i] <- PlanTraits2$SLA.sd[PlanTraits2$Latin_name==data.species.3$Species[i]]
        if (!is.na(data.species.3$SLA.mean[i])) {countsup.SLA <- countsup.SLA+1}
    }
    if (is.na(data.species.3$Wood.density.mean[i]) & (data.species.3$Species[i] %in% ListSpeciesPlanTraits)) {
        data.species.3$Wood.density.mean[i] <- PlanTraits2$WSG.mean[PlanTraits2$Latin_name==data.species.3$Species[i]]
        data.species.3$Wood.density.sd[i] <- PlanTraits2$WSG.sd[PlanTraits2$Latin_name==data.species.3$Species[i]]
        if (!is.na(data.species.3$Wood.density.mean[i])) {countsup.WSG <- countsup.WSG+1}
    }
}

countsup.LeafN # 34
countsup.SLA # 34
countsup.WSG # 0

# Plots
hist(data.species.3$Wood.density.mean)
hist(data.species.3$SLA.mean)
hist(data.species.3$Leaf.N.mean)

#================
# Output

# Select the right columns
names(data.species.3)
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
data.TRAITS.std <- data.species.3[,c(1:13)]
names(data.TRAITS.std)
data.TRAITS.std$Max.height.mean <- NA
source("R/find.trait/trait.fun.R")

#######################
##### GET IN GOOD FORMAT
data.tree <- read.csv("output/formatted/Mbaiki/tree.csv", stringsAsFactors = FALSE)
species.clean <- data.frame(sp=data.tree[!duplicated(data.tree[["sp"]]),"sp"],
                             Latin_name=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
                             Latin_name_syn=data.tree[!duplicated(data.tree[["sp"]]),"sp.name"],
                             stringsAsFactors =FALSE)



## extract
data.traits <- fun.extract.format.sp.traits.NOT.TRY(sp=species.clean$Latin_name, Latin_name=species.clean$Latin_name, data=data.TRAITS.std,name.match.traits="Species")

## change sp
data.traits$sp <- species.clean$sp

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

#### GET THE ANGIO/CONIF AND EVERGREEN/DECIDUOUS
# read try categrocial data
try.cat <- read.csv("data/raw/TRY/TRY_Categorical_Traits_Lookup_Table_2012_03_17_TestRelease.csv",
         stringsAsFactors=FALSE,na.strings = "")
Pheno.Zanne <- read.csv("data/raw/ZanneNature/GlobalLeafPhenologyDatabase.csv",
         stringsAsFactors=FALSE)
# extract
data.cat.extract <- do.call("rbind",lapply(data.traits$sp ,fun.get.cat.var.from.try,
                                           data.traits,try.cat,Pheno.Zanne))
# change category
data.cat.extract <- fun.change.factor.pheno.try(data.cat.extract)
data.cat.extract <- fun.change.factor.angio.try(data.cat.extract)
data.cat.extract <- fun.fill.pheno.try.with.zanne(data.cat.extract)

data.traits <- merge(data.traits,data.cat.extract[,c("sp","Phylo.group","Pheno.T")],by="sp")

177
178
### Export
write.csv(data.traits,file="output/formatted/Mbaiki/traits.csv",row.names = FALSE)
179
180

# Export
fhui28's avatar
fhui28 committed
181
182
#data.species.4$sp <- data.species.4$Species
#write.csv(data.species.4,file="output/formatted/Mbaiki/traits.csv",row.names=FALSE)
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

## #===================================
## # How many species have trait values
## nspecies
## # Obs at the species level
## nsp.LeafN <- sum(!is.na(data.species.4$Leaf.N.mean))
## nsp.SeedMass <- sum(!is.na(data.species.4$Seed.mass.mean))
## nsp.SLA <- sum(!is.na(data.species.4$SLA.mean))
## nsp.WSG <- sum(!is.na(data.species.4$Wood.density.mean))
## nsp.AllTraits <- sum(!is.na(data.species.4$Leaf.N.mean) & !is.na(data.species.4$Seed.mass.mean) &
##                      !is.na(data.species.4$SLA.mean) & !is.na(data.species.4$Wood.density.mean))
## # Summary in a matrix
## matsum <- as.data.frame(matrix(nrow=1,ncol=6))
## names(matsum) <- c("Total","LeafN","SeedMass","SLA","WSG","AllTraits")
## matsum[1,] <- c(nspecies,nsp.LeafN,nsp.SeedMass,nsp.SLA,nsp.WSG,nsp.AllTraits)
## sink("Summary_Traits_Mbaiki.txt")
## matsum
## sink()