Commit 8acde682 authored by Georges Kunstler's avatar Georges Kunstler
Browse files

merge conflict

No related merge requests found
Showing with 395 additions and 15 deletions
+395 -15
......@@ -4,7 +4,6 @@ data/*
figs
*.xls
*.xlsx
*.csv
.Rhistory
*.pdf
*.doc
......
Makefile 0 → 100644
all: output/list.US.DR.DO.Rdata
output/list.US.DR.DO.Rdata:
Rscript merge.data.US.R
clean:
rm -f output/*
.PHONY: all clean
################# FUNCTION TO EXTRACT DECTED OUTLIER AND FORMAT TRY DATA Georges Kunstler
############################################ 14/06/2013
library(MASS)
library(doParallel)
library(mvoutlier)
library(MASS, quietly=TRUE)
library(doParallel, quietly=TRUE)
library(mvoutlier, quietly=TRUE)
######################################################## Build a function that extract the variables
......@@ -133,7 +133,7 @@ f.quantile <- function(x, ind, probs) {
}
f.quantile.boot2 <- function(x, R, probs = 0.99) {
require(boot)
require(boot, quietly=TRUE)
if (length(na.exclude(x)) > 0) {
quant.boot <- boot(x, f.quantile, R = R, probs = probs)
return(c(mean = mean(quant.boot$t), sd = sd(quant.boot$t), nobs = length(na.exclude(x))))
......
to.pdf <- function(expr, filename, ..., verbose=TRUE) {
if(!file.exists(dirname(filename)))
dir.create(dirname(filename), recursive=TRUE)
if ( verbose )
cat(sprintf("Creating %s\n", filename))
pdf(filename, ...)
on.exit(dev.off())
eval.parent(substitute(expr))
}
na.clean <-function(x){x[!is.na(x)]}
#returns up to 80 unique, nice colors, generated using http://tools.medialab.sciences-po.fr/iwanthue/
# Starts repeating after 80
niceColors<-function(n=80){
cols<-rep(c("#75954F","#D455E9","#E34423","#4CAAE1","#451431","#5DE737","#DC9B94","#DC3788","#E0A732","#67D4C1","#5F75E2","#1A3125","#65E689","#A8313C","#8D6F96","#5F3819","#D8CFE4","#BDE640","#DAD799","#D981DD","#61AD34","#B8784B","#892870","#445662","#493670","#3CA374","#E56C7F","#5F978F","#BAE684","#DB732A","#7148A8","#867927","#918C68","#98A730","#DDA5D2","#456C9C","#2B5024","#E4D742","#D3CAB6","#946661","#9B66E3","#AA3BA2","#A98FE1","#9AD3E8","#5F8FE0","#DF3565","#D5AC81","#6AE4AE","#652326","#575640","#2D6659","#26294A","#DA66AB","#E24849","#4A58A3","#9F3A59","#71E764","#CF7A99","#3B7A24","#AA9FA9","#DD39C0","#604458","#C7C568","#98A6DA","#DDAB5F","#96341B","#AED9A8","#55DBE7","#57B15C","#B9E0D5","#638294","#D16F5E","#504E1A","#342724","#64916A","#975EA8","#9D641E","#59A2BB","#7A3660","#64C32A"),
ceiling(n/80))
cols[1:n]
}
make.transparent <- function(col, opacity=0.5) {
tmp <- col2rgb(col)/255
rgb(tmp[1,], tmp[2,], tmp[3,], alpha=opacity)
}
## Position label at a fractional x/y position on a plot
label <- function(px, py, lab, ..., adj=c(0, 1)) {
usr <- par("usr")
text(usr[1] + px*(usr[2] - usr[1]),
usr[3] + py*(usr[4] - usr[3]),
lab, adj=adj, ...)
}
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol
axis.log10 <- function(side=1, horiz=FALSE, labels=TRUE, baseAxis = TRUE, wholenumbers=T, labelEnds=T,las=1, at=NULL) {
fg <- par("fg")
if(is.null(at)){
#get range on axis
if(side ==1 | side ==3) {
r <- par("usr")[1:2] #upper and lower limits of x-axis
} else {
r <- par("usr")[3:4] #upper and lower limits of y-axis
}
#make pertty intervals
at <- pretty(r)
#drop ends if desirbale
if(!labelEnds)
at <- at[at > r[1] & at < r[2]]
}
#restrict to whole numbers if desriable
if(wholenumbers)
at<-at[is.wholenumber(at)]
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
#convert at if
if(baseAxis)
at<-10^at
#make labels
if ( labels )
axis(side, at=at, lab, col=if(horiz) fg else NA,
col.ticks=fg, las=las)
else
axis(side, at=at, FALSE, col=if(horiz) fg else NA,
col.ticks=fg, las=las)
}
#!/usr/bin/env Rscript
### MERGE us DATA Edited by FH
rm(list = ls())
library(reshape, quietly=TRUE)
source("./R/format.function.R")
library(reshape)
source("./R/FUN.TRY.R")
######################### READ DATA read individuals tree data
data.us <- read.csv("./data/raw/DataUS/FIA51_trees_w_supp.csv", header = TRUE, stringsAsFactors = FALSE)
### read species names
species.clean <- read.csv("./data/species.list/REF_SPECIES.CSV", stringsAsFactors = FALSE)
## select column to keep
......@@ -53,6 +51,7 @@ colnames(greco)[1] <- "Ecocode"
table(data.us$Ecocode)
data.us <- merge(data.us, greco[, -4], by = "Ecocode")
data.us$DIVISION <- factor(data.us$DIVISION)
## Some ecoregions still have small # of individuals, so create a variable which
## does division if # ind < 10000 else it reads Domain
data.us$eco_codemerged <- as.character(data.us$DIVISION)
......@@ -64,8 +63,8 @@ for (i in 1:length(sel.small.div)) {
data.us$eco_codemerged[find.ind] <- as.character(data.us$DOMAIN)[find.ind]
}
###################### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###################### missing compute numer of dead per plot to remove plot with disturbance
###### PERCENT DEAD variable percent dead/cannot do with since dead variable is
###### missing compute numer of dead per plot to remove plot with disturbance
perc.dead <- tapply(data.us[["dead"]], INDEX = data.us[["plot"]], FUN = function.perc.dead)
# ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF
# AVAILABLE (disturbance record)
......@@ -73,8 +72,7 @@ data.us <- merge(data.us, data.frame(plot = names(perc.dead), perc.dead = perc.d
by = "plot", sort = FALSE)
########################################################### PLOT SELECTION FOR THE ANALYSIS
##### PLOT SELECTION FOR THE ANALYSIS
## remove everything from memory not need before computation
rm(greco, perc.dead, tab.small.div, sel.small.div)
......@@ -90,6 +88,7 @@ vec.basic.var <- c("tree.id", "sp", "plot", "subplot", "ecocode", "D", "G", "dea
"year", "htot", "Lon", "Lat", "perc.dead", "weights","census")
data.tree <- subset(data.us, select = c(vec.basic.var, vec.abio.var.names))
rm(data.us)
## creat row unique id
data.tree$obs.id <- as.character(1:nrow(data.tree))
gc()
......@@ -97,7 +96,7 @@ gc()
### read TRY data
TRY.DATA.FORMATED <- readRDS("./data/process/TRY.DATA.FORMATED.rds")
#################### GENERATE ONE OBJECT PER ECOREGION
#### GENERATE ONE OBJECT PER ECOREGION
# vector of ecoregion name
ecoregion.unique <- unique(data.tree[["ecocode"]])
......
# Scripts used to create folders and files lists for metadata
# caution - if rerun, will overwrite existing files
# Make directory for all input directories
for(d in gsub("Data", "", dir("data/raw")))
dir.create(file.path("ms/metadata", d))
# List all files in raw data directories
for(d in dir("data/raw", full.names = TRUE)){
pathout <- file.path("ms/metadata", gsub("Data", "", basename(d)))
files <- gsub(paste0(d,"/"), "", dir(d, recursive=TRUE, full.names = TRUE))
files <- files[ tools::file_ext(files) %in% c("csv", "txt") ]
sep= rep("t", length(files))
sep[tools::file_ext(files) =="csv"] ="c"
write.csv(data.frame(filename= files, sep = sep, skip=0, contents=" "), file=file.path(pathout, "_file_index.csv"), row.names=FALSE, quote=FALSE)
}
make_meta_data_tables <- function(d){
files <- read.csv(file.path(d,"_file_index.csv"), stringsAsFactors=FALSE)
for(i in 1:length(files[,1])){
cat(paste(files[i,"sep"], " "))
sep = "\t"
if(files[i,"sep"] == "c") sep= ","
if(files[i,"sep"] == ";") sep= ";"
if(files[i,"sep"] == " ") sep= " "
infile <- file.path(gsub("ms/metadata/", "data/raw/Data", d), files[i,"filename"])
data <- read.table(infile, sep=sep, stringsAsFactors=FALSE, fill=TRUE, header=TRUE)
write.csv(data.frame(use=0, var=names(data), units="", description=""), quote=FALSE, row.names=FALSE,
file = file.path(d, "files",
paste0(tools::file_path_sans_ext(gsub("/", "_", files[i,"filename"])), ".csv")))
}
}
# Generate tables for each file
for(d in dir("ms/metadata", full.names = TRUE)[7:12]){
cat(paste("\n", d))
make_meta_data_tables(d)
}
filename,sep,skip,contents
BCITRAITS_20101220.csv,c,0,
census1/PlotsDataReport.txt,t,0,
TaxonomyDataReport.txt,t,0,
Item,Details
contact name,Joe Wright
contact email,wrightj@si.edu
contact affiliation, "Centre for Tropical Science"
dataset name, "Barro colorado island, 50 ha plot"
country, Panama
climate, Tropical
vegetation type, Tropical rain forest
minimum tree size, 1cm dbh
field methodology, "All free-standing woody plants with diameter at breast height (dbh) > 1 cm were measured in a 50-ha plot on BCI in 1982, 1985, 1990, 1995, 2000, and 2005."
trait information,
key references, "Hubbell and Foster 1992, Condit et al. 2006"
use,var,units,description 1,GENUS,,genus name truncated at 12 letters 1,SPECIES,,species name truncated at 12 letters 1,FAMILY,,family name truncated at 12 letters 1,GRWFRM2,,Values are as in GRWFRM1 except free-standing species can have multiple values if maximum size varies widely within Panama 1,GRWFRM1,,"Values are Climber, HERB, S, U, M and T. S, U, M and T are free-standing species with maximum heights of 5, 10, 20 and > 30 m, respectively. " 1,SP,,six letter species code 1,SG60C_AVG,g/cm3,mean wood specific gravity after drying at 60C (g cm-3) 1,SG100C_AVG,g/cm3,mean wood specific gravity after drying at 100C (g cm-3) 1,SG60C_SEM,g/cm3,standard error of SG60C_AVG (g cm-3) 1,SG100C_SEM,g/cm3,standard error of SG100C_AVG (g cm-3) 1,SG60C_N,,number of individuals sampled for SG60C_AVG 1,SG100C_N,,number of individuals sampled for SG100C_AVG 1,SEED_DRY,g,"mean seed dry mass after drying at 60 C (g), where seed is defined to include the endosperm and embryo only" 1,DBH_AVG,mm,mean DBH measured in 2005 of up to the six largest individuals in the BCI 50-ha plot (mm) 1,HEIGHT_AVG,m,mean height of up to the six largest individuals in the BCI 50-ha plot (m) 1,DIAM_AVG,m,mean crown diameter of up to the six largest individuals in the BCI 50-ha plot (m) 1,DBH_SEM,mm,one standard error of DBH_AVG (mm) 1,HEIGHT_SEM,m,one standard error of HEIGHT_AVG (m) 1,DIAM_SEM,m,one standard error of DIAM_AVG (m) 1,LMALAM_AVD,g/m2,mean leaf mass per unit area measured for the leaf lamina excluding the petiole and for compound leaves the petiolules (g m-2) for leaves receiving direct sunlight 1,LMALAM_SED,g/m2,one standard error for LMALAM_AVD (g m-2)
\ No newline at end of file
use,var,units,description
0,Family,,
0,Genus,,
0,species,,
0,subspecies,,
0,mnemonic,,
0,IDlevel,,
0,Authority,,
0,PriorNames,,
0,SpeciesID,,
use,var,units,description
0,Plot,,
0,Latin,,
0,Quadrat,,
0,gx,,
0,gy,,
0,TreeID,,
0,Tag,,
0,StemID,,
0,StemTag,,
0,Census,,
0,DBH,,
0,HOM,,
0,Date,,
0,Codes,,
0,Stem,,
0,Status,,
filename,sep,skip,contents
Canada_Data2George_20130818.csv,c,0,
EcoregionCodes.csv,c,0,
FIA_REF_SPECIES.csv,c,0,
Item,Details
contact name,
contact email,
contact affiliation,
dataset name,
country,
climate,
vegetation type,
minimum tree size,
field methodology,
trait information,
key references,
use,var,units,description
0,PLOTTREE_I,,
0,Species_FIAcode,,
0,InitDBH,,
0,FinalDBH,,
0,PLOT_ID,,
0,Subplot_ID,,
0,SubPlot_Size,,
0,Lat,,
0,Lon,,
0,IndWeight,,
0,IntervalYears,,
0,Ecocode,,
0,MAT,,
0,MAP,,
0,Province,,
use,var,units,description
0,ECOCODE,,
0,DOMAIN,,
0,DIVISION,,
0,PROVINCE,,
use,var,units,description
0,SPCD,,
0,COMMON_NAME,,
0,GENUS,,
0,SPECIES,,
0,VARIETY,,
0,SUBSPECIES,,
0,SPECIES_SYMBOL,,
0,E_SPGRPCD,,
0,W_SPGRPCD,,
0,C_SPGRPCD,,
0,P_SPGRPCD,,
0,MAJOR_SPGRPCD,,
0,STOCKING_SPGRPCD,,
0,FOREST_TYPE_SPGRPCD,,
0,EXISTS_IN_NCRS,,
0,EXISTS_IN_NERS,,
0,EXISTS_IN_PNWRS,,
0,EXISTS_IN_RMRS,,
0,EXISTS_IN_SRS,,
0,SITETREE,,
0,SFTWD_HRDWD,,
0,ST_EXISTS_IN_NCRS,,
0,ST_EXISTS_IN_NERS,,
0,ST_EXISTS_IN_PNWRS,,
0,ST_EXISTS_IN_RMRS,,
0,ST_EXISTS_IN_SRS,,
0,CORE,,
0,EAST,,
0,WEST,,
0,CARIBBEAN,,
0,PACIFIC,,
0,WOODLAND,,
0,MANUAL_START,,
0,MANUAL_END,,
0,JENKINS_SPGRPCD,,
0,JENKINS_TOTAL_B1,,
0,JENKINS_TOTAL_B2,,
0,JENKINS_STEM_WOOD_RATIO_B1,,
0,JENKINS_STEM_WOOD_RATIO_B2,,
0,JENKINS_STEM_BARK_RATIO_B1,,
0,JENKINS_STEM_BARK_RATIO_B2,,
0,JENKINS_FOLIAGE_RATIO_B1,,
0,JENKINS_FOLIAGE_RATIO_B2,,
0,JENKINS_ROOT_RATIO_B1,,
0,JENKINS_ROOT_RATIO_B2,,
0,JENKINS_SAPLING_ADJUSTMENT,,
0,WOOD_SPGR_GREENVOL_DRYWT,,
0,WOOD_SPGR_GREENVOL_DRYWT_CIT,,
0,BARK_SPGR_GREENVOL_DRYWT,,
0,BARK_SPGR_GREENVOL_DRYWT_CIT,,
0,MC_PCT_GREEN_BARK,,
0,MC_PCT_GREEN_BARK_CIT,,
0,MC_PCT_GREEN_WOOD,,
0,MC_PCT_GREEN_WOOD_CIT,,
0,WOOD_SPGR_MC12VOL_DRYWT,,
0,WOOD_SPGR_MC12VOL_DRYWT_CIT,,
0,BARK_VOL_PCT,,
0,BARK_VOL_PCT_CIT,,
0,RAILE_STUMP_DOB_B1,,
0,RAILE_STUMP_DIB_B1,,
0,RAILE_STUMP_DIB_B2,,
0,CWD_DECAY_RATIO1,,
0,CWD_DECAY_RATIO2,,
0,CWD_DECAY_RATIO3,,
0,CWD_DECAY_RATIO4,,
0,CWD_DECAY_RATIO5,,
0,DWM_CARBON_RATIO,,
0,STANDING_DEAD_DECAY_RATIO1,,
0,STANDING_DEAD_DECAY_RATIO2,,
0,STANDING_DEAD_DECAY_RATIO3,,
0,STANDING_DEAD_DECAY_RATIO4,,
0,STANDING_DEAD_DECAY_RATIO5,,
0,CREATED_BY,,
0,CREATED_DATE,,
0,CREATED_IN_INSTANCE,,
0,MODIFIED_BY,,
0,MODIFIED_DATE,,
0,MODIFIED_IN_INSTANCE,,
filename,sep,skip,contents
2005/arbres_foret_2005.csv,c,0,
2005/arbres_morts_foret_2005.csv,c,0,
2005/arbres_peupleraie_2005.csv,c,0,
2005/documentation_2005-2.csv,c,0,
2005/documentation_flore.csv,c,0,
2005/ecologie_2005.csv,c,0,
2005/flore_2005.csv,c,0,
2005/placettes_foret_2005.csv,c,0,
2005/placettes_peupleraie_2005.csv,c,0,
2006/arbres_foret_2006.csv,c,0,
2006/arbres_morts_foret_2006.csv,c,0,
2006/arbres_peupleraie_2006.csv,c,0,
2006/couverts_foret_2006.csv,c,0,
2006/documentation_2006-2.csv,c,0,
2006/documentation_flore.csv,c,0,
2006/ecologie_2006.csv,c,0,
2006/flore_2006.csv,c,0,
2006/placettes_foret_2006.csv,c,0,
2006/placettes_peupleraie_2006.csv,c,0,
2007/arbres_foret_2007.csv,c,0,
2007/arbres_morts_foret_2007.csv,c,0,
2007/arbres_peupleraie_2007.csv,c,0,
2007/couverts_foret_2007.csv,c,0,
2007/documentation_2007-2.csv,c,0,
2007/documentation_flore.csv,c,0,
2007/ecologie_2007.csv,c,0,
2007/flore_2007.csv,c,0,
2007/placettes_foret_2007.csv,c,0,
2007/placettes_peupleraie_2007.csv,c,0,
2008/arbres_foret_2008.csv,c,0,
2008/arbres_morts_foret_2008.csv,c,0,
2008/arbres_morts_peupleraie_2008.csv,c,0,
2008/arbres_peupleraie_2008.csv,c,0,
2008/couverts_foret_2008.csv,c,0,
2008/documentation_2008-2.csv,c,0,
2008/documentation_flore.csv,c,0,
2008/ecologie_2008.csv,c,0,
2008/flore_2008.csv,c,0,
2008/placettes_foret_2008.csv,c,0,
2008/placettes_peupleraie_2008.csv,c,0,
2009/arbres_foret_2009.csv,c,0,
2009/arbres_morts_foret_2009.csv,c,0,
2009/arbres_morts_peupleraie_2009.csv,c,0,
2009/arbres_peupleraie_2009.csv,c,0,
2009/couverts_foret_2009.csv,c,0,
2009/documentation_2009-2.csv,c,0,
2009/documentation_flore.csv,c,0,
2009/ecologie_2009.csv,c,0,
2009/flore_2009.csv,c,0,
2009/placettes_foret_2009.csv,c,0,
2009/placettes_peupleraie_2009.csv,c,0,
2010/arbres_foret_2010.csv,c,0,
2010/arbres_morts_foret_2010.csv,c,0,
2010/arbres_morts_peupleraie_2010.csv,c,0,
2010/arbres_peupleraie_2010.csv,c,0,
2010/couverts_foret_2010.csv,c,0,
2010/documentation_2010.csv,c,0,
2010/documentation_flore.csv,c,0,
2010/ecologie_2010.csv,c,0,
2010/flore_2010.csv,c,0,
2010/placettes_foret_2010.csv,c,0,
2010/placettes_peupleraie_2010.csv,c,0,
2011/arbres_foret_2011.csv,c,0,
2011/arbres_morts_foret_2011.csv,c,0,
2011/arbres_morts_peupleraie_2011.csv,c,0,
2011/arbres_peupleraie_2011.csv,c,0,
2011/couverts_foret_2011.csv,c,0,
2011/documentation_2011.csv,c,0,
2011/documentation_flore.csv,c,0,
2011/ecologie_2011.csv,c,0,
2011/flore_2011.csv,c,0,
2011/placettes_foret_2011.csv,c,0,
2011/placettes_peupleraie_2011.csv,c,0,
altitude/SER_alti_2011.csv,c,0,
altitude/SER_alti.csv,c,0,
climate_piedallu/placettesGK_avec 2011.csv,c,0,
climate_piedallu/placettesGK2.csv,c,0,
climate_piedallu/texture.txt,t,0,
cycle3/arbres2.txt,t,0,
cycle3/data.arbre.tot.txt,t,0,
cycle3/speciesnames.txt,t,0,
Item,Details
contact name,
contact email,
contact affiliation,
dataset name,
country,
climate,
vegetation type,
minimum tree size,
field methodology,
trait information,
key references,
use,var,units,description
0,idp.a.veget.espar.ori.lib.forme.tige.c13.ir5.htot.q1.q2.q3.r.lfsd.v.w,,
use,var,units,description
0,idp.a.espar.ori.veget.c0.v.w,,
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment