Commit 6526ad71 authored by Boulangeat Isabelle's avatar Boulangeat Isabelle

MAJ with 10 species analysed with GLM

parent 1805cbef
No preview for this file type
---
title: "Main_analyse.Rmd"
author: "Isabelle Boulangeat"
date: "23/01/2020"
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(Jmisc)
sourceAll("R_fct")
```
## Load data
- Load climate
- Load species cover data
- Filter out treatment plots
- filter out un-repeated plots
- filter out years > 2014 because of missing in climatologies
```{r load_data}
DATA = load.datasets()
str(DATA$cover)
str(DATA$climato)
mostRecordedSP = DATA$cover %>% group_by(Name) %>% summarise(nobs = length(Name)) %>% arrange(desc(nobs))
selection = as.vector(t(mostRecordedSP[1:22, "Name"]))
selection = selection[-which(selection %in% c("Arctagrostis latifolia", "Persicaria vivipara", "Carex aquatilis", "Cassiope tetragona", "Luzula confusa", "Luzula nivalis", "Salix arctica", "Poa arctica", "Eriophorum angustifolium", "Petasites frigidus", "Vaccinium uliginosum", "Dryas integrifolia"))]
```
N.B. I have quickly selected 10 species among the 22 having the most observations, with no problem with zero-days growing season in the previous years before an observation, and no problem in the dataset... **selection procedure to discuss**
## Prepare species specific datasets
Step 1: `create_dataset`:
- only subsites where the species have been seen (then ok for N=1)
- extract plot info
- build transition table
- merge with plot info
- calculate interval (years)
- tag transition types
- combine with climate (5 years earlier average)
- merge all
Step 2: adjustments
- Add anomalies
- remove intervals>=10 years
- select variables c("snowDays", "GSL", "GST", "SoilMoist", "TotalCover")
- modify SoilMoist levels (-1, 0, 1)
Step 3: prepare dataset to fit
- separate colonisation and extinction datasets
- scale data (not SoilMoist)
- prepare data for LaplacesDemon fit (optional)
- add event column for glm fit
```{r species_specific_datasets}
spDATA.list = lapply(selection, function(SPECIES){
print(SPECIES)
spDATA = dat.species(SPECIES, DATA)
return(spDATA)
} )
names(spDATA.list) = selection
(spDATA.check = lapply(spDATA.list, check.nas))
lapply(spDATA.list, function(x) table(x$spdat$trType))
```
## Fit models
```{r species_specific_models}
spDATA.fit = lapply(spDATA.list, fit.species)
```
## Summary of model evaluation
```{r model_evaluation}
spDATA.fit[[1]]$colo
eval.models = lapply(spDATA.fit, function(glm.fit){
eval.colo = eval.fit(glm.fit$colo, glm.fit$dat$colo, "colo.event")[-1]
vars.colo = paste(names(sort(abs(glm.fit$colo$coefficients[-1]), dec=T)), collapse=";")
eval.ext = eval.fit(glm.fit$ext, glm.fit$dat$ext, "ext.event")[-1]
vars.ext = paste(names(sort(abs(glm.fit$ext$coefficients[-1]), dec=T)), collapse=";")
return(list(colo.R2 = eval.colo$R2, colo.AUC = eval.colo$AUC, ext.R2 = eval.ext$R2, ext.AUC = eval.ext$AUC, vars.colo = vars.colo, vars.ext = vars.ext))
})
res.tab = do.call(rbind, eval.models)
res.tab
```
# ```{r model_evaluation}
# plot_model(glm.fit$colo, transform = NULL, show.values=TRUE, title = paste(SPECIES, "colonisation"))
# plot_model(glm.fit$ext, transform = NULL, show.values=TRUE, title = paste(SPECIES, "extinction"))
# plot_model(glm.fit$colo, type = "eff")
# ```
This diff is collapsed.
require(dplyr)
require(MASS)
require(fmsb)
require(ROCR)
require(LaplacesDemon)
#-----------------------------------
load.datasets <- function(){
temp = read.table("shrubhub/ARCfunc_timeseries_temp.txt", h=T, sep="\t", encoding = "UTF-8", comment.char="", quote = "")
precip = read.table("shrubhub/ARCfunc_timeseries_prec.txt", h=T, sep="\t", encoding = "UTF-8", comment.char="", quote = "")
climato = read.table("shrubhub/TVC_SITE_SUBSITE_UPDATED2016_snow_GSL_GST_CHELSAclimatologies.txt", h=T, sep="\t", encoding = "UTF-8", comment.char="", quote = "")
climato$SiteSubsite = paste(climato$SITE, climato$SUBSITE, sep=":")
load("coverc_sub.rdata")
cover.sub = coverc.sub[which(coverc.sub$TRTMT =="CTL"),]
cover.sub = cover.sub[which(cover.sub$RepeatedPlots ==1),]
cover.dom = cover.sub[which(cover.sub$YEAR<2014),]
return(list(cover=cover.dom, climato=climato))
}
#-----------------------------------
dat.species <- function(SPECIES, DATA, Bayesian=TRUE){
spdat = create_dataset(SPECIES, DATA$cover, DATA$climato)
spdat$GSL_anom = spdat$GSL-spdat$GSL_av
spdat$GST_anom = spdat$GST-spdat$GST_av
spdat$snowDays_anom = spdat$snowDays-spdat$snow_days_av
spdat$trType = factor(spdat$trType)
levels(spdat$trType) <- c("AA", "COLO", "EXT", "PP")
spdat = spdat[spdat$interval<10,]
selectVars = c("snowDays", "GSL", "GST", "SoilMoist", "TotalCover")
spdat$SoilMoist = factor(spdat$SoilMoist)
levels(spdat$SoilMoist) = c(-1,0,1)
spdat$SoilMoist = as.numeric(as.character(spdat$SoilMoist))
dat.ext = scale(spdat[which(spdat$trType%in%c("PP", "EXT")),selectVars])
dat.ext[,"SoilMoist"] = spdat[which(spdat$trType%in%c("PP", "EXT")),"SoilMoist"]
dat.colo = scale(spdat[which(spdat$trType%in%c("AA", "COLO")),selectVars])
dat.colo[,"SoilMoist"] = spdat[which(spdat$trType%in%c("AA", "COLO")),"SoilMoist"]
if(Bayesian){
nvars = length(selectVars)
parm.names.ext = as.parm.names(list(aE=rep(0, 1+nvars)))
nbetas.ext = length(parm.names.ext)
MyData.ext <- list(N = nrow(dat.ext), dat=dat.ext, mon.names = c("logLik") , parm.names= parm.names.ext,
var.aE = 1:(nvars),
aE.pos = grep("aE", parm.names.ext),
PA.pos = spdat$trType[which(spdat$trType%in%c("PP", "EXT"))]=="EXT",
PP.pos = spdat$trType[which(spdat$trType%in%c("PP", "EXT"))]=="PP",
invlogit_ = invlogit_,
itime = spdat$interval[which(spdat$trType%in%c("PP", "EXT"))],
nbetas = nbetas.ext)
parm.names.colo = as.parm.names(list(aC=rep(0, 1+nvars)))
nbetas.colo = length(parm.names.colo)
MyData.colo<- list(N = nrow(dat.colo), dat=dat.colo, mon.names = c("logLik") , parm.names= parm.names.colo,
var.aC = 1:(nvars),
aC.pos = grep("aC", parm.names.colo),
AP.pos = spdat$trType[which(spdat$trType%in%c("AA", "COLO"))]=="COLO",
AA.pos = spdat$trType[which(spdat$trType%in%c("AA", "COLO"))]=="AA",
invlogit_ = invlogit_,
density = rep(1, nrow(dat.colo)),
itime = spdat$interval[which(spdat$trType%in%c("AA", "COLO"))],
nbetas = nbetas.colo)
LD = list(colo = MyData.colo, ext = MyData.ext)
} else (LD = NULL)
dat.colo = data.frame(dat.colo)
dat.colo$colo.event = as.numeric(MyData.colo$AP.pos)
dat.ext = data.frame(dat.ext)
dat.ext$ext.event = as.numeric(MyData.ext$PA.pos)
return(list(spdat = spdat,LD = LD, colo = dat.colo, ext = dat.ext, sp = SPECIES))
}
#-----------------------------------
check.nas <- function(spDATA){
print(spDATA$sp)
return(sum(is.na(spDATA$spdat$GST)))
}
#-----------------------------------
fit.species <- function(spDATA){
print(spDATA$sp)
colo.mod = glm(colo.event~ (snowDays + GSL + GST + TotalCover) * SoilMoist, family = "binomial", data = spDATA$colo)
stepMod.colo = stepAIC(colo.mod, direction = "both", trace=0)
ext.mod = glm(ext.event~ (snowDays + GSL + GST + TotalCover) * SoilMoist, family = "binomial", data = spDATA$ext)
stepMod.ext = stepAIC(ext.mod, direction = "both", trace=0)
return(list(colo = stepMod.colo, ext = stepMod.ext, dat = spDATA))
}
#-----------------------------------
eval.fit <- function(stepMod, data, response.var){
pred = predict(stepMod,new=data,"response")
R2 = NagelkerkeR2(stepMod)$R2
perf = performance(prediction(pred, data[,response.var]), "auc")
AUC = perf@y.values[[1]]
return(list(pred = pred, R2=R2, AUC=AUC))
}
#-----------------------------------
fit.species.LD <- function(spDATA){
}
test_model<- function(MyData, loglik_fct){
require(LaplacesDemon)
set.seed(129837)
......
require(ggplot2)
require(reshape2)
require(sjPlot)
plot.effects.ld <- function(Fit.mcmc, vars, nmetapars=1, colos = c("lightblue", "steelblue", "grey50", "tomato", "orange")){
all.vars = rep(vars,nmetapars)
df = as.data.frame(Fit.mcmc$Posterior1)
df.long= melt(df)
df.long$par = unlist(lapply(df.long$variable, substr, start=1, stop=2))
df.long$index = unlist(lapply(df.long$variable, gsub, pattern="[^(0-9)]", replacement=""))
df.long2 = df.long[df.long$index>1,]
df.long2$var = factor(df.long2$index)
levels(df.long2$var) = all.vars
levels(df.long2$var) = vars
df.long2$par = factor(df.long2$par)
levels(df.long2$par)
# = c("aC", "aD", "bC", "bD", "d")
p1 = ggplot(df.long2, aes(x = var, y = value, fill = var)) +
geom_violin(trim= FALSE) +
geom_boxplot(alpha=0.3, width=0.2, outlier.size = 0) +
facet_wrap(~ par, ncol=3, dir="v", scales="free") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
p2 = p1 + geom_hline(yintercept=0, linetype="dashed", color = "black") +
labs(title="Posterior distributions", x="variable", y="effect") +
scale_fill_manual(name="variables",
labels=levels(df.long2$var) ,
values = colos)
return(p2)
}
#-------------------------
plot.effects.glm <- function(SPECIES, glm.fit, colos = c("lightblue", "steelblue", "grey50", "tomato", "orange")){
coeff.colo = summary(glm.fit$colo)$coefficients[-1,1]
vars.colo = rownames(glm.fit$colo$coeff)[-1]
plot_model(glm.fit$colo, transform = NULL, show.values=TRUE, title = SPECIES)
plot_model(glm.fit$colo, transform = NULL, show.values=TRUE, title = SPECIES)
return(barplot(coeff.colo, col = colos))
}
......@@ -51,3 +51,71 @@ invlogit_ <- function(x, itime)
proba_itime = 1 - (1 - proba)^(itime)
return(proba_itime)
}
ext_model <- function(parm, Data)
{
require(data.table)
aE = parm[Data$aE.pos]
# fit transitions
logit_alphaE = tcrossprod(aE, as.matrix(cbind(rep(1, Data$N), Data$dat[,Data$var.aE])))
# compute transitions accounting for interval time and logit transformation
# ! might be NaN because exp(bigNumber)
aE.fct=as.vector(Data$invlogit_(logit_alphaE, Data$itime))
# LIKELIHOOD
lik = rep(NA, nrow(Data$dat))
lik[which(Data$PA.pos)]= (aE.fct) [Data$PA.pos]
lik[which(Data$PP.pos)]= (1-aE.fct) [Data$PP.pos]
lik[lik==0] = .Machine$double.xmin
loglik = sum(log(lik))
# PRIOR
# prior cauchy following Gelman 2008
# intercepts different of slope
beta.aE = dcauchy(aE[1], 1, 10, log = TRUE) + sum(dcauchy(aE[-1], 0, 2.5, log = TRUE))
# POSTERIOR
LP = loglik + beta.aE
return(list(LP = LP, Dev = -2*loglik, Monitor = LP, yhat = lik,parm = parm))
}
colo_model <- function(parm, Data)
{
require(data.table)
aC = parm[Data$aC.pos]
# fit transitions
logit_alphaC = tcrossprod(aC, as.matrix(cbind(rep(1, Data$N), Data$dat[,Data$var.aC])))
# compute transitions accounting for interval time and logit transformation
# ! might be NaN because exp(bigNumber)
aC.fct=as.vector(Data$invlogit_(logit_alphaC, Data$itime))
N = Data$density
# LIKELIHOOD
lik = rep(NA, nrow(Data$dat))
lik[which(Data$AA.pos)]= (1-(aC.fct*N)) [Data$AA.pos]
lik[which(Data$AP.pos)]= (N*aC.fct) [Data$AP.pos]
lik[lik==0] = .Machine$double.xmin
loglik = sum(log(lik))
# PRIOR
# prior cauchy following Gelman 2008
# intercepts different of slope
beta.aC = dcauchy(aC[1], 1, 10, log = TRUE) + sum(dcauchy(aC[-1], 0, 2.5, log = TRUE))
# POSTERIOR
LP = loglik + beta.aC
return(list(LP = LP, Dev = -2*loglik, Monitor = LP, yhat = lik,parm = parm))
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
File added
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment