Commit 634fa733 authored by Boulangeat Isabelle's avatar Boulangeat Isabelle

analyses and predictions

parent 6526ad71
......@@ -8,8 +8,11 @@ editor_options:
---
```{r setup, include=FALSE}
library(knitr)
library(kableExtra)
knitr::opts_chunk$set(echo = TRUE)
library(Jmisc)
library(tidyr)
sourceAll("R_fct")
```
......@@ -34,60 +37,139 @@ N.B. I have quickly selected 10 species among the 22 having the most observation
## Prepare species specific datasets
Step 1: `create_dataset`:
- only subsites where the species have been seen (then ok for N=1)
- extract plot info
- build transition table
- merge with plot info
- calculate interval (years)
- tag transition types
- combine with climate (5 years earlier average)
- merge all
Step 2: adjustments
- Add anomalies
- remove intervals>=10 years
- select variables c("snowDays", "GSL", "GST", "SoilMoist", "TotalCover")
- modify SoilMoist levels (-1, 0, 1)
Step 3: prepare dataset to fit
- separate colonisation and extinction datasets
- scale data (not SoilMoist)
- prepare data for LaplacesDemon fit (optional)
- add event column for glm fit
```{r species_specific_datasets}
<br>- only subsites where the species have been seen (then ok for N=1)
<br>- extract plot info
<br>- build transition table
<br>- merge with plot info
<br>- calculate interval (years)
<br>- tag transition types
<br>- combine with climate (5 years earlier average)
<br>- merge all
<br>Step 2: adjustments
<br>- Add anomalies
<br>- remove intervals>=10 years
<br>- select variables c("snowDays", "GSL", "GST", "SoilMoist", "TotalCover")
<br>- modify SoilMoist levels (-1, 0, 1)
<br>Step 3: prepare dataset to fit
<br>- separate colonisation and extinction datasets
<br>- scale data (not SoilMoist)
<br>- prepare data for LaplacesDemon fit (optional)
<br>- add event column for glm fit
```{r species_specific_datasets, results='hide'}
spDATA.list = lapply(selection, function(SPECIES){
print(SPECIES)
spDATA = dat.species(SPECIES, DATA)
return(spDATA)
} )
names(spDATA.list) = selection
(spDATA.check = lapply(spDATA.list, check.nas))
spDATA.check = lapply(spDATA.list, check.nas)
```
## Visualisation of transition tables
```{r transition_tables}
lapply(spDATA.list, function(x) table(x$spdat$trType))
```
## Fit models
```{r species_specific_models}
```{r species_specific_models, results='hide', warning=FALSE}
spDATA.fit = lapply(spDATA.list, fit.species)
```
## Summary of model evaluation
```{r model_evaluation}
spDATA.fit[[1]]$colo
```{r model_evaluation, results='asis'}
eval.models = lapply(spDATA.fit, function(glm.fit){
eval.colo = eval.fit(glm.fit$colo, glm.fit$dat$colo, "colo.event")[-1]
vars.colo = paste(names(sort(abs(glm.fit$colo$coefficients[-1]), dec=T)), collapse=";")
coeffs.colo = sort(abs(glm.fit$colo$coefficients[-1]), dec=T)
vars.colo = paste(names(coeffs.colo), collapse=";")
eval.ext = eval.fit(glm.fit$ext, glm.fit$dat$ext, "ext.event")[-1]
vars.ext = paste(names(sort(abs(glm.fit$ext$coefficients[-1]), dec=T)), collapse=";")
return(list(colo.R2 = eval.colo$R2, colo.AUC = eval.colo$AUC, ext.R2 = eval.ext$R2, ext.AUC = eval.ext$AUC, vars.colo = vars.colo, vars.ext = vars.ext))
coeffs.ext = sort(abs(glm.fit$ext$coefficients[-1]), dec=T)
vars.ext = paste(names(coeffs.ext), collapse=";")
return(list(colo.R2 = eval.colo$R2, colo.AUC = eval.colo$AUC, ext.R2 = eval.ext$R2, ext.AUC = eval.ext$AUC, vars.colo = vars.colo, vars.ext = vars.ext, coeffs.colo = coeffs.colo, coeffs.ext=coeffs.ext))
})
res.tab = do.call(rbind, eval.models)
res.tab
res.tab.eval = as.data.frame(do.call(rbind, lapply(eval.models, function(x)unlist(x[1:4]))))
res.tab.vars = as.data.frame(do.call(rbind, lapply(eval.models, function(x)unlist(x[5:6]))))
kable(res.tab.eval, caption = "model evaluation", digits = 2) %>% kable_styling(bootstrap_options = c("striped", "hover", "responsive"), full_width=FALSE)
kable(res.tab.vars, caption = "selected variables")%>% kable_styling(bootstrap_options = c("striped", "hover", "responsive"), font_size = 10)
```
```{r model_plots, include = FALSE}
# x = spDATA.fit[["Betula nana"]]
# layout(matrix(c(1,1), ncol = 2))
# plot_model(list(x$colo, x$ext), transform = NULL, show.values=TRUE, title = paste(x$dat$sp, "colonisation"))
# plot_model(x$ext, transform = NULL, show.values=TRUE, title = paste(x$dat$sp, "extinction"))
```
# ```{r model_evaluation}
# plot_model(glm.fit$colo, transform = NULL, show.values=TRUE, title = paste(SPECIES, "colonisation"))
# plot_model(glm.fit$ext, transform = NULL, show.values=TRUE, title = paste(SPECIES, "extinction"))
# plot_model(glm.fit$colo, type = "eff")
# ```
## Selected variables and coefficient, sumnmary table
```{r coefficients}
coeffs.all.list = lapply(spDATA.fit, function(x){
coe.colo = data.frame(summary(x$colo)$coefficients)
coe.colo[,"sp"] = x$dat$sp
coe.colo[,"var"] = rownames(coe.colo)
coe.colo[,"process"] = "colonisation"
coe.ext = data.frame(summary(x$ext)$coefficients)
coe.ext[,"sp"] = x$dat$sp
coe.ext[,"var"] = rownames(coe.ext)
coe.ext[,"process"] = "extinction"
coe = rbind(coe.colo[-1,], coe.ext[-1,])
return(coe)
})
coeffs.all = do.call(rbind, coeffs.all.list)
str(coeffs.all)
```
## Number of selection of variable per process
```{r summary_sel_vars}
data.frame(unclass(table(coeffs.all$var, coeffs.all$process))) %>%
mutate(
colonisation = cell_spec(colonisation, color = ifelse(colonisation > 5, "red", "black")),
extinction = cell_spec(extinction, color = ifelse(extinction > 5, "red", "black"))) %>%
kable(.) %>%
kable_styling("striped")
```
```{r plot_coeffs, fig = TRUE, fig.width=10}
coeffs.all[which(coeffs.all$process == "colonisation" & coeffs.all$Pr...z..<0.1),] %>%
ggplot(aes(sp, Estimate, fill = var)) +
geom_col(position = "dodge") +
theme_bw() + facet_wrap(~sp,scales = "free_x", ncol=4) +
ggtitle("Colonisation process")
coeffs.all[which(coeffs.all$process == "extinction" & coeffs.all$Pr...z..<0.1),] %>%
ggplot(aes(sp, Estimate, fill = var)) +
geom_col(position = "dodge") +
theme_bw() + facet_wrap(~sp,scales = "free_x", ncol=4) +
ggtitle("Extinction process")
```
```{r lambda, include = FALSE}
all.sites = unique(DATA$cover[,c("SemiUniquePLOT", "YEAR", "SUBSITE", "SoilMoist", "TotalCover")])
pDATA = proj.dat.shape(all.sites, DATA$climato)
str(pDATA)
summary(pDATA)
selectVars = c("snowDays", "GSL", "GST", "SoilMoist", "TotalCover")
pDATA.all = na.omit(pDATA[,c(selectVars,"SemiUniquePLOT", "YEAR", "SUBSITE") ])
pred.all = lapply(spDATA.fit, predictions, pred.data = pDATA.all, selectVars=selectVars)
pred.tab = lapply(pred.all, function(x)data.frame(sp = x$sp, lambda = x$lambda))
pred.all.together = do.call(rbind, pred.tab)
head(pred.all.together)
pred.all.together$YEAR = pDATA.all$YEAR
pred.all.together$SUBSITE = pDATA.all$SUBSITE
summary(pred.all.together)
summary(glm(lambda~YEAR, data = pred.all.together))
pred.all.together %>% group_by(SUBSITE, YEAR) %>%
summarise(lambda = mean(lambda)) %>%
ggplot(aes(YEAR, lambda, SUBSITE))+
geom_point(shape = 1)
```
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -38,9 +38,11 @@ dat.species <- function(SPECIES, DATA, Bayesian=TRUE){
levels(spdat$SoilMoist) = c(-1,0,1)
spdat$SoilMoist = as.numeric(as.character(spdat$SoilMoist))
spdat[which(spdat$trType%in%c("PP", "EXT")),"subset"] = "colo"
dat.ext = scale(spdat[which(spdat$trType%in%c("PP", "EXT")),selectVars])
dat.ext[,"SoilMoist"] = spdat[which(spdat$trType%in%c("PP", "EXT")),"SoilMoist"]
spdat[which(spdat$trType%in%c("AA", "COLO")),"subset"] = "ext"
dat.colo = scale(spdat[which(spdat$trType%in%c("AA", "COLO")),selectVars])
dat.colo[,"SoilMoist"] = spdat[which(spdat$trType%in%c("AA", "COLO")),"SoilMoist"]
......@@ -109,6 +111,24 @@ eval.fit <- function(stepMod, data, response.var){
AUC = perf@y.values[[1]]
return(list(pred = pred, R2=R2, AUC=AUC))
}
#-----------------------------------
predictions <- function(glm.fit, pred.data, selectVars){
pred.data$SoilMoist = factor(pred.data$SoilMoist)
levels(pred.data$SoilMoist) = c(-1, 0 , 1)
pred.data$SoilMoist = as.numeric(as.character(pred.data$SoilMoist))
sc.colo = scale(glm.fit$dat$spdat[which(glm.fit$dat$spdat$trType%in%c("AA", "COLO")),selectVars])
dat.pred.colo = scale(pred.data[,selectVars], center = attr(sc.colo,"scaled:center"), scale = attr(sc.colo,"scaled:scale"))
sc.ext = scale(glm.fit$dat$spdat[which(glm.fit$dat$spdat$trType%in%c("PP", "EXT")),selectVars])
dat.pred.ext = scale(pred.data[,selectVars], center = attr(sc.ext,"scaled:center"), scale = attr(sc.ext,"scaled:scale"))
pred.colo = predict(glm.fit$colo,new=data.frame(dat.pred.colo),"response")
pred.ext = predict(glm.fit$ext,new=data.frame(dat.pred.ext),"response")
pred.lambda = 1- (pred.ext/pred.colo)
return(list(lambda = pred.lambda, sp = glm.fit$dat$sp))
}
#-----------------------------------
......
......@@ -82,57 +82,11 @@ sp.table.tr.all$trType[which(sp.table.tr.all$st0==0 & sp.table.tr.all$st1==0)]="
#--------------------------------------------------
# combine with climate (5 years earlier average)
#---------------------------------------------------
climData = unique(sites[,c("SUBSITE","YEAR")])
grepClim = function(climData, climato, variable, nyears){
clim = apply(climData, 1, function(x){
subClim = climato[which(climato$SiteSubsite==x[[1]]),]
meanClim = mean(as.numeric(subClim[sapply(1:nyears, function(k){grep(paste(variable,as.numeric(x[[2]])-k+1,sep=""), names(subClim), value=FALSE)})]))
return(meanClim)
})
return(clim)
}
climData$snowDays = grepClim(climData, climato, "snow_days_", 5)
climData$GSL = grepClim(climData, climato, "GSL_", 5)
climData$GST = grepClim(climData, climato, "GST_", 5)
# ------------------------------------------------
# average
#------------------------------------------------
mean.snowDays = sapply(climato$SiteSubsite, function(x){
subClim = climato[which(climato$SiteSubsite==x),]
snowDays = mean(as.numeric(subClim[sapply(1979:2013, function(k){grep(paste("snow_days_",k,sep=""), names(subClim), value=FALSE)})]))
return(snowDays)
})
mean.GSL = sapply(climato$SiteSubsite, function(x){
subClim = climato[which(climato$SiteSubsite==x),]
GSL = mean(as.numeric(subClim[sapply(1979:2013, function(k){grep(paste("GSL_",k,sep=""), names(subClim), value=FALSE)})]))
return(GSL)
})
mean.GST = sapply(climato$SiteSubsite, function(x){
subClim = climato[which(climato$SiteSubsite==x),]
GST = mean(as.numeric(subClim[sapply(1979:2013, function(k){grep(paste("GST_",k,sep=""), names(subClim), value=FALSE)})]))
return(GST)
})
mean.snowDays = unique(data.frame(snow_days_av=mean.snowDays, SUBSITE = names(mean.snowDays)))
mean.GSL = unique(data.frame(GSL_av=mean.GSL, SUBSITE = names(mean.GSL)))
mean.GST = unique(data.frame(GST_av=mean.GST, SUBSITE = names(mean.GST)))
require(dplyr)
all_means <- mean.snowDays %>% inner_join(mean.GSL, by = "SUBSITE") %>% inner_join(mean.GST, by ="SUBSITE")
# ------------------------------------------------
# distribution
#------------------------------------------------
#distri = merge_distribution (sp.distri.path, ncell.side=3, plots, plot.diag = TRUE)
dataset = proj.dat.shape(sites, climato)
#--------------------------------------------------
# merge datasets
#---------------------------------------------------
dataset = merge(climData, sites, by=c("SUBSITE", "YEAR"))
#dataset = merge(climData, sites, by=c("SUBSITE", "YEAR"))
dataset2 = merge(dataset, sp.table.tr.all,by.x = c("SemiUniquePLOT", "YEAR"), by.y = c("SemiUniquePLOT", "y0"), all.y=TRUE)
#nrow(dataset2)
#nrow(dataset)
......@@ -140,36 +94,68 @@ dataset2 = merge(dataset, sp.table.tr.all,by.x = c("SemiUniquePLOT", "YEAR"), by
#dataset2 = merge(dataset, distri, by = "SUBSITE", all.x=TRUE, all.y=FALSE)
#if(!(nrow(dataset2) == n1)) stop("error merging datasets")
dataset3 = merge(dataset2, all_means, by = "SUBSITE", all.x=TRUE, all.y=FALSE)
# dataset3 = merge(dataset2, all_means, by = "SUBSITE", all.x=TRUE, all.y=FALSE)
#if(!(nrow(dataset3) == n1)) stop("error merging datasets")
return(dataset3)
return(dataset2)
}
#--------------
# clim arrange data function
grepClim <- function(climData, climato, variable, nyears){
clim = apply(climData, 1, function(x){
subClim = climato[which(climato$SiteSubsite==x[[1]]),]
index = as.numeric(sapply(1:nyears, function(k){
res = grep(paste(variable,as.numeric(x[[2]])-k+1,sep=""), names(subClim), value=FALSE)
return(res)
}))
if(sum(is.na(index))>0) {
meanClim = NA
} else meanClim = mean(as.numeric(subClim[index]))
return(meanClim)
})
return(clim)
}
##-------------
proj.dat.shape <- function(sites, year=2012, climato){
proj.dat = sites
#snowdays
proj.dat$snowDays = apply(proj.dat , 1, function(x){
# print(as.numeric(x[2]))
subClim = climato[which(climato$SiteSubsite==x[1]),]
snowDays = mean(as.numeric(subClim[sapply(1:5, function(k){grep(paste("snow_days_",as.numeric(year)-k+1,sep=""), names(subClim), value=FALSE)})]))
return(snowDays)
})
proj.dat$GST = apply(proj.dat , 1, function(x){
subClim = climato[which(climato$SiteSubsite==x[1]),]
GST = mean(as.numeric(subClim[sapply(1:5, function(k){grep(paste("GST_",as.numeric(year)-k+1,sep=""), names(subClim), value=FALSE)})]))
return(GST)
})
proj.dat$GSL = apply(proj.dat , 1, function(x){
subClim = climato[which(climato$SiteSubsite==x[1]),]
GSL = mean(as.numeric(subClim[sapply(1:5, function(k){grep(paste("GSL_",as.numeric(year)-k+1,sep=""), names(subClim), value=FALSE)})]))
return(GSL)
})
proj.dat$GST = apply(proj.dat , 1, function(x){
subClim = climato[which(climato$SiteSubsite==x[1]),]
GST = mean(as.numeric(subClim[sapply(1:5, function(k){grep(paste("GST_",as.numeric(year)-k+1,sep=""), names(subClim), value=FALSE)})]))
return(GST)
})
return(proj.dat)
proj.dat.shape <- function(sites, climato){
climData = unique(sites[,c("SUBSITE","YEAR")])
climData$snowDays = grepClim(climData, climato, "snow_days_", 5)
climData$GSL = grepClim(climData, climato, "GSL_", 5)
climData$GST = grepClim(climData, climato, "GST_", 5)
# ------------------------------------------------
# average
#------------------------------------------------
mean.snowDays = sapply(climato$SiteSubsite, function(x){
subClim = climato[which(climato$SiteSubsite==x),]
snowDays = mean(as.numeric(subClim[sapply(1979:2013, function(k){grep(paste("snow_days_",k,sep=""), names(subClim), value=FALSE)})]))
return(snowDays)
})
mean.GSL = sapply(climato$SiteSubsite, function(x){
subClim = climato[which(climato$SiteSubsite==x),]
GSL = mean(as.numeric(subClim[sapply(1979:2013, function(k){grep(paste("GSL_",k,sep=""), names(subClim), value=FALSE)})]))
return(GSL)
})
mean.GST = sapply(climato$SiteSubsite, function(x){
subClim = climato[which(climato$SiteSubsite==x),]
GST = mean(as.numeric(subClim[sapply(1979:2013, function(k){grep(paste("GST_",k,sep=""), names(subClim), value=FALSE)})]))
return(GST)
})
mean.snowDays = unique(data.frame(snow_days_av=mean.snowDays, SUBSITE = names(mean.snowDays)))
mean.GSL = unique(data.frame(GSL_av=mean.GSL, SUBSITE = names(mean.GSL)))
mean.GST = unique(data.frame(GST_av=mean.GST, SUBSITE = names(mean.GST)))
require(dplyr)
all_means <- mean.snowDays %>% inner_join(mean.GSL, by = "SUBSITE") %>% inner_join(mean.GST, by ="SUBSITE")
#--------------------------------------------------
# merge datasets
#---------------------------------------------------
dataset = merge(climData, sites, by=c("SUBSITE", "YEAR"))
dataset2 = merge(dataset, all_means, by = "SUBSITE", all.x=TRUE, all.y=FALSE)
#if(!(nrow(dataset3) == n1)) stop("error merging datasets")
return(dataset2)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment