-
Guillaume Perréal authoredb6fbac68
- Biomasse data
- Code d'extraction from siddt database
- Description générale
- Par milieu (variations spatiales et temporelles)
- Séries temporelles par placette (variations temporelles seules)
- Calibration et qualité de mesure
- Variabilité intra placette pour une date de mesure
- Site characteristics
- Releves (sites x date) and climate
- date de denneigement
- comparison Tair and Tsoil
- Gdd et date de mesure
- filtrer les releves effectués trop tôt ou trop tard
- Modèle général de biomasse, SPATIAL (variations de hauteur)
- Modèle général de variation de biomasse inter-annuelle (retrait effet site)
- Sélection de modèle avec toutes les variables selectionnées dans les 10 modèles pour chaque milieu
- Par groupe de variables par milieu
- Sens des liens avec les variables explicatives
- Messages conclusion:
- Analyser la typo
- Effets spatiaux purs: typo de végétation et nos variables
- Effets années
title: "README"
author: "Isabelle Boulangeat"
date: today
output:
html_document:
keep_md: yes
variant: markdown_github
pdf_document:
keep_md: yes
editor_options:
chunk_output_type: console
always_allow_html: yes
library(knitr)
# library(kableExtra)
knitr::opts_chunk$set(echo = FALSE, cache = TRUE, warning = FALSE, message=FALSE)
# library(Jmisc)
library(tidyr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
sapply(list.files("R_fct"), function(x)source(paste0("R_fct/",x)))
Biomasse data
Code d'extraction from siddt database
voir workflow0
sites = read.csv("_data_raw/sentinelle_biomasse_sites.csv")
data_h = read.csv("_data_prod/data_biomass.csv")
dat_h_veg = unique(merge(data_h, sites[,c("id_site", "ref_typoveg")], by.x = "ref_site", by.y ="id_site"))
Description générale
- Hauteur moyenne mesurée par milieu
- Variation inter-relevés (spatiale ou temporelle)
- Variation intra-relevés (hétérogénéité estimée sur les 80 points de mesure de hauteur)
dat_long <- dat_h_veg %>%
dplyr::select(hmean, hmad, ref_typoveg) %>%
drop_na() %>%
group_by(ref_typoveg) %>%
summarize(hauteur=mean(hmean), var_inter_releve=mad(hmean), var_intra_releve = mean(hmad)) %>%
gather(stat, value, hauteur:var_intra_releve)
Par milieu (variations spatiales et temporelles)
ggplot(dat_long, aes(fill=stat, x=ref_typoveg, y=value)) +
geom_bar(position = "dodge", stat = "identity") +
facet_wrap(~stat, ncol = 1, scales = "free")
ggplot(dat_h_veg, aes(x = reorder(ref_typoveg, hmean, na.rm=TRUE), y = hmean)) +
geom_boxplot() +
labs(y="Hauteur moyenne (est. biomasse) en cm", x="Type vegetation")
Séries temporelles par placette (variations temporelles seules)
dat_h_veg$date_releve = as.Date(dat_h_veg$date_releve)
pl <- ggplot(dat_h_veg, aes(x = date_releve, y = hmean)) +
geom_line(aes(color = ref_site), show.legend = FALSE) +
facet_wrap(~ref_typoveg)
pl + theme(legend.position = "none")
Calibration et qualité de mesure
calage = read.csv("../PNE_calage_reformat.csv", sep= ";", dec = ",")
calage$releve = paste0(calage$site, calage$Releveur)
cal_mean = unique(calage %>% group_by(releve) %>% summarize(site = site, hmean = mean(hauteur, na.rm=TRUE)))
obs_error = cal_mean %>% group_by(site) %>% summarize(hmad = mad(hmean), hmad_st = mad(hmean)/mean(hmean), hm = mean(hmean))
ggplot(cal_mean, aes(x = reorder(site, hmean), y = hmean)) +
geom_boxplot() +
geom_point()
ggplot(obs_error, aes(x = reorder(site, hm), y = hmad_st)) +
geom_bar(stat = "identity")
Erreur de mesure et variation spatiale:
dat_h_veg %>% group_by(ref_typoveg) %>% summarise(hmad = mad(hmean)) %>%
ggplot(aes(y=hmad, x=ref_typoveg)) + geom_bar(position = "dodge", stat = "identity") + geom_abline(intercept=max(obs_error$hmad), slope= 0, col= "red") + geom_abline(intercept =mean(obs_error$hmad), slope = 0, col = "orange")
Erreure de mesure et variation temporelle:
dat_h_veg %>% group_by(ref_site) %>% summarise(hmad = mad(hmean)) %>%
ggplot(aes(y=hmad, x=ref_site)) + geom_bar(position = "dodge", stat = "identity") + geom_abline(intercept=max(obs_error$hmad), slope= 0, col= "red") + geom_abline(intercept =mean(obs_error$hmad), slope = 0, col = "orange")
On peut considérer que les sites qui varient en dessous de Mean Absolute Deviation = 1cm ne varient pas assez pour passer au dessus de notre seuil de détection.
Il s'agit de :
sites_no_trend = unique(dat_h_veg %>% group_by(ref_site) %>% summarise(hmad = mad(hmean)) %>% filter(hmad<1) %>% left_join(dat_h_veg[, c("ref_site", "ref_typoveg")]))
tab_snt = table(sites_no_trend$ref_typoveg)
sites_no_trend
tab_snt
Variabilité intra placette pour une date de mesure
Mesure l'hétérogénéité de végétation au sein de la placette
sd vs mad
ggplot(dat_h_veg, aes(x = hmad, y = hsd)) +
geom_point()
ggplot(dat_h_veg, aes(x = reorder(ref_typoveg, hmad, na.rm=TRUE), y = hmad)) +
geom_boxplot() +
labs(y="Variation intra de hauteur", x="Type vegetation")
ggplot(dat_h_veg, aes(x = reorder(ref_typoveg, hmad/hmean, na.rm=TRUE), y = hmad/hmean)) +
geom_boxplot() +
labs(y="Variation intra de hauteur (stand. moy.)", x="Type vegetation")
Site characteristics
table(sites$ref_typoveg)
ggplot(data.frame(count = table(sites$ref_typoveg)), aes(y = factor(count.Freq), x = count.Var1)) +
geom_bar(stat = "identity") +
labs(title="", x="Type de vegetation", y = "Nb de sites")
sites = read.csv("_data_prod/releves_all_climate.csv") %>% dplyr::select(ref_site, contains("climato"), massif_name, alti, aspect, slope, ref_typoveg)
sites = unique(sites)
# sites_tab = sites %>% dplyr::select(contains("climato"), alti, aspect, slope)
# rownames(sites_tab) = sites$ref_site
# library(ade4)
# pca.sites = dudi.pca(sites_tab %>% dplyr::select_if(~ !any(is.na(.))), scannf = F, nf = 4)
# inertia.dudi(pca.sites)
# s.corcircle(pca.sites$co)
table(sites$ref_typoveg)
ggplot(sites, aes(x = reorder(ref_typoveg, alti, na.rm=TRUE), y = alti)) +
geom_boxplot() +
labs(y="Altitude", x="Type vegetation")
On retrouve les étagements des types de vegetation. Certains sont assez étalés en altitude.
ggplot(sites, aes(x = reorder(ref_typoveg, slope, na.rm=TRUE), y = slope)) +
geom_boxplot() +
labs(y="Slope", x="Type vegetation")
La pente est peu discriminante des types de vegetation sauf pour les eboulis et les pelouses enherbées.
Releves (sites x date) and climate
releves_all = read.csv("_data_prod/releves_all_climate.csv")
releves_full = releves_all %>% dplyr::select_if(~ !any(is.na(.)))
table(releves_full$ref_typoveg)
Etendu du jeu de données par type de végétation
table(releves_full$ref_typoveg)
ggplot(data.frame(count = table(releves_full$ref_typoveg)), aes(y = factor(count.Freq), x = count.Var1)) +
geom_bar(stat = "identity") +
labs(title="Releves par types de vegetation", x="Type de vegetation", y = "nb de plots")
Certains milieux ne sont pas assez représentés et on ne peut pas les analyser séparemment. A grouper (BRAC)? A ne pas poursuivre (EBOUL, HUM) pour la biomasse?
date de denneigement
ggplot(releves_full, aes(x = reorder(ref_typoveg, LSD_crosscut, na.rm=TRUE), y = LSD_crosscut)) +
geom_boxplot() +
labs(y="LSD", x="Type vegetation")
La date de denneigement separe bien les types de vegetation.
- 75 = 16 mars
- 100 = 10 avril
- 125 = 5 mai
- 150 = 31 mai
- 175 = 24 juin
comparison Tair and Tsoil
Tair :
- Tmin calculée à partir des températures au pas horaire qui sont intégrées au pas journalier comme (max+min)/2 Tsoil :
- Tsoil de surface (5mm) donné par safran-crocus au pas journalier
Les minimums et les cumuls gdd saisonniers sont calculés pendant la saison de végétation (jusqu'au retour de la neige)
library(ggplot2)
ggplot(releves_full, aes(x = minTemp_crosscut, y = minTemp_safran)) +
geom_abline(intercept = 0, slope = 1) +
geom_point() +
facet_wrap(~ ref_typoveg) +
labs(y="min Tair", x="min Tsoil")
Pour la plupart des milieux le sol a un effet "buffer". Cela est moins vrai pour les milieux SUB/PROD/NAR
Regard sur les minimales:
library(ggplot2)
ggplot(releves_full, aes(x = minTemp_safran, y = minTemp300_safran)) +
geom_abline(intercept = 0, slope = 1) +
geom_point() +
facet_wrap(~ ref_typoveg) +
labs(y="min Tair", x="min Tsoil")
Effet "buffer" pour tous les milieux sur la minimale.
library(ggplot2)
ggplot(releves_full, aes(x = cumgdd_crosscut, y = cumgdd_safran)) +
geom_abline(intercept = 0, slope = 1) +
geom_point() +
facet_wrap(~ ref_typoveg) +
labs(y="cumul Tair", x="cumul Tsoil")
Bien que les températures air et sol soient différentes, le cumul des degré jours est très corrélé, avec un cumul systématiquement plus élevé pour les températures de l'air.
Gdd et date de mesure
library(ggplot2)
ggplot(releves_full, aes(x = reorder(ref_typoveg, cumgdd_period_safran, na.rm=TRUE), y = cumgdd_period_safran)) +
geom_boxplot() +
labs(y="cumul gdd (air) at releve date", x="Type vegetation")
La plupart des observations, quelquesoient les types de végétation, sont faites vers 500-600 degré-jours. Certaines observations sont faites beaucoup plus tard.
late_rel = releves_full[which(releves_full$cumgdd_period_safran>900),c("releve", "date_releve", "ref_typoveg", "alti")]
print(late_rel)
Les mesures réalisées trop tôt ou trop tard sont pas exploitables. Mieux vaut sauter une année si la saison est passée.
releves_full$yday = lubridate::yday(releves_full$date_releve)
library(ggplot2)
ggplot(releves_full, aes(x = yday - daygdd300_safran)) +
geom_histogram() +
facet_wrap(~ ref_typoveg) +
labs(y="", x="jour de releve - jour de GDD 300 (air)")
filtrer les releves effectués trop tôt ou trop tard
releves_filter_gdd = releves_full %>% dplyr::filter(cumgdd_period_safran < 900 & cumgdd_period_safran > 300)
table(releves_filter_gdd$ref_typoveg)
late_rel %>% left_join(releves_full[,c("releve", "ref_site")]) %>% group_by(ref_site) %>% summarise(nsites = n()) %>% filter(nsites>1) %>% arrange(desc(nsites)) %>% left_join(releves_full[,c("ref_site","ref_typoveg")]) %>% unique()
A réfléchir comment gérer les contraintes...
Modèle général de biomasse, SPATIAL (variations de hauteur)
releves_plus = read.csv("_data_prod/releves_plus_climate.csv")
releves_selvars_mod_spat = releves_plus %>%
filter(ref_typoveg %in% c("ALP", "BOMB", "ECOR", "ENHE", "NAR", "NIV", "PROD", "QUE", "SUB")) %>%
dplyr::filter(cumgdd_period_safran < 900 & cumgdd_period_safran > 300) %>%
dplyr::select(c(!contains("alti"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(contains("climato"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("month7"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("month8"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("cumgdd60"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("WBAL"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("PET_"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("temp8cm"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("lat"),hmean, ref_typoveg, slope)) %>%
dplyr::select(c(!contains("end_season"),hmean, ref_typoveg, slope)) %>%
dplyr::select_if(~ !any(is.na(.)))
# cor(dplyr::select(releves_selvars_mod_spat, -c(hmean, ref_typoveg)))
#
# PCA = dudi.pca(dplyr::select(releves_selvars_mod_spat, -c(hmean, ref_typoveg)), scannf = FALSE, nf =2)
# s.corcircle(PCA$co)
library(randomForest)
mod = randomForest(hmean ~ . ,
data = releves_selvars_mod_spat ,
ntree = 100, mtry=16)
mod
varImpPlot(mod)
imp=data.frame(var = rownames(mod$importance), val = c(mod$importance))
pal = colorRampPalette(c("yellow", "orange", "red", "darkviolet"))
imp_vartype = imp %>% mutate(var = if_else(grepl("frost",var), "frost", var)) %>%
mutate(var = if_else(grepl("precip",var), "precipitations", var)) %>%
mutate(var = if_else(grepl("LSD",var), "snow", var)) %>%
mutate(var = if_else(grepl("T",var), "temperature", var)) %>%
mutate(var = if_else(grepl("snow",var), "snow", var)) %>%
mutate(var = if_else(grepl("temp",var), "temperature", var)) %>%
mutate(var = if_else(grepl("gdd",var), "degree growing days", var)) %>%
mutate(var = if_else(grepl("runoff",var), "runoff", var)) %>%
mutate(var = if_else(grepl("typoveg",var), "vegetation type", var))
imp_vartype = imp_vartype %>% group_by(var) %>% summarise(sumval = sum(val)) %>% mutate (sumval = sumval/sum(sumval))
imp_vartype$prop = round(imp_vartype$sumval*100)
imp_vartype = imp_vartype %>% arrange(desc(sumval)) %>% mutate(lab.ypos = cumsum(prop) - 0.5*prop)
pie(imp_vartype$prop, labels = imp_vartype$var, col = rev(pal(8)))
imp_month = imp %>% mutate(var = if_else(grepl("month4",var), "april", var)) %>%
mutate(var = if_else(grepl("month5",var), "may", var)) %>%
mutate(var = if_else(grepl("month6",var), "june", var)) %>%
mutate(var = if_else(grepl("cumgdd",var), "cumulative GDD", var)) %>%
mutate(var = if_else(grepl("frost",var), "cumulative frost", var)) %>%
mutate(var = if_else(grepl("T",var), "temperature", var)) %>%
mutate(var = if_else(grepl("LSD",var), "late snow", var)) %>%
mutate(var = if_else(grepl("date_gdd300",var), "day gdd300", var)) %>%
mutate(var = if_else(grepl("typoveg",var), "vegetation type", var))
imp_month = imp_month %>% group_by(var) %>% summarise(sumval = sum(val)) %>% mutate (sumval = sumval/sum(sumval))
imp_month$prop = round(imp_month$sumval*100)
imp_month = imp_month %>% arrange(desc(sumval)) %>% mutate(lab.ypos = cumsum(prop) - 0.5*prop)
pie(imp_month$prop, labels = imp_month$var, col = rev(pal(8)))
83% de variance expliqué spatialement. Effet important du type de vegetation, et de temperatures notamment frost
On a pas mal de limites liées au démarrage de la saison de végétation.
Modèle général de variation de biomasse inter-annuelle (retrait effet site)
Etendu du jeu de données temporel
sites_no_repeat = names(which(table(releves_full$ref_site)==1))
ggplot(data.frame(count = table(releves_full$ref_site)), aes(x = factor(count.Freq))) + geom_bar(stat = "count") +
labs(title="", x="Nombre de relevés sur un même site", y = "Nombre de sites")
releves_selvars_mod_temp = releves_plus %>%
filter(ref_typoveg %in% c("ALP", "BOMB", "ECOR", "ENHE", "NAR", "NIV", "PROD", "QUE", "SUB")) %>%
dplyr::filter(cumgdd_period_safran < 900 & cumgdd_period_safran > 300) %>%
filter(!(ref_site %in% sites_no_trend)) %>%
filter(!(ref_site %in% sites_no_repeat)) %>%
mutate(ref_site = as.factor(ref_site))%>%
dplyr::select(-c(contains("climato"))) %>%
dplyr::select(c(!contains("month7"))) %>%
dplyr::select(c(!contains("month8"))) %>%
dplyr::select(c(!contains("cumgdd60"))) %>%
dplyr::select(c(!contains("alti"))) %>%
dplyr::select(c(!contains("lat"))) %>%
dplyr::select(c(!contains("end_season"))) %>%
dplyr::select(c(!contains("cumprecip60"))) %>%
dplyr::select(c(!contains("temp8cm"))) %>%
dplyr::select(c(!contains("gdd600"))) %>%
dplyr::select(c(!contains("gdd900"))) %>%
dplyr::select(c(!contains("PET"))) %>%
dplyr::select(c(!contains("WBAL"))) %>%
dplyr::select(c(!contains("year_agromet"))) %>%
dplyr::select(c(!contains("weights_agromet"))) %>%
dplyr::select(c(!contains("07"))) %>%
dplyr::select_if(~ !any(is.na(.))) %>%
dplyr::select(ref_typoveg, soil_temp5mm_month4_crosscut:length_season, norm_hmean_site)
mod = randomForest(norm_hmean_site ~ . ,
data = releves_selvars_mod_temp,
ntree = 100, mtry=16)
mod
varImpPlot(mod)
imp=data.frame(var = rownames(mod$importance), val = c(mod$importance))
imp_vartype = imp %>% mutate(var = if_else(grepl("frost",var), "frost", var)) %>%
mutate(var = if_else(grepl("precip",var), "precipitations", var)) %>%
mutate(var = if_else(grepl("LSD",var), "snow", var)) %>%
mutate(var = if_else(grepl("beginning",var), "snow", var)) %>%
mutate(var = if_else(grepl("Tmin",var), "temperature mini", var)) %>%
mutate(var = if_else(grepl("maxTemp",var), "temperature maxi", var)) %>%
mutate(var = if_else(grepl("minTemp",var), "temperature mini", var)) %>%
mutate(var = if_else(grepl("Tmax",var), "temperature maxi", var)) %>%
mutate(var = if_else(grepl("tmin",var), "temperature mini", var)) %>%
mutate(var = if_else(grepl("tmax",var), "temperature maxi", var)) %>%
mutate(var = if_else(grepl("soil",var), "temperature soil", var)) %>%
mutate(var = if_else(grepl("snow",var), "snow", var)) %>%
mutate(var = if_else(grepl("gdd",var), "degree growing days", var)) %>%
mutate(var = if_else(grepl("runoff",var), "runoff", var)) %>%
mutate(var = if_else(grepl("year",var), "year", var)) %>%
mutate(var = if_else(grepl("typoveg",var), "vegetation type", var))
imp_vartype = imp_vartype %>% group_by(var) %>% summarise(sumval = sum(val)) %>% mutate (sumval = sumval/sum(sumval))
imp_vartype$prop = round(imp_vartype$sumval*100)
imp_vartype = imp_vartype %>% arrange(desc(sumval)) %>% mutate(lab.ypos = cumsum(prop) - 0.5*prop)
pie(imp_vartype$prop, labels = imp_vartype$var, col = rev(pal(11)))
imp_month = imp %>% mutate(var = if_else(grepl("month4",var), "april", var)) %>%
mutate(var = if_else(grepl("month5",var), "may", var)) %>%
mutate(var = if_else(grepl("month6",var), "june", var)) %>%
mutate(var = if_else(grepl("LSD",var), "snow melt", var)) %>%
mutate(var = if_else(grepl("beginning",var), "snow melt", var)) %>%
mutate(var = if_else(grepl("day",var), "speed gdd", var)) %>%
mutate(var = if_else(grepl("date",var), "speed gdd", var)) %>%
mutate(var = if_else(grepl("speed",var), "speed gdd", var)) %>%
mutate(var = if_else(grepl("previous30",var), "growing start conditions", var)) %>%
mutate(var = if_else(grepl("period",var), "growing start conditions", var)) %>%
mutate(var = if_else(grepl("precip300",var), "growing start conditions", var)) %>%
mutate(var = if_else(grepl("Temp300",var), "growing start conditions", var)) %>%
mutate(var = if_else(grepl("cumgdd",var), "growing season conditions", var)) %>%
mutate(var = if_else(grepl("cumprecip",var), "growing season conditions", var)) %>%
mutate(var = if_else(grepl("length_season",var), "growing season conditions", var)) %>%
mutate(var = if_else(grepl("Temp",var), "growing season conditions", var)) %>%
mutate(var = if_else(grepl("year",var), "growing season conditions", var)) %>%
mutate(var = if_else(grepl("typoveg",var), "vegetation type", var))
imp_month = imp_month %>% group_by(var) %>% summarise(sumval = sum(val)) %>% mutate (sumval = sumval/sum(sumval))
imp_month$prop = round(imp_month$sumval*100)
imp_month = imp_month %>% arrange(desc(sumval)) %>% mutate(lab.ypos = cumsum(prop) - 0.5*prop)
pie(imp_month$prop, labels = imp_month$var, col = rev(pal(8)))
Autour de 22% de variance temporelle expliquée. Globalement le climat du mois de juin semble le plus déterminant, ainsi que les precipitations au début de la saison. On retrouve un gros effet "année" également.
Analyse par type de facteurs
dat_select = releves_plus %>%
filter(ref_typoveg %in% c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")) %>%
dplyr::filter(cumgdd_period_safran < 900 & cumgdd_period_safran > 300) %>%
filter(!(ref_site %in% sites_no_trend)) %>%
filter(!(ref_site %in% sites_no_repeat)) %>%
mutate(ref_site = as.factor(ref_site))%>%
dplyr::select(-c(contains("climato"))) %>%
dplyr::select(c(!contains("month7"))) %>%
dplyr::select(c(!contains("month8"))) %>%
dplyr::select(c(!contains("cumgdd60"))) %>%
dplyr::select(c(!contains("alti"))) %>%
dplyr::select(c(!contains("lat"))) %>%
dplyr::select(c(!contains("end_season"))) %>%
dplyr::select(c(!contains("cumprecip60"))) %>%
dplyr::select(c(!contains("temp8cm"))) %>%
dplyr::select(c(!contains("gdd600"))) %>%
dplyr::select(c(!contains("gdd900"))) %>%
dplyr::select(c(!contains("PET"))) %>%
dplyr::select(c(!contains("WBAL"))) %>%
dplyr::select(c(!contains("year_agromet"))) %>%
dplyr::select(c(!contains("weights_agromet"))) %>%
dplyr::select(c(!contains("07"))) %>%
dplyr::select_if(~ !any(is.na(.))) %>%
dplyr::select(ref_typoveg, ref_site, soil_temp5mm_month4_crosscut:length_season, hmean)
library(lme4)
library(nlme)
library(lmerTest)
library(car)
table(dat_select$ref_typoveg)
summary(dat_select)
# estimation du modèle
library(relaimpo)
library(regclass)
select_mod = function(dat, dir = "both"){
mod_lm <- lm(norm_hmean_site ~ ., data =dat)
library(MASS)
modAIC = stepAIC(mod_lm, direction = dir)
df = broom::tidy(modAIC)
if(length(df$term)>2) {
vif = car::vif(modAIC)
vif_df = data.frame(term = names(vif), vif = vif)
relimp = relaimpo::calc.relimp(modAIC, rela = TRUE)
relimp_df = data.frame(term = relimp@namen[-1], relimp = relimp@lmg)
} else {
vif_df = data.frame(term = df$term, vif = NA)
if(length(df$term>1)){
relimp_df = data.frame(term = df$term, relimp = 1)
}else{
relimp_df = data.frame(term = df$term, relimp = NA)
}
}
df$rsq = broom::glance(modAIC)$r.squared
df = df %>% left_join(relimp_df)
df = df %>% left_join(vif_df)
return(df)
}
# datasets
# mod_season = dat_select %>% dplyr::select(!contains("runoff")) %>%
# dplyr::select(!contains("month")) %>%
# dplyr::select(!contains("300")) %>%
# dplyr::select(!contains("period")) %>%
# dplyr::select(c(ref_typoveg, hmean,ref_site,contains("LSD_safran"), contains("beginning"), cumgdd_crosscut, ndayfrost_crosscut, minTemp_crosscut, maxTemp_crosscut, year_safran, cumgdd_safran, ndayfrost_safran, minTemp_safran, maxTemp_safran, cumprecip_safran, length_season ))
# dim(mod_season)
#
mod_gperiod300= dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("speddgdd300_s"), contains("precip300"), contains("Temp300_T"), contains("ndayfrost300_T") ))
dim(mod_gperiod300)
cor(mod_gperiod300[,-c(1:3)])
mod_gperiod300_agromet = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("speedgdd300_s") ,contains("precip300"), contains("gdd300_agromet") ))
dim(mod_gperiod300_agromet)
cor(mod_gperiod300_agromet[,-c(1:3)])
mod_gperiod = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("period"))) %>%
dplyr::select(-c("tgdd_period_safran"))
dim(mod_gperiod)
cor(mod_gperiod[,-c(1:3)])
mod_previous30 = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("previous30"), contains("cumgdd30_") ))
dim(mod_previous30)
cor(mod_previous30[,-c(1:3)])
mod_april_min = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("soil")) %>%
dplyr::select(-contains("Tmax")) %>%
dplyr::select(-contains("runoff"))
dim(mod_april_min)
cor(mod_april_min[,-c(1:3)])
mod_april_max = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("soil")) %>%
dplyr::select(-contains("Tmin")) %>%
dplyr::select(-contains("runoff"))
dim(mod_april_max)
cor(mod_april_max[,-c(1:3)])
mod_april_soil = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("Tmax")) %>%
dplyr::select(-contains("Tmin")) %>%
dplyr::select(-contains("runoff"))
dim(mod_april_soil)
cor(mod_april_soil[,-c(1:3)])
mod_may_min = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("soil")) %>%
dplyr::select(-contains("Tmax")) %>%
dplyr::select(-contains("runoff"))
dim(mod_may_min)
cor(mod_may_min[,-c(1:3)])
mod_may_max = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("soil")) %>%
dplyr::select(-contains("Tmin")) %>%
dplyr::select(-contains("runoff"))
dim(mod_may_max)
cor(mod_may_max[,-c(1:3)])
mod_may_soil = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("Tmax")) %>%
dplyr::select(-contains("Tmin")) %>%
dplyr::select(-contains("runoff"))
dim(mod_may_soil)
cor(mod_may_soil[,-c(1:3)])
mod_june_min = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("soil")) %>%
dplyr::select(-contains("Tmax")) %>%
dplyr::select(-contains("runoff"))
dim(mod_june_min)
cor(mod_june_min[,-c(1:3)])
mod_june_max = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("soil")) %>%
dplyr::select(-contains("Tmin")) %>%
dplyr::select(-contains("runoff"))
dim(mod_june_max)
cor(mod_june_max[,-c(1:3)])
mod_june_soil = dat_select %>%
dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") )) %>%
dplyr::select(-contains("Tgdd_")) %>%
dplyr::select(-contains("Tmax")) %>%
dplyr::select(-contains("Tmin")) %>%
dplyr::select(-contains("runoff"))
dim(mod_june_soil)
cor(mod_june_soil[,-c(1:3)])
# mod_tmin = dat_select %>%
# dplyr::select(c(ref_typoveg, hmean,ref_site, contains("min") ))
# dim(mod_tmin)
#
# mod_tmax = dat_select %>%
# dplyr::select(c(ref_typoveg, hmean,ref_site, contains("max") ))
# head(mod_tmax)
#
# mod_tsoil = dat_select %>%
# dplyr::select(c(ref_typoveg, hmean,ref_site, contains("soil"), contains("snowdepth")))
# head(mod_tsoil)
#
# mod_gdd = dat_select %>%
# dplyr::select(c(ref_typoveg, hmean,ref_site, contains("tgdd"), contains("Tgdd"), contains("cumgdd") ))
# head(mod_gdd)
mods_AIC_list = lapply(list(mod_gperiod, mod_gperiod300_agromet, mod_gperiod300, mod_previous30, mod_april_min, mod_april_max, mod_may_min, mod_may_max, mod_june_min, mod_june_max), function(x){
x %>%
group_by(ref_site) %>%
mutate(site_mean = mean(hmean)) %>%
ungroup() %>%
mutate(norm_hmean_site = (hmean-site_mean)) %>%
group_by(ref_typoveg) %>%
dplyr::select(-c(ref_site, site_mean, hmean)) %>%
mutate_at(vars(-c(norm_hmean_site, ref_typoveg)), scale) %>%
# drop_na() %>%
group_modify(~select_mod(.x))
})
list_rsq = lapply(mods_AIC_list, function(x){x %>% group_by(ref_typoveg) %>% summarise(rsq = rsq) %>%unique()}) %>% bind_rows(.id="model") %>% group_by(ref_typoveg) %>% summarise(mod = model[which.max(rsq)], rsq = max(rsq) )
mods_AIC = rbind(
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="ALP"), "mod"])]] %>% filter(ref_typoveg == "ALP"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="ECOR"), "mod"])]] %>% filter(ref_typoveg == "ECOR"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="NAR"), "mod"])]] %>% filter(ref_typoveg == "NAR"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="NIV"), "mod"])]] %>% filter(ref_typoveg == "NIV"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="PROD"), "mod"])]] %>% filter(ref_typoveg == "PROD"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="QUE"), "mod"])]] %>% filter(ref_typoveg == "QUE"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="SUB"), "mod"])]] %>% filter(ref_typoveg == "SUB"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="ENHE"), "mod"])]] %>% filter(ref_typoveg == "ENHE"),
mods_AIC_list[[as.integer(list_rsq[which(list_rsq$ref_typoveg=="BOMB"), "mod"])]] %>% filter(ref_typoveg == "BOMB"),
)
modsAIC_all = do.call(rbind, mods_AIC_list) %>% filter(!is.na(estimate)) %>% filter(p.value<0.2)
summary(modsAIC_all)
modsAIC_all %>% dplyr::select(ref_typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
list_rsq
# modsAIC_all= modsAIC_all %>% filter(p.value<0.1) %>%
# mutate(term = if_else(grepl("LSD",term), " last snow day", term)) %>%
# mutate(term = if_else(grepl("beginning",term), " last snow day", term)) %>%
# mutate(term = if_else(grepl("date_gdd300",term), " speed gdd 300", term)) %>%
# mutate(term = if_else(grepl("daygdd300",term), " speed gdd 300", term)) %>%
# mutate(term = if_else(grepl("cumgdd_period",term), "gdd growing start", term)) %>%
# mutate(term = if_else(grepl("precip_period",term), "precip growing start", term)) %>%
# mutate(term = if_else(grepl("min_period",term), "Tmin growing start", term)) %>%
# mutate(term = if_else(grepl("ndayfrost_period",term), "Tmin growing start", term)) %>%
# mutate(term = if_else(grepl("gdd300",term), "Tmin growing start", term)) %>%
# mutate(term = if_else(grepl("max_period",term), "Tmax growing start", term)) %>%
# mutate(term = if_else(grepl("tgdd_period",term), "gdd growing start", term)) %>%
# mutate(term = if_else(grepl("cumgdd30",term), "gdd growing start", term)) %>%
#
# mutate(term = if_else(grepl("precip300",term), "precip growing start", term)) %>%
# mutate(term = if_else(grepl("minTemp300",term), "Tmin growing start", term)) %>%
# mutate(term = if_else(grepl("maxTemp300",term), "Tmax growing start", term)) %>%
#
# mutate(term = if_else(grepl("ndayfrost300",term), "Tmin growing start", term)) %>%
# mutate(term = if_else(grepl("previous30_runoff",term), " runoff 30d before", term)) %>%
#
#
#
# mutate(term = if_else(grepl("cumgdd",term), "gdd growing season", term)) %>%
# mutate(term = if_else(grepl("minTemp",term), "Tmin growing season", term)) %>%
# mutate(term = if_else(grepl("maxTemp",term), "Tmax growing season", term)) %>%
#
# mutate(term = if_else(grepl("cumprecip",term), "precip growing season", term)) %>%
#
# mutate(term = if_else(grepl("mm_month",term), "gdd growing start", term)) %>%
#
# mutate(term = if_else(grepl("depth_month",term), " snow quantity 4-6", term)) %>%
#
# mutate(term = if_else(grepl("max_month",term), "Tmax growing start", term)) %>%
#
# mutate(term = if_else(grepl("min_month",term), "Tmin growing start", term)) %>%
#
# mutate(term = if_else(grepl("gdd_month",term), "gdd growing start", term)) %>%
#
# mutate(term = if_else(grepl("runoff_month",term), " runoff 4-6", term)) %>%
#
# mutate(term = if_else(grepl("precip_month",term), "precip growing start", term)) %>%
#
# mutate(term = if_else(grepl("year",term), "year", term))
#
# imp_vars = modsAIC_all %>% filter(term != "(Intercept)") %>% group_by(ref_typoveg, term) %>% summarise(importance = sum(relimp))
#
# rsq_text = data.frame(ref_typoveg = list_rsq$ref_typoveg, label = paste0("rsq = ",round(list_rsq$rsq, 2)*100, "%"))
#
# ggplot(imp_vars %>% left_join(rsq_text), aes(x = "", y = importance, fill = term, label = label)) +
# geom_bar(stat = "identity") +
# coord_polar("y", start = 0) +
# theme_void() + scale_fill_brewer(palette="Paired") +
# facet_wrap(~ref_typoveg) +
# geom_text( aes(label= label), size = 3, x = 1, y = 0)
Sélection de modèle avec toutes les variables selectionnées dans les 10 modèles pour chaque milieu
#modsAIC_all
final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
sel1 = modsAIC_all %>% filter(ref_typoveg == x) %>% filter(term!="(Intercept)")
# if(max(sel1$vif , na.rm=TRUE)>2) {
# select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique()
# } else {
select_terms = modsAIC_all %>% filter(ref_typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique()
# }
dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>% mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both")
}) %>% bind_rows(.id="typoveg")
final_mods$typoveg = as.factor(final_mods$typoveg)
levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
sel1 = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)")
if(max(sel1$vif , na.rm=TRUE)>5) {
select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique()
} else {
select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique()
}
dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>% mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both")
}) %>% bind_rows(.id="typoveg")
final_mods$typoveg = as.factor(final_mods$typoveg)
levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2.5) {
select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique()
} else {
select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique()
}
dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>% mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both")
}) %>% bind_rows(.id="typoveg")
final_mods$typoveg = as.factor(final_mods$typoveg)
levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2.5) {
select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique()
} else {
select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique()
}
dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>% mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both")
}) %>% bind_rows(.id="typoveg")
final_mods$typoveg = as.factor(final_mods$typoveg)
levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
#---
# final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
# if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) {
# select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif)) %>% dplyr::select(term) %>% unique()
# } else {
# select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique()
# }
# dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>% mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both")
# }) %>% bind_rows(.id="typoveg")
# final_mods$typoveg = as.factor(final_mods$typoveg)
# levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
#
# final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
#
#
rsq = final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
Par groupe de variables par milieu
require(ggplot2)
summary(final_mods)
final_mods_vars= final_mods %>% filter(p.value<0.1) %>%
mutate(term = if_else(grepl("LSD",term), " last snow day", term)) %>%
mutate(term = if_else(grepl("beginning",term), " last snow day", term)) %>%
mutate(term = if_else(grepl("date_gdd300",term), " speed gdd 300", term)) %>%
mutate(term = if_else(grepl("daygdd300",term), " speed gdd 300", term)) %>%
mutate(term = if_else(grepl("cumgdd_period",term), "gdd growing start", term)) %>%
mutate(term = if_else(grepl("precip_period",term), "precip growing start", term)) %>%
mutate(term = if_else(grepl("min_period",term), "Tmin growing start", term)) %>%
mutate(term = if_else(grepl("ndayfrost_period",term), "Tmin growing start", term)) %>%
mutate(term = if_else(grepl("gdd300",term), "Tmin growing start", term)) %>%
mutate(term = if_else(grepl("max_period",term), "Tmax growing start", term)) %>%
mutate(term = if_else(grepl("tgdd_period",term), "gdd growing start", term)) %>%
mutate(term = if_else(grepl("cumgdd30",term), "gdd growing start", term)) %>%
mutate(term = if_else(grepl("precip300",term), "precip growing start", term)) %>%
mutate(term = if_else(grepl("minTemp300",term), "Tmin growing start", term)) %>%
mutate(term = if_else(grepl("maxTemp300",term), "Tmax growing start", term)) %>%
mutate(term = if_else(grepl("cumgdd",term), "gdd growing season", term)) %>%
mutate(term = if_else(grepl("minTemp",term), "Tmin growing season", term)) %>%
mutate(term = if_else(grepl("maxTemp",term), "Tmax growing season", term)) %>%
mutate(term = if_else(grepl("cumprecip",term), "precip growing season", term)) %>%
mutate(term = if_else(grepl("mm_month",term), "gdd growing start", term)) %>%
mutate(term = if_else(grepl("depth_month",term), " snow quantity 4-6", term)) %>%
mutate(term = if_else(grepl("max_month",term), "Tmax growing start", term)) %>%
mutate(term = if_else(grepl("min_month",term), "Tmin growing start", term)) %>%
mutate(term = if_else(grepl("gdd_month",term), "gdd growing start", term)) %>%
mutate(term = if_else(grepl("runoff_month",term), " runoff 4-6", term)) %>%
mutate(term = if_else(grepl("precip_month",term), "precip growing start", term)) %>%
mutate(term = if_else(grepl("year",term), "year", term))
imp_vars = final_mods_vars %>% filter(term != "(Intercept)") %>% group_by(typoveg, term) %>% summarise(importance = sum(relimp))
rsq_text = data.frame(typoveg = rsq$typoveg, label = paste0("rsq = ",round(rsq$rsq, 2)*100, "%"))
ggplot(imp_vars %>% left_join(rsq_text), aes(x = "", y = importance, fill = term, label = label)) +
geom_bar(stat = "identity") +
coord_polar("y", start = 0) +
theme_void() + scale_fill_brewer(palette="Set1") +
facet_wrap(~typoveg) +
geom_text( aes(label= label), size = 3, x = 1, y = 0)
On retrouve une importance des températures (mini, maxi, ...) pour la plupart des milieux.
- Tmax pour ECOR, NIV ENHE et PROD et SUB
- Tmin pour PROD, SUB, NAR, BOMB et un peu ENHE
- Le gdd pour ALP, NAR, NIV, ECOR et un peu BOMB QUE
La neige entre en jeu pour la plupart des milieux mais de façon différente
- La quantité accumulée pour BOMB SUB et NAR
- La date de fonte pour PROD
- La vitesse d'atteinte gdd300 pour ALP
Les précipitations de début de saison sont importantes pour les milieux QUE surtout.
On a pas mal d'effets "années" car les variables "growing season" sont des variables compilées sur tout l'été (couleurs claires). Cela est moins évident pour ECOR et NIV
Sens des liens avec les variables explicatives
require(ggplot2)
ggplot(final_mods %>% filter(typoveg %in% c("ALP","NIV","BOMB", "ECOR", "ENHE", "NAR", "PROD", "QUE", "SUB")) %>% filter(p.value<0.1) %>%
filter(term != "(Intercept)"), aes(x = term, y = estimate, fill = term)) +
facet_wrap(~typoveg, scales = 'free') +
geom_bar(stat = "identity") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5), legend.position = "none")
Messages conclusion:
-
Différences entre milieux.
-
Ce serait un plus d'avoir des fiches par stations pour mieux caractériser le type de milieu (qq espèces dominantes, classe type DELPHINE, type d'habitat phytosocio, ...)
Analyser la typo
=======
Les prairies productives
releves_plus %>% filter(ref_typoveg %in% "PROD") %>% dplyr::select(ref_site) %>% unique()
releves_plus %>% filter(ref_typoveg %in% "PROD") %>%
ggplot(aes(x=hmean)) + geom_histogram()
Les Nardaies
releves_plus %>% filter(ref_typoveg %in% "NAR") %>% dplyr::select(ref_site) %>% unique()
releves_plus %>% filter(ref_typoveg %in% "NAR") %>%
ggplot(aes(x=hmean)) + geom_histogram()
Effets spatiaux purs: typo de végétation et nos variables
library(dplyr)
library(ade4)
site_names = releves_plus %>%
filter(ref_typoveg %in% c("ALP", "BOMB", "ECOR", "ENHE", "NAR", "NIV", "PROD", "QUE", "SUB")) %>%
dplyr::filter(cumgdd_period_safran < 900 & cumgdd_period_safran > 300) %>% dplyr::select(ref_site)
dat_pca = releves_selvars_mod_spat %>% dplyr::select(-c(hmean)) %>% dplyr::select(!contains("runoff")) %>% cbind(site_names) %>% unique()
rownames(dat_pca) = dat_pca$ref_site
pca = dudi.pca(dat_pca %>% dplyr::select(where(is.numeric)), scan=FALSE, nf = 4)
#inertia.dudi(pca, row=FALSE, col =TRUE)
library(ggrepel)
ggplot(pca$co, aes(Comp1, Comp2, label = rownames(pca$co) )) +
geom_label_repel(aes(Comp1, Comp2, label = rownames(pca$co) ), fontface = 'bold',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50',
max.overlaps = 30) +
scale_shape_manual(values=1:8) +
geom_point(size=2, stroke = 2) +
xlab(paste0("Axe1: 68% variance")) +
ylab(paste0("Axe2: 14% variance"))
On a surtout un axe qui ressort et est en lien avec les variables de température. De façon intéressante, le deuxième axe fait ressortir les minimales/ gel avec un large degré d'indépendance donc.
require(RColorBrewer)
par(mfrow=c(1,1))
s.arrow(2*pca$co[c("maxTemp_climato_2005_2015_crosscut", "minTemp_climato_2005_2015_crosscut"),], xax=1, yax=2, clabel=0.8, xlim = c(-6,6), ylim = c(-6,6))
s.class(pca$li, fac = as.factor(dat_pca$ref_typoveg), col = brewer.pal(8, "Dark2"), cpoint =0, axesell=FALSE, cstar = 0, clabel = 0.9, add.plot = TRUE)
Les types de végétation se répartissent assez bien le long de cet axe principal, avec une position similaire pour PROD, QUE, NAR, et ECOR, qui se différencient plus ou moins sur leur tolérance au gel.
Les PROD sont les plus hétérogènes sur l'axe 1. Les PROD/NAR et QUE et ECOR se distinguent par l'axe 2 (gel/ temp mini)
Répartition des sites:
ggplot(pca$li, aes(Axis1, Axis2, color = dat_pca$ref_typoveg, label = rownames(dat_pca) )) +
geom_label_repel(aes(Axis1, Axis2, label = rownames(dat_pca) ), fontface = 'bold',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50',
max.overlaps = 30) +
scale_shape_manual(values=1:8) +
geom_point(size=2, stroke = 2) +
xlab(paste0("Axe1: 68% variance")) +
ylab(paste0("Axe2: 14% variance"))
Discimination des types de végétation:
Qu'est ce qui disciminent le plus les types de végétation?
AD = discrimin(pca, as.factor(dat_pca$ref_typoveg), scannf=FALSE, nf=2)
plot(AD)
require(RColorBrewer)
par(mfrow=c(1,1))
s.class(AD$li, fac = as.factor(dat_pca$ref_typoveg), col = brewer.pal(10, "Dark2"), cpoint =0, axesell=FALSE, cstar = 0, clabel = 0.5, cellipse = 1)
Encore une fois, on retrouve le fait que QUE/PROD et NAR sont peu discriminées par les variables env., surtout PROD englobe NAR et QUE.
Effets années
library(dplyr)
library(ade4)
site_names = releves_plus %>%
filter(ref_typoveg %in% c("ALP", "BOMB", "ECOR", "ENHE", "NAR", "NIV", "PROD", "QUE", "SUB")) %>%
dplyr::filter(cumgdd_period_safran < 900 & cumgdd_period_safran > 300) %>% filter(!(ref_site %in% sites_no_trend)) %>% filter(!(ref_site %in% sites_no_repeat)) %>% dplyr::select(releve)
dat_pca = releves_selvars_mod_temp %>% dplyr::select(-c(norm_hmean_site)) %>% dplyr::select(!contains("runoff")) %>% cbind(site_names) %>% unique()
rownames(dat_pca) = dat_pca$releve
pca = dudi.pca(dat_pca %>% dplyr::select(where(is.numeric)), scan=FALSE, nf = 4)
inertia.dudi(pca, row=FALSE, col =FALSE)
library(ggrepel)
ggplot(pca$co, aes(Comp1, Comp2, label = rownames(pca$co) )) +
geom_label_repel(aes(Comp1, Comp2, label = rownames(pca$co) ), fontface = 'bold',
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = 'grey50',
max.overlaps = 30) +
scale_shape_manual(values=1:8) +
geom_point(size=2, stroke = 2) +
xlab(paste0("Axe1: 44% variance")) +
ylab(paste0("Axe2: 14% variance"))
En prenant en compte les effets temporels, les axes restent du même genre mais on a moins de corrélations.
require(RColorBrewer)
par(mfrow=c(1,1))
s.arrow(6*pca$co[c("LSD_crosscut", "ndayfrost_safran"),], xax=1, yax=2, clabel=0.8, xlim = c(-6,6), ylim = c(-6,6))
s.class(pca$li, fac = as.factor(dat_pca$year_safran), col = brewer.pal(10, "Dark2"), cpoint=0, axesell=TRUE, cstar = 0, clabel = 0.9, add.plot = TRUE, cellipse = 1)
On retrouve ici des années typiques :
- 2013 très tardive (enneigement long)
- 2018 très chaud au démarrage
- 2015 dénneigement précoce
- 2012/2014/2016 gel tardif