From 2d5b6aca2dd3af958a89ce8b53c749fcb5ed8c0b Mon Sep 17 00:00:00 2001 From: Isabelle Boulangeat <isabelle.boulangeat@inrae.fr> Date: Thu, 28 Sep 2023 10:28:10 +0200 Subject: [PATCH] retrait variables saison --- README.Rmd | 59 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/README.Rmd b/README.Rmd index 99a6ed7..abdb775 100644 --- a/README.Rmd +++ b/README.Rmd @@ -555,7 +555,11 @@ select_mod = function(dat, dir = "both"){ relimp_df = data.frame(term = relimp@namen[-1], relimp = relimp@lmg) } else { vif_df = data.frame(term = df$term, vif = NA) - relimp_df = data.frame(term = df$term, relimp = NA) + if(length(df$term>1)){ + relimp_df = data.frame(term = df$term, relimp = 1) + }else{ + relimp_df = data.frame(term = df$term, relimp = NA) + } } df$rsq = broom::glance(modAIC)$r.squared @@ -575,29 +579,29 @@ select_mod = function(dat, dir = "both"){ # mod_gperiod300= dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, cumprecip_safran, contains("LSD_safran"), contains("speddgdd300_s"), contains("precip300"), contains("Temp300_T"), contains("ndayfrost300_T") )) + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("speddgdd300_s"), contains("precip300"), contains("Temp300_T"), contains("ndayfrost300_T") )) dim(mod_gperiod300) cor(mod_gperiod300[,-c(1:3)]) mod_gperiod300_agromet = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, cumprecip_safran, contains("speedgdd300_s") ,contains("precip300"), contains("gdd300_agromet"), contains("Temp300_safran") )) + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("speedgdd300_s") ,contains("precip300"), contains("gdd300_agromet") )) dim(mod_gperiod300_agromet) cor(mod_gperiod300_agromet[,-c(1:3)]) mod_gperiod = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, cumprecip_safran, contains("LSD_safran"), contains("period"))) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("period"))) %>% dplyr::select(-c("tgdd_period_safran")) dim(mod_gperiod) cor(mod_gperiod[,-c(1:3)]) mod_previous30 = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("LSD_safran"), contains("previous30"), contains("cumgdd30_") )) + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("previous30"), contains("cumgdd30_") )) dim(mod_previous30) cor(mod_previous30[,-c(1:3)]) mod_april_min = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month4") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("soil")) %>% dplyr::select(-contains("Tmax")) %>% @@ -606,7 +610,7 @@ dim(mod_april_min) cor(mod_april_min[,-c(1:3)]) mod_april_max = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month4") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("soil")) %>% dplyr::select(-contains("Tmin")) %>% @@ -615,7 +619,7 @@ dim(mod_april_max) cor(mod_april_max[,-c(1:3)]) mod_april_soil = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month4") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("Tmax")) %>% dplyr::select(-contains("Tmin")) %>% @@ -626,7 +630,7 @@ cor(mod_april_soil[,-c(1:3)]) mod_may_min = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month5") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("soil")) %>% dplyr::select(-contains("Tmax")) %>% @@ -635,7 +639,7 @@ dim(mod_may_min) cor(mod_may_min[,-c(1:3)]) mod_may_max = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month5") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("soil")) %>% dplyr::select(-contains("Tmin")) %>% @@ -644,7 +648,7 @@ dim(mod_may_max) cor(mod_may_max[,-c(1:3)]) mod_may_soil = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month5") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("Tmax")) %>% dplyr::select(-contains("Tmin")) %>% @@ -654,7 +658,7 @@ cor(mod_may_soil[,-c(1:3)]) mod_june_min = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month6") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("soil")) %>% dplyr::select(-contains("Tmax")) %>% @@ -663,7 +667,7 @@ dim(mod_june_min) cor(mod_june_min[,-c(1:3)]) mod_june_max = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month6") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("soil")) %>% dplyr::select(-contains("Tmin")) %>% @@ -672,7 +676,7 @@ dim(mod_june_max) cor(mod_june_max[,-c(1:3)]) mod_june_soil = dat_select %>% - dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month6") )) %>% + dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") )) %>% dplyr::select(-contains("Tgdd_")) %>% dplyr::select(-contains("Tmax")) %>% dplyr::select(-contains("Tmin")) %>% @@ -729,6 +733,7 @@ mods_AIC = rbind( modsAIC_all = do.call(rbind, mods_AIC_list) %>% filter(!is.na(estimate)) %>% filter(p.value<0.2) summary(modsAIC_all) modsAIC_all %>% dplyr::select(ref_typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif)) +list_rsq ``` @@ -799,22 +804,22 @@ modsAIC_all %>% dplyr::select(ref_typoveg, term, vif) %>% filter(vif>2) %>% arra final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){ sel1 = modsAIC_all %>% filter(ref_typoveg == x) %>% filter(term!="(Intercept)") - if(max(sel1$vif , na.rm=TRUE)>2) { - select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique() - } else { + # if(max(sel1$vif , na.rm=TRUE)>2) { + # select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique() + # } else { select_terms = modsAIC_all %>% filter(ref_typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique() - } + # } dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>% mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both") }) %>% bind_rows(.id="typoveg") final_mods$typoveg = as.factor(final_mods$typoveg) levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB") final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif)) - +final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique() final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){ sel1 = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") - if(max(sel1$vif , na.rm=TRUE)>2) { + if(max(sel1$vif , na.rm=TRUE)>5) { select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique() } else { select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique() @@ -825,9 +830,10 @@ final_mods$typoveg = as.factor(final_mods$typoveg) levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB") final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif)) +final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique() final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){ - if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) { + if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2.5) { select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique() } else { select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique() @@ -838,9 +844,11 @@ final_mods$typoveg = as.factor(final_mods$typoveg) levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB") final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif)) +final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique() + final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){ - if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) { + if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2.5) { select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif, na.rm = TRUE)) %>% dplyr::select(term) %>% unique() } else { select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% dplyr::select(term) %>% unique() @@ -851,8 +859,9 @@ final_mods$typoveg = as.factor(final_mods$typoveg) levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB") final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif)) +final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique() +#--- -# # final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){ # if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) { # select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif)) %>% dplyr::select(term) %>% unique() @@ -866,7 +875,7 @@ final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(d # # final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif)) # - +# ``` @@ -930,7 +939,7 @@ rsq_text = data.frame(typoveg = rsq$typoveg, label = paste0("rsq = ",round(rsq$r ggplot(imp_vars %>% left_join(rsq_text), aes(x = "", y = importance, fill = term, label = label)) + geom_bar(stat = "identity") + coord_polar("y", start = 0) + - theme_void() + scale_fill_brewer(palette="Paired") + + theme_void() + scale_fill_brewer(palette="Set1") + facet_wrap(~typoveg) + geom_text( aes(label= label), size = 3, x = 1, y = 0) -- GitLab