From 2d5b6aca2dd3af958a89ce8b53c749fcb5ed8c0b Mon Sep 17 00:00:00 2001
From: Isabelle Boulangeat <isabelle.boulangeat@inrae.fr>
Date: Thu, 28 Sep 2023 10:28:10 +0200
Subject: [PATCH] retrait variables saison

---
 README.Rmd | 59 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 99a6ed7..abdb775 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -555,7 +555,11 @@ select_mod = function(dat, dir = "both"){
     relimp_df = data.frame(term = relimp@namen[-1], relimp = relimp@lmg)
   } else {
     vif_df = data.frame(term = df$term, vif = NA)
-    relimp_df = data.frame(term = df$term, relimp = NA)
+    if(length(df$term>1)){
+      relimp_df = data.frame(term = df$term, relimp = 1)
+    }else{
+      relimp_df = data.frame(term = df$term, relimp = NA)
+      }
   }
 
   df$rsq = broom::glance(modAIC)$r.squared
@@ -575,29 +579,29 @@ select_mod = function(dat, dir = "both"){
 #                                              
 
 mod_gperiod300= dat_select   %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, cumprecip_safran, contains("LSD_safran"), contains("speddgdd300_s"), contains("precip300"), contains("Temp300_T"), contains("ndayfrost300_T") ))
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("speddgdd300_s"), contains("precip300"), contains("Temp300_T"), contains("ndayfrost300_T") ))
 dim(mod_gperiod300)
 cor(mod_gperiod300[,-c(1:3)])
 
 mod_gperiod300_agromet = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, cumprecip_safran, contains("speedgdd300_s") ,contains("precip300"), contains("gdd300_agromet"), contains("Temp300_safran") )) 
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("speedgdd300_s") ,contains("precip300"), contains("gdd300_agromet") )) 
 dim(mod_gperiod300_agromet)
 cor(mod_gperiod300_agromet[,-c(1:3)])
 
 mod_gperiod = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, cumprecip_safran, contains("LSD_safran"),  contains("period")))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"),  contains("period")))  %>%
   dplyr::select(-c("tgdd_period_safran"))
 dim(mod_gperiod)
 cor(mod_gperiod[,-c(1:3)])
 
 mod_previous30 = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("LSD_safran"), contains("previous30"), contains("cumgdd30_") )) 
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("LSD_safran"), contains("previous30"), contains("cumgdd30_") )) 
 dim(mod_previous30)
 cor(mod_previous30[,-c(1:3)])
 
 
 mod_april_min = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month4") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("soil")) %>%
   dplyr::select(-contains("Tmax"))  %>%
@@ -606,7 +610,7 @@ dim(mod_april_min)
 cor(mod_april_min[,-c(1:3)])
 
 mod_april_max = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month4") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("soil")) %>%
   dplyr::select(-contains("Tmin"))  %>%
@@ -615,7 +619,7 @@ dim(mod_april_max)
 cor(mod_april_max[,-c(1:3)])
 
 mod_april_soil = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month4") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month4") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("Tmax")) %>%
   dplyr::select(-contains("Tmin"))  %>%
@@ -626,7 +630,7 @@ cor(mod_april_soil[,-c(1:3)])
 
 
 mod_may_min = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month5") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("soil")) %>%
   dplyr::select(-contains("Tmax"))  %>%
@@ -635,7 +639,7 @@ dim(mod_may_min)
 cor(mod_may_min[,-c(1:3)])
 
 mod_may_max = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month5") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("soil")) %>%
   dplyr::select(-contains("Tmin"))  %>%
@@ -644,7 +648,7 @@ dim(mod_may_max)
 cor(mod_may_max[,-c(1:3)])
 
 mod_may_soil = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month5") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month5") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("Tmax")) %>%
   dplyr::select(-contains("Tmin"))  %>%
@@ -654,7 +658,7 @@ cor(mod_may_soil[,-c(1:3)])
 
 
 mod_june_min = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month6") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("soil")) %>%
   dplyr::select(-contains("Tmax"))  %>%
@@ -663,7 +667,7 @@ dim(mod_june_min)
 cor(mod_june_min[,-c(1:3)])
 
 mod_june_max = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month6") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("soil")) %>%
   dplyr::select(-contains("Tmin"))  %>%
@@ -672,7 +676,7 @@ dim(mod_june_max)
 cor(mod_june_max[,-c(1:3)])
 
 mod_june_soil = dat_select  %>%
-  dplyr::select(c(ref_typoveg, hmean,ref_site, minTemp_crosscut, maxTemp_crosscut, cumprecip_safran, contains("month6") ))  %>%
+  dplyr::select(c(ref_typoveg, hmean,ref_site, contains("month6") ))  %>%
   dplyr::select(-contains("Tgdd_")) %>%
   dplyr::select(-contains("Tmax")) %>%
   dplyr::select(-contains("Tmin"))  %>%
@@ -729,6 +733,7 @@ mods_AIC = rbind(
 modsAIC_all = do.call(rbind, mods_AIC_list)  %>% filter(!is.na(estimate)) %>% filter(p.value<0.2)
 summary(modsAIC_all)
 modsAIC_all %>% dplyr::select(ref_typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
+list_rsq
 ```
 
 
@@ -799,22 +804,22 @@ modsAIC_all %>% dplyr::select(ref_typoveg, term, vif) %>% filter(vif>2) %>% arra
 
 final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
   sel1 = modsAIC_all %>% filter(ref_typoveg == x) %>% filter(term!="(Intercept)")
-  if(max(sel1$vif , na.rm=TRUE)>2) {
-    select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE))  %>% dplyr::select(term) %>% unique()
-  } else {
+  # if(max(sel1$vif , na.rm=TRUE)>2) {
+  #   select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE))  %>% dplyr::select(term) %>% unique()
+  # } else {
     select_terms = modsAIC_all %>% filter(ref_typoveg == x) %>% filter(term!="(Intercept)")  %>%     dplyr::select(term) %>% unique()
-  }
+  # }
   dat_select %>% dplyr::select(c(select_terms$term, hmean,ref_site)) %>% mutate(site_mean = mean(hmean)) %>%  mutate(norm_hmean_site = (hmean-site_mean)) %>% dplyr::select(-c(ref_site, site_mean, hmean)) %>% mutate_at(vars(-c(norm_hmean_site)), scale) %>% select_mod( dir ="both")
  }) %>% bind_rows(.id="typoveg")
 final_mods$typoveg = as.factor(final_mods$typoveg)
 levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
 
 final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
-
+final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
 
 final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
  sel1 = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)")
-  if(max(sel1$vif , na.rm=TRUE)>2) {
+  if(max(sel1$vif , na.rm=TRUE)>5) {
     select_terms = sel1 %>% filter(vif < max(vif, na.rm = TRUE))  %>% dplyr::select(term) %>% unique()
   } else {
     select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)")  %>%     dplyr::select(term) %>% unique()
@@ -825,9 +830,10 @@ final_mods$typoveg = as.factor(final_mods$typoveg)
 levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
 
 final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
+final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
 
 final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
-  if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) {
+  if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2.5) {
     select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif, na.rm = TRUE))  %>% dplyr::select(term) %>% unique()
   } else {
     select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)")  %>% dplyr::select(term) %>% unique()
@@ -838,9 +844,11 @@ final_mods$typoveg = as.factor(final_mods$typoveg)
 levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
 
 final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
+final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
+
 
 final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
-  if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) {
+  if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2.5) {
     select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif, na.rm = TRUE))  %>% dplyr::select(term) %>% unique()
   } else {
     select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)")  %>% dplyr::select(term) %>% unique()
@@ -851,8 +859,9 @@ final_mods$typoveg = as.factor(final_mods$typoveg)
 levels(final_mods$typoveg) = c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB")
 
 final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
+final_mods %>% dplyr::select(typoveg, rsq) %>% group_by(typoveg) %>% unique()
+#---
 
-# 
 # final_mods = lapply(c("ALP", "ECOR","NIV", "ENHE", "NAR", "BOMB", "PROD","QUE", "SUB"), function(x){
 #   if(max(final_mods %>% filter(typoveg == x) %>% dplyr::select(vif), na.rm=TRUE)>2) {
 #     select_terms = final_mods %>% filter(typoveg == x) %>% filter(term!="(Intercept)") %>% filter(vif < max(vif))  %>% dplyr::select(term) %>% unique()
@@ -866,7 +875,7 @@ final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(d
 # 
 # final_mods %>% dplyr::select(typoveg, term, vif) %>% filter(vif>2) %>% arrange(desc(vif))
 # 
-
+# 
 
 ```
 
@@ -930,7 +939,7 @@ rsq_text = data.frame(typoveg = rsq$typoveg, label = paste0("rsq = ",round(rsq$r
 ggplot(imp_vars %>% left_join(rsq_text), aes(x = "", y = importance, fill = term, label = label))  +
   geom_bar(stat = "identity")  +
   coord_polar("y", start = 0) +
-  theme_void() + scale_fill_brewer(palette="Paired") +
+  theme_void() + scale_fill_brewer(palette="Set1") +
   facet_wrap(~typoveg) +  
   geom_text( aes(label= label), size = 3, x = 1, y = 0)
  
-- 
GitLab