From 75b19ef604f06b7e9747b055d92082bf97744a9b Mon Sep 17 00:00:00 2001
From: Dorchies David <david.dorchies@inrae.fr>
Date: Wed, 23 Jun 2021 18:53:24 +0200
Subject: [PATCH] fix(CreateInputsCrit_DeLavenne): issues with calibration

- add example of regularisation in vignette V05
- fix wrong error message in CreateInputsCrit
- fix issues with number of model parameter with SD models
- fix issues with OutputsModel$Param in RunModelLag
- fix issues with OutputsModel$Param during calibration in CreateRunOptions

Refs #111
---
 R/CreateInputsCrit.R        |   2 +-
 R/CreateRunOptions.R        |   9 ++-
 R/RunModel.R                |   1 +
 R/RunModel_Lag.R            |   4 ++
 vignettes/V00_airgr_ref.bib |  21 ++++++
 vignettes/V05_sd_model.Rmd  | 134 ++++++++++++++++++++++++++----------
 6 files changed, 133 insertions(+), 38 deletions(-)

diff --git a/R/CreateInputsCrit.R b/R/CreateInputsCrit.R
index d2c61289..049e89a0 100644
--- a/R/CreateInputsCrit.R
+++ b/R/CreateInputsCrit.R
@@ -199,7 +199,7 @@ CreateInputsCrit <- function(FUN_CRIT,
       L2 <- LLL
     }
     if (!is.vector(iListArgs2$Obs) | length(iListArgs2$Obs) != L2 | !is.numeric(iListArgs2$Obs)) {
-      stop(sprintf("'Obs' must be a (list of) vector(s) of numeric values of length %i", LLL), call. = FALSE)
+      stop(sprintf("'Obs' must be a (list of) vector(s) of numeric values of length %i", L2), call. = FALSE)
     }
 
     ## check 'BoolCrit'
diff --git a/R/CreateRunOptions.R b/R/CreateRunOptions.R
index c745217b..8fd3022a 100644
--- a/R/CreateRunOptions.R
+++ b/R/CreateRunOptions.R
@@ -21,7 +21,6 @@ CreateRunOptions <- function(FUN_MOD, InputsModel,
   ## check FUN_MOD
   FUN_MOD <- match.fun(FUN_MOD)
   FeatFUN_MOD <- .GetFeatModel(FUN_MOD = FUN_MOD, DatesR = InputsModel$DatesR)
-  FeatFUN_MOD$IsSD <- inherits(InputsModel, "SD")
   ObjectClass <- FeatFUN_MOD$Class
   TimeStepMean <- FeatFUN_MOD$TimeStepMean
 
@@ -39,6 +38,12 @@ CreateRunOptions <- function(FUN_MOD, InputsModel,
     FeatFUN_MOD$NbParam <- FeatFUN_MOD$NbParam + 2
   }
 
+  ## SD model
+  FeatFUN_MOD$IsSD <- inherits(InputsModel, "SD")
+  if (FeatFUN_MOD$IsSD) {
+    FeatFUN_MOD$NbParam <- FeatFUN_MOD$NbParam + 1
+  }
+
   if (!"CemaNeige" %in% ObjectClass & "hysteresis" %in% ObjectClass) {
     stop("'IsHyst' cannot be TRUE for the chosen 'FUN_MOD'")
   }
@@ -323,7 +328,7 @@ CreateRunOptions <- function(FUN_MOD, InputsModel,
   ##check_Outputs_Cal
   if (is.null(Outputs_Cal)) {
     if ("GR" %in% ObjectClass) {
-      Outputs_Cal <- c("Qsim")
+      Outputs_Cal <- c("Qsim", "Param")
     }
     if ("CemaNeige" %in% ObjectClass) {
       Outputs_Cal <- c("all")
diff --git a/R/RunModel.R b/R/RunModel.R
index c21b803c..d68bf066 100644
--- a/R/RunModel.R
+++ b/R/RunModel.R
@@ -5,6 +5,7 @@ RunModel <- function(InputsModel, RunOptions, Param, FUN_MOD, ...) {
   if (inherits(InputsModel, "SD") && !identical(FUN_MOD, RunModel_Lag)) {
     # Lag model take one parameter at the beginning of the vector
     iFirstParamRunOffModel <- 2
+    RunOptions$FeatFUN_MOD$NbParam <- RunOptions$FeatFUN_MOD$NbParam - 1
   } else {
     # All parameters
     iFirstParamRunOffModel <- 1
diff --git a/R/RunModel_Lag.R b/R/RunModel_Lag.R
index 593faaca..622139ad 100644
--- a/R/RunModel_Lag.R
+++ b/R/RunModel_Lag.R
@@ -119,5 +119,9 @@ RunModel_Lag <- function(InputsModel, RunOptions, Param, QcontribDown) {
     # message("StateEnd: ", paste(OutputsModel$StateEnd$SD, collapse = ", "))
   }
 
+  if ("Param" %in% RunOptions$Outputs_Sim) {
+    OutputsModel$Param <- c(Param, OutputsModel$Param)
+  }
+
   return(OutputsModel)
 }
diff --git a/vignettes/V00_airgr_ref.bib b/vignettes/V00_airgr_ref.bib
index 419563ce..e618e4fb 100644
--- a/vignettes/V00_airgr_ref.bib
+++ b/vignettes/V00_airgr_ref.bib
@@ -178,4 +178,25 @@
 	year = {2019},
 	keywords = {airGRcite},
 	pages = {70--81}
+}
+
+@article{delavenne_regularization_2019,
+  ids = {lavenneRegularizationApproachImprove2019a},
+  title = {A {{Regularization Approach}} to {{Improve}} the {{Sequential Calibration}} of a {{Semidistributed Hydrological Model}}},
+  author = {de Lavenne, Alban and Andréassian, Vazken and Thirel, Guillaume and Ramos, Maria-Helena and Perrin, Charles},
+  date = {2019},
+  journaltitle = {Water Resources Research},
+  volume = {55},
+  pages = {8821--8839},
+  issn = {1944-7973},
+  doi = {10.1029/2018WR024266},
+  url = {https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2018WR024266},
+  urldate = {2021-06-07},
+  abstract = {In semidistributed hydrological modeling, sequential calibration usually refers to the calibration of a model by considering not only the flows observed at the outlet of a catchment but also the different gauging points inside the catchment from upstream to downstream. While sequential calibration aims to optimize the performance at these interior gauged points, we show that it generally fails to improve performance at ungauged points. In this paper, we propose a regularization approach for the sequential calibration of semidistributed hydrological models. It consists in adding a priori information on optimal parameter sets for each modeling unit of the semidistributed model. Calibration iterations are then performed by jointly maximizing simulation performance and minimizing drifts from the a priori parameter sets. The combination of these two sources of information is handled by a parameter k to which the method is quite sensitive. The method is applied to 1,305 catchments in France over 30 years. The leave-one-out validation shows that, at locations considered as ungauged, model simulations are significantly improved (over all the catchments, the median KGE criterion is increased from 0.75 to 0.83 and the first quartile from 0.35 to 0.66), while model performance at gauged points is not significantly impacted by the use of the regularization approach. Small catchments benefit most from this calibration strategy. These performances are, however, very similar to the performances obtained with a lumped model based on similar conceptualization.},
+  annotation = {\_eprint: https://agupubs.onlinelibrary.wiley.com/doi/pdf/10.1029/2018WR024266},
+  file = {C\:\\Users\\david.dorchies\\Zotero\\storage\\QNCGSJXB\\Lavenne et al. - 2019 - A Regularization Approach to Improve the Sequentia.pdf;C\:\\Users\\david.dorchies\\Zotero\\storage\\5DMRIV8Y\\2018WR024266.html},
+  keywords = {regularization,semidistributed model,stepwise calibration,ungauged basins},
+  langid = {english},
+  number = {11},
+  options = {useprefix=true}
 }
\ No newline at end of file
diff --git a/vignettes/V05_sd_model.Rmd b/vignettes/V05_sd_model.Rmd
index 11a9165b..f7f4415c 100644
--- a/vignettes/V05_sd_model.Rmd
+++ b/vignettes/V05_sd_model.Rmd
@@ -22,12 +22,15 @@ The **airGR** package implements semi-distributed model capabilities using a lag
 
 `RunModel_Lag` documentation gives an example of simulating the influence of a reservoir in a lumped model. Try `example(RunModel_Lag)` to get it.
 
-In this vignette, we show how to calibrate 2 sub-catchments in series with a semi-distributed model consisting of 2 GR4J models. For doing this we compare two strategies for calibrating the downstream subcatchment:
+In this vignette, we show how to calibrate 2 sub-catchments in series with a semi-distributed model consisting of 2 GR4J models.
+For doing this we compare 3 strategies for calibrating the downstream subcatchment:
 
 - using upstream observed flows
 - using upstream simulated flows
+- using upstream simulated flows and parameter regularisation [@delavenne_regularization_2019]
 
 We finally compare these calibrations with a theoretical set of parameters.
+This comparison is based on the Kling-Gupta Efficiency computed on the root-squared discharges as performance criterion.
 
 ## Model description
 
@@ -59,6 +62,13 @@ summary(cbind(QObsUp = BasinObs$Qmm, QObsDown))
 options(digits = 3)
 ```
 
+With a delay of 2 days between the 2 gauging stations, the theoretical Velocity parameter should be equal to:
+
+```{r}
+Velocity <- 100 * 1e3 / (2 * 86400)
+paste("Velocity: ", format(Velocity), "m/s")
+```
+
 # Calibration of the upstream subcatchment
 
 The operations are exactly the same as the ones for a GR4J lumped model. So we do exactly the same operations as in the [Get Started](V01_get_started.html) vignette.
@@ -72,9 +82,11 @@ RunOptionsUp <- CreateRunOptions(FUN_MOD = RunModel_GR4J,
                                  InputsModel = InputsModelUp,
                                  IndPeriod_WarmUp = NULL, IndPeriod_Run = Ind_Run,
                                  IniStates = NULL, IniResLevels = NULL)
-InputsCritUp <- CreateInputsCrit(FUN_CRIT = ErrorCrit_NSE, InputsModel = InputsModelUp,
+# Error criterion is KGE computed on the root-squared discharges
+InputsCritUp <- CreateInputsCrit(FUN_CRIT = ErrorCrit_KGE, InputsModel = InputsModelUp,
                                  RunOptions = RunOptionsUp,
-                                 VarObs = "Q", Obs = BasinObs$Qmm[Ind_Run])
+                                 VarObs = "Q", Obs = BasinObs$Qmm[Ind_Run],
+                                 transfo = "sqrt")
 CalibOptionsUp <- CreateCalibOptions(FUN_MOD = RunModel_GR4J, FUN_CALIB = Calibration_Michel)
 OutputsCalibUp <- Calibration_Michel(InputsModel = InputsModelUp, RunOptions = RunOptionsUp,
                                      InputsCrit = InputsCritUp, CalibOptions = CalibOptionsUp,
@@ -89,9 +101,11 @@ OutputsModelUp <- RunModel_GR4J(InputsModel = InputsModelUp, RunOptions = RunOpt
 ```
 
 
-# Calibration of the downstream subcatchment with upstream flow observations
+# Calibration of the downstream subcatchment
 
-we need to create the `InputsModel` object completed with upstream information:
+## Creation of the InputsModel objects
+
+we need to create `InputsModel` objects completed with upstream information with upstream observed flow for the calibration of first case and upstream simulated flows for the other cases:
 
 ```{r}
 InputsModelDown1 <- CreateInputsModel(
@@ -103,16 +117,38 @@ InputsModelDown1 <- CreateInputsModel(
 )
 ```
 
-And then calibrate the combination of Lag model for upstream flow transfer and GR4J model for the runoff of the downstream subcatchment:
+For using upstream simulated flows, we should concatenate a vector with the simulated flows for the entire period of simulation (warm-up + run):
+
+```{r}
+Qsim_upstream <- rep(NA, length(BasinObs$DatesR))
+# Simulated flow during warm-up period (365 days before run period)
+Qsim_upstream[Ind_Run[seq_len(365)] - 365] <- OutputsModelUp$WarmUpQsim
+# Simulated flow during run period
+Qsim_upstream[Ind_Run] <- OutputsModelUp$Qsim
+
+InputsModelDown2 <- CreateInputsModel(
+  FUN_MOD = RunModel_GR4J, DatesR = BasinObs$DatesR,
+  Precip = BasinObs$P, PotEvap = BasinObs$E,
+  Qupstream = matrix(Qsim_upstream, ncol = 1), # upstream observed flow
+  LengthHydro = 100, # distance between upstream catchment outlet & the downstream one [km]
+  BasinAreas = c(180, 180) # upstream and downstream areas [km²]
+)
+```
+
+
+## Calibration with upstream flow observations
+
+We calibrate the combination of Lag model for upstream flow transfer and GR4J model for the runoff of the downstream subcatchment:
 
 ```{r}
 RunOptionsDown <- CreateRunOptions(FUN_MOD = RunModel_GR4J,
                                    InputsModel = InputsModelDown1,
                                    IndPeriod_WarmUp = NULL, IndPeriod_Run = Ind_Run,
                                    IniStates = NULL, IniResLevels = NULL)
-InputsCritDown <- CreateInputsCrit(FUN_CRIT = ErrorCrit_NSE, InputsModel = InputsModelDown1,
+InputsCritDown <- CreateInputsCrit(FUN_CRIT = ErrorCrit_KGE, InputsModel = InputsModelDown1,
                                    RunOptions = RunOptionsDown,
-                                   VarObs = "Q", Obs = QObsDown[Ind_Run])
+                                   VarObs = "Q", Obs = QObsDown[Ind_Run],
+                                   transfo = "sqrt")
 CalibOptionsDown <- CreateCalibOptions(FUN_MOD = RunModel_GR4J,
                                        FUN_CALIB = Calibration_Michel,
                                        IsSD = TRUE) # specify that it's a SD model
@@ -123,16 +159,6 @@ OutputsCalibDown1 <- Calibration_Michel(InputsModel = InputsModelDown1,
                                         FUN_MOD = RunModel_GR4J)
 ```
 
-To run the complete model, we should substitute the observed upstream flow by the simulated one for the entire period of simulation (warm-up + run):
-
-```{r}
-InputsModelDown2 <- InputsModelDown1
-# Simulated flow during warm-up period 
-InputsModelDown2$Qupstream[Ind_Run[seq_len(365)] - 365] <- OutputsModelUp$WarmUpQsim * 180 * 1E3
-# Simulated flow during run period
-InputsModelDown2$Qupstream[Ind_Run] <- OutputsModelUp$Qsim * 180 * 1E3
-```
-
 `RunModel` is run in order to automatically combine GR4J and Lag models.
 
 ```{r}
@@ -145,11 +171,11 @@ OutputsModelDown1 <- RunModel(InputsModel = InputsModelDown2,
 Performance of the model validation is then:
 
 ```{r}
-CritDown1 <- ErrorCrit_NSE(InputsCritDown, OutputsModelDown1)
+KGE_down1 <- ErrorCrit_KGE(InputsCritDown, OutputsModelDown1)
 ```
 
 
-# Calibration of the downstream subcatchment with upstream simulated flow
+## Calibration with upstream simulated flow
 
 We calibrate the model with the `InputsModel` object previously created for substituting the observed upstream flow with the simulated one:
 
@@ -162,67 +188,105 @@ OutputsCalibDown2 <- Calibration_Michel(InputsModel = InputsModelDown2,
 ParamDown2 <- OutputsCalibDown2$ParamFinalR
 ```
 
+## Calibration with upstream simulated flow and parameter regularisation
 
-# Discussion
+The regularisation follow the method proposed by @delavenne_regularization_2019.
 
-## Identification of Velocity parameter
+As a priori parameter set, we use the calibrated parameter set of the upstream catchment and the theoretical velocity:
 
-The theoretical Velocity parameter should be equal to:
+```{r}
+ParamDownTheo <- c(Velocity, OutputsCalibUp$ParamFinalR)
+```
+
+The De Lavenne criterion is initialised with the a priori parameter set and the value of the KGE of the upstream basin.
 
 ```{r}
-Velocity <- InputsModelDown1$LengthHydro * 1e3 / (2 * 86400)
-paste(format(Velocity), "m/s")
+IC_DeLavenne <- CreateInputsCrit_DeLavenne(InputsModel = InputsModelDown2, 
+                                    RunOptions = RunOptionsDown,
+                                    Obs = QObsDown[Ind_Run],
+                                    AprParamR = ParamDownTheo, 
+                                    AprCrit = OutputsCalibUp$CritFinal)
 ```
 
+The De Lavenne criterion is used instead of the KGE for calibration with regularisation
+
+```{r}
+OutputsCalibDown3 <- Calibration_Michel(InputsModel = InputsModelDown2,
+                                        RunOptions = RunOptionsDown,
+                                        InputsCrit = IC_DeLavenne,
+                                        CalibOptions = CalibOptionsDown,
+                                        FUN_MOD = RunModel_GR4J)
+```
+
+The KGE is then calculated for performance comparisons:
+
+```{r}
+OutputsModelDown3 <- RunModel(InputsModel = InputsModelDown2,
+                              RunOptions = RunOptionsDown,
+                              Param = OutputsCalibDown3$ParamFinalR,
+                              FUN_MOD = RunModel_GR4J)
+KGE_down3 <- ErrorCrit_KGE(InputsCritDown, OutputsModelDown3)
+```
+
+
+# Discussion
+
+## Identification of Velocity parameter
+
 Both calibrations overestimate this parameter:
 
 ```{r}
 mVelocity <- matrix(c(Velocity,
                       OutputsCalibDown1$ParamFinalR[1],
-                      OutputsCalibDown2$ParamFinalR[1]),
+                      OutputsCalibDown2$ParamFinalR[1], 
+                      OutputsCalibDown3$ParamFinalR[1]),
                     ncol = 1,
                     dimnames = list(c("theoretical",
                                       "calibrated with observed upstream flow",
-                                      "calibrated with simulated  upstream flow"),
+                                      "calibrated with simulated  upstream flow",
+                                      "calibrated with sim upstream flow and regularisation"),
                                     c("Velocity parameter")))
 knitr::kable(mVelocity)
 ```
 
 ## Value of the performance criteria with theoretical calibration
 
-Theoretically, the parameters of the downstream GR4J model should be the same as the upstream one and we know the lag time. So this set of parameter should give a better performance criteria:
+Theoretically, the parameters of the downstream GR4J model should be the same as the upstream one with the velocity as extra parameter : 
 
 ```{r}
-ParamDownTheo <- c(Velocity, OutputsCalibUp$ParamFinalR)
 OutputsModelDownTheo <- RunModel(InputsModel = InputsModelDown2,
                                  RunOptions = RunOptionsDown,
                                  Param = ParamDownTheo,
                                  FUN_MOD = RunModel_GR4J)
-CritDownTheo <- ErrorCrit_NSE(InputsCritDown, OutputsModelDownTheo)
+KGE_downTheo <- ErrorCrit_KGE(InputsCritDown, OutputsModelDownTheo)
 ```
 
 
-
 ## Parameters and performance of each subcatchment for all calibrations
 
 ```{r}
 comp <- matrix(c(0, OutputsCalibUp$ParamFinalR,
                  rep(OutputsCalibDown1$ParamFinalR, 2),
                  OutputsCalibDown2$ParamFinalR,
+                 OutputsCalibDown3$ParamFinalR,
                  ParamDownTheo),
                ncol = 5, byrow = TRUE)
 comp <- cbind(comp, c(OutputsCalibUp$CritFinal,
                       OutputsCalibDown1$CritFinal,
-                      CritDown1$CritValue,
+                      KGE_down1$CritValue,
                       OutputsCalibDown2$CritFinal,
-                      CritDownTheo$CritValue))
-colnames(comp) <- c("Velocity", paste0("X", 1:4), "NSE")
+                      KGE_down3$CritValue,
+                      KGE_downTheo$CritValue))
+colnames(comp) <- c("Velocity", paste0("X", 1:4), "KGE(√Q)")
 rownames(comp) <- c("Calibration of the upstream subcatchment",
                     "Calibration 1 with observed upstream flow",
                     "Validation 1 with simulated upstream flow",
                     "Calibration 2 with simulated upstream flow",
+                    "Calibration 3 with simulated upstream flow and regularisation",
                     "Validation theoretical set of parameters")
 knitr::kable(comp)
 ```
 
-Even if calibration with observed upstream flows gives an improved performance criteria, in validation using simulated upstream flows the result is quite similar as the performance obtained with the calibration with upstream simulated flows. The theoretical set of parameters give also an equivalent performance but still underperforming the calibration 2 one.
+Even if calibration with observed upstream flows gives an improved performance criteria, in validation using simulated upstream flows the result is quite similar as the performance obtained with the calibration with upstream simulated flows. The theoretical set of parameters give also an equivalent performance but still underperforming the calibration 2 one. Regularisation allows to get similar performance as the one for calibration with simulated flows but with the big advantage of having parameters closer to the theoretical ones (Especially for the velocity parameter).
+
+# References
-- 
GitLab