From e5aae2ab6b6f77dea354b6a7b1797e17a8f1a914 Mon Sep 17 00:00:00 2001 From: Delaigue Olivier <olivier.delaigue@irstea.fr> Date: Wed, 10 Feb 2021 09:37:40 +0100 Subject: [PATCH] feat: add a Calibration_MichelP function that allows to parallelize the grid-screening step Refs #96 --- DESCRIPTION | 1 + NAMESPACE | 1 + R/Calibration_MichelP.R | 471 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 473 insertions(+) create mode 100644 R/Calibration_MichelP.R diff --git a/DESCRIPTION b/DESCRIPTION index 5bb4fc89..97beb2f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,7 @@ Depends: R (>= 3.1.0) Imports: graphics, grDevices, + parallel, stats, utils Suggests: diff --git a/NAMESPACE b/NAMESPACE index c98743b3..e9da537d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(SeriesAggreg, OutputsModel) ##################################### export(Calibration) export(Calibration_Michel) +export(Calibration_MichelP) export(CreateCalibOptions) export(CreateIniStates) export(CreateInputsCrit) diff --git a/R/Calibration_MichelP.R b/R/Calibration_MichelP.R new file mode 100644 index 00000000..d34b456d --- /dev/null +++ b/R/Calibration_MichelP.R @@ -0,0 +1,471 @@ +Calibration_MichelP <- function(InputsModel, + RunOptions, + InputsCrit, + CalibOptions, + FUN_MOD, + FUN_CRIT, # deprecated + FUN_TRANSFO = NULL, + verbose = TRUE, + Parallel = FALSE) { + + print("P3") + + FUN_MOD <- match.fun(FUN_MOD) + if (!missing(FUN_CRIT)) { + FUN_CRIT <- match.fun(FUN_CRIT) + } + + # Handling 'FUN_TRANSFO' from direct argument or provided by 'CaliOptions' + if (!is.null(FUN_TRANSFO)) { + FUN_TRANSFO <- match.fun(FUN_TRANSFO) + } else if(!is.null(CalibOptions$FUN_TRANSFO)) { + FUN_TRANSFO <- CalibOptions$FUN_TRANSFO + } else { + stop("'FUN_TRANSFO' is not provided neither as 'FUN_TRANSFO' argument or in 'CaliOptions' argument") + } + + ##_____Arguments_check_____________________________________________________________________ + if (!inherits(InputsModel, "InputsModel")) { + stop("'InputsModel' must be of class 'InputsModel'") + } + if (!inherits(RunOptions, "RunOptions")) { + stop("'RunOptions' must be of class 'RunOptions'") + } + if (!inherits(InputsCrit, "InputsCrit")) { + stop("'InputsCrit' must be of class 'InputsCrit'") + } + if (inherits(InputsCrit, "Multi")) { + stop("'InputsCrit' must be of class 'Single' or 'Compo'") + } + if (inherits(InputsCrit, "Single")) { + listVarObs <- InputsCrit$VarObs + } + if (inherits(InputsCrit, "Compo")) { + listVarObs <- sapply(InputsCrit, FUN = "[[", "VarObs") + } + if ("SCA" %in% listVarObs & !"Gratio" %in% RunOptions$Outputs_Cal) { + warning("Missing 'Gratio' is automatically added to 'Output_Cal' in 'RunOptions' as it is necessary in the objective function for comparison with SCA") + RunOptions$Outputs_Cal <- c(RunOptions$Outputs_Cal, "Gratio") + } + if ("SWE" %in% listVarObs & !"SnowPack" %in% RunOptions$Outputs_Cal) { + warning("Missing 'SnowPack' is automatically added to 'Output_Cal' in 'RunOptions' as it is necessary in the objective function for comparison with SWE") + RunOptions$Outputs_Cal <- c(RunOptions$Outputs_Cal, "SnowPack") + } + if (!inherits(CalibOptions, "CalibOptions")) { + stop("'CalibOptions' must be of class 'CalibOptions'") + } + if (!inherits(CalibOptions, "HBAN")) { + stop("'CalibOptions' must be of class 'HBAN' if 'Calibration_Michel' is used") + } + if (!missing(FUN_CRIT)) { + warning("argument 'FUN_CRIT' is deprecated. The error criterion function is now automatically get from the 'InputsCrit' object") + } + + ##_variables_initialisation + ParamFinalR <- NULL + ParamFinalT <- NULL + CritFinal <- NULL + NRuns <- 0 + NIter <- 0 + if ("StartParamDistrib" %in% names(CalibOptions)) { + PrefilteringType <- 2 + } else { + PrefilteringType <- 1 + } + if (PrefilteringType == 1) { + NParam <- ncol(CalibOptions$StartParamList) + } + if (PrefilteringType == 2) { + NParam <- ncol(CalibOptions$StartParamDistrib) + } + if (NParam > 20) { + stop("Calibration_Michel can handle a maximum of 20 parameters") + } + HistParamR <- matrix(NA, nrow = 500 * NParam, ncol = NParam) + HistParamT <- matrix(NA, nrow = 500 * NParam, ncol = NParam) + HistCrit <- matrix(NA, nrow = 500 * NParam, ncol = 1) + CritName <- NULL + CritBestValue <- NULL + Multiplier <- NULL + CritOptim <- +1e100 + ##_temporary_change_of_Outputs_Sim + RunOptions$Outputs_Sim <- RunOptions$Outputs_Cal ### this reduces the size of the matrix exchange with fortran and therefore speeds the calibration + + + + ##_____Parameter_Grid_Screening____________________________________________________________ + + + ##Definition_of_the_function_creating_all_possible_parameter_sets_from_different_values_for_each_parameter + ProposeCandidatesGrid <- function(DistribParam) { + NewCandidates <- expand.grid(lapply(seq_len(ncol(DistribParamR)), function(x) DistribParam[, x])) + NewCandidates <- unique(NewCandidates) # to avoid duplicates when a parameter is set + Output <- list(NewCandidates = NewCandidates) + } + + + ##Creation_of_new_candidates_______________________________________________ + OptimParam <- is.na(CalibOptions$FixedParam) + if (PrefilteringType == 1) { + CandidatesParamR <- CalibOptions$StartParamList + } + if (PrefilteringType == 2) { + DistribParamR <- CalibOptions$StartParamDistrib + DistribParamR[, !OptimParam] <- NA + CandidatesParamR <- ProposeCandidatesGrid(DistribParamR)$NewCandidates + } + ##Remplacement_of_non_optimised_values_____________________________________ + CandidatesParamR <- apply(CandidatesParamR, 1, function(x) { + x[!OptimParam] <- CalibOptions$FixedParam[!OptimParam] + return(x) + }) + if (NParam > 1) { + CandidatesParamR <- t(CandidatesParamR) + } else { + CandidatesParamR <- cbind(CandidatesParamR) + } + + ##Loop_to_test_the_various_candidates______________________________________ + iNewOptim <- 0 + Ncandidates <- nrow(CandidatesParamR) + if (verbose & Ncandidates > 1) { + if (PrefilteringType == 1) { + message("List-Screening in progress (", appendLF = FALSE) + } + if (PrefilteringType == 2) { + message("Grid-Screening in progress (", appendLF = FALSE) + } + message("0%", appendLF = FALSE) + } + if (!isFALSE(Parallel)) { + cl <- parallel::makeCluster(parallel::detectCores()) + on.exit(parallel::stopCluster(cl)) + parallel::clusterExport(cl = cl, + varlist = c("InputsModel", "RunOptions", "FUN_MOD"), + envir = environment()) + } + MichelApply <- function(cl = NULL, X, FUN, ...) { + if (Parallel) { + parallel::parRapply(cl = cl, X, FUN, ...) + } else { + apply(X, MARGIN = 1, FUN, ...) + } + } + CritGrid <- MichelApply(cl = cl, X = CandidatesParamR, FUN = function(iParam) { + # for (iNew in 1:nrow(CandidatesParamR)) { + # if (verbose & Ncandidates > 1) { + # for (k in c(2, 4, 6, 8)) { + # if (iNew == round(k / 10 * Ncandidates)) { + # message(" ", 10 * k, "%", appendLF = FALSE) + # } + # } + # } + # ##Model_run + # Param <- CandidatesParamR[iNew, ] + OutputsModel <- RunModel(InputsModel, RunOptions, iParam, FUN_MOD = FUN_MOD) + + ##Calibration_criterion_computation + OutputsCrit <- ErrorCrit(InputsCrit, OutputsModel, verbose = FALSE) + CritOptim <- OutputsCrit$CritValue * OutputsCrit$Multiplier + ##Calibration_criterion_computation + # OutputsCrit <- ErrorCrit(InputsCrit, OutputsModel, verbose = FALSE) + # if (!is.na(OutputsCrit$CritValue)) { + # if (OutputsCrit$CritValue * OutputsCrit$Multiplier < CritOptim) { + # CritOptim <- OutputsCrit$CritValue * OutputsCrit$Multiplier + # iNewOptim <- iNew + # } + # } + ##Storage_of_crit_info + # if (is.null(CritName) | is.null(CritBestValue) | is.null(Multiplier)) { + # CritName <- OutputsCrit$CritName + # CritBestValue <- OutputsCrit$CritBestValue + # Multiplier <- OutputsCrit$Multiplier + # } + # } + CritOptim + }) + CritGrid[is.na(CritGrid)] <- Inf + iNewOptim <- which.min(CritGrid) + CritOptim <- CritGrid[iNewOptim] + # CritName <- NULL + # CritBestValue <- NULL + # Multiplier <- NULL + if (verbose & Ncandidates > 1) { + message(" 100%)\n", appendLF = FALSE) + } + + + ##End_of_first_step_Parameter_Screening____________________________________ + # ParamStartR <- CandidatesParamR[iNewOptim, ] + # if (!is.matrix(ParamStartR)) { + # ParamStartR <- matrix(ParamStartR, nrow = 1) + # } + ParamStartR <- CandidatesParamR[iNewOptim, , drop = FALSE] + OutputsModel <- RunModel(InputsModel, RunOptions, ParamStartR, FUN_MOD = FUN_MOD) + OutputsCrit <- ErrorCrit(InputsCrit, OutputsModel, verbose = FALSE) + CritName <- OutputsCrit$CritName + CritBestValue <- OutputsCrit$CritBestValue + Multiplier <- OutputsCrit$Multiplier + ParamStartT <- FUN_TRANSFO(ParamStartR, "RT") + CritStart <- CritOptim + NRuns <- NRuns + nrow(CandidatesParamR) + if (verbose) { + if (Ncandidates > 1) { + message(sprintf("\t Screening completed (%s runs)", NRuns)) + } + if (Ncandidates == 1) { + message("\t Starting point for steepest-descent local search:") + } + message("\t Param = ", paste(sprintf("%8.3f", ParamStartR), collapse = ", ")) + message(sprintf("\t Crit. %-12s = %.4f", CritName, CritStart * Multiplier)) + } + ##Results_archiving________________________________________________________ + HistParamR[1, ] <- ParamStartR + HistParamT[1, ] <- ParamStartT + HistCrit[1, ] <- CritStart + + + + + ##_____Steepest_Descent_Local_Search_______________________________________________________ + + + ##Definition_of_the_function_creating_new_parameter_sets_through_a_step_by_step_progression_procedure + ProposeCandidatesLoc <- function(NewParamOptimT, OldParamOptimT, RangesT, OptimParam, Pace) { + ##Format_checking + if (nrow(NewParamOptimT) != 1 | nrow(OldParamOptimT) != 1) { + stop("each input set must be a matrix of one single line") + } + if (ncol(NewParamOptimT)!=ncol(OldParamOptimT) | ncol(NewParamOptimT) != length(OptimParam)) { + stop("each input set must have the same number of values") + } + ##Proposal_of_new_parameter_sets ###(local search providing 2 * NParam-1 new sets) + NParam <- ncol(NewParamOptimT) + VECT <- NULL + for (I in 1:NParam) { + ##We_check_that_the_current_parameter_should_indeed_be_optimised + if (OptimParam[I]) { + for (J in 1:2) { + Sign <- 2 * J - 3 #Sign can be equal to -1 or +1 + ##We_define_the_new_potential_candidate + Add <- TRUE + PotentialCandidateT <- NewParamOptimT + PotentialCandidateT[1, I] <- NewParamOptimT[I] + Sign * Pace + ##If_we_exit_the_range_of_possible_values_we_go_back_on_the_boundary + if (PotentialCandidateT[1, I] < RangesT[1, I] ) { + PotentialCandidateT[1, I] <- RangesT[1, I] + } + if (PotentialCandidateT[1, I] > RangesT[2, I]) { + PotentialCandidateT[1, I] <- RangesT[2, I] + } + ##We_check_the_set_is_not_outside_the_range_of_possible_values + if (NewParamOptimT[I] == RangesT[1, I] & Sign < 0) { + Add <- FALSE + } + if (NewParamOptimT[I] == RangesT[2, I] & Sign > 0) { + Add <- FALSE + } + ##We_check_that_this_set_has_not_been_tested_during_the_last_iteration + if (identical(PotentialCandidateT, OldParamOptimT)) { + Add <- FALSE + } + ##We_add_the_candidate_to_our_list + if (Add) { + VECT <- c(VECT, PotentialCandidateT) + } + } + } + } + Output <- NULL + Output$NewCandidatesT <- matrix(VECT, ncol = NParam, byrow = TRUE) + return(Output) + } + + + ##Initialisation_of_variables + if (verbose) { + message("Steepest-descent local search in progress") + } + Pace <- 0.64 + PaceDiag <- rep(0, NParam) + CLG <- 0.7^(1 / NParam) + Compt <- 0 + CritOptim <- CritStart + ##Conversion_of_real_parameter_values + RangesR <- CalibOptions$SearchRanges + RangesT <- FUN_TRANSFO(RangesR, "RT") + NewParamOptimT <- ParamStartT + OldParamOptimT <- ParamStartT + + + ##START_LOOP_ITER_________________________________________________________ + for (ITER in 1:(100 * NParam)) { + + + ##Exit_loop_when_Pace_becomes_too_small___________________________________ + if (Pace < 0.01) { + break + } + + + ##Creation_of_new_candidates______________________________________________ + CandidatesParamT <- ProposeCandidatesLoc(NewParamOptimT, OldParamOptimT, RangesT, OptimParam, Pace)$NewCandidatesT + CandidatesParamR <- FUN_TRANSFO(CandidatesParamT, "TR") + ##Remplacement_of_non_optimised_values_____________________________________ + CandidatesParamR <- apply(CandidatesParamR, 1, function(x) { + x[!OptimParam] <- CalibOptions$FixedParam[!OptimParam] + return(x) + }) + if (NParam > 1) { + CandidatesParamR <- t(CandidatesParamR) + } else { + CandidatesParamR <- cbind(CandidatesParamR) + } + + + ##Loop_to_test_the_various_candidates_____________________________________ + iNewOptim <- 0 + for (iNew in 1:nrow(CandidatesParamR)) { + ##Model_run + Param <- CandidatesParamR[iNew, ] + OutputsModel <- RunModel(InputsModel, RunOptions, Param, FUN_MOD = FUN_MOD) + ##Calibration_criterion_computation + OutputsCrit <- ErrorCrit(InputsCrit, OutputsModel, verbose = FALSE) + if (!is.na(OutputsCrit$CritValue)) { + if (OutputsCrit$CritValue * OutputsCrit$Multiplier < CritOptim) { + CritOptim <- OutputsCrit$CritValue * OutputsCrit$Multiplier + iNewOptim <- iNew + } + } + } + NRuns <- NRuns + nrow(CandidatesParamR) + + + ##When_a_progress_has_been_achieved_______________________________________ + if (iNewOptim != 0) { + ##We_store_the_optimal_set + OldParamOptimT <- NewParamOptimT + NewParamOptimT <- matrix(CandidatesParamT[iNewOptim, 1:NParam], nrow = 1) + Compt <- Compt + 1 + ##When_necessary_we_increase_the_pace ### if_successive_progress_occur_in_a_row + if (Compt > 2 * NParam) { + Pace <- Pace * 2 + Compt <- 0 + } + ##We_update_PaceDiag + VectPace <- NewParamOptimT-OldParamOptimT + for (iC in 1:NParam) { + if (OptimParam[iC]) { + PaceDiag[iC] <- CLG * PaceDiag[iC] + (1-CLG) * VectPace[iC] + } + } + } else { + ##When_no_progress_has_been_achieved_we_decrease_the_pace_________________ + Pace <- Pace / 2 + Compt <- 0 + } + + + ##Test_of_an_additional_candidate_using_diagonal_progress_________________ + if (ITER > 4 * NParam) { + NRuns <- NRuns + 1 + iNewOptim <- 0 + iNew <- 1 + CandidatesParamT <- NewParamOptimT+PaceDiag + if (!is.matrix(CandidatesParamT)) { + CandidatesParamT <- matrix(CandidatesParamT, nrow = 1) + } + ##If_we_exit_the_range_of_possible_values_we_go_back_on_the_boundary + for (iC in 1:NParam) { + if (OptimParam[iC]) { + if (CandidatesParamT[iNew, iC] < RangesT[1, iC]) { + CandidatesParamT[iNew, iC] <- RangesT[1, iC] + } + if (CandidatesParamT[iNew, iC] > RangesT[2, iC]) { + CandidatesParamT[iNew, iC] <- RangesT[2, iC] + } + } + } + CandidatesParamR <- FUN_TRANSFO(CandidatesParamT, "TR") + ##Model_run + Param <- CandidatesParamR[iNew, ] + OutputsModel <- RunModel(InputsModel, RunOptions, Param, FUN_MOD = FUN_MOD) + ##Calibration_criterion_computation + OutputsCrit <- ErrorCrit(InputsCrit, OutputsModel, verbose = FALSE) + if (OutputsCrit$CritValue * OutputsCrit$Multiplier < CritOptim) { + CritOptim <- OutputsCrit$CritValue * OutputsCrit$Multiplier + iNewOptim <- iNew + } + ##When_a_progress_has_been_achieved + if (iNewOptim != 0) { + OldParamOptimT <- NewParamOptimT + NewParamOptimT <- matrix(CandidatesParamT[iNewOptim, 1:NParam], nrow = 1) + } + + } + + + ##Results_archiving_______________________________________________________ + NewParamOptimR <- FUN_TRANSFO(NewParamOptimT, "TR") + HistParamR[ITER+1, ] <- NewParamOptimR + HistParamT[ITER+1, ] <- NewParamOptimT + HistCrit[ITER+1, ] <- CritOptim + ### if (verbose) { cat(paste("\t Iter ",formatC(ITER,format="d",width=3), " Crit ",formatC(CritOptim,format="f",digits=4), " Pace ",formatC(Pace,format="f",digits=4), "\n",sep=""))} + + + + } ##END_LOOP_ITER_________________________________________________________ + ITER <- ITER - 1 + + + ##Case_when_the_starting_parameter_set_remains_the_best_solution__________ + if (CritOptim == CritStart & verbose) { + message("\t No progress achieved") + } + + ##End_of_Steepest_Descent_Local_Search____________________________________ + ParamFinalR <- NewParamOptimR + ParamFinalT <- NewParamOptimT + CritFinal <- CritOptim + NIter <- 1 + ITER + if (verbose) { + message(sprintf("\t Calibration completed (%s iterations, %s runs)", NIter, NRuns)) + message("\t Param = ", paste(sprintf("%8.3f", ParamFinalR), collapse = ", ")) + message(sprintf("\t Crit. %-12s = %.4f", CritName, CritFinal * Multiplier)) + if (inherits(InputsCrit, "Compo")) { + listweights <- OutputsCrit$CritCompo$MultiCritWeights + listNameCrit <- OutputsCrit$CritCompo$MultiCritNames + msgForm <- paste(sprintf("%.2f", listweights), listNameCrit, sep = " * ", collapse = ", ") + msgForm <- unlist(strsplit(msgForm, split = ",")) + msgFormSep <- rep(c(",", ",", ",\n\t\t "), times = ceiling(length(msgForm)/3))[1: length(msgForm)] + msgForm <- paste(msgForm, msgFormSep, sep = "", collapse = "") + msgForm <- gsub("\\,\\\n\\\t\\\t $|\\,$", "", msgForm) + message("\tFormula: sum(", msgForm, ")") + } + } + ##Results_archiving_______________________________________________________ + HistParamR <- cbind(HistParamR[1:NIter, ]) + colnames(HistParamR) <- paste0("Param", 1:NParam) + HistParamT <- cbind(HistParamT[1:NIter, ]) + colnames(HistParamT) <- paste0("Param", 1:NParam) + HistCrit <- cbind(HistCrit[1:NIter, ]) + ###colnames(HistCrit) <- paste("HistCrit") + + BoolCrit_Actual <- InputsCrit$BoolCrit + BoolCrit_Actual[OutputsCrit$Ind_notcomputed] <- FALSE + MatBoolCrit <- cbind(InputsCrit$BoolCrit, BoolCrit_Actual) + colnames(MatBoolCrit) <- c("BoolCrit_Requested", "BoolCrit_Actual") + + + ##_____Output______________________________________________________________________________ + OutputsCalib <- list(ParamFinalR = as.double(ParamFinalR), CritFinal = CritFinal * Multiplier, + NIter = NIter, NRuns = NRuns, + HistParamR = HistParamR, HistCrit = HistCrit * Multiplier, + MatBoolCrit = MatBoolCrit, + CritName = CritName, CritBestValue = CritBestValue) + class(OutputsCalib) <- c("OutputsCalib", "HBAN") + return(OutputsCalib) + + + +} -- GitLab