Improve computation time of propose candidates grid
Currently, the ProposeCandidatesGrid()
function from Calibration_Michel()
removes duplicated values after the creation of the table of proposed candidates grid using expand.grid()
in order to compute all combinations of parameters.
In addition, the function is not clean because it use the DistribParamR
external variable (but the result was not wrong).
Current function:
ProposeCandidatesGrid <- function(DistribParam) {
NewCandidates <- expand.grid(lapply(seq_len(ncol(DistribParamR)), function(x) DistribParam[, x]))
NewCandidates <- unique(NewCandidates) # to avoid duplicates when a parameter is set
Output <- list(NewCandidates = NewCandidates)
}
Benchmarking:
It is more efficient to remove duplicated values on each parameter instead of the whole table.
In addition, it is not necessary to return a list.
## original one removing duplcated values on the whole data.frame (but DistribParamR replacedby DistribParam)
ProposeCandidatesGrid1 <- function(DistribParam) {
NewCandidates <- expand.grid(lapply(seq_len(ncol(DistribParam)), function(x) DistribParam[, x]))
NewCandidates <- unique(NewCandidates) # to avoid duplicates when a parameter is set
Output <- list(NewCandidates = NewCandidates)
}
## original one removing duplicated values on the whole data.frame & returning data.frame instead of a list
ProposeCandidatesGrid2 <- function(DistribParam) {
NewCandidates <- expand.grid(lapply(seq_len(ncol(DistribParam)), function(x) DistribParam[, x]))
unique(NewCandidates) # to avoid duplicates when a parameter is set
}
## new one removing duplicated values on each parameter & returning a list
ProposeCandidatesGrid3 <- function(DistribParam) {
NewCandidates <- expand.grid(lapply(seq_len(ncol(DistribParam)), function(x) unique(DistribParam[, x])))
Output <- list(NewCandidates = NewCandidates)
}
## new one removing duplicated values on each parameter & returning data.frame instead of a list
ProposeCandidatesGrid4 <- function(DistribParam) {
expand.grid(lapply(seq_len(ncol(DistribParam)), function(x) unique(DistribParam[, x])))
}
ParamT <- matrix(c(+3.60, -1.00, +3.30, -9.10, -0.90, +3.00,
+3.90, -0.50, +4.10, -8.70, +0.10, +4.00,
+4.50, +0.50, +5.00, -8.10, +1.10, +5.00), ncol = 6, byrow = TRUE)
microbenchmark(ProposeCandidatesGrid1(ParamT),
ProposeCandidatesGrid2(ParamT),
ProposeCandidatesGrid3(ParamT),
ProposeCandidatesGrid4(ParamT),
times = 50L,
unit = "milliseconds")
## Unit: milliseconds
## expr min lq mean median uq max neval
## ProposeCandidatesGrid1(ParamT) 1.7160 1.8258 2.176492 1.85945 1.9305 5.4754 50
## ProposeCandidatesGrid2(ParamT) 1.7570 1.8292 2.090968 1.88125 1.9109 5.5252 50
## ProposeCandidatesGrid3(ParamT) 0.2275 0.2456 0.275084 0.26530 0.2881 0.5601 50
## ProposeCandidatesGrid4(ParamT) 0.2337 0.2466 0.318084 0.26510 0.2856 2.7356 50
ParamT <- matrix(c(+1.9, +2.7, -1.1, -9.6, -5.5, +1.0, +3.4, -0.2, +4.5, +3.5, +4.3, -1.5, -4.2, -4.4,
+3.5, +3.4, -0.3, -9.2, -1.6, +2.0, +3.7, +0.0, +5.1, +5.3, +5.6, -0.5, -3.6, -2.7),
ncol = 14, byrow = TRUE)
microbenchmark(ProposeCandidatesGrid1(ParamT),
ProposeCandidatesGrid2(ParamT),
ProposeCandidatesGrid3(ParamT),
ProposeCandidatesGrid4(ParamT),
times = 50L,
unit = "milliseconds")
## Unit: milliseconds
## expr min lq mean median uq max neval
## ProposeCandidatesGrid1(ParamT) 88.6652 92.7099 100.529286 96.37210 100.7031 169.4589 50
## ProposeCandidatesGrid2(ParamT) 84.6542 90.4932 104.274102 95.62470 109.4955 167.5425 50
## ProposeCandidatesGrid3(ParamT) 1.3705 1.4359 1.909450 1.50210 1.5678 15.6336 50
## ProposeCandidatesGrid4(ParamT) 1.3880 1.4408 1.503504 1.48280 1.5404 2.2734 50
}
ParamT <- matrix(c(+1.9, +2.7, -1.1, -9.6, -5.5, +1.0, +3.4, -0.2, +4.5, +3.5, +4.3, -1.5, -4.2, -4.4,
+3.5, +3.4, -0.3, -9.2, -1.6, +2.0, +3.7, +0.0, +5.1, +5.3, +5.6, -0.5, -3.6, -2.7,
+3.5, +3.4, -0.3, -9.2, -1.6, +4.0, +3.7, +0.0, +5.1, +5.3, +5.6, -0.5, -3.6, -2.7),
ncol = 14, byrow = TRUE)
ParamT [3, ] <- Sacramento[3, ] + 1
microbenchmark(ProposeCandidatesGrid1(ParamT),
ProposeCandidatesGrid2(ParamT),
ProposeCandidatesGrid3(ParamT),
ProposeCandidatesGrid4(ParamT),
times = 50L,
unit = "milliseconds")
## Unit: milliseconds
## expr min lq mean median uq max neval
## ProposeCandidatesGrid1(ParamT) 31527.7280 35190.369 36366.143 36351.8798 38167.122 40490.213 10
## ProposeCandidatesGrid2(ParamT) 31889.5226 33820.050 35162.053 34758.2704 37117.132 38873.993 10
## ProposeCandidatesGrid3(ParamT) 350.7097 371.400 1505.988 481.6215 2297.846 5384.131 10
## ProposeCandidatesGrid4(ParamT) 347.6815 372.925 1410.104 427.5523 2170.164 4755.841 10