From 0d586ce47aaaf47780a6bef2bde9f386f7f3b074 Mon Sep 17 00:00:00 2001 From: de Lavenne Alban <alban.de-lavenne@irstea.fr> Date: Wed, 5 Feb 2025 11:17:56 +0100 Subject: [PATCH] feat: the donor catchment weighting strategy can now be derived from a similarity analysis of Rn a linear model can be built between a list of predictors (provided by the package user) and the observed similarity of Rn in order to predict the similarity between a gauged catchment and an ungauged catchment --- DESCRIPTION | 6 +- NAMESPACE | 8 ++ R/mixr.R | 64 ++++++++++------ R/rsimilarity.R | 177 +++++++++++++++++++++++++++++++++++++++++++++ R/weightr.R | 12 +-- man/mixr.Rd | 11 ++- man/rsimilarity.Rd | 82 +++++++++++++++++++++ man/weightr.Rd | 2 +- 8 files changed, 325 insertions(+), 37 deletions(-) create mode 100644 R/rsimilarity.R create mode 100644 man/rsimilarity.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f67ef6a..bb89598 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: transfR Type: Package Title: Transfer of Hydrograph from Gauged to Ungauged Catchments -Version: 1.0.10 -Date: 2023-09-29 +Version: 1.1.0 +Date: 2025-02-05 Authors@R: c( person("Alban", "de Lavenne", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9448-3490"), email = "alban.delavenne@inrae.fr"), person("Christophe", "Cudennec", role = c("ths"), comment = c(ORCID = "0000-0002-1707-8926"), email = "christophe.cudennec@agrocampus-ouest.fr"), @@ -10,7 +10,7 @@ Authors@R: c( person("Hervé", "Squividant", role = c("ctb"), email = "herve.squividant@agrocampus-ouest.fr") ) Depends: R (>= 4.1.0), stars (>= 0.4-0), sf (>= 0.8-0) -Imports: doParallel, foreach, units, Rdpack +Imports: doParallel, foreach, units, glmnet, Rdpack RdMacros: Rdpack VignetteBuilder: knitr Suggests: knitr, rmarkdown, elevatr, progress, whitebox diff --git a/NAMESPACE b/NAMESPACE index 5141234..d38239f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(lagtime) export(mixr) export(quick_transfr) export(rapriori) +export(rsimilarity) export(uh) export(velocity) export(weightr) @@ -43,7 +44,14 @@ import(graphics) import(sf) import(stars) importFrom(Rdpack,reprompt) +importFrom(glmnet,cv.glmnet) +importFrom(glmnet,glmnet) +importFrom(stats,as.formula) +importFrom(stats,complete.cases) +importFrom(stats,cor) +importFrom(stats,lm) importFrom(stats,na.omit) +importFrom(stats,predict) importFrom(stats,sd) importFrom(units,as_units) importFrom(units,drop_units) diff --git a/R/mixr.R b/R/mixr.R index 0cdd2c4..fa6e9e3 100644 --- a/R/mixr.R +++ b/R/mixr.R @@ -17,7 +17,7 @@ #' @param weightC weight given to the distance between centroids if \code{distance} is "combined" #' (see the function \link{hdist}) #' @param similarity a hydrological similarity matrix between gauged and ungauged catchments -#' used for donor weighting (\code{mdist} is used if not provided) +#' used for donor weighting (\code{1/mdist^power} is used if not provided) #' @param parallel logical indicating if the computation should be parallelised #' @param cores the number of cores to use for parallel execution if \code{parallel} is TRUE. #' If not specified, the number of cores is set to the value of \code{parallel::detectCores()} @@ -25,8 +25,8 @@ #' \link{weightr} #' @param ndonors maximum number of catchments to be used to simulate discharge of an ungauged #' catchment as defined by the function \link{weightr} -#' @param donors vector of catchments id from which donors are selected. If empty, the \code{ndonors} closest -#' catchments are used +#' @param donors list of vectors indicating the catchment IDs from which donors are selected for each ungauged catchments. +#' If empty, the \code{ndonors} closest catchments are used #' @param maxdist maximum distance between a gauged and an ungauged catchment to allow the net rainfall #' to be transferred. This threshold is applied on the \code{mdist} distance matrix. If no units is provided, #' \code{maxdist} is assumed to be in [m]. @@ -39,6 +39,7 @@ #' in the \code{sim} object called "RnDonor", "Idonor" and "Wdonor" describing the net rainfall, the id and #' the weight of the donor catchments respectively #' @param verbose boolean indicating if information messages should be written to the console +#' @param ... other arguments to be passed to \link{rsimilarity} if \code{similarity} is a list of matrices #' @return The \code{sim} object incremented by the new computed attributes. #' @details This function is a wrapper function for \link{hdist} and \link{weightr} to directly estimate #' the net rainfall on a set of ungauged catchments (\code{sim}) from a set of gauged catchments (\code{obs}). @@ -63,8 +64,8 @@ #' plot(object, i = 1, attribute = c("Qobs", "Qsim"))} #' @export mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8, weightC = 0.2, similarity, - parallel = FALSE, cores = NULL, power = 1, ndonors = 5, donors = NULL, maxdist = 50e3, - flexible_donor = TRUE, cv = FALSE, save_donor = FALSE, verbose = TRUE){ + parallel = FALSE, cores = NULL, power = 1, ndonors = 5, donors = NULL, maxdist = 50e3, + flexible_donor = TRUE, cv = FALSE, save_donor = FALSE, verbose = TRUE, ...){ if(cv){ if(missing(sim)){ sim <- obs @@ -86,6 +87,7 @@ mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8, #--- Checking donors if(!is.null(donors)){ + if(!inherits(donors,"list")) stop(paste0("The class of 'donors' should be a list, with each elements being the catchment IDs of the donors for each ungauged catchment.")) if(length(donors)!= dim(sim$st)[2]) stop(paste0("The length of 'donors' list is ",length(donors)," but the number of ungauged catchments is ",dim(sim$st)[2],".")) if(any(sapply(donors,FUN = function(x) !all(x %in% 1:dim(obs$st)[2])))) stop("The value of 'donors' can not be matched to a gauged catchment.") } @@ -98,25 +100,48 @@ mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8, mdist <- hdist(x=obs, y=sim, method=distance, gres=gres, weightO=weightO, weightC=weightC, parallel=parallel, cores=cores, verbose=verbose) } - if(missing(similarity)){similarity <- mdist}else{ - if(any(dim(mdist)!=dim(similarity))) stop("The dimensions of 'similarity' should be similar to 'mdist'.") + if(missing(similarity)){similarity <- 1/mdist^power}else{ + if(is.matrix(similarity) & any(dim(mdist)!=dim(similarity))) stop("The dimensions of 'similarity' should be similar to 'mdist'.") + if(is.list(similarity) & any(sapply(lapply(similarity,FUN=dim),FUN=function(x) any(dim(x)!=dim(mdist))))) stop("The dimensions of each 'similarity' matrices should be similar to 'mdist'.") } - #--- Transfering Rn from obs to sim + #--- Transferring Rn from obs to sim + if(is.list(similarity)) similarity_models <- data.frame() sim$st$RnSim <- NA for(i in 1:dim(sim$st)[2]){ if(verbose) progress("Combining simulated Rn of neighbouring catchments for catchment ",i,dim(sim$st)[2]) - if(cv){distances <- mdist[-i,i]}else{distances <- mdist[,i]} - if(cv){similarities <- similarity[-i,i]}else{similarities <- similarity[,i]} - if(is.null(donors)){idonors <- which(distances<=maxdist)}else{idonors <- donors[[i]][donors[[i]]%in%which(distances<=maxdist)]} + if(cv){distances <- mdist[-i,i]}else{distances <- mdist[,i]} + if(is.null(donors)){idonors <- which(distances<=maxdist)}else{idonors <- donors[[i]][donors[[i]]%in%which(distances<=maxdist)]} if(length(idonors)==0){ warning(paste0("No donor found for catchment ",i," below the distance of ",units::set_units(maxdist,"km")," km.")) sim$st$RnSim[,i] <- rep(NA,dim(sim$st)[1]) }else{ - if(cv){RnInv <- obs$st$RnInv[,-i][,idonors]}else{RnInv <- obs$st$RnInv[,idonors]} - weights <- weightr(Rn=RnInv, distances=distances[idonors], similarities=similarities[idonors], ndonors=ndonors, power=power, flexible_donor=flexible_donor) + if(cv){RnInv <- obs$st$RnInv[,-i][,idonors]}else{RnInv <- obs$st$RnInv[,idonors]} + if(is.list(similarity)){ + if(cv){ + predictors <- lapply(similarity, FUN = function(x) x[-i,-i][idonors,idonors]) + newpredictors<- sapply(similarity, FUN = function(x) x[-i,i])[idonors,] + }else{ + predictors <- lapply(similarity, FUN = function(x) x[idonors,idonors]) + newpredictors<- sapply(similarity, FUN = function(x) x[i,idonors]) + } + similarity_model <- rsimilarity(Rn=RnInv, + predictors=predictors, + newpredictors=newpredictors, + ...) + similarities <- similarity_model$similarity + # Save description of the similarity model + similarity_models <- rbind(similarity_models, data.frame(t(similarity_model$coef), r2=similarity_model$r2, + ncatch=similarity_model$ncatch, ndist=similarity_model$ndist, + n75=similarity_model$n75, n90=similarity_model$n90)) + }else{ + if(cv){similarities <- similarity[-i,i][idonors]}else{similarities <- similarity[idonors,i]} + } + if(verbose & is.list(similarity)) cat("### Weighting Rn of each donor catchments ###\n") + weights <- weightr(Rn=RnInv, distances=distances[idonors], similarities=similarities, ndonors=ndonors, power=power, flexible_donor=flexible_donor) sim$st$RnSim[,i] <- apply(RnInv*weights,MARGIN = 1,FUN = function(x){if(all(is.na(x))){NA}else{sum(x, na.rm = T)}}) if(save_donor){ + if(cv){gdonors <- seq_len(dim(sim$st)[2])[-i][idonors]}else{gdonors <- seq_len(dim(sim$st)[2])[idonors]} #to come back to general spatial index for(idonor in 1:min(c(ndonors,length(idonors)))){ #Create new columns if(!paste0("RnDonor",idonor)%in%names(sim$st)){ @@ -128,22 +153,15 @@ mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8, if(!paste0("Wdonor",idonor)%in%names(sim$st)){ sim$st[[paste0("Wdonor",idonor)]] <- matrix(NA,nrow=dim(sim$st)[1],ncol=dim(sim$st)[2]) dim(sim$st[[paste0("Wdonor",idonor)]]) <- dim(sim$st)} - sim$st[[paste0("Idonor",idonor)]][,i] <- apply(weights,MARGIN = 1, FUN = function(x){if(all(is.na(x))){NA}else{which(rank(1-x,ties.method = "first")==idonor)[idonors]}}) + sim$st[[paste0("Idonor",idonor)]][,i] <- apply(weights,MARGIN = 1, FUN = function(x){if(all(is.na(x))){NA}else{gdonors[which(rank(1-x,ties.method = "first")==idonor)]}}) sim$st[[paste0("Wdonor",idonor)]][,i] <- apply(weights,MARGIN = 1, FUN = function(x){if(all(is.na(x))){NA}else{x[which(rank(1-x,ties.method = "first")==idonor)]}}) - #Solution 1 (simpler) - # for(t in 1:dim(sim$st)[1]) sim$st[[paste0("RnDonor",idonor)]][t,i] <- RnInv[t,sim$st[[paste0("Idonor",idonor)]][t,i]] - #Solution 2 (faster) - iweights <- matrix(0, nrow = dim(sim$st)[1], ncol = min(c(ndonors,length(idonors)))) - iweights <- cbind(iweights,sim$st[[paste0("Idonor",idonor)]][,i]) - iweights <- t(apply(iweights, MARGIN = 1, function(x){if(!is.na(x[length(x)])){x[which(x[length(x)]==idonor)]=1;x[1:(length(x)-1)]}else{rep(NA,length(x)-1)}})) - sim$st[[paste0("RnDonor",idonor)]][,i] <- apply(RnInv*iweights,MARGIN = 1,FUN = function(x){if(all(is.na(x))){NA}else{sum(x, na.rm = T)}}) - # units(sim$st[[which(names(sim$st)==paste0("RnDonor",idonor))]]) <- units(RnInv) # could not find a way to keep the units provided by weighting + sim$st[[paste0("RnDonor", idonor)]][, i] <- sapply(seq_len(dim(sim$st)[1]), function(t) sim$st$RnInv[t, sim$st[[paste0("Idonor", idonor)]][t, i]]) units(sim$st[[paste0("RnDonor",idonor)]]) <- units(RnInv) # could not find a way to keep the units provided by weighting } } } } + if(is.list(similarity)) sim[["similarity_models"]] <- similarity_models # save similarity models units(sim$st[["RnSim"]]) <- units(obs$st$RnInv) # could not find a way to keep the units provided by weighting return(sim) } - diff --git a/R/rsimilarity.R b/R/rsimilarity.R new file mode 100644 index 0000000..462b104 --- /dev/null +++ b/R/rsimilarity.R @@ -0,0 +1,177 @@ +#' Prediction of hydrological similarity to drive transfer of hydrograph to ungauged catchments +#' +#' Quantify the similarity of net rainfall and model this hydrological similarity from catchment +#' similarity predictors +#' @name rsimilarity +#' @param Rn net rainfall matrix of gauged catchments (rows for time index, and columns for catchment index) +#' @param predictors a list of at least two squared matrices that could be used to predict similarity of net rainfall. +#' The matrices should be squared to describe the similarity of each pair of gauged catchments. High values should +#' indicate high probability of similarity in net rainfall +#' @param newpredictors a matrix or a data.frame of predictors, to be used as new input to the model being built, +#' and from which we want to predict net rainfall similarity. Typically, a similarity between an ungauged catchment +#' and several gauged catchments that we want to weight as donors. Each column should correspond to each \code{predictors}, +#' and each row should correspond to each pair of catchments analysed. +#' @param FUN a function that will be used to quantify the similarity of two net rainfall time series. High values should +#' indicate high probability of similarity in net rainfall. Default is the inverse of root mean square error \code{invRMSE}, +#' but \code{KGE} (Kling–Gupta efficiency) can also be used +#' @param power exponent applied on \code{Rn} time series when computing \code{Rn} similarity (generally between -1 and 1, +#' a lower value emphasising the similarity of low flows). Default value is 0.5 +#' @param symmetrize a function to combine \code{FUN(Rn[,i],Rn[,j])} and \code{FUN(Rn[,j],Rn[,i])} into one similarity value +#' (typically if \code{FUN=KGE}, \code{KGE(Rn[,i],Rn[,j])!=KGE(Rn[,j],Rn[,i])}). Default is mean +#' @param model the method to estimate similarity of \code{Rn} from \code{descriptors}. This must be one of "lm", "glmnet" +#' @param args_glmnet list of arguments to be passed to \code{glmnet::cv.glmnet()} +#' @param verbose boolean indicating if warning messages should be written to the console +#' @param seed integer value to be used by \code{set.seed()} for reproducible results. Used only if \code{model="glmnet"} +#' @seealso \link{mixr} +#' @examples +#' \donttest{data(Oudon) +#' obs <- as_transfr(st = Oudon$obs, hl = Oudon$hl) +#' obs <- velocity(obs, method = "loire2016") +#' obs <- uh(obs) +#' obs <- lagtime(obs) +#' obs <- rapriori(obs) +#' obs <- inversion(obs, parallel = TRUE, cores=2) +#' mdist1 <- hdist(x = obs, y = obs, method = "rghosh", parallel = c(FALSE,TRUE), cores=2) +#' mdist2 <- mdist1^2 +#' rghosh1 <- seq(1000, 5000, by=100) +#' rghosh2 <- rghosh1^2 +#' res <- rsimilarity(Rn = obs$st$RnInv, +#' predictors = list(rghosh1=1/mdist1, rghosh2=1/mdist2), +#' newpredictors = data.frame(rghosh1=1/rghosh1, rghosh2=1/rghosh2), +#' seed=1234) +#' plot(rghosh1, res$similarity, ylab = "Predicted Rn similarity") +#' plot(rghosh2, res$similarity, ylab = "Predicted Rn similarity") +#' # rsimilarity() is automatically called by mixr() if mdist is a list +#' obs <- mixr(obs = obs, mdist = mdist1, +#' similarity = list(rghosh1=1/mdist1, rghosh2=1/mdist2), +#' parallel = TRUE, cores=2, cv = TRUE, save_donor = TRUE) +#' obs$similarity_models +#' obs <- convolution(obs) +#' plot(obs, i = 1, attribute = c("Qobs", "Qsim"))} +#' @importFrom units drop_units +#' @importFrom glmnet glmnet cv.glmnet +#' @importFrom stats lm predict complete.cases cor as.formula +#' @export +rsimilarity <- function(Rn, predictors, newpredictors, FUN=invRMSE, power=0.5, symmetrize=mean, model="glmnet", + args_glmnet=list(alpha=0.5, s="lambda.min", lower.limits=0), verbose = TRUE, seed=NULL){ + + # Checks inputs + if(inherits(Rn, "units")) Rn <- units::drop_units(Rn) + if(!inherits(predictors, "list")) predictors <- list(predictors) + if(any(!sapply(predictors,FUN=is.matrix))) stop("predictors should be a list with 2 or more matrices") + if(is.null(ncol(newpredictors))) stop("newpredictors should be a matrix or a data.frame with 2 or more columns") + if(length(predictors)<=1) stop("predictors should be a list with 2 or more matrices") + if(ncol(newpredictors)<=1) stop("newpredictors should be a matrix or a data.frame with 2 or more columns") + if(any(ncol(Rn)!=sapply(predictors,FUN=ncol)) | any(ncol(Rn)!=sapply(predictors,FUN=nrow))){ + stop(paste("All predictors elements should be a square matrix of",ncol(Rn),"x",ncol(Rn),"to be consistent with Rn"))} + if(length(predictors) != ncol(newpredictors)) stop("length(predictors) and ncol(newpredictors) should be the same") + + # Provide names of distances if not provided + if(is.null(names(predictors))) names(predictors) <- paste0("dist",1:length(predictors)) + if(is.null(names(newpredictors))) names(newpredictors) <- paste0("dist",1:length(newpredictors)) + if(any(names(predictors) != colnames(newpredictors))) stop("predictors and newpredictors do not have the same names") + + # Compute similarity matrix index of Rn + if(verbose) cat("\n### Computing similarity matrix of Rn ###\n") + n <- ncol(Rn) + mat_similarity <- matrix(NA, n, n) + mat_similarity <- sapply(1:n, function(i) { + sapply(1:n, function(j) FUN(Rn[, i]^power, Rn[, j]^power)) + }) + + # Convert matrix to vector for Rn similarity + if(verbose) cat("### Preparing data for the similarity model ###\n") + tmat_similarity <- t(mat_similarity) + dat_similarity <- data.frame(ij=mat_similarity[upper.tri(mat_similarity, diag = FALSE)],ji=tmat_similarity[upper.tri(tmat_similarity, diag = FALSE)]) + vec_similarity <- apply(dat_similarity, MARGIN=1, FUN=symmetrize) + + # Convert each matrix of predictors into vectors, to prepare data for linear model + model_data <- matrix(nrow=length(vec_similarity), ncol=length(predictors), dimnames=list(NULL,names(predictors))) + for(i in 1:length(predictors)){ + mat_pred <- predictors[[i]] + tmat_pred <- t(mat_pred) + dat_pred <- data.frame(ij=mat_pred[upper.tri(mat_pred, diag = FALSE)],ji=tmat_pred[upper.tri(tmat_pred, diag = FALSE)]) + model_data[,i] <- apply(dat_pred, MARGIN=1, FUN=symmetrize) + } + + # Remove NA or Inf value + to_remove <- apply(model_data, MARGIN=1, FUN=function(x) any(is.infinite(x) | is.na(x))) + to_remove <- to_remove | is.na(vec_similarity) + model_data <- model_data[!to_remove,] + vec_similarity <- vec_similarity[!to_remove] + + if(verbose & nrow(model_data) < 10 * ncol(model_data)) warning(paste0("Regression is done over a limited number of similarities (", + nrow(model_data)," observations for ",ncol(model_data)," predictors)")) + + if(verbose) cat("### Building the similarity model ###\n") + if(model == "glmnet"){ + if (!is.null(seed)) set.seed(seed) + # Linear model to predict Rn similarity from predictors using a GLM with lasso or elasticnet regularization + valid_args <- c(names(formals(glmnet::glmnet)),names(formals(glmnet::cv.glmnet))) + glm_similarity <- do.call(glmnet::cv.glmnet, args=c(list(x=model_data, y=vec_similarity), args_glmnet[names(args_glmnet) %in% valid_args])) + if(is.character(args_glmnet[["s"]])){s <- glm_similarity[[args_glmnet[["s"]]]]}else{s <- args_glmnet[["s"]]} + similarity <- as.vector(stats::predict(glm_similarity, newx=as.matrix(newpredictors), s=s)) + coef <- as.matrix(coef(glm_similarity, s=s))[-1,] + # Compute R2 manually + y <- vec_similarity + y_pred <- stats::predict(glm_similarity, newx = model_data, s = s) + y_mean <- mean(y) + ss_total <- sum((y - y_mean)^2) + ss_residual <- sum((y - y_pred)^2) + r2 <- 1 - (ss_residual / ss_total) + # Indicators of the distribution of the weights + n50 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.50) + n75 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.75) + n90 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.90) + # Gather results + results <- list(similarity=similarity, coef=coef, r2=r2, ncatch=as.integer(ncol(Rn)), ndist=length(vec_similarity), n50=n50, n75=n75, n90=n90) + } + + if(model == "lm"){ + # Linear model to predict Rn similarity from predictors using lm + lm_data <- cbind(Rn_similarity=vec_similarity, model_data) + formula <- stats::as.formula(paste(colnames(lm_data)[1],"~", paste(colnames(lm_data)[-1], collapse = " + "))) + lm_similarity <- stats::lm(formula, data=as.data.frame(lm_data)) + similarity <- as.vector(stats::predict(lm_similarity, newdata=as.data.frame(newpredictors))) + coef <- as.matrix(coef(lm_similarity))[-1,] + r2 <- summary(lm_similarity)$r.squared + pvalue <- summary(lm_similarity)$coefficients[-1, "Pr(>|t|)"] + # Indicators of the distribution of the weights + n50 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.50) + n75 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.75) + n90 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.90) + # Gather results + results <- list(similarity=similarity, coef=coef, r2=r2, ncatch=ncol(Rn), ndist=length(vec_similarity), n50=n50, n75=n75, n90=n90, pvalue=pvalue) + } + + return(results) +} + +KGE <- function(obs, sim) { + # Remove missing values + valid <- !is.na(obs) & !is.na(sim) + if (!any(valid)) return(NA) + obs <- obs[valid] + sim <- sim[valid] + + # KGE components + r <- stats::cor(obs, sim, method = c("pearson")) + beta <- mean(sim) / mean(obs) + gamma <- (sd(sim) / mean(sim)) / (sd(obs) / mean(obs)) + + # Final KGE + kge <- 1 - sqrt((r - 1)^2 + (beta - 1)^2 + (gamma - 1)^2) + return(kge) +} + +invRMSE <- function(obs, sim) { + # Remove missing values + valid <- !is.na(obs) & !is.na(sim) + if (!any(valid)) return(NA) + obs <- obs[valid] + sim <- sim[valid] + + # RMSE + rmse <- sqrt(mean((sim - obs)^2)) + return(1 / rmse) +} diff --git a/R/weightr.R b/R/weightr.R index d32e595..450b858 100644 --- a/R/weightr.R +++ b/R/weightr.R @@ -6,7 +6,7 @@ #' @param Rn net rainfall matrix of donor catchments (rows for time index, and columns for donors index) #' @param distances vector of the distances to each donor catchment, used for donor selection (see \link{hdist}) #' @param similarities vector of the hydrological similarities to each donor catchment, used for donor weighting -#' (\code{distances} is used if not provided) +#' (\code{1/distances^power} is used if not provided) #' @param ndonors maximum number of donor catchments to use #' @param donors vector of catchments id from which donors are selected. If empty, the \code{ndonors} closest #' catchments are used @@ -38,7 +38,7 @@ weightr <- function(Rn, distances, similarities, ndonors = 5, donors = NULL, power = 1, flexible_donor = TRUE){ if(inherits(distances, "units")) distances <- units::drop_units(distances) - if(missing(similarities)) similarities <- distances + if(missing(similarities)) similarities <- 1/distances^power if(!flexible_donor){ norn <- apply(Rn,MARGIN=2,FUN=function(x){all(is.na(x))}) @@ -48,11 +48,11 @@ weightr <- function(Rn, distances, similarities, ndonors = 5, donors = NULL, pow if(length(stable_donors)==0){stop("Could not find gauged donors")}else{donors <- stable_donors} } weights <- t(apply(Rn, MARGIN=1, FUN=function(x) time_weight(tRn=x, distances=distances, similarities=similarities, ndonors=ndonors, - donors=donors, power=power, flexible_donor=flexible_donor))) + donors=donors, flexible_donor=flexible_donor))) return(weights) } -time_weight <- function(tRn, distances, similarities, ndonors, donors, power, flexible_donor){ +time_weight <- function(tRn, distances, similarities, ndonors, donors, flexible_donor){ tweights <- rep(0, length(tRn)) if(flexible_donor){ gaps <- is.na(tRn) @@ -67,8 +67,8 @@ time_weight <- function(tRn, distances, similarities, ndonors, donors, power, fl tweights[tdonors] <- 0 tweights[tdonors[which(distances[tdonors]==0)]] <- 1/sum(distances[tdonors]==0) }else{ - tweights[tdonors]=1/similarities[tdonors]^power - tweights[tdonors]=tweights[tdonors]/sum(tweights[tdonors]) + tweights[tdonors] <- similarities[tdonors]+abs(min(similarities[tdonors])) + tweights[tdonors] <- tweights[tdonors]/sum(tweights[tdonors]) } } return(tweights) diff --git a/man/mixr.Rd b/man/mixr.Rd index 0f2116b..c177b43 100644 --- a/man/mixr.Rd +++ b/man/mixr.Rd @@ -22,7 +22,8 @@ mixr( flexible_donor = TRUE, cv = FALSE, save_donor = FALSE, - verbose = TRUE + verbose = TRUE, + ... ) } \arguments{ @@ -47,7 +48,7 @@ distance (see the function \link{hdist})} (see the function \link{hdist})} \item{similarity}{a hydrological similarity matrix between gauged and ungauged catchments -used for donor weighting (\code{mdist} is used if not provided)} +used for donor weighting (\code{1/mdist^power} is used if not provided)} \item{parallel}{logical indicating if the computation should be parallelised} @@ -60,8 +61,8 @@ If not specified, the number of cores is set to the value of \code{parallel::det \item{ndonors}{maximum number of catchments to be used to simulate discharge of an ungauged catchment as defined by the function \link{weightr}} -\item{donors}{vector of catchments id from which donors are selected. If empty, the \code{ndonors} closest -catchments are used} +\item{donors}{list of vectors indicating the catchment IDs from which donors are selected for each ungauged catchments. +If empty, the \code{ndonors} closest catchments are used} \item{maxdist}{maximum distance between a gauged and an ungauged catchment to allow the net rainfall to be transferred. This threshold is applied on the \code{mdist} distance matrix. If no units is provided, @@ -79,6 +80,8 @@ in the \code{sim} object called "RnDonor", "Idonor" and "Wdonor" describing the the weight of the donor catchments respectively} \item{verbose}{boolean indicating if information messages should be written to the console} + +\item{...}{other arguments to be passed to \link{rsimilarity} if \code{similarity} is a list of matrices} } \value{ The \code{sim} object incremented by the new computed attributes. diff --git a/man/rsimilarity.Rd b/man/rsimilarity.Rd new file mode 100644 index 0000000..4062b74 --- /dev/null +++ b/man/rsimilarity.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsimilarity.R +\name{rsimilarity} +\alias{rsimilarity} +\title{Prediction of hydrological similarity to drive transfer of hydrograph to ungauged catchments} +\usage{ +rsimilarity( + Rn, + predictors, + newpredictors, + FUN = invRMSE, + power = 0.5, + symmetrize = mean, + model = "glmnet", + args_glmnet = list(alpha = 0.5, s = "lambda.min", lower.limits = 0), + verbose = TRUE, + seed = NULL +) +} +\arguments{ +\item{Rn}{net rainfall matrix of gauged catchments (rows for time index, and columns for catchment index)} + +\item{predictors}{a list of at least two squared matrices that could be used to predict similarity of net rainfall. +The matrices should be squared to describe the similarity of each pair of gauged catchments. High values should +indicate high probability of similarity in net rainfall} + +\item{newpredictors}{a matrix or a data.frame of predictors, to be used as new input to the model being built, +and from which we want to predict net rainfall similarity. Typically, a similarity between an ungauged catchment +and several gauged catchments that we want to weight as donors. Each column should correspond to each \code{predictors}, +and each row should correspond to each pair of catchments analysed.} + +\item{FUN}{a function that will be used to quantify the similarity of two net rainfall time series. High values should +indicate high probability of similarity in net rainfall. Default is the inverse of root mean square error \code{invRMSE}, +but \code{KGE} (Kling–Gupta efficiency) can also be used} + +\item{power}{exponent applied on \code{Rn} time series when computing \code{Rn} similarity (generally between -1 and 1, +a lower value emphasising the similarity of low flows). Default value is 0.5} + +\item{symmetrize}{a function to combine \code{FUN(Rn[,i],Rn[,j])} and \code{FUN(Rn[,j],Rn[,i])} into one similarity value +(typically if \code{FUN=KGE}, \code{KGE(Rn[,i],Rn[,j])!=KGE(Rn[,j],Rn[,i])}). Default is mean} + +\item{model}{the method to estimate similarity of \code{Rn} from \code{descriptors}. This must be one of "lm", "glmnet"} + +\item{args_glmnet}{list of arguments to be passed to \code{glmnet::cv.glmnet()}} + +\item{verbose}{boolean indicating if warning messages should be written to the console} + +\item{seed}{integer value to be used by \code{set.seed()} for reproducible results. Used only if \code{model="glmnet"}} +} +\description{ +Quantify the similarity of net rainfall and model this hydrological similarity from catchment +similarity predictors +} +\examples{ +\donttest{data(Oudon) +obs <- as_transfr(st = Oudon$obs, hl = Oudon$hl) +obs <- velocity(obs, method = "loire2016") +obs <- uh(obs) +obs <- lagtime(obs) +obs <- rapriori(obs) +obs <- inversion(obs, parallel = TRUE, cores=2) +mdist1 <- hdist(x = obs, y = obs, method = "rghosh", parallel = c(FALSE,TRUE), cores=2) +mdist2 <- mdist1^2 +rghosh1 <- seq(1000, 5000, by=100) +rghosh2 <- rghosh1^2 +res <- rsimilarity(Rn = obs$st$RnInv, + predictors = list(rghosh1=1/mdist1, rghosh2=1/mdist2), + newpredictors = data.frame(rghosh1=1/rghosh1, rghosh2=1/rghosh2), + seed=1234) +plot(rghosh1, res$similarity, ylab = "Predicted Rn similarity") +plot(rghosh2, res$similarity, ylab = "Predicted Rn similarity") +# rsimilarity() is automatically called by mixr() if mdist is a list +obs <- mixr(obs = obs, mdist = mdist1, + similarity = list(rghosh1=1/mdist1, rghosh2=1/mdist2), + parallel = TRUE, cores=2, cv = TRUE, save_donor = TRUE) +obs$similarity_models +obs <- convolution(obs) +plot(obs, i = 1, attribute = c("Qobs", "Qsim"))} +} +\seealso{ +\link{mixr} +} diff --git a/man/weightr.Rd b/man/weightr.Rd index c2e1886..efd5506 100644 --- a/man/weightr.Rd +++ b/man/weightr.Rd @@ -20,7 +20,7 @@ weightr( \item{distances}{vector of the distances to each donor catchment, used for donor selection (see \link{hdist})} \item{similarities}{vector of the hydrological similarities to each donor catchment, used for donor weighting -(\code{distances} is used if not provided)} +(\code{1/distances^power} is used if not provided)} \item{ndonors}{maximum number of donor catchments to use} -- GitLab