From 0d586ce47aaaf47780a6bef2bde9f386f7f3b074 Mon Sep 17 00:00:00 2001
From: de Lavenne Alban <alban.de-lavenne@irstea.fr>
Date: Wed, 5 Feb 2025 11:17:56 +0100
Subject: [PATCH] feat: the donor catchment weighting strategy can now be
 derived from a similarity analysis of Rn

a linear model can be built between a list of predictors (provided by the package user) and the
observed similarity of Rn in order to predict the similarity between a gauged catchment and an
ungauged catchment
---
 DESCRIPTION        |   6 +-
 NAMESPACE          |   8 ++
 R/mixr.R           |  64 ++++++++++------
 R/rsimilarity.R    | 177 +++++++++++++++++++++++++++++++++++++++++++++
 R/weightr.R        |  12 +--
 man/mixr.Rd        |  11 ++-
 man/rsimilarity.Rd |  82 +++++++++++++++++++++
 man/weightr.Rd     |   2 +-
 8 files changed, 325 insertions(+), 37 deletions(-)
 create mode 100644 R/rsimilarity.R
 create mode 100644 man/rsimilarity.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index f67ef6a..bb89598 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: transfR
 Type: Package
 Title: Transfer of Hydrograph from Gauged to Ungauged Catchments
-Version: 1.0.10
-Date: 2023-09-29
+Version: 1.1.0
+Date: 2025-02-05
 Authors@R: c(
   person("Alban", "de Lavenne", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9448-3490"), email = "alban.delavenne@inrae.fr"),
   person("Christophe", "Cudennec", role = c("ths"), comment = c(ORCID = "0000-0002-1707-8926"), email = "christophe.cudennec@agrocampus-ouest.fr"),
@@ -10,7 +10,7 @@ Authors@R: c(
   person("HervÃ©", "Squividant", role = c("ctb"), email = "herve.squividant@agrocampus-ouest.fr")
   )
 Depends: R (>= 4.1.0), stars (>= 0.4-0), sf (>= 0.8-0)
-Imports: doParallel, foreach, units, Rdpack
+Imports: doParallel, foreach, units, glmnet, Rdpack
 RdMacros: Rdpack
 VignetteBuilder: knitr
 Suggests: knitr, rmarkdown, elevatr, progress, whitebox
diff --git a/NAMESPACE b/NAMESPACE
index 5141234..d38239f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,7 @@ export(lagtime)
 export(mixr)
 export(quick_transfr)
 export(rapriori)
+export(rsimilarity)
 export(uh)
 export(velocity)
 export(weightr)
@@ -43,7 +44,14 @@ import(graphics)
 import(sf)
 import(stars)
 importFrom(Rdpack,reprompt)
+importFrom(glmnet,cv.glmnet)
+importFrom(glmnet,glmnet)
+importFrom(stats,as.formula)
+importFrom(stats,complete.cases)
+importFrom(stats,cor)
+importFrom(stats,lm)
 importFrom(stats,na.omit)
+importFrom(stats,predict)
 importFrom(stats,sd)
 importFrom(units,as_units)
 importFrom(units,drop_units)
diff --git a/R/mixr.R b/R/mixr.R
index 0cdd2c4..fa6e9e3 100644
--- a/R/mixr.R
+++ b/R/mixr.R
@@ -17,7 +17,7 @@
 #' @param weightC weight given to the distance between centroids if \code{distance} is "combined"
 #' (see the function \link{hdist})
 #' @param similarity a hydrological similarity matrix between gauged and ungauged catchments
-#' used for donor weighting (\code{mdist} is used if not provided)
+#' used for donor weighting (\code{1/mdist^power} is used if not provided)
 #' @param parallel logical indicating if the computation should be parallelised
 #' @param cores the number of cores to use for parallel execution if \code{parallel} is TRUE.
 #' If not specified, the number of cores is set to the value of \code{parallel::detectCores()}
@@ -25,8 +25,8 @@
 #' \link{weightr}
 #' @param ndonors maximum number of catchments to be used to simulate discharge of an ungauged
 #' catchment as defined by the function \link{weightr}
-#' @param donors vector of catchments id from which donors are selected. If empty, the \code{ndonors} closest
-#' catchments are used
+#' @param donors list of vectors indicating the catchment IDs from which donors are selected for each ungauged catchments.
+#'  If empty, the \code{ndonors} closest catchments are used
 #' @param maxdist maximum distance between a gauged and an ungauged catchment to allow the net rainfall
 #' to be transferred. This threshold is applied on the \code{mdist} distance matrix. If no units is provided,
 #' \code{maxdist} is assumed to be in [m].
@@ -39,6 +39,7 @@
 #' in the \code{sim} object called "RnDonor", "Idonor" and "Wdonor" describing the net rainfall, the id and
 #' the weight of the donor catchments respectively
 #' @param verbose boolean indicating if information messages should be written to the console
+#' @param ... other arguments to be passed to \link{rsimilarity} if \code{similarity} is a list of matrices
 #' @return The \code{sim} object incremented by the new computed attributes.
 #' @details This function is a wrapper function for \link{hdist} and \link{weightr} to directly estimate
 #' the net rainfall on a set of ungauged catchments (\code{sim}) from a set of gauged catchments (\code{obs}).
@@ -63,8 +64,8 @@
 #' plot(object, i = 1, attribute = c("Qobs", "Qsim"))}
 #' @export
 mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8, weightC = 0.2, similarity,
-                    parallel = FALSE, cores = NULL, power = 1, ndonors = 5, donors = NULL, maxdist = 50e3,
-                    flexible_donor = TRUE, cv = FALSE, save_donor = FALSE, verbose = TRUE){
+                 parallel = FALSE, cores = NULL, power = 1, ndonors = 5, donors = NULL, maxdist = 50e3,
+                 flexible_donor = TRUE, cv = FALSE, save_donor = FALSE, verbose = TRUE, ...){
   if(cv){
     if(missing(sim)){
       sim <- obs
@@ -86,6 +87,7 @@ mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8,
 
   #--- Checking donors
   if(!is.null(donors)){
+    if(!inherits(donors,"list")) stop(paste0("The class of 'donors' should be a list, with each elements being the catchment IDs of the donors for each ungauged catchment."))
     if(length(donors)!= dim(sim$st)[2]) stop(paste0("The length of 'donors' list is ",length(donors)," but the number of ungauged catchments is ",dim(sim$st)[2],"."))
     if(any(sapply(donors,FUN = function(x) !all(x %in% 1:dim(obs$st)[2])))) stop("The value of 'donors' can not be matched to a gauged catchment.")
   }
@@ -98,25 +100,48 @@ mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8,
     mdist <- hdist(x=obs, y=sim, method=distance, gres=gres, weightO=weightO, weightC=weightC,
                    parallel=parallel, cores=cores, verbose=verbose)
   }
-  if(missing(similarity)){similarity <- mdist}else{
-    if(any(dim(mdist)!=dim(similarity))) stop("The dimensions of 'similarity' should be similar to 'mdist'.")
+  if(missing(similarity)){similarity <- 1/mdist^power}else{
+    if(is.matrix(similarity) & any(dim(mdist)!=dim(similarity))) stop("The dimensions of 'similarity' should be similar to 'mdist'.")
+    if(is.list(similarity) & any(sapply(lapply(similarity,FUN=dim),FUN=function(x) any(dim(x)!=dim(mdist))))) stop("The dimensions of each 'similarity' matrices should be similar to 'mdist'.")
   }
 
-  #--- Transfering Rn from obs to sim
+  #--- Transferring Rn from obs to sim
+  if(is.list(similarity)) similarity_models <- data.frame()
   sim$st$RnSim <- NA
   for(i in 1:dim(sim$st)[2]){
     if(verbose) progress("Combining simulated Rn of neighbouring catchments for catchment ",i,dim(sim$st)[2])
-      if(cv){distances <- mdist[-i,i]}else{distances <- mdist[,i]}
-      if(cv){similarities <- similarity[-i,i]}else{similarities <- similarity[,i]}
-      if(is.null(donors)){idonors <- which(distances<=maxdist)}else{idonors <- donors[[i]][donors[[i]]%in%which(distances<=maxdist)]}
+    if(cv){distances <- mdist[-i,i]}else{distances <- mdist[,i]}
+    if(is.null(donors)){idonors <- which(distances<=maxdist)}else{idonors <- donors[[i]][donors[[i]]%in%which(distances<=maxdist)]}
     if(length(idonors)==0){
       warning(paste0("No donor found for catchment ",i," below the distance of ",units::set_units(maxdist,"km")," km."))
       sim$st$RnSim[,i] <- rep(NA,dim(sim$st)[1])
     }else{
-		if(cv){RnInv <- obs$st$RnInv[,-i][,idonors]}else{RnInv <- obs$st$RnInv[,idonors]}
-        weights <- weightr(Rn=RnInv, distances=distances[idonors], similarities=similarities[idonors], ndonors=ndonors, power=power, flexible_donor=flexible_donor)
+      if(cv){RnInv <- obs$st$RnInv[,-i][,idonors]}else{RnInv <- obs$st$RnInv[,idonors]}
+      if(is.list(similarity)){
+        if(cv){
+          predictors <- lapply(similarity, FUN = function(x) x[-i,-i][idonors,idonors])
+          newpredictors<- sapply(similarity, FUN = function(x) x[-i,i])[idonors,]
+        }else{
+          predictors <- lapply(similarity, FUN = function(x) x[idonors,idonors])
+          newpredictors<- sapply(similarity, FUN = function(x) x[i,idonors])
+        }
+        similarity_model <- rsimilarity(Rn=RnInv,
+                                        predictors=predictors,
+                                        newpredictors=newpredictors,
+                                        ...)
+        similarities <- similarity_model$similarity
+        # Save description of the similarity model
+        similarity_models <- rbind(similarity_models, data.frame(t(similarity_model$coef), r2=similarity_model$r2,
+                                       ncatch=similarity_model$ncatch, ndist=similarity_model$ndist,
+                                       n75=similarity_model$n75, n90=similarity_model$n90))
+      }else{
+        if(cv){similarities <- similarity[-i,i][idonors]}else{similarities <- similarity[idonors,i]}
+      }
+	  if(verbose & is.list(similarity)) cat("### Weighting Rn of each donor catchments ###\n")
+      weights <- weightr(Rn=RnInv, distances=distances[idonors], similarities=similarities, ndonors=ndonors, power=power, flexible_donor=flexible_donor)
       sim$st$RnSim[,i] <- apply(RnInv*weights,MARGIN = 1,FUN = function(x){if(all(is.na(x))){NA}else{sum(x, na.rm = T)}})
       if(save_donor){
+        if(cv){gdonors <- seq_len(dim(sim$st)[2])[-i][idonors]}else{gdonors <- seq_len(dim(sim$st)[2])[idonors]} #to come back to general spatial index
         for(idonor in 1:min(c(ndonors,length(idonors)))){
           #Create new columns
           if(!paste0("RnDonor",idonor)%in%names(sim$st)){
@@ -128,22 +153,15 @@ mixr <- function(obs, sim, mdist, distance = "rghosh", gres = 5, weightO = 0.8,
           if(!paste0("Wdonor",idonor)%in%names(sim$st)){
             sim$st[[paste0("Wdonor",idonor)]]  <- matrix(NA,nrow=dim(sim$st)[1],ncol=dim(sim$st)[2])
             dim(sim$st[[paste0("Wdonor",idonor)]]) <- dim(sim$st)}
-            sim$st[[paste0("Idonor",idonor)]][,i]  <- apply(weights,MARGIN = 1, FUN = function(x){if(all(is.na(x))){NA}else{which(rank(1-x,ties.method = "first")==idonor)[idonors]}})
+          sim$st[[paste0("Idonor",idonor)]][,i]  <- apply(weights,MARGIN = 1, FUN = function(x){if(all(is.na(x))){NA}else{gdonors[which(rank(1-x,ties.method = "first")==idonor)]}})
           sim$st[[paste0("Wdonor",idonor)]][,i] <- apply(weights,MARGIN = 1, FUN = function(x){if(all(is.na(x))){NA}else{x[which(rank(1-x,ties.method = "first")==idonor)]}})
-          #Solution 1 (simpler)
-          # for(t in 1:dim(sim$st)[1]) sim$st[[paste0("RnDonor",idonor)]][t,i] <- RnInv[t,sim$st[[paste0("Idonor",idonor)]][t,i]]
-          #Solution 2 (faster)
-          iweights <- matrix(0, nrow = dim(sim$st)[1], ncol = min(c(ndonors,length(idonors))))
-          iweights <- cbind(iweights,sim$st[[paste0("Idonor",idonor)]][,i])
-            iweights <- t(apply(iweights, MARGIN = 1, function(x){if(!is.na(x[length(x)])){x[which(x[length(x)]==idonor)]=1;x[1:(length(x)-1)]}else{rep(NA,length(x)-1)}}))
-          sim$st[[paste0("RnDonor",idonor)]][,i] <- apply(RnInv*iweights,MARGIN = 1,FUN = function(x){if(all(is.na(x))){NA}else{sum(x, na.rm = T)}})
-          # units(sim$st[[which(names(sim$st)==paste0("RnDonor",idonor))]]) <- units(RnInv) # could not find a way to keep the units provided by weighting
+          sim$st[[paste0("RnDonor", idonor)]][, i] <- sapply(seq_len(dim(sim$st)[1]), function(t) sim$st$RnInv[t, sim$st[[paste0("Idonor", idonor)]][t, i]])
           units(sim$st[[paste0("RnDonor",idonor)]]) <- units(RnInv) # could not find a way to keep the units provided by weighting
         }
       }
     }
   }
+  if(is.list(similarity)) sim[["similarity_models"]] <- similarity_models # save similarity models
   units(sim$st[["RnSim"]]) <- units(obs$st$RnInv) # could not find a way to keep the units provided by weighting
   return(sim)
 }
-
diff --git a/R/rsimilarity.R b/R/rsimilarity.R
new file mode 100644
index 0000000..462b104
--- /dev/null
+++ b/R/rsimilarity.R
@@ -0,0 +1,177 @@
+#' Prediction of hydrological similarity to drive transfer of hydrograph to ungauged catchments
+#'
+#' Quantify the similarity of net rainfall and model this hydrological similarity from catchment
+#' similarity predictors
+#' @name rsimilarity
+#' @param Rn net rainfall matrix of gauged catchments (rows for time index, and columns for catchment index)
+#' @param predictors a list of at least two squared matrices that could be used to predict similarity of net rainfall.
+#' The matrices should be squared to describe the similarity of each pair of gauged catchments. High values should
+#' indicate high probability of similarity in net rainfall
+#' @param newpredictors a matrix or a data.frame of predictors, to be used as new input to the model being built,
+#' and from which we want to predict net rainfall similarity. Typically, a similarity between an ungauged catchment
+#' and several gauged catchments that we want to weight as donors. Each column should correspond to each \code{predictors},
+#' and each row should correspond to each pair of catchments analysed.
+#' @param FUN a function that will be used to quantify the similarity of two net rainfall time series. High values should
+#' indicate high probability of similarity in net rainfall. Default is the inverse of root mean square error \code{invRMSE},
+#' but \code{KGE} (Klingâ€“Gupta efficiency) can also be used
+#' @param power exponent applied on \code{Rn} time series when computing \code{Rn} similarity (generally between -1 and 1,
+#' a lower value emphasising the similarity of low flows). Default value is 0.5
+#' @param symmetrize a function to combine \code{FUN(Rn[,i],Rn[,j])} and \code{FUN(Rn[,j],Rn[,i])} into one similarity value
+#' (typically if \code{FUN=KGE}, \code{KGE(Rn[,i],Rn[,j])!=KGE(Rn[,j],Rn[,i])}). Default is mean
+#' @param model the method to estimate similarity of \code{Rn} from \code{descriptors}. This must be one of "lm", "glmnet"
+#' @param args_glmnet list of arguments to be passed to \code{glmnet::cv.glmnet()}
+#' @param verbose boolean indicating if warning messages should be written to the console
+#' @param seed integer value to be used by \code{set.seed()} for reproducible results. Used only if \code{model="glmnet"}
+#' @seealso \link{mixr}
+#' @examples
+#' \donttest{data(Oudon)
+#' obs <- as_transfr(st = Oudon$obs, hl = Oudon$hl)
+#' obs <- velocity(obs, method = "loire2016")
+#' obs <- uh(obs)
+#' obs <- lagtime(obs)
+#' obs <- rapriori(obs)
+#' obs <- inversion(obs, parallel = TRUE, cores=2)
+#' mdist1 <- hdist(x = obs, y = obs, method = "rghosh", parallel = c(FALSE,TRUE), cores=2)
+#' mdist2 <- mdist1^2
+#' rghosh1 <- seq(1000, 5000, by=100)
+#' rghosh2 <- rghosh1^2
+#' res <- rsimilarity(Rn = obs$st$RnInv,
+#'                    predictors = list(rghosh1=1/mdist1, rghosh2=1/mdist2),
+#'                    newpredictors = data.frame(rghosh1=1/rghosh1, rghosh2=1/rghosh2),
+#'                    seed=1234)
+#' plot(rghosh1, res$similarity, ylab = "Predicted Rn similarity")
+#' plot(rghosh2, res$similarity, ylab = "Predicted Rn similarity")
+#' # rsimilarity() is automatically called by mixr() if mdist is a list
+#' obs <- mixr(obs = obs, mdist = mdist1,
+#'             similarity = list(rghosh1=1/mdist1, rghosh2=1/mdist2),
+#'             parallel = TRUE, cores=2, cv = TRUE, save_donor = TRUE)
+#' obs$similarity_models
+#' obs <- convolution(obs)
+#' plot(obs, i = 1, attribute = c("Qobs", "Qsim"))}
+#' @importFrom units drop_units
+#' @importFrom glmnet glmnet cv.glmnet
+#' @importFrom stats lm predict complete.cases cor as.formula
+#' @export
+rsimilarity <- function(Rn, predictors, newpredictors, FUN=invRMSE, power=0.5, symmetrize=mean, model="glmnet",
+		args_glmnet=list(alpha=0.5, s="lambda.min", lower.limits=0), verbose = TRUE, seed=NULL){
+	
+	# Checks inputs
+	if(inherits(Rn, "units")) Rn <- units::drop_units(Rn)
+	if(!inherits(predictors, "list")) predictors <- list(predictors)
+	if(any(!sapply(predictors,FUN=is.matrix))) stop("predictors should be a list with 2 or more matrices")
+	if(is.null(ncol(newpredictors))) stop("newpredictors should be a matrix or a data.frame with 2 or more columns")
+	if(length(predictors)<=1) stop("predictors should be a list with 2 or more matrices")
+	if(ncol(newpredictors)<=1) stop("newpredictors should be a matrix or a data.frame with 2 or more columns")
+	if(any(ncol(Rn)!=sapply(predictors,FUN=ncol)) | any(ncol(Rn)!=sapply(predictors,FUN=nrow))){
+		stop(paste("All predictors elements should be a square matrix of",ncol(Rn),"x",ncol(Rn),"to be consistent with Rn"))}
+	if(length(predictors) != ncol(newpredictors)) stop("length(predictors) and ncol(newpredictors) should be the same")
+	
+	# Provide names of distances if not provided
+	if(is.null(names(predictors))) names(predictors) <- paste0("dist",1:length(predictors))
+	if(is.null(names(newpredictors))) names(newpredictors) <- paste0("dist",1:length(newpredictors))
+	if(any(names(predictors) != colnames(newpredictors))) stop("predictors and newpredictors do not have the same names")
+	
+	# Compute similarity matrix index of Rn
+	if(verbose) cat("\n### Computing similarity matrix of Rn ###\n")
+	n <- ncol(Rn)
+	mat_similarity <- matrix(NA, n, n)
+	mat_similarity <- sapply(1:n, function(i) {
+				sapply(1:n, function(j) FUN(Rn[, i]^power, Rn[, j]^power))
+			})
+	
+	# Convert matrix to vector for Rn similarity
+	if(verbose) cat("### Preparing data for the similarity model ###\n")
+	tmat_similarity <- t(mat_similarity)
+	dat_similarity <- data.frame(ij=mat_similarity[upper.tri(mat_similarity, diag = FALSE)],ji=tmat_similarity[upper.tri(tmat_similarity, diag = FALSE)])
+	vec_similarity <- apply(dat_similarity, MARGIN=1, FUN=symmetrize)
+	
+	# Convert each matrix of predictors into vectors, to prepare data for linear model
+	model_data <- matrix(nrow=length(vec_similarity), ncol=length(predictors), dimnames=list(NULL,names(predictors)))
+	for(i in 1:length(predictors)){
+		mat_pred <- predictors[[i]]
+		tmat_pred <- t(mat_pred)
+		dat_pred <- data.frame(ij=mat_pred[upper.tri(mat_pred, diag = FALSE)],ji=tmat_pred[upper.tri(tmat_pred, diag = FALSE)])
+		model_data[,i] <- apply(dat_pred, MARGIN=1, FUN=symmetrize)
+	}
+	
+	# Remove NA or Inf value
+	to_remove <- apply(model_data, MARGIN=1, FUN=function(x) any(is.infinite(x) | is.na(x)))
+	to_remove <- to_remove | is.na(vec_similarity)
+	model_data <- model_data[!to_remove,]
+	vec_similarity <- vec_similarity[!to_remove]
+	
+	if(verbose & nrow(model_data) < 10 * ncol(model_data)) warning(paste0("Regression is done over a limited number of similarities (",
+						nrow(model_data)," observations for ",ncol(model_data)," predictors)"))
+	
+	if(verbose) cat("### Building the similarity model ###\n")
+	if(model == "glmnet"){
+		if (!is.null(seed)) set.seed(seed)
+		# Linear model to predict Rn similarity from predictors using a GLM with lasso or elasticnet regularization
+		valid_args <- c(names(formals(glmnet::glmnet)),names(formals(glmnet::cv.glmnet)))
+		glm_similarity <- do.call(glmnet::cv.glmnet, args=c(list(x=model_data, y=vec_similarity), args_glmnet[names(args_glmnet) %in% valid_args]))
+		if(is.character(args_glmnet[["s"]])){s <- glm_similarity[[args_glmnet[["s"]]]]}else{s <- args_glmnet[["s"]]}
+		similarity <- as.vector(stats::predict(glm_similarity, newx=as.matrix(newpredictors), s=s))
+		coef <- as.matrix(coef(glm_similarity, s=s))[-1,]
+		# Compute R2 manually
+		y <- vec_similarity
+		y_pred <- stats::predict(glm_similarity, newx = model_data, s = s)
+		y_mean <- mean(y)
+		ss_total <- sum((y - y_mean)^2)
+		ss_residual <- sum((y - y_pred)^2)
+		r2 <- 1 - (ss_residual / ss_total)
+		# Indicators of the distribution of the weights
+		n50 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.50)
+		n75 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.75)
+		n90 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.90)
+		# Gather results
+		results <- list(similarity=similarity, coef=coef, r2=r2, ncatch=as.integer(ncol(Rn)), ndist=length(vec_similarity), n50=n50, n75=n75, n90=n90)
+	}
+	
+	if(model == "lm"){
+		# Linear model to predict Rn similarity from predictors using lm
+		lm_data <- cbind(Rn_similarity=vec_similarity, model_data)
+		formula <- stats::as.formula(paste(colnames(lm_data)[1],"~", paste(colnames(lm_data)[-1], collapse = " + ")))
+		lm_similarity <- stats::lm(formula, data=as.data.frame(lm_data))
+		similarity <- as.vector(stats::predict(lm_similarity, newdata=as.data.frame(newpredictors)))
+		coef <- as.matrix(coef(lm_similarity))[-1,]
+		r2 <- summary(lm_similarity)$r.squared
+		pvalue <- summary(lm_similarity)$coefficients[-1, "Pr(>|t|)"]
+		# Indicators of the distribution of the weights
+		n50 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.50)
+		n75 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.75)
+		n90 <- which.min(cumsum(sort(similarity/sum(similarity), decreasing=TRUE)) <= 0.90)
+		# Gather results
+		results <- list(similarity=similarity, coef=coef, r2=r2, ncatch=ncol(Rn), ndist=length(vec_similarity), n50=n50, n75=n75, n90=n90, pvalue=pvalue)
+	}
+	
+	return(results)
+}
+
+KGE <- function(obs, sim) {
+	# Remove missing values
+	valid <- !is.na(obs) & !is.na(sim)
+	if (!any(valid)) return(NA)
+	obs <- obs[valid]
+	sim <- sim[valid]
+	
+	# KGE components
+	r <- stats::cor(obs, sim, method = c("pearson"))
+	beta <- mean(sim) / mean(obs)
+	gamma <- (sd(sim) / mean(sim)) / (sd(obs) / mean(obs))
+	
+	# Final KGE
+	kge <- 1 - sqrt((r - 1)^2 + (beta - 1)^2 + (gamma - 1)^2)
+	return(kge)
+}
+
+invRMSE <- function(obs, sim) {
+	# Remove missing values
+	valid <- !is.na(obs) & !is.na(sim)
+	if (!any(valid)) return(NA)
+	obs <- obs[valid]
+	sim <- sim[valid]
+	
+	# RMSE
+	rmse <- sqrt(mean((sim - obs)^2))
+	return(1 / rmse)
+}
diff --git a/R/weightr.R b/R/weightr.R
index d32e595..450b858 100644
--- a/R/weightr.R
+++ b/R/weightr.R
@@ -6,7 +6,7 @@
 #' @param Rn net rainfall matrix of donor catchments (rows for time index, and columns for donors index)
 #' @param distances vector of the distances to each donor catchment, used for donor selection (see \link{hdist})
 #' @param similarities vector of the hydrological similarities to each donor catchment, used for donor weighting
-#'  (\code{distances} is used if not provided)
+#'  (\code{1/distances^power} is used if not provided)
 #' @param ndonors maximum number of donor catchments to use
 #' @param donors vector of catchments id from which donors are selected. If empty, the \code{ndonors} closest
 #' catchments are used
@@ -38,7 +38,7 @@
 weightr <- function(Rn, distances, similarities, ndonors = 5, donors = NULL, power = 1, flexible_donor = TRUE){
 
   if(inherits(distances, "units")) distances <- units::drop_units(distances)
-  if(missing(similarities)) similarities <- distances
+  if(missing(similarities)) similarities <- 1/distances^power
 
   if(!flexible_donor){
     norn    <- apply(Rn,MARGIN=2,FUN=function(x){all(is.na(x))})
@@ -48,11 +48,11 @@ weightr <- function(Rn, distances, similarities, ndonors = 5, donors = NULL, pow
     if(length(stable_donors)==0){stop("Could not find gauged donors")}else{donors <- stable_donors}
   }
   weights <- t(apply(Rn, MARGIN=1, FUN=function(x) time_weight(tRn=x, distances=distances, similarities=similarities, ndonors=ndonors,
-                                                                 donors=donors, power=power, flexible_donor=flexible_donor)))
+                                                                 donors=donors, flexible_donor=flexible_donor)))
   return(weights)
 }
 
-time_weight <- function(tRn, distances, similarities, ndonors, donors, power, flexible_donor){
+time_weight <- function(tRn, distances, similarities, ndonors, donors, flexible_donor){
   tweights <- rep(0, length(tRn))
   if(flexible_donor){
     gaps    <- is.na(tRn)
@@ -67,8 +67,8 @@ time_weight <- function(tRn, distances, similarities, ndonors, donors, power, fl
       tweights[tdonors] <- 0
       tweights[tdonors[which(distances[tdonors]==0)]] <- 1/sum(distances[tdonors]==0)
     }else{
-      tweights[tdonors]=1/similarities[tdonors]^power
-      tweights[tdonors]=tweights[tdonors]/sum(tweights[tdonors])
+      tweights[tdonors] <- similarities[tdonors]+abs(min(similarities[tdonors]))
+      tweights[tdonors] <- tweights[tdonors]/sum(tweights[tdonors])
   }
   }
   return(tweights)
diff --git a/man/mixr.Rd b/man/mixr.Rd
index 0f2116b..c177b43 100644
--- a/man/mixr.Rd
+++ b/man/mixr.Rd
@@ -22,7 +22,8 @@ mixr(
   flexible_donor = TRUE,
   cv = FALSE,
   save_donor = FALSE,
-  verbose = TRUE
+  verbose = TRUE,
+  ...
 )
 }
 \arguments{
@@ -47,7 +48,7 @@ distance (see the function \link{hdist})}
 (see the function \link{hdist})}
 
 \item{similarity}{a hydrological similarity matrix between gauged and ungauged catchments
-used for donor weighting (\code{mdist} is used if not provided)}
+used for donor weighting (\code{1/mdist^power} is used if not provided)}
 
 \item{parallel}{logical indicating if the computation should be parallelised}
 
@@ -60,8 +61,8 @@ If not specified, the number of cores is set to the value of \code{parallel::det
 \item{ndonors}{maximum number of catchments to be used to simulate discharge of an ungauged
 catchment as defined by the function \link{weightr}}
 
-\item{donors}{vector of catchments id from which donors are selected. If empty, the \code{ndonors} closest
-catchments are used}
+\item{donors}{list of vectors indicating the catchment IDs from which donors are selected for each ungauged catchments.
+If empty, the \code{ndonors} closest catchments are used}
 
 \item{maxdist}{maximum distance between a gauged and an ungauged catchment to allow the net rainfall
 to be transferred. This threshold is applied on the \code{mdist} distance matrix. If no units is provided,
@@ -79,6 +80,8 @@ in the \code{sim} object called "RnDonor", "Idonor" and "Wdonor" describing the
 the weight of the donor catchments respectively}
 
 \item{verbose}{boolean indicating if information messages should be written to the console}
+
+\item{...}{other arguments to be passed to \link{rsimilarity} if \code{similarity} is a list of matrices}
 }
 \value{
 The \code{sim} object incremented by the new computed attributes.
diff --git a/man/rsimilarity.Rd b/man/rsimilarity.Rd
new file mode 100644
index 0000000..4062b74
--- /dev/null
+++ b/man/rsimilarity.Rd
@@ -0,0 +1,82 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rsimilarity.R
+\name{rsimilarity}
+\alias{rsimilarity}
+\title{Prediction of hydrological similarity to drive transfer of hydrograph to ungauged catchments}
+\usage{
+rsimilarity(
+  Rn,
+  predictors,
+  newpredictors,
+  FUN = invRMSE,
+  power = 0.5,
+  symmetrize = mean,
+  model = "glmnet",
+  args_glmnet = list(alpha = 0.5, s = "lambda.min", lower.limits = 0),
+  verbose = TRUE,
+  seed = NULL
+)
+}
+\arguments{
+\item{Rn}{net rainfall matrix of gauged catchments (rows for time index, and columns for catchment index)}
+
+\item{predictors}{a list of at least two squared matrices that could be used to predict similarity of net rainfall.
+The matrices should be squared to describe the similarity of each pair of gauged catchments. High values should
+indicate high probability of similarity in net rainfall}
+
+\item{newpredictors}{a matrix or a data.frame of predictors, to be used as new input to the model being built,
+and from which we want to predict net rainfall similarity. Typically, a similarity between an ungauged catchment
+and several gauged catchments that we want to weight as donors. Each column should correspond to each \code{predictors},
+and each row should correspond to each pair of catchments analysed.}
+
+\item{FUN}{a function that will be used to quantify the similarity of two net rainfall time series. High values should
+indicate high probability of similarity in net rainfall. Default is the inverse of root mean square error \code{invRMSE},
+but \code{KGE} (Klingâ€“Gupta efficiency) can also be used}
+
+\item{power}{exponent applied on \code{Rn} time series when computing \code{Rn} similarity (generally between -1 and 1,
+a lower value emphasising the similarity of low flows). Default value is 0.5}
+
+\item{symmetrize}{a function to combine \code{FUN(Rn[,i],Rn[,j])} and \code{FUN(Rn[,j],Rn[,i])} into one similarity value
+(typically if \code{FUN=KGE}, \code{KGE(Rn[,i],Rn[,j])!=KGE(Rn[,j],Rn[,i])}). Default is mean}
+
+\item{model}{the method to estimate similarity of \code{Rn} from \code{descriptors}. This must be one of "lm", "glmnet"}
+
+\item{args_glmnet}{list of arguments to be passed to \code{glmnet::cv.glmnet()}}
+
+\item{verbose}{boolean indicating if warning messages should be written to the console}
+
+\item{seed}{integer value to be used by \code{set.seed()} for reproducible results. Used only if \code{model="glmnet"}}
+}
+\description{
+Quantify the similarity of net rainfall and model this hydrological similarity from catchment
+similarity predictors
+}
+\examples{
+\donttest{data(Oudon)
+obs <- as_transfr(st = Oudon$obs, hl = Oudon$hl)
+obs <- velocity(obs, method = "loire2016")
+obs <- uh(obs)
+obs <- lagtime(obs)
+obs <- rapriori(obs)
+obs <- inversion(obs, parallel = TRUE, cores=2)
+mdist1 <- hdist(x = obs, y = obs, method = "rghosh", parallel = c(FALSE,TRUE), cores=2)
+mdist2 <- mdist1^2
+rghosh1 <- seq(1000, 5000, by=100)
+rghosh2 <- rghosh1^2
+res <- rsimilarity(Rn = obs$st$RnInv,
+                   predictors = list(rghosh1=1/mdist1, rghosh2=1/mdist2),
+                   newpredictors = data.frame(rghosh1=1/rghosh1, rghosh2=1/rghosh2),
+                   seed=1234)
+plot(rghosh1, res$similarity, ylab = "Predicted Rn similarity")
+plot(rghosh2, res$similarity, ylab = "Predicted Rn similarity")
+# rsimilarity() is automatically called by mixr() if mdist is a list
+obs <- mixr(obs = obs, mdist = mdist1,
+            similarity = list(rghosh1=1/mdist1, rghosh2=1/mdist2),
+            parallel = TRUE, cores=2, cv = TRUE, save_donor = TRUE)
+obs$similarity_models
+obs <- convolution(obs)
+plot(obs, i = 1, attribute = c("Qobs", "Qsim"))}
+}
+\seealso{
+\link{mixr}
+}
diff --git a/man/weightr.Rd b/man/weightr.Rd
index c2e1886..efd5506 100644
--- a/man/weightr.Rd
+++ b/man/weightr.Rd
@@ -20,7 +20,7 @@ weightr(
 \item{distances}{vector of the distances to each donor catchment, used for donor selection (see \link{hdist})}
 
 \item{similarities}{vector of the hydrological similarities to each donor catchment, used for donor weighting
-(\code{distances} is used if not provided)}
+(\code{1/distances^power} is used if not provided)}
 
 \item{ndonors}{maximum number of donor catchments to use}
 
-- 
GitLab