Commit 87898a5d by patrick.lambert

### exploration of strayers mortality and growth

 ... ... @@ -81,24 +81,24 @@ plot(tempData$Year[sel], tempData$Winter[sel], type = 'l') # ---------------------------------------------- # growth simulation # ---------------------------------------------- vonBertalaffyGrowth = function(age, L0, Linf, K){ vonBertalanffyGrowth = function(age, L0, Linf, K){ t0 = log(1 - L0 / Linf) / K return(Linf * (1 - exp(-K * (age - t0)))) } vonBertalaffyInverse = function(L, L0, Linf, K){ vonBertalanffyInverse = function(L, L0, Linf, K){ t0 = log(1 - L0/Linf)/K return(t0 - log(1 - L/Linf)/K) } KvonBertalaffy = function(age, L, L0, Linf) { KvonBertalanffy = function(age, L, L0, Linf) { return(-log((Linf - L ) / (Linf - L0)) / age) } #vonBertalaffyInverse(L=40, L0, Linf, koptMale) #vonBertalanffyInverse(L=40, L0, Linf, koptMale) Zfct = function(age, Ltarget, L0, Linf, K, Tref ) { return(sapply(age,function(a) (vonBertalaffyGrowth(a, L0, Linf, K) * mean(temperatureEffect(Tref, 3, 17, 26)) - Ltarget)^2)) return(sapply(age,function(a) (vonBertalanffyGrowth(a, L0, Linf, K) * mean(temperatureEffect(Tref, 3, 17, 26)) - Ltarget)^2)) } Pauly = function(age, t0, Linf, K, D){ ... ... @@ -106,7 +106,7 @@ Pauly = function(age, t0, Linf, K, D){ } # pas cohérent avec la temperature effet sur le coeff de croissance mais ca marche vonBertalaffyIncrement = function(nStep, L0, Linf, K, deltaT, sigma, withTempEffect=FALSE, TrefAtSea = c(9.876946, 13.489854, 15.891487, 11.554104) ){ vonBertalanffyIncrement = function(nStep, L0, Linf, K, deltaT, sigma, withTempEffect=FALSE, TrefAtSea = c(9.876946, 13.489854, 15.891487, 11.554104) ){ tempEffect = temperatureEffect( TrefAtSea , 3, 17, 26) L = matrix(nrow = nStep + 1) L[1] = L0 ... ... @@ -142,40 +142,40 @@ age = seq(0,8,.25) # comparaison male femelle # ########################################################################### Lfemale = 55 (koptFemale = KvonBertalaffy(5.5, L = Lfemale, L0=L0, Linf=Linf)/tempEffect) (koptFemale = KvonBertalanffy(5.5, L = Lfemale, L0=L0, Linf=Linf)/tempEffect) Lmale = 40 (koptMale = KvonBertalaffy(4.5, L = Lmale, L0, Linf)/tempEffect) (koptMale = KvonBertalanffy(4.5, L = Lmale, L0, Linf)/tempEffect) plot(x=NaN, xlim=range(age), ylim=c(0, Linf), xlab ='age (year)', ylab ='length (cm)') for (i in 1:100) { lines(age, vonBertalaffyIncrement(max(age)/timestep, L0, Linf, koptMale, timestep, sigma, withTempEffect = TRUE), col='blue') lines(age, vonBertalanffyIncrement(max(age)/timestep, L0, Linf, koptMale, timestep, sigma, withTempEffect = TRUE), col='blue') } lines(age, vonBertalaffyGrowth(age, L0, Linf, koptMale * tempEffect), lty=2, lwd = 2, col = 'black') lines(age, vonBertalanffyGrowth(age, L0, Linf, koptMale * tempEffect), lty=2, lwd = 2, col = 'black') res=matrix(nrow = max(age)/timestep + 1, ncol= 100 ) # verification par rapport à la médiane et la moyenne for (i in 1:100) { res[,i]=vonBertalaffyIncrement(max(age)/timestep, L0, Linf, koptMale, timestep, sigma, withTempEffect = TRUE) res[,i]=vonBertalanffyIncrement(max(age)/timestep, L0, Linf, koptMale, timestep, sigma, withTempEffect = TRUE) } lines(age, apply(res, 1, quantile, probs =0.5 ), lty=2, lwd = 2, col = 'red') lines( age, rowMeans(res)) for (i in 1:100) { lines(age, vonBertalaffyIncrement(max(age)/timestep, L0, Linf, koptFemale, timestep, sigma, withTempEffect = TRUE), col='pink') lines(age, vonBertalanffyIncrement(max(age)/timestep, L0, Linf, koptFemale, timestep, sigma, withTempEffect = TRUE), col='pink') } lines(age, vonBertalaffyGrowth(age, L0, Linf, koptFemale * tempEffect), lty=2, lwd = 2) lines(age, vonBertalanffyGrowth(age, L0, Linf, koptFemale * tempEffect), lty=2, lwd = 2) abline(h = Lmale) abline(h = Lfemale) ageMale=vonBertalaffyInverse(L=Lmale, L0, Linf, koptMale*tempEffect) points(ageMale, vonBertalaffyGrowth(ageMale, L0, Linf, koptMale * tempEffect), col = 'red') segments(x0 = ageMale, y0=0, y1=vonBertalaffyGrowth(ageMale, L0, Linf, koptMale * tempEffect)) ageMale=vonBertalanffyInverse(L=Lmale, L0, Linf, koptMale*tempEffect) points(ageMale, vonBertalanffyGrowth(ageMale, L0, Linf, koptMale * tempEffect), col = 'red') segments(x0 = ageMale, y0=0, y1=vonBertalanffyGrowth(ageMale, L0, Linf, koptMale * tempEffect)) ageFemale=vonBertalaffyInverse(L=Lfemale, L0, Linf, koptFemale*tempEffect) points(ageFemale, vonBertalaffyGrowth(ageFemale, L0, Linf, koptFemale * tempEffect), col = 'red') segments(x0 = ageFemale, y0=0, y1=vonBertalaffyGrowth(ageFemale, L0, Linf, koptFemale * tempEffect)) ageFemale=vonBertalanffyInverse(L=Lfemale, L0, Linf, koptFemale*tempEffect) points(ageFemale, vonBertalanffyGrowth(ageFemale, L0, Linf, koptFemale * tempEffect), col = 'red') segments(x0 = ageFemale, y0=0, y1=vonBertalanffyGrowth(ageFemale, L0, Linf, koptFemale * tempEffect)) cat(koptFemale, koptMale, sep =" ") cat(ageFemale, ageMale, sep =" ") ... ... @@ -186,8 +186,8 @@ sel = tempData$NOM=="Rhine" & tempData$Year>=2008 & tempData$Year<=2018 TrefRhine = (12 + colMeans(tempData[sel, c("Winter", "Spring", "summer", "Autumn")])) / 2 rbind(TrefAtSea, TrefRhine) tempEffectRhine = mean(temperatureEffect( TrefRhine, 3, 17, 26)) c(vonBertalaffyInverse(L=Lfemale, L0, Linf, koptFemale*tempEffectRhine), vonBertalaffyInverse(L=Lmale, L0, Linf, koptMale*tempEffectRhine)) c(vonBertalanffyInverse(L=Lfemale, L0, Linf, koptFemale*tempEffectRhine), vonBertalanffyInverse(L=Lmale, L0, Linf, koptMale*tempEffectRhine)) # ====================================================================== # temp en fonction de la latitude ... ... @@ -215,17 +215,17 @@ abline(h=12.5) correction=mean(temperatureEffect(TrefAtSea, 3, 17, 26)) age=seq(0,10,.25) present = vonBertalaffyGrowth(age, 2, 60, 0.3900707 * correction) present = vonBertalanffyGrowth(age, 2, 60, 0.3900707 * correction) plot(age, present, type='l', lwd=3, ylim =c(0,80)) present[age == 5] abline(v=5) male = vonBertalaffyGrowth(age, 2, 65, 0.3900707 * correction) male = vonBertalanffyGrowth(age, 2, 65, 0.3900707 * correction) lines(age, male, type='l', lwd=3, ylim =c(0,80), col='blue') abline(h=40, col='blue', lwd=2, lty=2) male[age == 5] female = vonBertalaffyGrowth(age, 2, 75, 0.3900707*55/40 * correction) female = vonBertalanffyGrowth(age, 2, 75, 0.3900707*55/40 * correction) lines(age, female, lwd=3, col='red') abline(h=55, col='red', lwd=2, lty=2) female[age == 5] ... ...  # temperature effect in GR3D # from Rosso temperatureEffect = function(tempWater, Tmin, Topt, Tmax){ response = (tempWater - Tmin) * (tempWater - Tmax) / ((tempWater - Tmin) * (tempWater - Tmax) - (tempWater - Topt)^2) response[tempWater <= Tmin | tempWater >= Tmax] = 0 return(response) } # ---------------------------------------------- # growth simulation #=--------------------------------------------- # von Bertalaffy : L = f(age) vonBertalanffyGrowth = function(age, L0, Linf, K){ t0 = log(1 - L0 / Linf) / K return(Linf * (1 - exp(-K * (age - t0)))) } # von Bertalanffy inverse : age = f(L) # with L0 rather the usual t0 parameter vonBertalanffyInverse = function(L, L0, Linf, K){ t0 = log(1 - L0/Linf)/K return(t0 - log(1 - L/Linf)/K) } # von Bertalanffy increment # pas cohérent avec la temperature effet sur le coeff de croissance mais ca marche vonBertalanffyIncrement = function(nStep, L0, Linf, K, deltaT, sigma, withTempEffect=FALSE, TrefAtSea = c(9.876946, 13.489854, 15.891487, 11.554104) ){ tempEffect = temperatureEffect( TrefAtSea , 3, 17, 26) L = matrix(nrow = nStep + 1) L[1] = L0 for (i in 1:nStep) { mu = log((Linf - L[i]) * (1 - exp(-K * deltaT))) - sigma * sigma / 2 increment = exp(rnorm(1, mu, sigma)) if (withTempEffect) { increment = increment * tempEffect[((i - 1) %% 4) + 1] } L[i + 1] = L[i] + increment } return(L) } vonBertalanffyWithNextIncrement = function(L, L0, Linf, K, deltaT, sigma, tempEffect ){ if (sigma == 0) { mu = log((Linf - L) * (1 - exp(-K * deltaT))) increment = exp(mu) } else { mu = log((Linf - L) * (1 - exp(-K * deltaT))) - (sigma * sigma) / 2 increment = exp(rnorm(1, mu, sigma)) } L = L + increment * tempEffect return(L) } # ------------------------------------------------------- # Dispersal # see see (Chapman et al., 2007; Holloway et al., 2016) # ------------------------------------------------------- logitKernel = function(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance, basinSurface = 0, alpha2 = 0, meanBasinSurface = 0, standardDeviationBasinSurface = 1, fishLength = 0, alpha3 = 0, meanLengthAtRepro = 0, standardDeviationLengthAtRepro = 1) { logitW = alpha0 - alpha1 * (distance - meanInterDistance) / standardDeviationInterDistance + alpha2 * (basinSurface - meanBasinSurface) / standardDeviationBasinSurface + alpha3 * (fishLength - meanLengthAtRepro) / standardDeviationInterDistance; w = 1 / (1+ exp(-logitW)) return(w) } \ No newline at end of file  library(dplyr) library(tidyr) library(ggplot2) distance <- as.matrix(read.csv("../../data/input/northeastamerica/distanceGridNEA.csv", row.names = 1, stringsAsFactors = FALSE)) #distance <- as.matrix(read.csv("../../data/input/atlanticarea/distanceGridAA.csv", row.names = 1, stringsAsFactors = FALSE)) distance <- distance %>% replace(., col(.) == row(.), NA) %>% as.data.frame() %>% mutate(destination = row.names(.)) %>% pivot_longer(cols = -destination, names_to = 'departure', values_to = 'distance') # true values meanInterDistance <- mean(distance$distance, na.rm = TRUE ) standardDeviationInterDistance <- sd(distance$distance, na.rm = TRUE ) # as in XML from Rougier's application meanInterDistance = 300 standardDeviationInterDistance = 978 alpha0 = -2.9 alpha1 = 19.7 distance <- distance %>% mutate(logitW = alpha0 - alpha1 * (distance - meanInterDistance) / standardDeviationInterDistance) %>% mutate(W = 1/(1 + exp(-logitW))) #filter(departure =='Pearl', destination == 'Escambia') distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE)) %>% #filter(departure == 'Potomac') ggplot(aes(x = sumW)) + geom_histogram() + geom_vline(aes(xintercept = mean(sumW)), color="blue", linetype="dashed", size=1) distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE)) %>% summarise(sumW = mean(sumW))  library(dplyr) library(tidyr) library(ggplot2) #library(digitize) river <- read.csv("../../data/input/northeastamerica/observed_river_temperatures.csv") river %>% filter(basin_name %in% c( 'St. Johns', 'Cape Fear', 'Connecticut', 'Shubenacadie', 'St Lawrence' )) %>% filter(between(year, 1900, 1950)) %>% pivot_longer(cols = c('winter_river_temperature', 'spring_river_temperature', 'summer_river_temperature', 'fall_river_temperature')) %>% group_by(basin_id, basin_name) %>% summarise(Tmean = mean(value)) inshore <- read.csv("../../data/input/northeastamerica/observed_inshore_temperatures.csv") inshore %>% filter(basin_name %in% c( 'St. Johns', 'Cape Fear', 'Connecticut', 'Shubenacadie', 'St Lawrence' )) %>% filter(between(year, 1900, 1950)) %>% pivot_longer(cols = c('winter_inshore_temperature', 'spring_inshore_temperature', 'summer_inshore_temperature', 'fall_inshore_temperature')) %>% group_by(basin_id, basin_name) %>% summarise(Tmean = mean(value)) inshore %>% distinct(basin_name) %>% arrange(basin_name) offshore <- read.csv("../../data/input/northeastamerica/observed_offshore_temperatures.csv") offshore %>% distinct(basin_name) offshore %>% filter(basin_name %in% c( 'Bay of Fundy' )) %>% filter(between(year, 1900, 1950)) %>% pivot_longer(cols = c('winter_sea_surface_temperature', 'spring_sea_surface_temperature', 'summer_sea_surface_temperature', 'fall_sea_surface_temperature')) %>% group_by(basin_id, basin_name) %>% summarise(Tmean = mean(value)) connections = read.csv("../../data/input/northeastamerica/inshore_offshore_connections.csv") ###### growth rate of young-of-year (mm/d) #### #digitizedLimburg2003=digitize("limburg.jpeg", x1=25, x2=50, y1=0, y2=1.5) Limburg2003 = data.frame(basin_name=c('St. Johns', 'Cape Fear', 'Delaware', 'Connecticut', 'Connecticut', 'Shubenacadie', 'St Lawrence' ), publication_date = c(1972, 1967, 1969, 1976, 2000, 1935, 1987), latitude = c(29.02477, 34.05573, 40.01548, 42.10526, 42.06656, 45.12384, 45.89783), yoy_growthrate = c(0.2758621, 0.3534483, 0.4655172, 0.5646552, 0.7370690, 0.8103448, 1.3965517)) ###### mean folk lengthat at age 5 (cm) #### #digitizedLimburg2003b=digitize("Limburg2003b.png", x1=0, x2=16, y1=40, y2=52) round(digitizedLimburg2003b$y,1) Limburg2003b = data.frame(basin_name = c('St. Johns', 'Pee Dee', 'Cape Fear', 'Neuse', 'Pamlico-Tar', 'Roanoke', 'James', 'York', 'Rappahannock', 'Susquehanna', 'Delaware', 'Hudson', 'Connecticut', 'Shubenacadie', 'St John', 'St Lawrence'), mean_fork_length = c(44.6, 44.8, 46.2, 46.0, 48.1, 46.8, 43.0, 42.3, 42.8, 44.0, 50.7, 48.7, 47.9, 49.2, 45.9, 44.1)) Limburg2003b %>% mutate( basin_name = factor(basin_name, levels = basin_name)) %>% ggplot(aes(basin_name, mean_fork_length)) + geom_bar(stat="identity") + coord_cartesian(ylim = c(40, 52)) + theme(axis.text.x = element_text(angle = 45, vjust = -0.1, hjust=.0)) ##### link with river basins temperature Limburg2003 <- Limburg2003 %>% inner_join( river %>% filter(basin_name %in% c('St. Johns', 'Cape Fear', 'Delaware', 'Connecticut', 'Shubenacadie', 'St Lawrence' )) %>% filter(between(year, 1900, 1950)) %>% pivot_longer(cols = c('winter_river_temperature', 'spring_river_temperature', 'summer_river_temperature', 'fall_river_temperature')) %>% group_by(basin_id, basin_name) %>% summarise(Tmean = mean(value)), by ='basin_name') %>% select(basin_id, basin_name, latitude, Tmean, yoy_growthrate) Limburg2003 %>% ggplot(aes(latitude, yoy_growthrate)) + geom_point() +geom_text(aes(label=basin_name), hjust = 0, nudge_x = 0.2) Limburg2003 %>% ggplot(aes(Tmean, yoy_growthrate)) + geom_point() +geom_text(aes(label=basin_name), hjust = 0, nudge_x = 0.2) # ======================================================================================================= # link with offshore basin marineGrowth <- Limburg2003b %>% inner_join(connections %>% dplyr::select(inshore_basin_name, wintering_offshore_basin_name, summering_offshore_basin_name ), by = c("basin_name" = "inshore_basin_name")) temperature_offshore_growth <- offshore %>% filter(between(year, 1900, 1950)) %>% group_by(basin_id, basin_name) %>% summarise(winter_temperature = mean(winter_sea_surface_temperature), spring_temperature = mean(spring_sea_surface_temperature), summer_temperature = mean(summer_sea_surface_temperature) , fall_temperature = mean(fall_sea_surface_temperature)) marineGrowth <- marineGrowth %>% inner_join(temperature_offshore_growth %>% dplyr::select(basin_id, basin_name, winter_temperature, spring_temperature), by = c('wintering_offshore_basin_name' = 'basin_name')) %>% inner_join(temperature_offshore_growth %>% dplyr::select(basin_id, basin_name, summer_temperature, fall_temperature), by = c('summering_offshore_basin_name' = 'basin_name')) %>% select(basin_name, mean_fork_length, wintering_offshore_basin_name, summering_offshore_basin_name, winter_temperature, spring_temperature, summer_temperature, fall_temperature) marineGrowth %>% mutate(mean_temperature = (winter_temperature + spring_temperature + summer_temperature + fall_temperature)/4) %>% ggplot(aes(x = mean_temperature, y= mean_fork_length)) + geom_point() + geom_text(aes(label=basin_name), hjust = 0, nudge_x = 0.2) # == read a xml file ============================================ library(XML) fishXML <- xmlToList(xmlParse("../../data/input/atlanticarea/fishTryRealBV_CC_Alosa.xml")) growPar = data.frame(t(unlist(fishXML[[1]][["processes"]][["processesEachStep"]][["species.Grow"]]))) %>% select(-synchronisationMode ) %>% mutate(linfVonBertForMale = fishXML[[1]][["linfVonBertForMale"]], linfVonBertForFemale = fishXML[[1]][["linfVonBertForFemale"]], lengthAtHatching = fishXML[[1]][["lengthAtHatching"]], lFirstMaturityForMale = fishXML[[1]][["lFirstMaturityForMale"]], lFirstMaturityForFemale = fishXML[[1]][["lFirstMaturityForFemale"]]) %>% mutate_all(~as.numeric(as.character(.))) # === test growth rate functions ================================ source("GR3Dfunction.R") # increment with temperature effect correctionGrowth <- marineGrowth %>% select(basin_name, winter_temperature, spring_temperature, summer_temperature, fall_temperature) %>% mutate_if(is.numeric, ~temperatureEffect(., growPar$tempMinGrow, growPar$tempOptGrow, growPar$tempMaxGrow)) growData <- tibble(age = seq(0,5.75,.25), season_temperature = rep(c("spring_temperature", "summer_temperature", "fall_temperature", "winter_temperature"),6) )%>% mutate(L = vonBertalanffyGrowth(age, growPar$lengthAtHatching, growPar$linfVonBertForMale, growPar$kOptForMale), Lincrement = L * NA) growData %>% ggplot(aes(x = age)) + geom_line(aes(y = L)) # ------------------------------------------------------------------------------- # legnth at hatching growData$Lincrement[growData$age == 0] = growData$L[growData$age == 0] # next increments for (i in 2:nrow(growData)) { cat(i, "\n") tempEffect <- correctionGrowth %>% filter(basin_name == "St Lawrence") %>% select(growData$season_temperature[i]) growData[i, 'Lincrement'] = unname(vonBertalanffyWithNextIncrement( growData$Lincrement[i-1], growPar$lengthAtHatching, growPar$linfVonBertForMale, growPar$kOptForMale, deltaT = 0.25, sigma = growPar$sigmaDeltaLVonBert, tempEffect = tempEffect)) } growData %>% ggplot(aes(x=age)) + geom_line(aes(y=L)) + geom_line(aes(y=Lincrement)) + geom_abline(intercept = growPar$lFirstMaturityForMale, slope= 0, color='red') vonBertalanffyWithNextIncrement( growData$Lincrement[i-1], growPar$lengthAtHatching, growPar$linfVonBertForMale, growPar$kOptForMale, deltaT = 0.25, sigma = growPar$sigmaDeltaLVonBert, tempEffect = tempEffect)
 --- title: "Weight for the death basin for the North-East America application of GR3D" author: "P. Lambert" date: "r format(Sys.time(), '%d %B %Y')" bibliography: alosa.bib csl: ecological-modelling.csl output: word_document: df_print: kable reference_docx: ref_doc.docx # do: default --- {r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)  {r include = FALSE} library(dplyr) library(tidyr) library(ggplot2) library(knitr) library(flextable)  {r load data, include = FALSE} distance <- as.matrix(read.csv("../../data/input/northeastamerica/distanceGridNEA.csv", row.names = 1, stringsAsFactors = FALSE)) distance <- distance %>% replace(., col(.) == row(.), NA) %>% as.data.frame() %>% mutate(destination = row.names(.)) %>% pivot_longer(cols = -destination, names_to = 'departure', values_to = 'distance') %>% dplyr::select(departure, destination, distance) %>% arrange(departure, distance) riverBasins = read.csv("../../data/input/northeastamerica/nea_riverbasins.csv")  {r load GR3D function, include = FALSE} source("GR3Dfunction.R")  From the distance grid file used by GR3D for the US application, the data, after reshaping, look like {r distance, echo=FALSE, include = FALSE} #paged.print=TRUE ft <- head(distance,15) %>% flextable() %>% set_formatter_type(fmt_double = "%.02f") %>% autofit() set_caption(ft, 'Examples of distance (in km) between departure and destination basins') #  The parameters for the kernel function based on accessibility from @rougier2015CombinedUseEmpirical are: {r} alpha0 = -2.9 alpha1 = 19.7 meanInterDistance = 300 standardDeviationInterDistance = 978  {r include = FALSE} meanInterDistance <- mean(distance$distance, na.rm = TRUE ) standardDeviationInterDistance <- sd(distance$distance, na.rm = TRUE)  Notice that the$\mu_D$and$\sigma_D$depend on the basins list considered. The true values for$\mu_D$and$\sigma_D$are respectively r meanInterDistance and r standardDeviationInterDistance. So there is a problem in the AA application since the number of basins was increased in comparison with @rougier2015CombinedUseEmpirical. {r kernel function, echo = TRUE} range = round(range(distance$distance, na.rm = TRUE)) dist = range[1]:range[2] dataKernel = data.frame(dist = dist, w = logitKernel(dist, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance)) dataKernel %>% ggplot(aes(x=dist, y=w)) + geom_line() + labs(x='distance between departure and destination basins (km)')  The weight for destination basin $j_2$ from basin $j_1$ is: $$w_{j_1\rightarrow j_2} = \frac {1} {1 + e ^{\alpha_0 + \alpha_1 \cdot {\frac {( D_{j_1\rightarrow j_2} - \mu_D)} {\sigma_D} } } }$$ {r echo =TRUE, include = FALSE} distance <- distance %>% mutate(W = logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))  The sum of weights for the departure basin $j_1$ is: $$w_{j_1} = \sum_{j_2 \neq j_1} {w_{j_1\rightarrow j_2}}$$ The mean weight across all basins is then: $$\overline{w} = \sum_{j_1 =1}^{n_B} {w_{j_1}}$$ It is advised to use this value for the death basin weight. The histogram of weights sum in NorthEast America application is given by {r histogram, echo=FALSE, message=FALSE, warning=FALSE, fig.cap="Distribution of weight sums for departure basins"} distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE)) %>% ggplot(aes(x = sumW)) + geom_histogram() + geom_vline(aes(xintercept = mean(sumW)), color="blue", linetype="dashed", size=1)  {r deathBasinWeight, echo=FALSE, warning=FALSE} deathBasinWeight <- distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% summarise(mean(sumW)) %>% unlist()  The dashed blue line corresponds to the mean weight ($w_{j_1}$ =r round(deathBasinWeight, 4) that is advised to be used as death basin weight. For a departure basin with a $w_{j_1}$ below this value, the strayers mortality is higher than 50 %. For these departure basins, most destination basins are far from the departure basin, the sum of destination basins weights is low and the death basin is attractive. {r, echo =FALSE, warning = FALSE, include = FALSE, mortalyRateLatitude, fig.cap ="Evolution of mortality rate in the death basin according to departure basin latitude"} distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% mutate( mortality_rate = deathBasinWeight/(sumW + deathBasinWeight)) %>% inner_join(riverBasins %>% select(basin_name, lat_outlet, ordre), by = c("departure" ="basin_name")) %>% ggplot(aes(x=lat_outlet, y = mortality_rate)) + geom_point() + labs(x='latitude', y = 'mortality rate')  {r fake list of basin, echo false, include = TRUE} nbBasin = 100 distBetweenBasin = 10 # create a fake basin tibble basin <- tibble(basin_name=paste0('B', formatC(1:nbBasin,width=3, flag = "0"))) %>% mutate(latitude = row_number()) # create a fake basin-to-basin distance basinDistance <- tibble(departure = basin$basin_name, destination =basin$basin_name) %>% expand(departure, destination ) %>% arrange(departure, destination) %>% inner_join(basin, by=c("departure" = "basin_name")) %>% rename(latitude_departure = latitude) %>% inner_join(basin, by=c("destination" = "basin_name")) %>% rename(latitude_destination = latitude) %>% mutate(distance = abs(latitude_departure - latitude_destination ) * distBetweenBasin) drawMortalityVslatitude = function(data){ # calculate the distance characteristics of the fake universe meanInterDistance <- mean(data$distance, na.rm = TRUE) standardDeviationInterDistance <- sd(data$distance, na.rm = TRUE) # calculate weight for each basin distanceW <- data %>% mutate(W = logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance)) # calculate weight for the death basin deathBasinWeight <- distanceW %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% summarise(mean(sumW)) %>% unlist() %>% unname() # draw the mortality according to latitude distanceW %>% group_by(departure, latitude_departure ) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% mutate( mortality_rate = deathBasinWeight/(sumW + deathBasinWeight)) %>% ggplot(aes(x=latitude_departure, y = mortality_rate)) + geom_point() + labs(x='latitude rank', y = 'mortality rate') + xlim(0,100) + ylim(0.40,.70) }  {r fake regular universe, echo FALSE} mean(basinDistance$distance, na.rm = TRUE) sd(basinDistance$distance, na.rm = TRUE) drawMortalityVslatitude(data=basinDistance)  {r irregular sampling of fake universe, echo FALSE} sampledBasin <- basin %>% sample_n(25) %>% arrange(latitude) sampledBasinDistance <- basinDistance %>% inner_join(sampledBasin %>% select(basin_name), by = c("departure" = "basin_name")) %>% inner_join(sampledBasin %>% select(basin_name), by = c("destination" = "basin_name")) mean(sampledBasinDistance$distance, na.rm = TRUE) sd(sampledBasinDistance$distance, na.rm = TRUE) drawMortalityVslatitude(sampledBasinDistance)  # Comparison with HADD formulation # Conclusion The strayer mortality increases at the edge of the distribution. The selection of basins impacts the strayers mortality. It is probably safer to considered a constant rate rather a death basin.