exploration of death Basin weight in RmarkDown

802ec497 · patrick.lambert · e564e663 · 802ec497 · e564e663 · 802ec497
Commit 802ec497 authored 4 years ago by patrick.lambert
Hide whitespace changes
Inline Side-by-side

Showing

with 261 additions and 135234 deletions
+261 -135234
--- a/.gitignore
+++ b/.gitignore
@@ -123,3 +123,5 @@ org.*
 /src/main/java/species/testAlea.java
 /data/input/northeastamerica/shape/*.qix
 /src/main/java/species/Essai.java
+/exploration/GR3D_Rdescription/.RData
+/exploration/GR3D_Rdescription/.Rhistory
--- a/exploration/GR3D_Rdescription/Documents exportés.bib
+++ b/exploration/GR3D_Rdescription/Documents exportés.bib
-
--- a/exploration/GR3D_Rdescription/GR3Dfunction.R
+++ b/exploration/GR3D_Rdescription/GR3Dfunction.R
@@ -66,6 +66,16 @@ logitKernel =  function(distance, alpha0, alpha1, meanInterDistance, standardDev
    alpha2 * (basinSurface - meanBasinSurface) / standardDeviationBasinSurface +
    alpha3 * (fishLength - meanLengthAtRepro) / standardDeviationInterDistance;
  
-  w = 1 / (1+ exp(-logitW))
-  return(w)
-}
\ No newline at end of file
+  W = 1 / (1+ exp(-logitW))
+  return(W)
+}
+# extended negative exponential kernel
+eneKernel =  function(distance, alpha_D, beta_D){
+  W = exp(-alpha_D * distance ^ beta_D)
+  return(W)
+}
+
+strayerSurvival = function(distance, m ){
+  # m: extra mortality rate for strayers (L-1)
+  return(exp(-m * distance))
+}
--- a/exploration/GR3D_Rdescription/alosa.bib
+++ b/exploration/GR3D_Rdescription/alosa.bib
--- a/exploration/GR3D_Rdescription/biblio.bib
+++ b/exploration/GR3D_Rdescription/biblio.bib
+
+@article{rougier2015,
+	title = {The combined use of empirical and mechanistic species distribution models benefits low conservation status species},
+	author = {{Rougier}, {Thibaud} and {Lassalle}, {Géraldine} and {Drouineau}, {Hilaire} and {Dumoulin}, {Nicolas} and {Faure}, {Thierry} and {Deffuant}, {Guillaume} and {Rochard}, {Eric} and {Lambert}, {Patrick}},
+	year = {2015},
+	date = {2015},
+	journal = {PLoS ONE},
+	pages = {e0139194},
+	volume = {10},
+	number = {10},
+	doi = {10.1371/journal.pone.0139194}
+}
--- a/exploration/GR3D_Rdescription/deathBasinW.Rmd
+++ b/exploration/GR3D_Rdescription/deathBasinW.Rmd
@@ -2,20 +2,24 @@
 title: "Weight for the death basin for the North-East America application of GR3D"
 author: "P. Lambert"
 date: "`r format(Sys.time(), '%d %B %Y')`"
-bibliography: alosa.bib
+bibliography: biblio.bib
 csl: ecological-modelling.csl
 output:
  word_document: 
    df_print: kable
    reference_docx: ref_doc.docx
+    toc: yes
+    fig_caption: yes
 #    do: default
+editor_options: 
+  chunk_output_type: console
 ---

 ```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(echo = TRUE, include = FALSE,  fig.alig ='left')
 ```

-```{r include = FALSE}
+```{r library}
 library(dplyr)
 library(tidyr)
 library(ggplot2)
@@ -23,99 +27,225 @@ library(knitr)
 library(flextable)
 ```

-```{r load data, include = FALSE}
-distance <-  as.matrix(read.csv("../../data/input/northeastamerica/distanceGridNEA.csv", row.names = 1, stringsAsFactors = FALSE))
-distance <- distance %>%
+```{r load data}
+rm(list = ls())
+
+# load list of basins  and distance matrix (with "_" instead of " " and "." in basin name)
+distanceAA <-  as.matrix(read.csv("../../data/input/atlanticarea/distanceGridAA.csv", row.names = 1, stringsAsFactors = FALSE))
+distanceAA <- distanceAA %>%
  replace(., col(.) == row(.), NA) %>% 
-  as.data.frame() %>% mutate(destination = row.names(.)) %>% 
+  as.data.frame() %>% mutate(destination = str_replace_all(row.names(.), "([ '\\-\\.])", "_")) %>% 
+  pivot_longer(cols = -destination, names_to = 'departure', values_to = 'distance') %>%
+  mutate(departure = str_replace_all(departure, "([ '\\-\\.])", "_")) %>% 
+  dplyr::select(departure, destination, distance) %>% 
+  arrange(departure, distance)
+
+riverBasinsAA =  read.csv("../../data/input/atlanticarea/aa_basins.csv") %>% 
+  mutate(basin_name = str_replace_all(basin_name, "([ '\\-\\.])", "_"))
+
+distanceNEA <-  as.matrix(read.csv("../../data/input/northeastamerica/distanceGridNEA.csv", row.names = 1, stringsAsFactors = FALSE))
+distanceNEA <- distanceNEA %>%
+  replace(., col(.) == row(.), NA) %>% 
+  as.data.frame() %>% mutate(destination = str_replace_all(row.names(.), "([ '\\-\\.])", "_")) %>% 
  pivot_longer(cols = -destination, names_to = 'departure', values_to = 'distance') %>% 
+    mutate(departure = str_replace_all(departure, "([ '\\-\\.])", "_")) %>% 
  dplyr::select(departure, destination, distance) %>% 
  arrange(departure, distance)

-riverBasins =  read.csv("../../data/input/northeastamerica/nea_riverbasins.csv")
+riverBasinsNEA =  read.csv("../../data/input/northeastamerica/nea_riverbasins.csv") %>% 
+  mutate(basin_name = str_replace_all(basin_name, "([ '\\-\\.])", "_")) 
+
+# nameRiver = riverBasinsNEA %>%  select(basin_name) %>% arrange(basin_name)
+# nameDistance = distanceNEA %>% distinct(departure) %>% rename(basin_name = departure) %>% arrange(basin_name)
+#  nameRiver %>% setdiff(nameDistance)
+# nameDistance %>% setdiff(nameRiver)
+
+# riverAARougier2015 = read.csv("basinsRougieretal2015.csv", stringsAsFactors = FALSE) %>% 
+#   rename(basin_id = id, basin_name = nomBV) %>% 
+#   mutate(basin_name =replace(basin_name, basin_name == "Sevre_Niortaise", "Sevre Niortaise"))

 ```

-```{r load GR3D function, include = FALSE}
+```{r load GR3D function}
 source("GR3Dfunction.R")

 ```

-From the distance grid file used by GR3D for the US application, the data, after reshaping, look like
+# Computation of the death basin weight
+
+## Baseline
+
+The kernel function presently used in GR3D is only based on basin accessibility (linked to distance between basins) even if a generic formulation including basin attractivity (related to basin size) and fish ability (based on fish lenght) is proposed in @rougier2015.
+
+The first step is to compute for a departure $j_1$ the weight of each destination basin $j_2$ using:
+
+$$w_{j_1\rightarrow j_2} = \frac {1} {1 +  e ^{\alpha_0 + \alpha_1 \cdot {\frac {( D_{j_1\rightarrow j_2}   - \mu_D)} {\sigma_D} } } }$$
+
+where $D_{j_1\rightarrow j_2}$ is the distance between the departure and destination basins, $\alpha_0$ and $\alpha_1$ are the kernel parameters, $\mu_D$ and $\sigma_D$ are the mean and standard deviation between inter basin distances.

-```{r distance, echo=FALSE, include = FALSE}
+The last two parameters were introduced to standardise distance when accessibility is combining with attractivity and ability. When only considering only accessibility, $\mu_D$ and $\sigma_D$ are simply linked to the distance a strayer can reach. There is no need to be changed when the basins network (number and location of basins) change. Definitively the definition as mean and standard deviation of distances is confusing.
+
+The sum of weights for the departure basin $j_1$ is: $$w_{j_1} = \sum_{j_2 \neq j_1} {w_{j_1\rightarrow j_2}}$$
+
+A strayers' mortality is added by considering a "death basin" with a constant weight $w_{death}$. The probability $p_{j_1 \rightarrow j_2}$ for a strayer to reach a destination basin $j_2$ from departure basin $j_1$ is
+
+$$
+p_{j_1 \rightarrow j_2} =  \frac {w_{j_1 \rightarrow j_2}} {w_{death} + w_{j_1}} 
+$$
+
+With these equations, the strayer mortality $sm_{j_1}$ from a departure basin is given by:
+
+$$ sm_{ j_1} = \frac {w_{death}} { w_{death}+w_{j_1} }$$
+
+The efficiency for strayers $se_{j2}$ to reach a destination basin (considering abundance in the departure basin proportional to the surface area of this basin) can be computed with:
+
+$$
+se_{j_2} = \frac {\sum_{j_1}{A_{j_1} \cdot  p_{j_1 \rightarrow j_2} }} {\sum_{j_1} {A_{j_1}}}
+$$
+
+## Atlantic area application
+
+From the distance grid file used by GR3D for the AA application, the data, after reshaping, look like
+
+```{r distance, echo =FALSE, warning = FALSE, include = TRUE}
 #paged.print=TRUE
-ft <- head(distance,15) %>%   flextable() %>% set_formatter_type(fmt_double = "%.02f") %>% autofit()
+ft <- head(distanceAA,15) %>%   flextable() %>% set_formatter_type(fmt_double = "%.02f") %>% autofit()
 set_caption(ft, 'Examples of distance (in km) between departure and destination basins')
-#
-```

-The parameters for the kernel function based on accessibility from @rougier2015CombinedUseEmpirical are:
+```

 ```{r}
 alpha0 = -2.9
 alpha1 =  19.7
 meanInterDistance = 300
 standardDeviationInterDistance = 978
+WDeathBasin = .4
+```

+The kernel parameters for Atlantic Area application defined by @rougier2015 were $\alpha_0$ = `r alpha0`, $\alpha_1$ = `r alpha1`, $\mu_D$ = `r meanInterDistance` km and $\sigma_D$ = `r standardDeviationInterDistance` km. The death basin weight was $r$ = `r WDeathBasin`.
+
+```{r}
+dist = seq(1,500, 10)
+dataKernel = data.frame(dist = dist, W  = logitKernel(dist, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))
 ```

-```{r include = FALSE}
-meanInterDistance <- mean(distance$distance, na.rm = TRUE )
-standardDeviationInterDistance <- sd(distance$distance, na.rm = TRUE) 
+```{r drawKernelFunctionAA, echo =FALSE, warning = FALSE, include = TRUE, fig.cap="Kernel function for AA application"}
+dataKernel %>% ggplot(aes(x=dist, y=W)) + geom_line() + labs(x='distance between departure and destination basins (km)')
 ```

-Notice that the $\mu_D$ and $\sigma_D$ depend on the basins list considered. The true values for $\mu_D$ and $\sigma_D$ are respectively `r meanInterDistance` and `r standardDeviationInterDistance`. So there is a problem in the AA application <!--# calculte rhe value for Rougier 2015 --> since the number of basins was increased in comparison with @rougier2015CombinedUseEmpirical.
+```{r}
+# riverAARougier2015 %>% select(basin_name) %>% setdiff(riverBasinsAA %>% select(basin_name))
+# 
+# meanW_AA <-  distanceAA %>% inner_join(riverAARougier2015, by =c('departure' = 'basin_name')) %>% 
+#   inner_join(riverAARougier2015, by =c('destination' = 'basin_name')) %>% 
+#   group_by(departure) %>% 
+#   summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% 
+#   summarise(mean(sumW)) %>% unlist() %>%  unname()

-```{r kernel function, echo = TRUE}
-range = round(range(distance$distance, na.rm = TRUE))
-dist = range[1]:range[2]
-dataKernel = data.frame(dist = dist, w  = logitKernel(dist, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))
-dataKernel %>% ggplot(aes(x=dist, y=w)) + geom_line() + labs(x='distance between departure and destination basins (km)')
 ```

-The weight for destination basin $j_2$ from basin $j_1$ is:
-
-$$w_{j_1\rightarrow j_2} = \frac {1} {1 +  e ^{\alpha_0 + \alpha_1 \cdot {\frac {( D_{j_1\rightarrow j_2}   - \mu_D)} {\sigma_D} } } }$$
+```{r computeWeigthAA, echo = TRUE}
+# compute weight
+distanceAA <- distanceAA %>% 
+  mutate(W =  logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))
+
+# add surface area for departure basins
+extendedDistance  = distanceAA %>% 
+  inner_join(riverBasinsAA %>% select(basin_name, surface_area_drainage_basin), by=c('departure' = 'basin_name')) %>% 
+  rename(surface_departure = surface_area_drainage_basin)
+
+# calculate sum W
+extendedDistance <- extendedDistance %>% 
+  inner_join(extendedDistance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = "drop"), by=c('departure'))
+
+# compute strayer mortality and strayer efficiency
+resultAA  <- extendedDistance %>% distinct(departure, sumW) %>%
+  mutate(sm_departure = WDeathBasin / (WDeathBasin + sumW)) %>%
+  select(departure, sm_departure) %>%
+  rename(basin_name = departure) %>%
+  inner_join(
+    extendedDistance %>%
+      mutate(p12 = W / (WDeathBasin + sumW), Ap12 = surface_departure * p12) %>%
+      group_by(destination) %>% summarise(
+        sumA = sum(surface_departure),
+        sumAp12 = sum(Ap12, na.rm = TRUE),
+        .groups = 'drop'
+      ) %>%
+      mutate(se_destination = sumAp12 / sumA) %>%
+      select (destination, se_destination) %>%
+      rename(basin_name = destination),
+    by = 'basin_name'
+  ) %>% 
+  inner_join(riverBasinsAA %>% select(basin_name, lat_outlet), by='basin_name') %>% 
+  rename(latitude = lat_outlet)
+
+resultAA 
+```

-```{r echo =TRUE, include = FALSE}
-distance <- distance %>% mutate(W =  logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))
+```{r, fig.cap="Evolution od}
+resultAA %>% ggplot(aes(x=latitude, y=sm_departure)) + geom_point() + labs(x="departure latitude (°)", y = "strayer mortality rate")
 ```

-The sum of weights for the departure basin $j_1$ is: $$w_{j_1} = \sum_{j_2 \neq j_1} {w_{j_1\rightarrow j_2}}$$
+```{r}
+resultAA %>%  ggplot(aes(x=latitude, y=se_destination)) + geom_point() + labs(x="destination latitude (°)", y = "strayer efficiency")
+```

-The mean weight across all basins is then: $$\overline{w} = \sum_{j_1 =1}^{n_B} {w_{j_1}}$$
+## North East America application

-It is advised to use this value for the death basin weight.
+```{r computeDistanceFeatures}
+# meanInterDistance <- mean(distanceNEA$distance, na.rm = TRUE )
+# standardDeviationInterDistance <- sd(distanceNEA$distance, na.rm = TRUE) 
+```

-The histogram of weights sum in NorthEast America application is given by

-```{r histogram, echo=FALSE, message=FALSE, warning=FALSE, fig.cap="Distribution of weight sums for departure basins"}
-distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE)) %>%
-  ggplot(aes(x = sumW)) + geom_histogram() + 
-  geom_vline(aes(xintercept = mean(sumW)), color="blue", linetype="dashed", size=1)
+```{r computeWeighthNEA}
+distanceNEA <- distanceNEA %>% 
+  mutate(W =  logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))
+
+extendedDistance  = distanceNEA %>% 
+  inner_join(riverBasinsNEA %>% select(basin_name, areasqkm), by=c('departure' = 'basin_name')) %>% 
+  rename(surface_departure = areasqkm)
+
+
+extendedDistance <- extendedDistance %>% inner_join(extendedDistance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = "drop"), by=c('departure'))
+
+resultNEA  <- extendedDistance %>% distinct(departure, sumW) %>%
+  mutate(sm_departure = WDeathBasin / (WDeathBasin + sumW)) %>%
+  select(departure, sm_departure) %>%
+  rename(basin_name = departure) %>%
+  inner_join(
+    extendedDistance %>%
+      mutate(p12 = W / (WDeathBasin + sumW), Ap12 = surface_departure * p12) %>%
+      group_by(destination) %>% summarise(
+        sumA = sum(surface_departure),
+        sumAp12 = sum(Ap12, na.rm = TRUE),
+        .groups = 'drop'
+      ) %>%
+      mutate(se_destination = sumAp12 / sumA) %>%
+      select (destination, se_destination) %>%
+      rename(basin_name = destination),
+    by = 'basin_name'
+  ) %>% 
+  inner_join(riverBasinsNEA %>% select(basin_name, lat_outlet), by='basin_name') %>% 
+  rename(latitude = lat_outlet)

 ```
-
-```{r deathBasinWeight, echo=FALSE, warning=FALSE}
-deathBasinWeight <- distance %>% 
-  group_by(departure) %>% 
-  summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% 
-  summarise(mean(sumW)) %>% unlist()
+```{r, fig.cap="Evolution of strayer's mortality according to depature basin latitude in the NEA zone"}
+resultNEA %>% ggplot(aes(x=latitude, y=sm_departure)) + 
+  geom_point() + labs(x="departure latitude (°)", y = "strayer mortality rate")
+```
+```{r, fig.cap="Evolution of strayer's efficiency according to destination basin latitude in the NEA zone"}
+resultNEA %>% ggplot(aes(x=latitude, y=se_destination)) + 
+  geom_point() + labs(x="destination latitude (°)", y = "strayer's efficiency")
 ```

-The dashed blue line corresponds to the mean weight ($w_{j_1}$ =`r round(deathBasinWeight, 4)` that is advised to be used as death basin weight. For a departure basin with a $w_{j_1}$ below this value, the strayers mortality is higher than 50 %. For these departure basins, most destination basins are far from the departure basin, the sum of destination basins weights is low and the death basin is attractive.

-```{r, echo =FALSE, warning = FALSE, include = FALSE, mortalyRateLatitude, fig.cap ="Evolution of mortality rate in the death basin according to departure basin latitude"}
-distance %>% group_by(departure) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% 
-  mutate( mortality_rate =  deathBasinWeight/(sumW + deathBasinWeight)) %>% 
-  inner_join(riverBasins %>% select(basin_name, lat_outlet, ordre), by = c("departure" ="basin_name")) %>% 
-  ggplot(aes(x=lat_outlet, y = mortality_rate)) + geom_point() + labs(x='latitude', y = 'mortality rate')
+## Virtual linear network of basin

-```
+### full universe

 ```{r fake list of basin, echo false, include = TRUE}
-nbBasin = 100
+nbBasin = 150
 distBetweenBasin = 10
 # create a fake basin tibble
 basin <- tibble(basin_name=paste0('B', formatC(1:nbBasin,width=3, flag = "0"))) %>% mutate(latitude = row_number())
@@ -125,60 +255,77 @@ basinDistance <- tibble(departure = basin$basin_name, destination =basin$basin_n
  expand(departure, destination ) %>% arrange(departure, destination) %>% 
  inner_join(basin, by=c("departure" = "basin_name"))  %>% rename(latitude_departure = latitude) %>% 
  inner_join(basin, by=c("destination" = "basin_name"))  %>% rename(latitude_destination = latitude) %>% 
-  mutate(distance = abs(latitude_departure - latitude_destination ) * distBetweenBasin)
-
-
-drawMortalityVslatitude = function(data){
-  # calculate the distance characteristics of the fake universe
-  meanInterDistance <- mean(data$distance, na.rm = TRUE) 
-  standardDeviationInterDistance <- sd(data$distance, na.rm = TRUE) 
-  
-  # calculate weight for each basin
-  distanceW <- data %>% mutate(W =  logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance))
-  
-  # calculate weight for the death basin
-  deathBasinWeight <- distanceW %>% 
-    group_by(departure) %>% 
-    summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% 
-    summarise(mean(sumW)) %>% unlist() %>% unname()
-  
-  # draw the mortality according to latitude
-  distanceW %>% group_by(departure, latitude_departure ) %>% summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop') %>% 
-    mutate( mortality_rate =  deathBasinWeight/(sumW + deathBasinWeight)) %>% 
-    ggplot(aes(x=latitude_departure, y = mortality_rate)) + geom_point() + labs(x='latitude rank', y = 'mortality rate') +
-    xlim(0,100) + ylim(0.40,.70)
-}
+  mutate(distance = abs(latitude_departure - latitude_destination ) * distBetweenBasin) %>% 
+  mutate(W =  logitKernel(distance, alpha0, alpha1, meanInterDistance, standardDeviationInterDistance)) 
+
+# compute sumW an p12
+extendedDistance = basinDistance %>% 
+  inner_join(basinDistance %>% group_by(departure ) %>% 
+  summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop'), by='departure') %>% 
+  mutate(p12 = W/(sumW + WDeathBasin))
+```
+```{r sm_fakeUniverse, fig.cap="Evolution of strayer mortality according to latitude departure" }  
+extendedDistance %>% distinct(departure, latitude_departure, sumW) %>% 
+  mutate(sm_departure = WDeathBasin /(WDeathBasin + sumW)) %>% 
+  ggplot(aes(x=latitude_departure, y = sm_departure)) + geom_point() + labs(x='latitude rank', y = 'strayer mortality rate') +
+    xlim(0,150) + ylim(0.0,.050)
 ```

-```{r fake regular universe, echo FALSE}
-
-mean(basinDistance$distance, na.rm = TRUE) 
-sd(basinDistance$distance, na.rm = TRUE) 
-
-drawMortalityVslatitude(data=basinDistance)
-
+```{r se_fakeUniverse, fig.cap="Evolution of strayer efficiency according to latitude departure" } 
+extendedDistance %>% group_by(destination, latitude_destination) %>% summarise(se_destination = mean(p12), .groups ='drop') %>% 
+    ggplot(aes(x=latitude_destination, y = se_destination)) + geom_point() + labs(x='latitude rank', y = 'strayer efficiency') +
+    xlim(0,150) + ylim(0.0,.010)
 ```

-```{r  irregular sampling of fake universe, echo FALSE}
+###Sampled universe

+```{r  irregular sampling of fake universe, echo FALSE}
 sampledBasin <- basin %>% sample_n(25) %>%  arrange(latitude)

 sampledBasinDistance <- basinDistance %>% inner_join(sampledBasin %>% select(basin_name), by = c("departure" = "basin_name")) %>% 
  inner_join(sampledBasin %>% select(basin_name), by = c("destination" = "basin_name"))

- mean(sampledBasinDistance$distance, na.rm = TRUE) 
-sd(sampledBasinDistance$distance, na.rm = TRUE) 
+# compute sumW an p12
+extendedSampledDistance = sampledBasinDistance %>% 
+  inner_join(sampledBasinDistance %>% group_by(departure ) %>% 
+  summarise(sumW = sum(W, na.rm = TRUE), .groups = 'drop'), by='departure') %>% 
+  mutate(p12 = W/(sumW + WDeathBasin))
+```
+```{r sm_sampledUniverse, fig.cap="Evolution of strayer mortality according to latitude departure" }  
+extendedSampledDistance %>% distinct(departure, latitude_departure, sumW) %>% 
+  mutate(sm_departure = WDeathBasin /(WDeathBasin + sumW)) %>% 
+  ggplot(aes(x=latitude_departure, y = sm_departure)) + geom_point() + labs(x='latitude rank', y = 'strayer mortality rate') +
+    xlim(0,150) + ylim(0.0,.2)
+```

-drawMortalityVslatitude(sampledBasinDistance)
+```{r se_SampledUniverse, fig.cap="Evolution of strayer efficiency according to latitude departure" } 
+extendedSampledDistance %>% group_by(destination, latitude_destination) %>% 
+  summarise(se_destination = mean(p12), .groups ='drop') %>% 
+    ggplot(aes(x=latitude_destination, y = se_destination)) + geom_point() + labs(x='latitude rank', y = 'strayer efficiency') +
+    xlim(0,150) + ylim(0.0,0.1)
 ```
+# Comparison with HADiaD formulation
+
+```{r echo =TRUE}
+alpha_D = 0.0608
+beta_D = 0.655
+m = -log(0.464)/41
+HADiaD <-  basinDistance %>% mutate(W = eneKernel(distance, alpha_D, beta_D), strayerMortalityRate = 1- strayerSurvival(distance, m) )

-# Comparison with HADD formulation
+tibble(distance = 0:500) %>% mutate(strayerMortalityRate = 1- strayerSurvival(distance, m)) %>% 
+  ggplot(aes(x= distance, y =strayerMortalityRate)) + geom_point()
+
+HADiaD %>%   group_by(departure, latitude_departure) %>%
+  summarise(sumW=sum(W), sm_depature = weighted.mean(strayerMortalityRate, W) )%>% 
+  ggplot(aes(x= latitude_departure, y =sm_depature)) + geom_point()
+
+```

 # Conclusion

 The strayer mortality increases at the edge of the distribution.

-The selection of basins impacts the strayers mortality. It is probably safer to considered a constant rate rather a death basin.
+The selection of basins impacts the strayers mortality. It is probably safer to considered a constant rate rather a death basin. It is a priority to improve the coverage of the area by increasing the number of basins considered.

 The logit function to compute the basin weights introduces a plateau for the short distance that leads to random destination in the the departure vicinity


--- a/exploration/GR3D_Rdescription/deathBasinW.docx
+++ b/exploration/GR3D_Rdescription/deathBasinW.docx