diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp index edae68bb644f9cd7f1aff94778a624e80f919fbe..8a4785cc19c71e2a769dc98ea942cf6fab1340a5 100644 --- a/include/evalhyd/detail/probabilist/evaluator.hpp +++ b/include/evalhyd/detail/probabilist/evaluator.hpp @@ -15,6 +15,7 @@ #include "brier.hpp" #include "quantiles.hpp" #include "contingency.hpp" +#include "ranks.hpp" namespace evalhyd @@ -57,6 +58,9 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 5>, bool> ct_b; xtl::xoptional<xt::xtensor<double, 5>, bool> ct_c; xtl::xoptional<xt::xtensor<double, 5>, bool> ct_d; + // > Ranks-based + xtl::xoptional<xt::xtensor<double, 3>, bool> r_k; + xtl::xoptional<xt::xtensor<double, 5>, bool> o_j; // members for intermediate evaluation metrics // (i.e. before the reduction along the temporal axis) @@ -86,6 +90,10 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 6>, bool> FAR; xtl::xoptional<xt::xtensor<double, 6>, bool> CSI; xtl::xoptional<xt::xtensor<double, 5>, bool> ROCSS; + // > Ranks-based + xtl::xoptional<xt::xtensor<double, 6>, bool> REL_DIAG; + xtl::xoptional<xt::xtensor<double, 4>, bool> DS; + xtl::xoptional<xt::xtensor<double, 4>, bool> AS; // methods to get optional parameters auto get_q_thr() @@ -242,6 +250,29 @@ namespace evalhyd return ct_d.value(); }; + xt::xtensor<double, 3> get_r_k() + { + if (!r_k.has_value()) + { + r_k = elements::calc_r_k( + q_obs, get_q_qnt(), n_mbr + ); + } + return r_k.value(); + }; + + xt::xtensor<double, 5> get_o_j() + { + if (!o_j.has_value()) + { + o_j = elements::calc_o_j( + get_r_k(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return o_j.value(); + }; + // methods to compute intermediate metrics xt::xtensor<double, 4> get_bs() { @@ -511,6 +542,42 @@ namespace evalhyd } return ROCSS.value(); }; + + xt::xtensor<double, 6> get_REL_DIAG() + { + if (!REL_DIAG.has_value()) + { + REL_DIAG = metrics::calc_REL_DIAG( + get_o_j(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return REL_DIAG.value(); + }; + + xt::xtensor<double, 4> get_DS() + { + if (!DS.has_value()) + { + DS = metrics::calc_DS( + get_o_j(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return DS.value(); + }; + + xt::xtensor<double, 4> get_AS() + { + if (!AS.has_value()) + { + AS = metrics::calc_AS( + get_r_k(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return AS.value(); + }; }; } } diff --git a/include/evalhyd/detail/probabilist/ranks.hpp b/include/evalhyd/detail/probabilist/ranks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c841ec348e210bcb6201c594fb8ca65a55d3fa62 --- /dev/null +++ b/include/evalhyd/detail/probabilist/ranks.hpp @@ -0,0 +1,441 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_RANKS_HPP +#define EVALHYD_PROBABILIST_RANKS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xindex_view.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xrandom.hpp> + + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Compute the position of the observations amongst the ensemble + /// member predictions (i.e. their ranks). + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_qnt + /// Streamflow quantiles. + /// shape: (sites, lead times, quantiles, time) + /// \param n_mbr + /// Number of ensemble members. + /// \return + /// Ranks of streamflow observations. + /// shape: (sites, lead times, time) + template <class XD2, class XD4> + inline xt::xtensor<double, 3> calc_r_k( + const XD2& q_obs, + const XD4& q_qnt, + std::size_t n_mbr + ) + { + xt::xtensor<double, 3> ranks = xt::zeros<double>( + {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)} + ); + xt::view(ranks, xt::all()) = NAN; + + xt::xtensor<double, 3> min_ranks = xt::zeros<double>( + {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)} + ); + xt::view(min_ranks, xt::all()) = NAN; + + xt::xtensor<double, 3> max_ranks = xt::zeros<double>( + {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)} + ); + xt::view(max_ranks, xt::all()) = NAN; + + for (int m = 0; m < n_mbr; m++) + { + // strictly below a member and no rank yet + xt::view(ranks, xt::all()) = xt::where( + (xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + < xt::view(q_qnt, xt::all(), xt::all(), m, xt::all())) + && + xt::isnan(ranks), + m, + ranks + ); + + // first time tied with a member + xt::view(min_ranks, xt::all()) = xt::where( + xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + xt::view(q_qnt, xt::all(), xt::all(), m, xt::all())) + && + xt::isnan(min_ranks), + m, + min_ranks + ); + + // again tied with a member + xt::view(max_ranks, xt::all()) = xt::where( + xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + xt::view(q_qnt, xt::all(), xt::all(), m, xt::all())) + && + !xt::isnan(min_ranks), + m + 1, + max_ranks + ); + } + + // above last member + xt::view(ranks, xt::all()) = xt::where( + xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + > xt::view(q_qnt, xt::all(), xt::all(), n_mbr - 1, xt::all()), + n_mbr, + ranks + ); + + // for ties, take random rank between min and max + xt::view(ranks, xt::all()) = xt::where( + !xt::isnan(min_ranks), + min_ranks + + xt::round((max_ranks - max_ranks + 1) + * xt::random::rand<double>(ranks.shape())), + ranks + ); + + return ranks; + } + + /// Compute the number of observations for all possible rank values. + /// + /// \param r_k + /// Ranks of streamflow observations. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Tallies of streamflow observations for all possible ranks. + /// shape: (sites, lead times, subsets, samples, ranks) + inline xt::xtensor<double, 5> calc_o_j( + const xt::xtensor<double, 3>& r_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> o_j = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_mbr + 1}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto r_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + r_k, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto r_k_masked_sampled = + xt::view(r_k_masked, xt::all(), xt::all(), + b_exp[e]); + + for (int j = 0; j < n_mbr + 1; j++) + { + // compute the observed relative frequency + // $o_j = \sum_{k \in M_j} r_k$ + xt::view(o_j, xt::all(), xt::all(), m, e, j) = + xt::nansum( + xt::equal(r_k_masked_sampled, j), + -1 + ); + } + } + } + + return o_j; + } + } + + namespace metrics + { + /// Compute the reliability diagram X and Y axes. + /// + /// \param o_j + /// Tallies of streamflow observations for all possible ranks. + /// shape: (sites, lead times, subsets, samples, ranks) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// X and Y axes of the reliability diagram. + /// shape: (sites, lead times, subsets, samples, ranks, axes) + inline xt::xtensor<double, 6> calc_REL_DIAG( + const xt::xtensor<double, 5>& o_j, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 6> REL_DIAG = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, + n_mbr + 1, std::size_t {2}}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), + m, b_exp[e]); + + // calculate length of subset + auto l = xt::sum(t_msk_sampled, -1); + + // compute the observed relative frequency + // $\bar{o_j} = \frac{1}{n} \sum_{k \in M_j} r_k$ + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), 0) = + xt::cumsum( + xt::view(o_j, xt::all(), xt::all(), + m, e, xt::all()) + / l, + -1 + ); + + // compute the forecast probability $y_i$ + xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), 1) = + xt::arange<double>(double(n_mbr + 1)) / n_mbr; + } + } + + return REL_DIAG; + } + + /// Compute the Delta score. + /// + /// \param o_j + /// Tallies of streamflow observations for all possible ranks. + /// shape: (sites, lead times, subsets, samples, ranks) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Delta scores. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_DS( + const xt::xtensor<double, 5>& o_j, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> DS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), + m, b_exp[e]); + + // calculate length of subset + auto l = xt::sum(t_msk_sampled, -1); + + // compute the Delta score + // $\bar{o_j} = \frac{1}{n} \sum_{k \in M_j} r_k$ + xt::view(DS, xt::all(), xt::all(), m, e) = + xt::nansum( + xt::square( + xt::view(o_j, xt::all(), xt::all(), m, e, xt::all()) + - (l / (n_mbr + 1)) + ), + -1 + ) / (l * n_mbr / (n_mbr + 1)); + } + } + + return DS; + } + + /// Compute the Alpha score. + /// + /// \param r_k + /// Ranks of streamflow observations. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Alpha scores. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_AS( + const xt::xtensor<double, 3>& r_k, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> AS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute one site and one leadtime at a time because of + // potential NaN (of varying numbers across sites/lead times) + // in the ranks that are not put at the end with `xt::sort` + // (unlike `numpy.sort`) which prevent from an easy conversion + // from rank to probability + for (std::size_t s = 0; s < n_sit; s++) + { + for (std::size_t l = 0; l < n_ldt; l++) + { + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto r_k_masked = xt::where( + xt::view(t_msk, s, l, m, xt::all()), + xt::view(r_k, s, l, xt::all()), + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto r_k_masked_sampled = + xt::view(r_k_masked, b_exp[e]); + + // notations below follow Renard et al. (2010) + // https://doi.org/10.1029/2009WR008328 + + // compute observed p values + // $p_{x(i)}$ + auto p_x_i = xt::sort( + xt::eval( + // filter out NaNs + xt::filter( + r_k_masked_sampled, + !xt::isnan(r_k_masked_sampled) + ) + / n_mbr + ) + ); + + // calculate length of realisations + // $N_x$ + auto N_x = p_x_i.size(); + + // compute theoretical p values + // $p_{x(i)}^{(th)}$ + auto p_x_i_th = + xt::arange<double>(double(N_x)) / (N_x - 1); + + // compute area between the predictive curve and + // the 1:1 line in the Q-Q plot + // $\alpha'_x$ + auto alpha_prime_x = xt::nanmean( + xt::abs(p_x_i - p_x_i_th) + ); + + // compute the alpha score + // $\alpha_x = 1 - 2 \alpha'_x$ + xt::view(AS, s, l, m, e) = 1 - 2 * alpha_prime_x; + } + } + } + } + + return AS; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_RANKS_HPP \ No newline at end of file diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 6c52fac81e46400fd108bdfd386a483b3f40948a..26b34330d700785be3dc6767d284ca170b972ecf 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -198,7 +198,8 @@ namespace evalhyd metrics, {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS", - "POD", "POFD", "FAR", "CSI", "ROCSS"} + "POD", "POFD", "FAR", "CSI", "ROCSS", + "REL_DIAG", "DS", "AS"} ); // check optional parameters @@ -436,6 +437,24 @@ namespace evalhyd uncertainty::summarise(evaluator.get_ROCSS(), summary) ); } + else if ( metric == "REL_DIAG" ) + { + r.emplace_back( + uncertainty::summarise(evaluator.get_REL_DIAG(), summary) + ); + } + else if ( metric == "DS" ) + { + r.emplace_back( + uncertainty::summarise(evaluator.get_DS(), summary) + ); + } + else if ( metric == "AS" ) + { + r.emplace_back( + uncertainty::summarise(evaluator.get_AS(), summary) + ); + } } return r;