From ac411b5543e6be0f50a38172dd3b10e6f3aa7f2c Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Wed, 11 May 2022 16:11:45 +0200 Subject: [PATCH] wrap calculation of probabilistic scores into a single function The purpose of such refactoring is to allow for a "memoisation" approach (i.e. avoid re-computing the same elements/metrics multiple times). By redefining the API as a one-entry point function `evaluate` (as opposed to calling each metric in turn), it makes it possible to know in advance the metrics required by the user, and by keeping a register (hard coded for now) of what unitary computation elements each metric requires, and which metrics are nested in other metrics, it is possible to avoid computation repetitions. --- include/evalhyd/probabilistic.hpp | 444 +++++---------------- include/evalhyd/probabilistic/brier.hpp | 307 ++++++++++++++ include/evalhyd/probabilistic/elements.hpp | 99 +++++ include/evalhyd/utils.hpp | 65 +-- 4 files changed, 544 insertions(+), 371 deletions(-) create mode 100644 include/evalhyd/probabilistic/brier.hpp create mode 100644 include/evalhyd/probabilistic/elements.hpp diff --git a/include/evalhyd/probabilistic.hpp b/include/evalhyd/probabilistic.hpp index 2cc541d..a241634 100644 --- a/include/evalhyd/probabilistic.hpp +++ b/include/evalhyd/probabilistic.hpp @@ -1,365 +1,119 @@ #ifndef EVALHYD_PROBABILISTIC_HPP #define EVALHYD_PROBABILISTIC_HPP -#include <xtensor/xexpression.hpp> -#include <xtensor/xmath.hpp> -#include <xtensor/xoperation.hpp> -#include <xtensor/xindex_view.hpp> -#include <xtensor/xio.hpp> +#include <unordered_map> +#include <unordered_set> +#include <xtensor/xtensor.hpp> #include "utils.hpp" +#include "probabilistic/elements.hpp" +#include "probabilistic/brier.hpp" namespace eh = evalhyd; namespace evalhyd { - namespace detail - { - // determine whether exceedance event has occurred - // q_obs shape: (1, time) - // q_thr shape: (thresholds,) - // returned shape: (thresholds, time) - xt::xtensor<double, 2> event_observed_outcome( + namespace probabilist { + /// Function allowing the evaluation of streamflow forecasts using a + /// range of relevant metrics. + /// + /// \param [in] metrics: + /// Vector of strings for the metric(s) to be computed. + /// \param [in] q_obs: + /// 2D array of streamflow observations. + /// shape: (1, time) + /// \param [in] q_frc: + /// 2D array of streamflow forecasts. + /// shape: (members, time) + /// \param [in] q_thr: + /// 1D array of streamflow exceedance threshold(s). + /// shape: (thresholds,) + /// \return + /// Vector of 1D array of metrics for each threshold. + std::vector<xt::xtensor<double, 1>> evaluate( + const std::vector<std::string>& metrics, const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 1>& q_thr - ) - { - // determine observed realisation of threshold(s) exceedance - return xt::squeeze(eh::utils::is_above_threshold(q_obs, q_thr)); - } - - // determine forecast probability of event to occur - // q_obs shape: (members, time) - // q_thr shape: (thresholds,) - // returned shape: (thresholds, time) - xt::xtensor<double, 2> event_probability_forecast( const xt::xtensor<double, 2>& q_frc, const xt::xtensor<double, 1>& q_thr ) { - // determine if members have exceeded threshold(s) - xt::xtensor<double, 3> e_frc = - eh::utils::is_above_threshold(q_frc, q_thr); - - // calculate how many members have exceeded threshold(s) - xt::xtensor<double, 2> n_frc = xt::sum(e_frc, 1); - - // determine correspondence between number of exceeding members - // and forecast probabilities of exceedance - // /!\ probability calculation dividing by n (the number of - // members), not n+1 (the number of ranks) like other metrics - std::size_t n_mbr = q_frc.shape(0); - xt::xtensor<double, 2> p_frc = n_frc / n_mbr; - - return p_frc; + // declare maps for memoisation purposes + std::unordered_map<std::string, std::vector<std::string>> elt; + std::unordered_map<std::string, xt::xtensor<double, 2>> mem_elt; + std::unordered_map<std::string, std::vector<std::string>> dep; + std::unordered_map<std::string, xt::xtensor<double, 1>> mem_dep; + + // register potentially recurring computation elt across metrics + elt["bs"] = {"o_k", "y_k"}; + elt["bss"] = {"o_k"}; + elt["bs_crd"] = {"o_k", "y_k"}; + elt["bs_lbd"] = {"o_k", "y_k"}; + + // register nested metrics (i.e. metric dependent on another metric) + dep["bss"] = {"bs"}; + + // determine required elt/dep to be pre-computed + std::unordered_set<std::string> req_elt = {}; + std::unordered_set<std::string> req_dep = {}; + + eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); + + // pre-compute required elt + for (const auto& element : req_elt) + { + if ( element == "o_k" ) + mem_elt["o_k"] = + eh::elements::event_observed_outcome(q_obs, q_thr); + else if ( element == "y_k" ) + mem_elt["y_k"] = + eh::elements::event_probability_forecast(q_frc, q_thr); + } + + // pre-compute required dep + for (const auto& dependency : req_dep) + { + if ( dependency == "bs" ) + mem_dep["bs"] = eh::bs(mem_elt); + } + + // retrieve or compute requested metrics + std::vector<xt::xtensor<double, 1>> r; + + for (const auto& metric : metrics) + { + // if exists, retrieve pre-computed metric + if (mem_dep.find(metric) != mem_dep.end()) + { + r.emplace_back(mem_dep[metric]); + } + // else, compute relevant metric + else if ( metric == "bs" ) + { + r.emplace_back(eh::bs(mem_elt)); + } + else if ( metric == "bss" ) + { + r.emplace_back(eh::bss(mem_elt, mem_dep)); + } + else if ( metric == "bs_crd" ) + { + auto c = eh::bs_crd(mem_elt, q_frc.shape(0)); + r.emplace_back(xt::row(c, 0)); + r.emplace_back(xt::row(c, 1)); + r.emplace_back(xt::row(c, 2)); + } + else if ( metric == "bs_lbd" ) + { + auto c = eh::bs_lbd(mem_elt); + r.emplace_back(xt::row(c, 0)); + r.emplace_back(xt::row(c, 1)); + r.emplace_back(xt::row(c, 2)); + } + } + + return r; } } - - /// Compute the Brier Score (BS). - /// - /// \param [in] q_obs: - /// 2D array of streamflow observations. - /// shape: (1, time) - /// \param [in] q_frc: - /// 2D array of streamflow forecasts. - /// shape: (members, time) - /// \param [in] q_thr: - /// 1D array of streamflow exceedance threshold(s). - /// shape: (thresholds,) - /// \return - /// 1D array of Brier Score for each threshold. - /// shape: (thresholds,) - xt::xtensor<double, 1> bs( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 2>& q_frc, - const xt::xtensor<double, 1>& q_thr - ) - { - // get observed event and forecast probability of event - xt::xtensor<double, 2> p_obs = - detail::event_observed_outcome(q_obs, q_thr); - - xt::xtensor<double, 2> p_frc = - detail::event_probability_forecast(q_frc, q_thr); - - // return computed BS - return xt::mean(xt::square(p_obs - p_frc), -1); - } - - /// Compute the calibration-refinement decomposition of the Brier Score - /// into reliability, resolution, and uncertainty. - /// - /// BS = reliability - resolution + uncertainty - /// - /// \param [in] q_obs: - /// 2D array of streamflow observations. - /// shape: (1, time) - /// \param [in] q_frc: - /// 2D array of streamflow forecasts. - /// shape: (members, time) - /// \param [in] q_thr: - /// 1D array of streamflow exceedance threshold(s). - /// shape: (thresholds,) - /// \return - /// 2D array of Brier Score components (reliability, resolution, - /// uncertainty) for each threshold. - /// shape: (components, thresholds) - xt::xtensor<double, 2> bs_crd( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 2>& q_frc, - const xt::xtensor<double, 1>& q_thr - ) - { - // NOTE ---------------------------------------------------------------- - // All equations are following notations from "Wilks, D. S. (2011). - // Statistical methods in the atmospheric sciences. Amsterdam; Boston: - // Elsevier Academic Press. ISBN: 9780123850225". In particular, - // pp. 302-303, 332-333. - // --------------------------------------------------------------------- - - // define some dimensions - std::size_t n = q_frc.shape(1); - std::size_t n_mbr = q_frc.shape(0); - std::size_t n_thr = q_thr.size(); - std::size_t n_cmp = 3; - - // declare internal variables - // shape: (bins, thresholds, time) - xt::xtensor<double, 3> msk_bins; - // shape: (thresholds, time) - xt::xtensor<double, 2> o_k, y_k; - // shape: (thresholds,) - xt::xtensor<double, 1> bar_o; - // shape: (bins, thresholds) - xt::xtensor<double, 2> N_i, bar_o_i; - // shape: (bins,) - xt::xtensor<double, 1> y_i; - - // declare and initialise output variable - // shape: (components, thresholds) - xt::xtensor<double, 2> bs_components = xt::zeros<double>({n_cmp, n_thr}); - - // get observed event and forecast probability of event - o_k = detail::event_observed_outcome(q_obs, q_thr); - y_k = detail::event_probability_forecast(q_frc, q_thr); - - // compute range of forecast values $y_i$ - y_i = xt::arange<double>(double (n_mbr + 1)) / n_mbr; - - // compute mask to subsample time steps belonging to same forecast bin - // (where bins are defined as the range of forecast values) - msk_bins = xt::equal( - y_k, - xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis()) - ); - - // compute number of forecasts in each forecast bin $N_i$ - N_i = xt::sum(msk_bins, -1); - - // compute subsample relative frequency - // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ - bar_o_i = xt::where( - N_i > 0, - xt::sum( - xt::where( - msk_bins, - xt::view(o_k, xt::newaxis(), xt::all(), xt::all()), - 0. - ), - -1 - ) / N_i, - 0. - ); - - // compute mean "climatology" relative frequency of the event - // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$ - bar_o = xt::mean(o_k, -1); - - // calculate reliability = - // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$ - xt::row(bs_components, 0) = - xt::sum( - xt::square( - xt::view(y_i, xt::all(), xt::newaxis()) - - bar_o_i - ) * N_i, - 0 - ) / n; - - // calculate resolution = - // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$ - xt::row(bs_components, 1) = - xt::sum( - xt::square( - bar_o_i - bar_o - ) * N_i, - 0 - ) / n; - - // calculate uncertainty = $\bar{o} (1 - \bar{o})$ - xt::row(bs_components, 2) = bar_o * (1 - bar_o); - - return bs_components; - } - - /// Compute the likelihood-based decomposition of the Brier Score - /// into type 2 bias, discrimination, and sharpness (a.k.a. refinement). - /// - /// BS = type 2 bias - discrimination + sharpness - /// - /// \param [in] q_obs: - /// 2D array of streamflow observations. - /// shape: (1, time) - /// \param [in] q_frc: - /// 2D array of streamflow forecasts. - /// shape: (members, time) - /// \param [in] q_thr: - /// 1D array of streamflow exceedance threshold(s). - /// shape: (thresholds,) - /// \return - /// 2D array of Brier Score components (type 2 bias, discrimination, - /// sharpness) for each threshold. - /// shape: (components, thresholds) - xt::xtensor<double, 2> bs_lbd( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 2>& q_frc, - const xt::xtensor<double, 1>& q_thr - ) - { - // NOTE ---------------------------------------------------------------- - // All equations are following notations from "Wilks, D. S. (2011). - // Statistical methods in the atmospheric sciences. Amsterdam; Boston: - // Elsevier Academic Press. ISBN: 9780123850225". In particular, - // pp. 302-303, 332-333. - // --------------------------------------------------------------------- - - // define some dimensions - std::size_t n = q_frc.shape(1); - std::size_t n_thr = q_thr.size(); - std::size_t n_cmp = 3; - - // declare internal variables - // shape: (bins, thresholds, time) - xt::xtensor<double, 3> msk_bins; - // shape: (thresholds, time) - xt::xtensor<double, 2> o_k, y_k; - // shape: (thresholds,) - xt::xtensor<double, 1> bar_y; - // shape: (bins, thresholds) - xt::xtensor<double, 2> M_j, bar_y_j; - // shape: (bins,) - xt::xtensor<double, 1> o_j; - - // declare and initialise output variable - // shape: (components, thresholds) - xt::xtensor<double, 2> bs_components = xt::zeros<double>({n_cmp, n_thr}); - - // get observed event and forecast probability of event - o_k = detail::event_observed_outcome(q_obs, q_thr); - y_k = detail::event_probability_forecast(q_frc, q_thr); - - // set the range of observed values $o_j$ - o_j = {0., 1.}; - - // compute mask to subsample time steps belonging to same observation bin - // (where bins are defined as the range of forecast values) - msk_bins = xt::equal( - o_k, - xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis()) - ); - - // compute number of observations in each observation bin $M_j$ - M_j= xt::sum(msk_bins, -1); - - // compute subsample relative frequency - // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$ - bar_y_j = xt::where( - M_j > 0, - xt::sum( - xt::where( - msk_bins, - xt::view(y_k, xt::newaxis(), xt::all(), xt::all()), - 0. - ), - -1 - ) / M_j, - 0. - ); - - // compute mean "climatology" forecast probability - // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$ - bar_y = xt::mean(y_k, -1); - - // calculate type 2 bias = - // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$ - xt::row(bs_components, 0) = - xt::sum( - xt::square( - xt::view(o_j, xt::all(), xt::newaxis()) - - bar_y_j - ) * M_j, - 0 - ) / n; - - // calculate discrimination = - // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$ - xt::row(bs_components, 1) = - xt::sum( - xt::square( - bar_y_j - bar_y - ) * M_j, - 0 - ) / n; - - // calculate sharpness = - // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$ - xt::row(bs_components, 2) = - xt::sum( - xt::square( - y_k - - xt::view(bar_y, xt::all(), xt::newaxis()) - ), - -1 - ) / n; - - return bs_components; - } - - /// Compute the Brier Skill Score (BSS). - /// - /// \param [in] q_obs: - /// 2D array of streamflow observations. - /// shape: (1, time) - /// \param [in] q_frc: - /// 2D array of streamflow forecasts. - /// shape: (members, time) - /// \param [in] q_thr: - /// 1D array of streamflow exceedance threshold(s). - /// shape: (thresholds,) - /// \return - /// 1D array of Brier Skill Score for each threshold. - /// shape: (thresholds,) - xt::xtensor<double, 1> bss( - const xt::xtensor<double, 2>& q_obs, - const xt::xtensor<double, 2>& q_frc, - const xt::xtensor<double, 1>& q_thr - ) - { - // determine observed realisation of threshold(s) exceedance - xt::xtensor<double, 2> p_obs = - detail::event_observed_outcome(q_obs, q_thr); - - // calculate mean observed realisation of threshold(s) exceedance - xt::xtensor<double, 2> p_obs_mean = xt::mean(p_obs, -1, xt::keep_dims); - - // calculate reference Brier Score - xt::xtensor<double, 1> bs_ref = xt::mean( - xt::square(p_obs - p_obs_mean), -1 - ); - - // return computed BSS - return 1 - (bs(q_obs, q_frc, q_thr) / bs_ref); - } } #endif //EVALHYD_PROBABILISTIC_HPP diff --git a/include/evalhyd/probabilistic/brier.hpp b/include/evalhyd/probabilistic/brier.hpp new file mode 100644 index 0000000..ade5fbf --- /dev/null +++ b/include/evalhyd/probabilistic/brier.hpp @@ -0,0 +1,307 @@ +#ifndef EVALHYD_BRIER_HPP +#define EVALHYD_BRIER_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xmath.hpp> +#include <xtensor/xoperation.hpp> + +#include "elements.hpp" + +namespace eh = evalhyd; + +namespace evalhyd +{ + // NOTE -------------------------------------------------------------------- + // All equations in metrics below are following notations from + // "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences. + // Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225". + // In particular, pp. 302-303, 332-333. + // ------------------------------------------------------------------------- + + /// Compute the Brier score (BS). + /// + /// \param [in] elt: + /// Map of pre-computed elements, including required elements: + /// o_k: + /// Observed event outcome. + /// shape: (thresholds, time) + /// y_k: + /// Event probability forecast. + /// shape: (thresholds, time) + /// \return + /// 1D array of Brier Score for each threshold. + /// shape: (thresholds,) + xt::xtensor<double, 1> bs( + std::unordered_map<std::string, xt::xtensor<double, 2>>& elt + ) + { + // return computed Brier score(s) + // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$ + return xt::mean(xt::square(elt["o_k"] - elt["y_k"]), -1); + } + + /// Compute the calibration-refinement decomposition of the Brier score + /// into reliability, resolution, and uncertainty. + /// + /// BS = reliability - resolution + uncertainty + /// + /// \param [in] elt: + /// Map of pre-computed elements, including required elements: + /// o_k: + /// Observed event outcome. + /// shape: (thresholds, time) + /// y_k: + /// Event probability forecast. + /// shape: (thresholds, time) + /// \param [in] n_mbr: + /// Number of forecast members. + /// \return + /// 2D array of Brier score components (reliability, resolution, + /// uncertainty) for each threshold. + /// shape: (components, thresholds) + xt::xtensor<double, 2> bs_crd( + std::unordered_map<std::string, xt::xtensor<double, 2>>& elt, + std::size_t n_mbr + ) + { + // declare internal variables + // shape: (bins, thresholds, time) + xt::xtensor<double, 3> msk_bins; + // shape: (thresholds, time) + xt::xtensor<double, 2> o_k, y_k; + // shape: (thresholds,) + xt::xtensor<double, 1> bar_o; + // shape: (bins, thresholds) + xt::xtensor<double, 2> N_i, bar_o_i; + // shape: (bins,) + xt::xtensor<double, 1> y_i; + + // get observed event and forecast probability of event + o_k = elt["o_k"]; + y_k = elt["y_k"]; + + // define some dimensions + std::size_t n = o_k.shape(1); + std::size_t n_thr = o_k.shape(0); + std::size_t n_cmp = 3; + + // declare and initialise output variable + // shape: (components, thresholds) + xt::xtensor<double, 2> bs_components = xt::zeros<double>({n_cmp, n_thr}); + + // compute range of forecast values $y_i$ + y_i = xt::arange<double>(double (n_mbr + 1)) / n_mbr; + + // compute mask to subsample time steps belonging to same forecast bin + // (where bins are defined as the range of forecast values) + msk_bins = xt::equal( + y_k, + xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis()) + ); + + // compute number of forecasts in each forecast bin $N_i$ + N_i = xt::sum(msk_bins, -1); + + // compute subsample relative frequency + // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ + bar_o_i = xt::where( + N_i > 0, + xt::sum( + xt::where( + msk_bins, + xt::view(o_k, xt::newaxis(), xt::all(), xt::all()), + 0. + ), + -1 + ) / N_i, + 0. + ); + + // compute mean "climatology" relative frequency of the event + // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$ + bar_o = xt::mean(o_k, -1); + + // calculate reliability = + // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$ + xt::row(bs_components, 0) = + xt::sum( + xt::square( + xt::view(y_i, xt::all(), xt::newaxis()) + - bar_o_i + ) * N_i, + 0 + ) / n; + + // calculate resolution = + // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$ + xt::row(bs_components, 1) = + xt::sum( + xt::square( + bar_o_i - bar_o + ) * N_i, + 0 + ) / n; + + // calculate uncertainty = $\bar{o} (1 - \bar{o})$ + xt::row(bs_components, 2) = bar_o * (1 - bar_o); + + return bs_components; + } + + /// Compute the likelihood-based decomposition of the Brier score + /// into type 2 bias, discrimination, and sharpness (a.k.a. refinement). + /// + /// BS = type 2 bias - discrimination + sharpness + /// + /// \param [in] elt: + /// Map of pre-computed elements, including required elements: + /// o_k: + /// Observed event outcome. + /// shape: (thresholds, time) + /// y_k: + /// Event probability forecast. + /// shape: (thresholds, time) + /// \return + /// 2D array of Brier score components (type 2 bias, discrimination, + /// sharpness) for each threshold. + /// shape: (components, thresholds) + xt::xtensor<double, 2> bs_lbd( + std::unordered_map<std::string, xt::xtensor<double, 2>>& elt + ) + { + // declare internal variables + // shape: (bins, thresholds, time) + xt::xtensor<double, 3> msk_bins; + // shape: (thresholds, time) + xt::xtensor<double, 2> o_k, y_k; + // shape: (thresholds,) + xt::xtensor<double, 1> bar_y; + // shape: (bins, thresholds) + xt::xtensor<double, 2> M_j, bar_y_j; + // shape: (bins,) + xt::xtensor<double, 1> o_j; + + // get observed event and forecast probability of event + o_k = elt["o_k"]; + y_k = elt["y_k"]; + + // define some dimensions + std::size_t n = o_k.shape(1); + std::size_t n_thr = o_k.shape(0); + std::size_t n_cmp = 3; + + // declare and initialise output variable + // shape: (components, thresholds) + xt::xtensor<double, 2> bs_components = xt::zeros<double>({n_cmp, n_thr}); + + // get observed event and forecast probability of event + o_k = elt["o_k"]; + y_k = elt["y_k"]; + + // set the range of observed values $o_j$ + o_j = {0., 1.}; + + // compute mask to subsample time steps belonging to same observation bin + // (where bins are defined as the range of forecast values) + msk_bins = xt::equal( + o_k, + xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis()) + ); + + // compute number of observations in each observation bin $M_j$ + M_j= xt::sum(msk_bins, -1); + + // compute subsample relative frequency + // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$ + bar_y_j = xt::where( + M_j > 0, + xt::sum( + xt::where( + msk_bins, + xt::view(y_k, xt::newaxis(), xt::all(), xt::all()), + 0. + ), + -1 + ) / M_j, + 0. + ); + + // compute mean "climatology" forecast probability + // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$ + bar_y = xt::mean(y_k, -1); + + // calculate type 2 bias = + // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$ + xt::row(bs_components, 0) = + xt::sum( + xt::square( + xt::view(o_j, xt::all(), xt::newaxis()) + - bar_y_j + ) * M_j, + 0 + ) / n; + + // calculate discrimination = + // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$ + xt::row(bs_components, 1) = + xt::sum( + xt::square( + bar_y_j - bar_y + ) * M_j, + 0 + ) / n; + + // calculate sharpness = + // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$ + xt::row(bs_components, 2) = + xt::sum( + xt::square( + y_k - + xt::view(bar_y, xt::all(), xt::newaxis()) + ), + -1 + ) / n; + + return bs_components; + } + + /// Compute the Brier skill score (BSS). + /// + /// \param [in] elt: + /// Map of pre-computed elements, including required elements: + /// o_k: + /// Observed event outcome. + /// shape: (thresholds, time) + /// \param [in] dep: + /// Map of pre-computed metrics, including required metrics: + /// bs: + /// Brier score(s). + /// shape: (thresholds,) + /// \return + /// 1D array of Brier skill score for each threshold. + /// shape: (thresholds,) + xt::xtensor<double, 1> bss( + std::unordered_map<std::string, xt::xtensor<double, 2>>& elt, + std::unordered_map<std::string, xt::xtensor<double, 1>>& dep + ) + { + // determine observed realisation of threshold(s) exceedance + xt::xtensor<double, 2> o_k = elt["o_k"]; + + // calculate mean observed event outcome + // $\bar{o_k} = \frac{1}{n} \sum_{k=1}^{n} o_k$ + xt::xtensor<double, 2> bar_o_k = xt::mean(o_k, -1, xt::keep_dims); + + // calculate reference Brier score(s) + // $BS_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o_k})^2$ + xt::xtensor<double, 1> bs_ref = xt::mean( + xt::square(o_k - bar_o_k), -1 + ); + + // return computed Brier skill score(s) + // $BSS = 1 - \frac{BS}{BS_{ref}} + return 1 - (dep["bs"] / bs_ref); + } +} + +#endif //EVALHYD_BRIER_HPP diff --git a/include/evalhyd/probabilistic/elements.hpp b/include/evalhyd/probabilistic/elements.hpp new file mode 100644 index 0000000..9253f90 --- /dev/null +++ b/include/evalhyd/probabilistic/elements.hpp @@ -0,0 +1,99 @@ +#ifndef EVALHYD_ELEMENTS_HPP +#define EVALHYD_ELEMENTS_HPP + +#include <xtensor/xtensor.hpp> +#include "xtensor/xview.hpp" +#include <xtensor/xmath.hpp> + + +namespace eh = evalhyd; + +namespace evalhyd +{ + namespace elements + { + namespace detail + { + // determine whether flows `q` are greater than given threshold(s) `thr` + // q shape: (1+, time) + // thr shape: (thresholds,) + // returned shape: (thresholds, 1+, time) + xt::xtensor<double, 3> is_above_threshold( + const xt::xtensor<double, 2>& q, + const xt::xtensor<double, 1>& thr + ) + { + // return a boolean-like array + return q >= xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis()); + } + + // determine whether flows `q` are strictly lower than given threshold(s) `thr` + // q shape: (1+, time) + // thr shape: (thresholds,) + // returned shape: (thresholds, 1+, time) + xt::xtensor<double, 3> is_below_threshold( + const xt::xtensor<double, 2>& q, + const xt::xtensor<double, 1>& thr + ) + { + // return a boolean-like array + return q < xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis()); + } + } + + /// Determine whether exceedance event has occurred. + /// + /// \param [in] q_obs: + /// 2D array of streamflow observations. + /// shape: (1, time) + /// \param [in] q_thr: + /// 1D array of streamflow exceedance threshold(s). + /// shape: (thresholds,) + /// \return + /// 2D array of event observed outcome + /// shape: (thresholds, time) + xt::xtensor<double, 2> event_observed_outcome( + const xt::xtensor<double, 2>& q_obs, + const xt::xtensor<double, 1>& q_thr + ) + { + // determine observed realisation of threshold(s) exceedance + return xt::squeeze(detail::is_above_threshold(q_obs, q_thr)); + } + + /// Determine forecast probability of event to occur. + /// + /// \param [in] q_frc: + /// 2D array of streamflow forecasts. + /// shape: (members, time) + /// \param [in] q_thr: + /// 1D array of streamflow exceedance threshold(s). + /// shape: (thresholds,) + /// \return + /// 2D array of event probability forecast + /// shape: (thresholds, time) + xt::xtensor<double, 2> event_probability_forecast( + const xt::xtensor<double, 2>& q_frc, + const xt::xtensor<double, 1>& q_thr + ) + { + // determine if members have exceeded threshold(s) + xt::xtensor<double, 3> e_frc = + detail::is_above_threshold(q_frc, q_thr); + + // calculate how many members have exceeded threshold(s) + xt::xtensor<double, 2> n_frc = xt::sum(e_frc, 1); + + // determine correspondence between number of exceeding members + // and forecast probabilities of exceedance + // /!\ probability calculation dividing by n (the number of + // members), not n+1 (the number of ranks) like in other metrics + std::size_t n_mbr = q_frc.shape(0); + xt::xtensor<double, 2> p_frc = n_frc / n_mbr; + + return p_frc; + } + } +} + +#endif //EVALHYD_ELEMENTS_HPP diff --git a/include/evalhyd/utils.hpp b/include/evalhyd/utils.hpp index d8bbb81..a2c8b9c 100644 --- a/include/evalhyd/utils.hpp +++ b/include/evalhyd/utils.hpp @@ -1,40 +1,53 @@ #ifndef EVALHYD_UTILS_HPP #define EVALHYD_UTILS_HPP +#include <unordered_map> +#include <unordered_set> #include <xtensor/xtensor.hpp> -#include <xtensor/xbroadcast.hpp> -#include <xtensor/xadapt.hpp> -#include <xtensor/xview.hpp> -#include <xtensor/xmath.hpp> namespace evalhyd { namespace utils { - // determine whether flows `q` are greater than given threshold(s) `thr` - // q shape: (1+, time) - // thr shape: (thresholds,) - // returned shape: (thresholds, 1+, time) - xt::xtensor<double, 3> is_above_threshold( - const xt::xtensor<double, 2>& q, - const xt::xtensor<double, 1>& thr + /// Procedure to determine based on a list of metrics which elements + /// and which metrics (and their associated elements) require to be + /// pre-computed for memoisation purposes. + /// + /// \param [in] metrics: + /// Vector of strings for the metric(s) to be computed. + /// \param [in] elements: + /// Map between metrics and their required computation elements. + /// \param [in] dependencies: + /// Map between metrics and their dependencies. + /// \param [out] required_elements: + /// Set of elements identified as required to be pre-computed. + /// \param [out] required_dependencies: + /// Set of metrics identified as required to be pre-computed. + void find_requirements ( + const std::vector<std::string>& metrics, + std::unordered_map<std::string, std::vector<std::string>>& elements, + std::unordered_map<std::string, std::vector<std::string>>& dependencies, + std::unordered_set<std::string>& required_elements, + std::unordered_set<std::string>& required_dependencies ) { - // return a boolean-like array - return q >= xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis()); - } - - // determine whether flows `q` are strictly lower than given threshold(s) `thr` - // q shape: (1+, time) - // thr shape: (thresholds,) - // returned shape: (thresholds, 1+, time) - xt::xtensor<double, 3> is_below_threshold( - const xt::xtensor<double, 2>& q, - const xt::xtensor<double, 1>& thr - ) - { - // return a boolean-like array - return q < xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis()); + for (const auto& metric : metrics) + { + // add elements to pre-computation set + for (const auto& element : elements[metric]) + required_elements.insert(element); + // add metric dependencies to pre-computation set + if (dependencies.find(metric) != dependencies.end()) + { + for (const auto& dependency : dependencies[metric]) + { + required_dependencies.insert(dependency); + // add dependency elements to pre-computation set + for (const auto& element : elements[dependency]) + required_elements.insert(element); + } + } + } } } } -- GitLab