intervals.hpp 18.36 KiB
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_PROBABILIST_INTERVALS_HPP
#define EVALHYD_PROBABILIST_INTERVALS_HPP
#include <limits>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xindex_view.hpp>
#include <xtensor/xsort.hpp>
namespace evalhyd
    namespace probabilist
        namespace elements
            /// Compute the bounds of the predictive intervals by computing
            /// the quantiles of the predictive distribution corresponding
            /// to the confidence intervals.
            ///
            /// \param q_prd
            ///     Streamflow predictions.
            ///     shape: (sites, lead times, members, time)
            /// \param c_lvl
            ///     Confidence levels for the predictive intervals.
            ///     shape: (intervals,)
            /// \param n_sit
            ///     Number of sites.
            /// \param n_ldt
            ///     Number of lead times.
            /// \param n_itv
            ///     Number of predictive intervals.
            /// \param n_tim
            ///     Number of time steps.
            /// \return
            ///     Bounds of the predictive intervals.
            ///     shape: (sites, lead times, intervals, bounds, time)
            template <class XD4>
            inline xt::xtensor<double, 5> calc_itv_bnds(
                    const XD4& q_prd,
                    const xt::xtensor<double, 1>& c_lvl,
                    const xt::xtensor<double, 1>& q_lvl,
                    std::size_t n_sit,
                    std::size_t n_ldt,
                    std::size_t n_itv,
                    std::size_t n_tim
                xt::xtensor<double, 5> itv_bnds =
                        xt::zeros<double>({n_sit, n_ldt, n_itv, std::size_t {2}, n_tim});
                // determine quantiles forming the predictive intervals
                // from the confidence levels
                xt::xtensor<double, 2> quantiles =
                        xt::zeros<double>({n_itv, std::size_t {2}});
                xt::col(quantiles, 0) = 0.5 - c_lvl / 200.;
                xt::col(quantiles, 1) = 0.5 + c_lvl / 200.;
                // get or compute predictive interval bounds from quantiles
                if (q_lvl.size() > 0)
                    for (std::size_t i = 0; i < n_itv; i++)
                    //auto a = xt::broadcast(xt::view(quantiles, i), std::vector<std::size_t>({q_lvl.size(), 2}));
                    //auto b = xt::broadcast(q_lvl, std::vector<std::size_t>({2, q_lvl.size()}));
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
//auto res = xt::where(xt::equal(a, xt::transpose(b))); //if (res.size() != 2) //{ // throw std::runtime_error( // "interval-based metric requested, " // "but *c_lvl* not matching *q_lvl" // ); //} else //{ xt::view(itv_bnds, xt::all(), xt::all(), i, 0, xt::all()) = xt::view(q_prd, 0); xt::view(itv_bnds, xt::all(), xt::all(), i, 1, xt::all()) = xt::view(q_prd, 1); //} } } else { for (std::size_t i = 0; i < n_itv; i++) { auto q = xt::quantile(q_prd, xt::view(quantiles, i), 2); xt::view(itv_bnds, xt::all(), xt::all(), i, 0, xt::all()) = xt::view(q, 0); xt::view(itv_bnds, xt::all(), xt::all(), i, 1, xt::all()) = xt::view(q, 1); } } return itv_bnds; } /// Determine whether the observations are inside the predictive /// intervals for each time step. /// /// \param q_obs /// Streamflow predictions. /// shape: (sites, time) /// \param itv_bnds /// Bounds of the predictive intervals. /// shape: (sites, lead times, intervals, bounds, time) /// \return /// Boolean-like tensor evaluating to true where observations /// are inside the predictive intervals. /// shape: (sites, lead times, intervals, time) template <class XD2> inline xt::xtensor<double, 4> calc_obs_in_itv( const XD2& q_obs, const xt::xtensor<double, 5>& itv_bnds ) { // notations below follow Gneiting and Raftery (2007), sect 6.2 // https://doi.org/10.1198/016214506000001437 auto x = xt::view(q_obs, xt::all(), xt::newaxis(), xt::newaxis(), xt::all()); auto l = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 0, xt::all()); auto u = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 1, xt::all()); return ((x >= l) && (x <= u)); } /// Compute the width of the predictive intervals for each time step. /// /// \param itv_bnds /// Bounds of the predictive intervals. /// shape: (sites, lead times, intervals, bounds, time) /// \return /// Interval scores for each time step. /// shape: (sites, lead times, intervals, time) inline xt::xtensor<double, 4> calc_itv_width(
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
const xt::xtensor<double, 5>& itv_bnds ) { // notations below follow Gneiting and Raftery (2007), sect 6.2 // https://doi.org/10.1198/016214506000001437 auto l = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 0, xt::all()); auto u = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 1, xt::all()); return (u - l); } } namespace intermediate { /// Compute the Winkler score for each time step. /// /// \param q_obs /// Streamflow predictions. /// shape: (sites, time) /// \param c_lvl /// Confidence levels for the predictive intervals. /// shape: (intervals,) /// \param itv_bnds /// Bounds of the predictive intervals. /// shape: (sites, lead times, intervals, bounds, time) /// \return /// Interval scores for each time step. /// shape: (sites, lead times, intervals, time) template <class XD2> inline xt::xtensor<double, 4> calc_ws( const XD2& q_obs, const xt::xtensor<double, 1>& c_lvl, const xt::xtensor<double, 5>& itv_bnds ) { // notations below follow Gneiting and Raftery (2007), sect 6.2 // https://doi.org/10.1198/016214506000001437 auto x = xt::view(q_obs, xt::all(), xt::newaxis(), xt::newaxis(), xt::all()); auto alpha = 1 - xt::view(c_lvl, xt::all(), xt::newaxis()) / 100.; // compute component corresponding to observations below interval auto l = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 0, xt::all()); // (l - x)𝟙{x < l} auto ws_l = xt::where(x < l, l - x, 0.); // compute component corresponding to observations above interval auto u = xt::view(itv_bnds, xt::all(), xt::all(), xt::all(), 1, xt::all()); // (x - u)𝟙{x > u} auto ws_u = xt::where(x > u, x - u, 0.); // compute interval score auto ws = (u - l) + 2. * (ws_l + ws_u) / alpha; return ws; } } namespace metrics { namespace detail { inline xt::xtensor<double, 5> calc_METRIC_from_metric( const xt::xtensor<double, 4>& metric, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, std::size_t n_sit, std::size_t n_ldt, std::size_t n_itv, std::size_t n_msk,
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
std::size_t n_exp ) { // initialise output variable xt::xtensor<double, 5> METRIC = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_itv}); // compute variable one mask at a time to minimise memory imprint for (std::size_t m = 0; m < n_msk; m++) { // apply the mask // (using NaN workaround until reducers work on masked_view) auto metric_masked = xt::where( xt::view(t_msk, xt::all(), xt::all(), m, xt::newaxis(), xt::all()), metric, NAN ); // compute variable one sample at a time for (std::size_t e = 0; e < n_exp; e++) { // apply the bootstrap sampling auto metric_masked_sampled = xt::view(metric_masked, xt::all(), xt::all(), xt::all(), b_exp[e]); // calculate the mean over the time steps xt::view(METRIC, xt::all(), xt::all(), m, e, xt::all()) = xt::nanmean(metric_masked_sampled, -1); } } return METRIC; } } /// Compute the Coverage Ratio (CR), i.e. the portion of /// observations falling within the predictive intervals. /// It is a measure of the reliability of the predictions. /// /// \param obs_in_itv /// Boolean-like tensor evaluating to true where observations /// are inside the predictive intervals. /// shape: (sites, lead times, intervals, time) /// \param t_msk /// Temporal subsets of the whole record. /// shape: (sites, lead times, subsets, time) /// \param b_exp /// Boostrap samples. /// shape: (samples, time slice) /// \param n_sit /// Number of sites. /// \param n_ldt /// Number of lead times. /// \param n_itv /// Number of predictive intervals. /// \param n_msk /// Number of temporal subsets. /// \param n_exp /// Number of bootstrap samples. /// \return /// Coverage ratios. /// shape: (sites, lead times, subsets, samples, intervals) inline xt::xtensor<double, 5> calc_CR( const xt::xtensor<double, 4>& obs_in_itv, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, std::size_t n_sit, std::size_t n_ldt,
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
std::size_t n_itv, std::size_t n_msk, std::size_t n_exp ) { return detail::calc_METRIC_from_metric( obs_in_itv, t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp ); } /// Compute the Average Width (AW) of the predictive intervals. /// It is a measure of the sharpness of the predictions. /// /// \param itv_width /// Widths of predictive intervals for each time step. /// shape: (sites, lead times, intervals, time) /// \param t_msk /// Temporal subsets of the whole record. /// shape: (sites, lead times, subsets, time) /// \param b_exp /// Boostrap samples. /// shape: (samples, time slice) /// \param n_sit /// Number of sites. /// \param n_ldt /// Number of lead times. /// \param n_itv /// Number of predictive intervals. /// \param n_msk /// Number of temporal subsets. /// \param n_exp /// Number of bootstrap samples. /// \return /// Average widths. /// shape: (sites, lead times, subsets, samples, intervals) inline xt::xtensor<double, 5> calc_AW( const xt::xtensor<double, 4>& itv_width, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, std::size_t n_sit, std::size_t n_ldt, std::size_t n_itv, std::size_t n_msk, std::size_t n_exp ) { return detail::calc_METRIC_from_metric( itv_width, t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp ); } /// Compute the Average Width Normalised (AWN). /// /// \param q_obs /// Streamflow predictions. /// shape: (sites, time) /// \param AW /// Average widths. /// shape: (sites, lead times, subsets, samples, intervals) /// \param t_msk /// Temporal subsets of the whole record. /// shape: (sites, lead times, subsets, time) /// \param b_exp /// Boostrap samples. /// shape: (samples, time slice) /// \param n_sit /// Number of sites. /// \param n_ldt /// Number of lead times.
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
/// \param n_msk /// Number of temporal subsets. /// \param n_exp /// Number of bootstrap samples. /// \return /// Average widths normalised with mean observations. /// shape: (sites, lead times, subsets, samples, intervals) template <class XD2> inline xt::xtensor<double, 5> calc_AWN( const XD2& q_obs, const xt::xtensor<double, 5>& AW, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, std::size_t n_sit, std::size_t n_ldt, std::size_t n_msk, std::size_t n_exp ) { // compute "climatology" average width xt::xtensor<double, 5> mean_obs = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, std::size_t {1}}); // compute variable one mask at a time to minimise memory imprint for (std::size_t m = 0; m < n_msk; m++) { // apply the mask // (using NaN workaround until reducers work on masked_view) auto q_obs_masked = xt::where( xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), NAN ); // compute variable one sample at a time for (std::size_t e = 0; e < n_exp; e++) { // apply the bootstrap sampling auto q_obs_masked_sampled = xt::view(q_obs_masked, xt::all(), xt::all(), b_exp[e]); // compute mean observation xt::view(mean_obs, xt::all(), xt::all(), m, e, 0) = xt::nanmean(q_obs_masked_sampled, -1); } } return xt::where(mean_obs > 0, AW / mean_obs, - std::numeric_limits<double>::infinity()); } /// Compute the Winkler scores (WS), also known as interval score. /// /// \param ws /// Winkler scores for each time step. /// shape: (sites, lead times, intervals, time) /// \param t_msk /// Temporal subsets of the whole record. /// shape: (sites, lead times, subsets, time) /// \param b_exp /// Boostrap samples. /// shape: (samples, time slice) /// \param n_sit /// Number of sites. /// \param n_ldt /// Number of lead times. /// \param n_itv /// Number of predictive intervals. /// \param n_msk
421422423424425426427428429430431432433434435436437438439440441442443444445446447
/// Number of temporal subsets. /// \param n_exp /// Number of bootstrap samples. /// \return /// Winkler scores. /// shape: (sites, lead times, subsets, samples, intervals) inline xt::xtensor<double, 5> calc_WS( const xt::xtensor<double, 4>& ws, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, std::size_t n_sit, std::size_t n_ldt, std::size_t n_itv, std::size_t n_msk, std::size_t n_exp ) { return detail::calc_METRIC_from_metric( ws, t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp ); } } } } #endif //EVALHYD_PROBABILIST_INTERVALS_HPP