evalp.hpp 18.44 KiB
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_EVALP_HPP
#define EVALHYD_EVALP_HPP
#include <unordered_map>
#include <vector>
#include <xtl/xoptional.hpp>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xarray.hpp>
#include "detail/utils.hpp"
#include "detail/masks.hpp"
#include "detail/maths.hpp"
#include "detail/uncertainty.hpp"
#include "detail/probabilist/evaluator.hpp"
namespace evalhyd
    /// Function to evaluate probabilistic streamflow predictions.
    ///
    /// \rst
    ///
    /// :Template Parameters:
    ///
    ///    XD2: Any 2-dimensional container class storing numeric elements
    ///         (e.g. ``xt::xtensor<double, 2>``, ``xt::pytensor<double, 2>``,
    ///         ``xt::rtensor<double, 2>``, etc.).
    ///
    ///    XD4: Any 4-dimensional container class storing numeric elements
    ///         (e.g. ``xt::xtensor<double, 4>``, ``xt::pytensor<double, 4>``,
    ///         ``xt::rtensor<double, 4>``, etc.).
    ///
    ///    XB4: Any 4-dimensional container class storing boolean elements
    ///         (e.g. ``xt::xtensor<bool, 4>``, ``xt::pytensor<bool, 4>``,
    ///         ``xt::rtensor<bool, 4>``, etc.).
    ///
    /// :Parameters:
    ///
    ///    q_obs: ``XD2``
    ///       Streamflow observations. Time steps with missing observations
    ///       must be assigned `NAN` values. Those time steps will be ignored
    ///       both in the observations and the predictions before the
    ///       *metrics* are computed.
    ///       shape: (sites, time)
    ///
    ///    q_prd: ``XD4``
    ///       Streamflow predictions. Time steps with missing predictions
    ///       must be assigned `NAN` values. Those time steps will be ignored
    ///       both in the observations and the predictions before the
    ///       *metrics* are computed.
    ///       shape: (sites, lead times, members, time)
    ///
    ///    metrics: ``std::vector<std::string>``
    ///       The sequence of evaluation metrics to be computed.
    ///
    ///    q_thr: ``XD2``, optional
    ///       Streamflow exceedance threshold(s).
    ///       shape: (sites, thresholds)
    ///
    ///    t_msk: ``XB4``, optional
    ///       Mask(s) used to generate temporal subsets of the whole streamflow
    ///       time series (where True/False is used for the time steps to
    ///       include/discard in a given subset). If not provided and neither
    ///       is *m_cdt*, no subset is performed and only one set of metrics is
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
/// returned corresponding to the whole time series. If provided, as /// many sets of metrics are returned as they are masks provided. /// shape: (sites, lead times, subsets, time) /// /// .. seealso:: :doc:`../../functionalities/temporal-masking` /// /// m_cdt: ``xt::xtensor<std::array<char, 32>, 2>``, optional /// Masking conditions to use to generate temporal subsets. Each /// condition consists in a string and can be specified on /// observed/predicted streamflow values/statistics (mean, median, /// quantile), or on time indices. If provided in combination with /// *t_msk*, the latter takes precedence. If not provided and neither /// is *t_msk*, no subset is performed and only one set of metrics is /// returned corresponding to the whole time series. If provided, as /// many sets of metrics are returned as they are conditions provided. /// shape: (sites, subsets) /// /// .. seealso:: :doc:`../../functionalities/conditional-masking` /// /// bootstrap: ``std::unordered_map<std::string, int>``, optional /// Parameters for the bootstrapping method used to estimate the /// sampling uncertainty in the evaluation of the predictions. /// Three parameters are mandatory ('n_samples' the number of random /// samples, 'len_sample' the length of one sample in number of years, /// and 'summary' the statistics to return to characterise the /// sampling distribution), and one parameter is optional ('seed'). /// If not provided, no bootstrapping is performed. If provided, /// *dts* must also be provided. /// /// .. seealso:: :doc:`../../functionalities/bootstrapping` /// /// dts: ``std::vector<std::string>``, optional /// Datetimes. The corresponding date and time for the temporal /// dimension of the streamflow observations and predictions. /// The date and time must be specified in a string following the /// ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss" (e.g. the /// 21st of May 2007 at 4 in the afternoon is "2007-05-21 16:00:00"). /// If provided, it is only used if *bootstrap* is also provided. /// /// :Returns: /// /// ``std::vector<xt::xarray<double>>`` /// The sequence of evaluation metrics computed in the same order /// as given in *metrics*. /// shape: (metrics,)<(sites, lead times, subsets, samples, /// {quantiles,} {thresholds,} {components})> /// /// :Examples: /// /// .. code-block:: c++ /// /// #include <xtensor/xtensor.hpp> /// #include <evalhyd/evalp.hpp> /// /// xt::xtensor<double, 2> obs = {{ 4.7, 4.3, 5.5, 2.7, 4.1 }}; /// xt::xtensor<double, 4> prd = {{{{ 5.3, 4.2, 5.7, 2.3, 3.1 }, /// { 4.3, 4.2, 4.7, 4.3, 3.3 }, /// { 5.3, 5.2, 5.7, 2.3, 3.9 }}}}; /// xt::xtensor<double, 2> thr = {{ 4.7, 4.3, 5.5, 2.7, 4.1 }}; /// /// evalhyd::evalp(obs, prd, {"BS"}, thr); /// /// .. code-block:: c++ /// /// xt::xtensor<bool, 3> msk = {{{ false, true, true, false, true }}}; /// /// evalhyd::evalp(obs, prd, {"BS"}, thr, msk); /// /// .. code-block:: c++ ///
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
/// evalhyd::evalp(obs, prd, {"CRPS"}); /// /// \endrst template <class XD2, class XD4, class XB4 = xt::xtensor<bool, 4>> std::vector<xt::xarray<double>> evalp( const xt::xexpression<XD2>& q_obs, const xt::xexpression<XD4>& q_prd, const std::vector<std::string>& metrics, const xt::xexpression<XD2>& q_thr = XD2({}), const xt::xexpression<XB4>& t_msk = XB4({}), const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {}, xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap = xtl::missing<const std::unordered_map<std::string, int>>(), const std::vector<std::string>& dts = {} ) { // check ranks of tensors if (xt::get_rank<XD2>::value != 2) { throw std::runtime_error( "observations and/or thresholds are not two-dimensional" ); } if (xt::get_rank<XD4>::value != 4) { throw std::runtime_error( "predictions are not four-dimensional" ); } if (xt::get_rank<XB4>::value != 4) { throw std::runtime_error( "temporal masks are not four-dimensional" ); } // retrieve real types of the expressions const XD2& q_obs_ = q_obs.derived_cast(); const XD4& q_prd_ = q_prd.derived_cast(); const XD2& q_thr_ = q_thr.derived_cast(); const XB4& t_msk_ = t_msk.derived_cast(); // check that the metrics to be computed are valid utils::check_metrics( metrics, {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS", "POD", "POFD", "FAR", "CSI", "ROCSS"} ); // check that optional parameters are given as arguments utils::evalp::check_optionals(metrics, q_thr_); if (bootstrap.has_value()) { utils::check_bootstrap(bootstrap.value()); } // check that data dimensions are compatible // > time if (q_obs_.shape(1) != q_prd_.shape(3)) { throw std::runtime_error( "observations and predictions feature different " "temporal lengths" ); } if (t_msk_.size() > 0) { if (q_obs_.shape(1) != t_msk_.shape(3))
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
{ throw std::runtime_error( "observations and masks feature different " "temporal lengths" ); } } if (!dts.empty()) { if (q_obs_.shape(1) != dts.size()) { throw std::runtime_error( "observations and datetimes feature different " "temporal lengths" ); } } // > leadtimes if (t_msk_.size() > 0) { if (q_prd_.shape(1) != t_msk_.shape(1)) { throw std::runtime_error( "predictions and temporal masks feature different " "numbers of lead times" ); } } // > sites if (q_obs_.shape(0) != q_prd_.shape(0)) { throw std::runtime_error( "observations and predictions feature different " "numbers of sites" ); } if (t_msk_.size() > 0) { if (q_obs_.shape(0) != t_msk_.shape(0)) { throw std::runtime_error( "observations and temporal masks feature different " "numbers of sites" ); } } if (m_cdt.size() > 0) { if (q_obs_.shape(0) != m_cdt.shape(0)) { throw std::runtime_error( "observations and masking conditions feature different " "numbers of sites" ); } } // retrieve dimensions std::size_t n_sit = q_prd_.shape(0); std::size_t n_ltm = q_prd_.shape(1); std::size_t n_mbr = q_prd_.shape(2); std::size_t n_tim = q_prd_.shape(3); std::size_t n_thr = q_thr_.shape(1); std::size_t n_msk = t_msk_.size() > 0 ? t_msk_.shape(2) : (m_cdt.size() > 0 ? m_cdt.shape(1) : 1); std::size_t n_exp = !bootstrap.has_value() ? 1: bootstrap.value().find("n_samples")->second;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
// register metrics number of dimensions std::unordered_map<std::string, std::vector<std::size_t>> dim; dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; dim["POD"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr}; dim["POFD"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr}; dim["FAR"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr}; dim["CSI"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr}; dim["ROCSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; // generate masks from conditions if provided auto gen_msk = [&]() { XB4 c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); if (m_cdt.size() > 0) { for (std::size_t s = 0; s < n_sit; s++) { for (std::size_t l = 0; l < n_ltm; l++) { for (std::size_t m = 0; m < n_msk; m++) { xt::view(c_msk, s, l, m) = masks::generate_mask_from_conditions( xt::view(m_cdt, s, m), xt::view(q_obs_, s), xt::view(q_prd_, s, l) ); } } } } return c_msk; }; const XB4 c_msk = gen_msk(); // generate bootstrap experiment if requested std::vector<xt::xkeep_slice<int>> b_exp; int summary; if (bootstrap.has_value()) { auto n_samples = bootstrap.value().find("n_samples")->second; auto len_sample = bootstrap.value().find("len_sample")->second; summary = bootstrap.value().find("summary")->second; if (dts.empty()) { throw std::runtime_error( "bootstrap requested but datetimes not provided" ); } b_exp = uncertainty::bootstrap(dts, n_samples, len_sample); } else { // if no bootstrap requested, generate one sample // containing all the time indices once summary = 0; xt::xtensor<int, 1> all = xt::arange(n_tim); b_exp.push_back(xt::keep(all)); }
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
// initialise data structure for outputs std::vector<xt::xarray<double>> r; for (const auto& metric : metrics) { r.emplace_back(xt::zeros<double>(dim[metric])); } // compute variables one site at a time to minimise memory imprint for (std::size_t s = 0; s < n_sit; s++) { // compute variables one lead time at a time to minimise memory imprint for (std::size_t l = 0; l < n_ltm; l++) { // instantiate probabilist evaluator const auto q_obs_v = xt::view(q_obs_, s, xt::all()); const auto q_prd_v = xt::view(q_prd_, s, l, xt::all(), xt::all()); const auto q_thr_v = xt::view(q_thr_, s, xt::all()); const auto t_msk_v = t_msk_.size() > 0 ? xt::view(t_msk_, s, l, xt::all(), xt::all()) : (m_cdt.size() > 0 ? xt::view(c_msk, s, l, xt::all(), xt::all()) : xt::view(t_msk_, s, l, xt::all(), xt::all())); probabilist::Evaluator<XD2, XD4, XB4> evaluator( q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp ); // retrieve or compute requested metrics for (std::size_t m = 0; m < metrics.size(); m++) { const auto& metric = metrics[m]; if ( metric == "BS" ) { // (sites, lead times, subsets, samples, thresholds) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_BS(), summary); } else if ( metric == "BSS" ) { // (sites, lead times, subsets, samples, thresholds) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_BSS(), summary); } else if ( metric == "BS_CRD" ) { // (sites, lead times, subsets, samples, thresholds, components) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_BS_CRD(), summary); } else if ( metric == "BS_LBD" ) { // (sites, lead times, subsets, samples, thresholds, components) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_BS_LBD(), summary); } else if ( metric == "QS" ) { // (sites, lead times, subsets, samples, quantiles) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_QS(), summary); } else if ( metric == "CRPS" ) { // (sites, lead times, subsets, samples) xt::view(r[m], s, l, xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_CRPS(), summary); }
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
else if ( metric == "POD" ) { // (sites, lead times, subsets, samples, levels, thresholds) xt::view(r[m], s, l, xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_POD(), summary); } else if ( metric == "POFD" ) { // (sites, lead times, subsets, samples, levels, thresholds) xt::view(r[m], s, l, xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_POFD(), summary); } else if ( metric == "FAR" ) { // (sites, lead times, subsets, samples, levels, thresholds) xt::view(r[m], s, l, xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_FAR(), summary); } else if ( metric == "CSI" ) { // (sites, lead times, subsets, samples, levels, thresholds) xt::view(r[m], s, l, xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_CSI(), summary); } else if ( metric == "ROCSS" ) { // (sites, lead times, subsets, samples, thresholds) xt::view(r[m], s, l, xt::all(), xt::all()) = uncertainty::summarise(evaluator.get_ROCSS(), summary); } } } } return r; } } #endif //EVALHYD_EVALP_HPP