evald.hpp 21.40 KiB
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_EVALD_HPP
#define EVALHYD_EVALD_HPP
#include <unordered_map>
#include <vector>
#include <xtl/xoptional.hpp>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xarray.hpp>
#include "detail/utils.hpp"
#include "detail/masks.hpp"
#include "detail/uncertainty.hpp"
#include "detail/determinist/evaluator.hpp"
namespace evalhyd
    /// Function to evaluate deterministic streamflow predictions.
    ///
    /// \rst
    ///
    /// :Template Parameters:
    ///
    ///    XD2: Any 2-dimensional container class storing numeric elements
    ///         (e.g. ``xt::xtensor<double, 2>``, ``xt::pytensor<double, 2>``,
    ///         ``xt::rtensor<double, 2>``, etc.).
    ///
    ///    XB3: Any 3-dimensional container class storing boolean elements
    ///         (e.g. ``xt::xtensor<bool, 3>``, ``xt::pytensor<bool, 3>``,
    ///         ``xt::rtensor<bool, 3>``, etc.).
    ///
    ///    XS2: Any 2-dimensional container class storing string elements
    ///         (e.g. ``xt::xtensor<std::array<char, 32>, 2>``,
    ///         ``xt::pytensor<std::array<char, 32>, 2>``,
    ///         ``xt::rtensor<std::array<char, 32>, 2>``, etc.).
    ///
    /// :Parameters:
    ///
    ///    q_obs: ``XD2``
    ///       Streamflow observations. Time steps with missing observations
    ///       must be assigned `NAN` values. Those time steps will be ignored
    ///       both in the observations and the predictions before the
    ///       *metrics* are computed.
    ///       shape: (1, time)
    ///
    ///    q_prd: ``XD2``
    ///       Streamflow predictions. Time steps with missing predictions
    ///       must be assigned `NAN` values. Those time steps will be ignored
    ///       both in the observations and the predictions before the
    ///       *metrics* are computed.
    ///       shape: (series, time)
    ///
    ///    metrics: ``std::vector<std::string>``
    ///       The sequence of evaluation metrics to be computed.
    ///
    ///       .. seealso:: :doc:`../../metrics/deterministic`
    ///
    ///    q_thr: ``XD2``, optional
    ///       Streamflow exceedance threshold(s). If provided, *events* must
    ///       also be provided.
    ///       shape: (sites, thresholds)
    ///
    ///    events: ``std::string``, optional
    ///       The type of streamflow events to consider for threshold
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
/// exceedance-based metrics. It can either be set as "high" when /// flooding conditions/high flow events are evaluated (i.e. event /// occurring when streamflow goes above threshold) or as "low" when /// drought conditions/low flow events are evaluated (i.e. event /// occurring when streamflow goes below threshold). It must be /// provided if *q_thr* is provided. /// /// transform: ``std::string``, optional /// The transformation to apply to both streamflow observations and /// predictions prior to the calculation of the *metrics*. /// /// .. seealso:: :doc:`../../functionalities/transformation` /// /// exponent: ``double``, optional /// The value of the exponent n to use when the *transform* is the /// power function. If not provided, the streamflow observations /// and predictions remain untransformed. /// /// epsilon: ``double``, optional /// The value of the small constant ε to add to both the streamflow /// observations and predictions prior to the calculation of the /// *metrics* when the *transform* is the reciprocal function, the /// natural logarithm, or the power function with a negative exponent /// (since none are defined for 0). If not provided, one hundredth of /// the mean of the streamflow observations is used as value for /// epsilon, as recommended by `Pushpalatha et al. (2012) /// <https://doi.org/10.1016/j.jhydrol.2011.11.055>`_. /// /// t_msk: ``XB3``, optional /// Mask used to temporally subset of the whole streamflow time series /// (where True/False is used for the time steps to include/discard in /// the subset). /// shape: (series, subsets, time) /// /// .. seealso:: :doc:`../../functionalities/temporal-masking` /// /// m_cdt: ``XS2``, optional /// Masking conditions to use to generate temporal subsets. Each /// condition consists in a string and can be specified on observed /// streamflow values/statistics (mean, median, quantile), or on time /// indices. If provided in combination with *t_msk*, the latter takes /// precedence. If not provided and neither is *t_msk*, no subset is /// performed. /// shape: (series, subsets) /// /// .. seealso:: :doc:`../../functionalities/conditional-masking` /// /// bootstrap: ``std::unordered_map<std::string, int>``, optional /// Parameters for the bootstrapping method used to estimate the /// sampling uncertainty in the evaluation of the predictions. /// The parameters are: 'n_samples' the number of random samples, /// 'len_sample' the length of one sample in number of years, /// and 'summary' the statistics to return to characterise the /// sampling distribution). If not provided, no bootstrapping is /// performed. If provided, *dts* must also be provided. /// /// .. seealso:: :doc:`../../functionalities/bootstrapping` /// /// dts: ``std::vector<std::string>``, optional /// Datetimes. The corresponding date and time for the temporal /// dimension of the streamflow observations and predictions. /// The date and time must be specified in a string following the /// ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss" (e.g. the /// 21st of May 2007 at 4 in the afternoon is "2007-05-21 16:00:00"). /// If provided, it is only used if *bootstrap* is also provided. /// /// seed: ``int``, optional /// A value for the seed used by random generators. This parameter /// guarantees the reproducibility of the metric values between calls. ///
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
/// diagnostics: ``std::vector<std::string>``, optional /// The sequence of evaluation diagnostics to be computed. /// /// .. seealso:: :doc:`../../functionalities/diagnostics` /// /// :Returns: /// /// ``std::vector<xt::xarray<double>>`` /// The sequence of evaluation metrics computed in the same order /// as given in *metrics*, followed by the sequence of evaluation /// diagnostics computed in the same order as given in *diagnostics*. /// shape: (metrics+diagnostics,)<(series, subsets, samples, {components})> /// /// :Examples: /// /// .. code-block:: c++ /// /// #include <xtensor/xtensor.hpp> /// #include <evalhyd/evald.hpp> /// /// xt::xtensor<double, 2> obs = {{ 4.7, 4.3, 5.5, 2.7, 4.1 }}; /// xt::xtensor<double, 2> prd = {{ 5.3, 4.2, 5.7, 2.3, 3.1 }, /// { 4.3, 4.2, 4.7, 4.3, 3.3 }, /// { 5.3, 5.2, 5.7, 2.3, 3.9 }}; /// /// evalhyd::evald(obs, prd, {"NSE"}); /// /// .. code-block:: c++ /// /// evalhyd::evald(obs, prd, {"NSE"}, "sqrt"); /// /// .. code-block:: c++ /// /// evalhyd::evald(obs, prd, {"NSE"}, "pow", 0.2); /// /// .. code-block:: c++ /// /// evalhyd::evald(obs, prd, {"NSE"}, "log", 1, 0.05); /// /// .. code-block:: c++ /// /// xt::xtensor<double, 3> msk = {{{ 1, 1, 0, 1, 0 }}}; /// /// evalhyd::evald(obs, prd, {"NSE"}, "none", 1, -9, msk); /// /// \endrst template <class XD2, class XB3 = xt::xtensor<bool, 3>, class XS2 = xt::xtensor<std::array<char, 32>, 2>> std::vector<xt::xarray<double>> evald( const xt::xexpression<XD2>& q_obs, const xt::xexpression<XD2>& q_prd, const std::vector<std::string>& metrics, const xt::xexpression<XD2>& q_thr = XD2({}), xtl::xoptional<const std::string, bool> events = xtl::missing<const std::string>(), xtl::xoptional<const std::string, bool> transform = xtl::missing<const std::string>(), xtl::xoptional<double, bool> exponent = xtl::missing<double>(), xtl::xoptional<double, bool> epsilon = xtl::missing<double>(), const xt::xexpression<XB3>& t_msk = XB3({}), const xt::xexpression<XS2>& m_cdt = XS2({}), xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap = xtl::missing<const std::unordered_map<std::string, int>>(), const std::vector<std::string>& dts = {}, xtl::xoptional<const int, bool> seed = xtl::missing<const int>(), xtl::xoptional<const std::vector<std::string>, bool> diagnostics = xtl::missing<const std::vector<std::string>>()
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
) { // check ranks of expressions if they are tensors if (xt::get_rank<XD2>::value != SIZE_MAX) { if (xt::get_rank<XD2>::value != 2) { throw std::runtime_error( "observations and/or predictions and/or thresholds " "are not two-dimensional" ); } } if (xt::get_rank<XB3>::value != SIZE_MAX) { if (xt::get_rank<XB3>::value != 3) { throw std::runtime_error( "temporal masks are not three-dimensional" ); } } // retrieve real types of the expressions const XD2& q_obs_ = q_obs.derived_cast(); const XD2& q_prd_ = q_prd.derived_cast(); const XD2& q_thr_ = q_thr.derived_cast(); const XB3& t_msk_ = t_msk.derived_cast(); const XS2& m_cdt_ = m_cdt.derived_cast(); // check that the metrics/diagnostics to be computed are valid utils::check_metrics( metrics, {"MAE", "MARE", "MSE", "RMSE", "NSE", "KGE", "KGE_D", "KGEPRIME", "KGEPRIME_D", // ------------------------------------------------------------ // TODO: bring back when `xt::argsort` supports stable sorting // so that the r_spearman component of KGENP and KGENP_D // yields consistent results across compilers // https://github.com/xtensor-stack/xtensor/issues/2677 // "KGENP", "KGENP_D", // ------------------------------------------------------------ "CONT_TBL"} ); if ( diagnostics.has_value() ) { utils::check_diags( diagnostics.value(), {"completeness"} ); } // check that optional parameters are valid if (bootstrap.has_value()) { utils::check_bootstrap(bootstrap.value()); } // get a seed for random generators auto random_seed = utils::get_seed(seed); // check that data dimensions are compatible // > time if (q_obs_.shape(1) != q_prd_.shape(1)) { throw std::runtime_error( "observations and predictions feature different " "temporal lengths"
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
); } if (t_msk_.size() > 0) { if (q_obs_.shape(1) != t_msk_.shape(2)) { throw std::runtime_error( "observations and masks feature different " "temporal lengths" ); } } if (!dts.empty()) { if (q_obs_.shape(1) != dts.size()) { throw std::runtime_error( "observations and datetimes feature different " "temporal lengths" ); } } // > series if (q_obs_.shape(0) != 1) { throw std::runtime_error( "observations contain more than one time series" ); } if (q_thr_.size() > 0) { if (q_prd_.shape(0) != q_thr_.shape(0)) { throw std::runtime_error( "predictions and thresholds feature different " "numbers of series" ); } } if (t_msk_.size() > 0) { if (q_prd_.shape(0) != t_msk_.shape(0)) { throw std::runtime_error( "predictions and masks feature different " "number of series" ); } } if (m_cdt_.size() > 0) { if (q_prd_.shape(0) != m_cdt_.shape(0)) { throw std::runtime_error( "predictions and masking conditions feature different " "numbers of series" ); } } // retrieve dimensions std::size_t n_tim = q_prd_.shape(1); // generate masks from conditions if provided auto gen_msk = [&]() {
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
if ((t_msk_.size() < 1) && (m_cdt_.size() > 0)) { std::size_t n_srs = q_prd_.shape(0); std::size_t n_msk = m_cdt_.shape(1); XB3 c_msk = xt::zeros<bool>({n_srs, n_msk, n_tim}); for (std::size_t s = 0; s < n_srs; s++) { for (std::size_t m = 0; m < n_msk; m++) { xt::view(c_msk, s, m) = masks::generate_mask_from_conditions( xt::view(m_cdt_, s, m), xt::view(q_obs_, 0), xt::view(q_prd_, s, xt::newaxis()) ); } } return c_msk; } else { return XB3(xt::zeros<bool>({0, 0, 0})); } }; const XB3 c_msk = gen_msk(); // apply streamflow transformation if requested auto q_transform = [&](const XD2& q) { if (transform.has_value()) { if ( transform.value() == "sqrt" ) { return XD2(xt::sqrt(q)); } else if ( transform.value() == "inv" ) { if ( !epsilon.has_value() ) { // determine an epsilon value to avoid zero divide epsilon = xt::mean(q_obs_)() * 0.01; } return XD2(1. / (q + epsilon.value())); } else if ( transform.value() == "log" ) { if ( !epsilon.has_value() ) { // determine an epsilon value to avoid log zero epsilon = xt::mean(q_obs_)() * 0.01; } return XD2(xt::log(q + epsilon.value())); } else if ( transform.value() == "pow" ) { if ( exponent.has_value() ) { if ( exponent.value() == 1) { return q; } else if ( exponent.value() < 0 ) { if ( !epsilon.has_value() ) {
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
// determine an epsilon value to avoid zero divide epsilon = xt::mean(q_obs_)() * 0.01; } return XD2(xt::pow(q + epsilon.value(), exponent.value())); } else { return XD2(xt::pow(q, exponent.value())); } } else { throw std::runtime_error( "missing exponent for power transformation" ); } } else { throw std::runtime_error( "invalid streamflow transformation: " + transform.value() ); } } else { return q; } }; const XD2& obs = q_transform(q_obs_); const XD2& prd = q_transform(q_prd_); const XD2& thr = q_transform(q_thr_); // generate bootstrap experiment if requested std::vector<xt::xkeep_slice<int>> exp; int summary; if (bootstrap.has_value()) { auto n_samples = bootstrap.value().find("n_samples")->second; auto len_sample = bootstrap.value().find("len_sample")->second; summary = bootstrap.value().find("summary")->second; if (dts.empty()) { throw std::runtime_error( "bootstrap requested but datetimes not provided" ); } exp = uncertainty::bootstrap( dts, n_samples, len_sample, random_seed ); } else { // if no bootstrap requested, generate one sample // containing all the time indices once summary = 0; xt::xtensor<int, 1> all = xt::arange(n_tim); exp.push_back(xt::keep(all)); } // instantiate determinist evaluator determinist::Evaluator<XD2, XB3> evaluator( obs, prd, thr, events,
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
t_msk_.size() > 0 ? t_msk_: (m_cdt_.size() > 0 ? c_msk : t_msk_), exp ); // retrieve or compute requested metrics std::vector<xt::xarray<double>> r; for ( const auto& metric : metrics ) { if ( metric == "MAE" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_MAE(), summary) ); } if ( metric == "MARE" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_MARE(), summary) ); } if ( metric == "MSE" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_MSE(), summary) ); } if ( metric == "RMSE" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_RMSE(), summary) ); } else if ( metric == "NSE" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_NSE(), summary) ); } else if ( metric == "KGE" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_KGE(), summary) ); } else if ( metric == "KGE_D" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_KGE_D(), summary) ); } else if ( metric == "KGEPRIME" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_KGEPRIME(), summary) ); } else if ( metric == "KGEPRIME_D" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_KGEPRIME_D(), summary) ); } else if ( metric == "KGENP" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_KGENP(), summary) ); } else if ( metric == "KGENP_D" )
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
{ r.emplace_back( uncertainty::summarise_d(evaluator.get_KGENP_D(), summary) ); } else if ( metric == "CONT_TBL" ) { r.emplace_back( uncertainty::summarise_d(evaluator.get_CONT_TBL(), summary) ); } } if ( diagnostics.has_value() ) { for ( const auto& diagnostic : diagnostics.value() ) { if ( diagnostic == "completeness" ) { r.emplace_back( evaluator.get_completeness() ); } } } return r; }; } #endif //EVALHYD_EVALD_HPP