An error occurred while loading the file. Please try again.
-
Thibault Hallouin authoredad3bdc97
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_EVALP_HPP
#define EVALHYD_EVALP_HPP
#include <unordered_map>
#include <vector>
#include <xtl/xoptional.hpp>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xarray.hpp>
#include "detail/utils.hpp"
#include "detail/masks.hpp"
#include "detail/maths.hpp"
#include "detail/uncertainty.hpp"
#include "detail/probabilist/evaluator.hpp"
namespace evalhyd
{
/// Function to evaluate probabilistic streamflow predictions.
///
/// \rst
///
/// :Template Parameters:
///
/// XD2: Any 2-dimensional container class storing numeric elements
/// (e.g. ``xt::xtensor<double, 2>``, ``xt::pytensor<double, 2>``,
/// ``xt::rtensor<double, 2>``, etc.).
///
/// XD4: Any 4-dimensional container class storing numeric elements
/// (e.g. ``xt::xtensor<double, 4>``, ``xt::pytensor<double, 4>``,
/// ``xt::rtensor<double, 4>``, etc.).
///
/// XB4: Any 4-dimensional container class storing boolean elements
/// (e.g. ``xt::xtensor<bool, 4>``, ``xt::pytensor<bool, 4>``,
/// ``xt::rtensor<bool, 4>``, etc.).
///
/// :Parameters:
///
/// q_obs: ``XD2``
/// Streamflow observations. Time steps with missing observations
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed.
/// shape: (sites, time)
///
/// q_prd: ``XD4``
/// Streamflow predictions. Time steps with missing predictions
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed.
/// shape: (sites, lead times, members, time)
///
/// metrics: ``std::vector<std::string>``
/// The sequence of evaluation metrics to be computed.
///
/// q_thr: ``XD2``, optional
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
///
/// t_msk: ``XB4``, optional
/// Mask(s) used to generate temporal subsets of the whole streamflow
/// time series (where True/False is used for the time steps to
/// include/discard in a given subset). If not provided and neither
/// is *m_cdt*, no subset is performed and only one set of metrics is
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
/// returned corresponding to the whole time series. If provided, as
/// many sets of metrics are returned as they are masks provided.
/// shape: (sites, lead times, subsets, time)
///
/// .. seealso:: :doc:`../../functionalities/temporal-masking`
///
/// m_cdt: ``xt::xtensor<std::array<char, 32>, 2>``, optional
/// Masking conditions to use to generate temporal subsets. Each
/// condition consists in a string and can be specified on
/// observed/predicted streamflow values/statistics (mean, median,
/// quantile), or on time indices. If provided in combination with
/// *t_msk*, the latter takes precedence. If not provided and neither
/// is *t_msk*, no subset is performed and only one set of metrics is
/// returned corresponding to the whole time series. If provided, as
/// many sets of metrics are returned as they are conditions provided.
/// shape: (sites, subsets)
///
/// .. seealso:: :doc:`../../functionalities/conditional-masking`
///
/// bootstrap: ``std::unordered_map<std::string, int>``, optional
/// Parameters for the bootstrapping method used to estimate the
/// sampling uncertainty in the evaluation of the predictions.
/// Three parameters are mandatory ('n_samples' the number of random
/// samples, 'len_sample' the length of one sample in number of years,
/// and 'summary' the statistics to return to characterise the
/// sampling distribution), and one parameter is optional ('seed').
/// If not provided, no bootstrapping is performed. If provided,
/// *dts* must also be provided.
///
/// .. seealso:: :doc:`../../functionalities/bootstrapping`
///
/// dts: ``std::vector<std::string>``, optional
/// Datetimes. The corresponding date and time for the temporal
/// dimension of the streamflow observations and predictions.
/// The date and time must be specified in a string following the
/// ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss" (e.g. the
/// 21st of May 2007 at 4 in the afternoon is "2007-05-21 16:00:00").
/// If provided, it is only used if *bootstrap* is also provided.
///
/// :Returns:
///
/// ``std::vector<xt::xarray<double>>``
/// The sequence of evaluation metrics computed in the same order
/// as given in *metrics*.
/// shape: (metrics,)<(sites, lead times, subsets, samples,
/// {quantiles,} {thresholds,} {components})>
///
/// :Examples:
///
/// .. code-block:: c++
///
/// #include <xtensor/xtensor.hpp>
/// #include <evalhyd/evalp.hpp>
///
/// xt::xtensor<double, 2> obs = {{ 4.7, 4.3, 5.5, 2.7, 4.1 }};
/// xt::xtensor<double, 4> prd = {{{{ 5.3, 4.2, 5.7, 2.3, 3.1 },
/// { 4.3, 4.2, 4.7, 4.3, 3.3 },
/// { 5.3, 5.2, 5.7, 2.3, 3.9 }}}};
/// xt::xtensor<double, 2> thr = {{ 4.7, 4.3, 5.5, 2.7, 4.1 }};
///
/// evalhyd::evalp(obs, prd, {"BS"}, thr);
///
/// .. code-block:: c++
///
/// xt::xtensor<bool, 3> msk = {{{ false, true, true, false, true }}};
///
/// evalhyd::evalp(obs, prd, {"BS"}, thr, msk);
///
/// .. code-block:: c++
///
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
/// evalhyd::evalp(obs, prd, {"CRPS"});
///
/// \endrst
template <class XD2, class XD4, class XB4 = xt::xtensor<bool, 4>>
std::vector<xt::xarray<double>> evalp(
const xt::xexpression<XD2>& q_obs,
const xt::xexpression<XD4>& q_prd,
const std::vector<std::string>& metrics,
const xt::xexpression<XD2>& q_thr = XD2({}),
const xt::xexpression<XB4>& t_msk = XB4({}),
const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {},
xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap =
xtl::missing<const std::unordered_map<std::string, int>>(),
const std::vector<std::string>& dts = {}
)
{
// check ranks of tensors
if (xt::get_rank<XD2>::value != 2)
{
throw std::runtime_error(
"observations and/or thresholds are not two-dimensional"
);
}
if (xt::get_rank<XD4>::value != 4)
{
throw std::runtime_error(
"predictions are not four-dimensional"
);
}
if (xt::get_rank<XB4>::value != 4)
{
throw std::runtime_error(
"temporal masks are not four-dimensional"
);
}
// retrieve real types of the expressions
const XD2& q_obs_ = q_obs.derived_cast();
const XD4& q_prd_ = q_prd.derived_cast();
const XD2& q_thr_ = q_thr.derived_cast();
const XB4& t_msk_ = t_msk.derived_cast();
// check that the metrics to be computed are valid
utils::check_metrics(
metrics,
{"BS", "BSS", "BS_CRD", "BS_LBD",
"QS", "CRPS",
"POD", "POFD", "FAR", "CSI", "ROCSS"}
);
// check that optional parameters are given as arguments
utils::evalp::check_optionals(metrics, q_thr_);
if (bootstrap.has_value())
{
utils::check_bootstrap(bootstrap.value());
}
// check that data dimensions are compatible
// > time
if (q_obs_.shape(1) != q_prd_.shape(3))
{
throw std::runtime_error(
"observations and predictions feature different "
"temporal lengths"
);
}
if (t_msk_.size() > 0)
{
if (q_obs_.shape(1) != t_msk_.shape(3))
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
{
throw std::runtime_error(
"observations and masks feature different "
"temporal lengths"
);
}
}
if (!dts.empty())
{
if (q_obs_.shape(1) != dts.size())
{
throw std::runtime_error(
"observations and datetimes feature different "
"temporal lengths"
);
}
}
// > leadtimes
if (t_msk_.size() > 0)
{
if (q_prd_.shape(1) != t_msk_.shape(1))
{
throw std::runtime_error(
"predictions and temporal masks feature different "
"numbers of lead times"
);
}
}
// > sites
if (q_obs_.shape(0) != q_prd_.shape(0))
{
throw std::runtime_error(
"observations and predictions feature different "
"numbers of sites"
);
}
if (t_msk_.size() > 0)
{
if (q_obs_.shape(0) != t_msk_.shape(0))
{
throw std::runtime_error(
"observations and temporal masks feature different "
"numbers of sites"
);
}
}
if (m_cdt.size() > 0)
{
if (q_obs_.shape(0) != m_cdt.shape(0))
{
throw std::runtime_error(
"observations and masking conditions feature different "
"numbers of sites"
);
}
}
// retrieve dimensions
std::size_t n_sit = q_prd_.shape(0);
std::size_t n_ltm = q_prd_.shape(1);
std::size_t n_mbr = q_prd_.shape(2);
std::size_t n_tim = q_prd_.shape(3);
std::size_t n_thr = q_thr_.shape(1);
std::size_t n_msk = t_msk_.size() > 0 ? t_msk_.shape(2) :
(m_cdt.size() > 0 ? m_cdt.shape(1) : 1);
std::size_t n_exp = !bootstrap.has_value() ? 1:
bootstrap.value().find("n_samples")->second;
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
// register metrics number of dimensions
std::unordered_map<std::string, std::vector<std::size_t>> dim;
dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr};
dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr};
dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3};
dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3};
dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr};
dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp};
dim["POD"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr};
dim["POFD"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr};
dim["FAR"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr};
dim["CSI"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr + 1, n_thr};
dim["ROCSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr};
// generate masks from conditions if provided
auto gen_msk = [&]()
{
XB4 c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim});
if (m_cdt.size() > 0)
{
for (std::size_t s = 0; s < n_sit; s++)
{
for (std::size_t l = 0; l < n_ltm; l++)
{
for (std::size_t m = 0; m < n_msk; m++)
{
xt::view(c_msk, s, l, m) =
masks::generate_mask_from_conditions(
xt::view(m_cdt, s, m),
xt::view(q_obs_, s),
xt::view(q_prd_, s, l)
);
}
}
}
}
return c_msk;
};
const XB4 c_msk = gen_msk();
// generate bootstrap experiment if requested
std::vector<xt::xkeep_slice<int>> b_exp;
int summary;
if (bootstrap.has_value())
{
auto n_samples = bootstrap.value().find("n_samples")->second;
auto len_sample = bootstrap.value().find("len_sample")->second;
summary = bootstrap.value().find("summary")->second;
if (dts.empty())
{
throw std::runtime_error(
"bootstrap requested but datetimes not provided"
);
}
b_exp = uncertainty::bootstrap(dts, n_samples, len_sample);
}
else
{
// if no bootstrap requested, generate one sample
// containing all the time indices once
summary = 0;
xt::xtensor<int, 1> all = xt::arange(n_tim);
b_exp.push_back(xt::keep(all));
}
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
// initialise data structure for outputs
std::vector<xt::xarray<double>> r;
for (const auto& metric : metrics)
{
r.emplace_back(xt::zeros<double>(dim[metric]));
}
// compute variables one site at a time to minimise memory imprint
for (std::size_t s = 0; s < n_sit; s++)
{
// compute variables one lead time at a time to minimise memory imprint
for (std::size_t l = 0; l < n_ltm; l++)
{
// instantiate probabilist evaluator
const auto q_obs_v = xt::view(q_obs_, s, xt::all());
const auto q_prd_v = xt::view(q_prd_, s, l, xt::all(), xt::all());
const auto q_thr_v = xt::view(q_thr_, s, xt::all());
const auto t_msk_v =
t_msk_.size() > 0 ?
xt::view(t_msk_, s, l, xt::all(), xt::all()) :
(m_cdt.size() > 0 ?
xt::view(c_msk, s, l, xt::all(), xt::all()) :
xt::view(t_msk_, s, l, xt::all(), xt::all()));
probabilist::Evaluator<XD2, XD4, XB4> evaluator(
q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp
);
// retrieve or compute requested metrics
for (std::size_t m = 0; m < metrics.size(); m++)
{
const auto& metric = metrics[m];
if ( metric == "BS" )
{
// (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_BS(), summary);
}
else if ( metric == "BSS" )
{
// (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_BSS(), summary);
}
else if ( metric == "BS_CRD" )
{
// (sites, lead times, subsets, samples, thresholds, components)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_BS_CRD(), summary);
}
else if ( metric == "BS_LBD" )
{
// (sites, lead times, subsets, samples, thresholds, components)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_BS_LBD(), summary);
}
else if ( metric == "QS" )
{
// (sites, lead times, subsets, samples, quantiles)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_QS(), summary);
}
else if ( metric == "CRPS" )
{
// (sites, lead times, subsets, samples)
xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_CRPS(), summary);
}
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
else if ( metric == "POD" )
{
// (sites, lead times, subsets, samples, levels, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_POD(), summary);
}
else if ( metric == "POFD" )
{
// (sites, lead times, subsets, samples, levels, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_POFD(), summary);
}
else if ( metric == "FAR" )
{
// (sites, lead times, subsets, samples, levels, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_FAR(), summary);
}
else if ( metric == "CSI" )
{
// (sites, lead times, subsets, samples, levels, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_CSI(), summary);
}
else if ( metric == "ROCSS" )
{
// (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.get_ROCSS(), summary);
}
}
}
}
return r;
}
}
#endif //EVALHYD_EVALP_HPP