From 602b486dc0b75ad35fcebcb29c3ce549bdecb176 Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Fri, 24 Feb 2023 10:25:42 +0100 Subject: [PATCH] add contingency table (CONT_TBL) as new deterministic metric --- .../evalhyd/detail/determinist/evaluator.hpp | 140 ++++++++- include/evalhyd/detail/determinist/events.hpp | 283 ++++++++++++++++++ include/evalhyd/evald.hpp | 30 +- tests/expected/evald/CONT_TBL.csv | 204 +++++++++++++ tests/test_determinist.cpp | 236 ++++++++++----- 5 files changed, 810 insertions(+), 83 deletions(-) create mode 100644 include/evalhyd/detail/determinist/events.hpp create mode 100644 tests/expected/evald/CONT_TBL.csv diff --git a/include/evalhyd/detail/determinist/evaluator.hpp b/include/evalhyd/detail/determinist/evaluator.hpp index df2be57..bb82025 100644 --- a/include/evalhyd/detail/determinist/evaluator.hpp +++ b/include/evalhyd/detail/determinist/evaluator.hpp @@ -14,6 +14,7 @@ #include "diagnostics.hpp" #include "errors.hpp" #include "efficiencies.hpp" +#include "events.hpp" namespace evalhyd @@ -28,6 +29,8 @@ namespace evalhyd const XD2& q_obs; const XD2& q_prd; // members for optional input data + const XD2& _q_thr; + xtl::xoptional<const std::string, bool> _events; xt::xtensor<bool, 3> t_msk; const std::vector<xt::xkeep_slice<int>>& b_exp; @@ -35,6 +38,7 @@ namespace evalhyd std::size_t n_tim; std::size_t n_msk; std::size_t n_srs; + std::size_t n_thr; std::size_t n_exp; // members for computational elements @@ -54,6 +58,12 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 3>, bool> gamma; xtl::xoptional<xt::xtensor<double, 3>, bool> alpha_np; xtl::xoptional<xt::xtensor<double, 3>, bool> bias; + xtl::xoptional<xt::xtensor<double, 3>, bool> obs_event; + xtl::xoptional<xt::xtensor<double, 3>, bool> prd_event; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_a; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_b; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_c; + xtl::xoptional<xt::xtensor<double, 3>, bool> ct_d; // members for evaluation metrics xtl::xoptional<xt::xtensor<double, 3>, bool> MAE; @@ -67,6 +77,50 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 4>, bool> KGEPRIME_D; xtl::xoptional<xt::xtensor<double, 3>, bool> KGENP; xtl::xoptional<xt::xtensor<double, 4>, bool> KGENP_D; + xtl::xoptional<xt::xtensor<double, 5>, bool> CONT_TBL; + + // methods to get optional parameters + auto get_q_thr() + { + if (_q_thr.size() < 1) + { + throw std::runtime_error( + "threshold-based metric requested, " + "but *q_thr* not provided" + ); + } + else{ + return _q_thr; + } + } + + bool is_high_flow_event() + { + if (_events.has_value()) + { + if (_events.value() == "high") + { + return true; + } + else if (_events.value() == "low") + { + return false; + } + else + { + throw std::runtime_error( + "invalid value for *events*: " + _events.value() + ); + } + } + else + { + throw std::runtime_error( + "threshold-based metric requested, " + "but *events* not provided" + ); + } + } // methods to compute elements xt::xtensor<double, 3> get_t_counts() @@ -253,13 +307,83 @@ namespace evalhyd return bias.value(); }; + xt::xtensor<double, 3> get_obs_event() + { + if (!obs_event.has_value()) + { + obs_event = elements::calc_obs_event( + q_obs, get_q_thr(), is_high_flow_event() + ); + } + return obs_event.value(); + }; + + xt::xtensor<double, 3> get_prd_event() + { + if (!prd_event.has_value()) + { + prd_event = elements::calc_prd_event( + q_prd, get_q_thr(), is_high_flow_event() + ); + } + return prd_event.value(); + }; + + xt::xtensor<double, 3> get_ct_a() + { + if (!ct_a.has_value()) + { + ct_a = elements::calc_ct_a( + get_obs_event(), get_prd_event() + ); + } + return ct_a.value(); + }; + + xt::xtensor<double, 3> get_ct_b() + { + if (!ct_b.has_value()) + { + ct_b = elements::calc_ct_b( + get_obs_event(), get_prd_event() + ); + } + return ct_b.value(); + }; + + xt::xtensor<double, 3> get_ct_c() + { + if (!ct_c.has_value()) + { + ct_c = elements::calc_ct_c( + get_obs_event(), get_prd_event() + ); + } + return ct_c.value(); + }; + + xt::xtensor<double, 3> get_ct_d() + { + if (!ct_d.has_value()) + { + ct_d = elements::calc_ct_d( + get_obs_event(), get_prd_event() + ); + } + return ct_d.value(); + }; + public: // constructor method Evaluator(const XD2& obs, const XD2& prd, + const XD2& thr, + xtl::xoptional<const std::string&, bool> events, const XB3& msk, const std::vector<xt::xkeep_slice<int>>& exp) : - q_obs{obs}, q_prd{prd}, t_msk{msk}, b_exp{exp} + q_obs{obs}, q_prd{prd}, + _q_thr{thr}, _events{events}, + t_msk{msk}, b_exp{exp} { // initialise a mask if none provided // (corresponding to no temporal subset) @@ -274,6 +398,7 @@ namespace evalhyd n_srs = q_prd.shape(0); n_tim = q_prd.shape(1); n_msk = t_msk.shape(1); + n_thr = _q_thr.shape(1); n_exp = b_exp.size(); // drop time steps where observations or predictions are NaN @@ -417,6 +542,19 @@ namespace evalhyd return KGENP_D.value(); }; + xt::xtensor<double, 5> get_CONT_TBL() + { + if (!CONT_TBL.has_value()) + { + CONT_TBL = metrics::calc_CONT_TBL( + get_q_thr(), get_ct_a(), get_ct_b(), get_ct_c(), + get_ct_d(), t_msk, b_exp, + n_srs, n_thr, n_msk, n_exp + ); + } + return CONT_TBL.value(); + }; + // methods to compute diagnostics xt::xtensor<double, 3> get_completeness() { diff --git a/include/evalhyd/detail/determinist/events.hpp b/include/evalhyd/detail/determinist/events.hpp new file mode 100644 index 0000000..acda44a --- /dev/null +++ b/include/evalhyd/detail/determinist/events.hpp @@ -0,0 +1,283 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_EVENTS_HPP +#define EVALHYD_DETERMINIST_EVENTS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xmasked_view.hpp> +#include <xtensor/xmath.hpp> + + +namespace evalhyd +{ + namespace determinist + { + namespace elements + { + // Contingency table: + // + // OBS + // Y N + // +-----+-----+ a: hits + // Y | a | b | b: false alarms + // PRD +-----+-----+ c: misses + // N | c | d | d: correct rejections + // +-----+-----+ + // + + /// Determine observed realisation of threshold(s) exceedance. + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (1, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (series, thresholds) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \return + /// Event observed outcome. + /// shape: (series, thresholds, time) + template<class XD2> + inline xt::xtensor<double, 3> calc_obs_event( + const XD2& q_obs, + const XD2& q_thr, + bool is_high_flow_event + ) + { + if (is_high_flow_event) + { + // observations above threshold(s) + return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + >= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + else + { + // observations below threshold(s) + return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()) + <= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + } + + /// Determine predicted realisation of threshold(s) exceedance. + /// + /// \param q_prd + /// Streamflow predictions. + /// shape: (series, time) + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (series, thresholds) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \return + /// Event predicted outcome. + /// shape: (series, thresholds, time) + template<class XD2> + inline xt::xtensor<double, 3> calc_prd_event( + const XD2& q_prd, + const XD2& q_thr, + bool is_high_flow_event + ) + { + if (is_high_flow_event) + { + // observations above threshold(s) + return xt::view(q_prd, xt::all(), xt::newaxis(), xt::all()) + >= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + else + { + // observations below threshold(s) + return xt::view(q_prd, xt::all(), xt::newaxis(), xt::all()) + <= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis()); + } + } + + /// Determine hits ('a' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// Hits. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_a( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 1.) && xt::equal(prd_event, 1.); + } + + /// Determine false alarms ('b' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// False alarms. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_b( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 0.) && xt::equal(prd_event, 1.); + } + + /// Determine misses ('c' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// Misses. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_c( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 1.) && xt::equal(prd_event, 0.); + } + + /// Determine correct rejections ('d' in contingency table). + /// + /// \param obs_event + /// Observed event outcome. + /// shape: (sites, thresholds, time) + /// \param prd_event + /// Predicted event outcome. + /// shape: (sites, thresholds, time) + /// \return + /// Correct rejections. + /// shape: (sites, thresholds, time) + inline xt::xtensor<double, 3> calc_ct_d( + const xt::xtensor<double, 3>& obs_event, + const xt::xtensor<double, 3>& prd_event + ) + { + return xt::equal(obs_event, 0.) && xt::equal(prd_event, 0.); + } + } + + namespace metrics + { + /// Compute the cells of the contingency table (CONT_TBL), + /// i.e. 'hits', 'false alarms', 'misses', 'correct rejections', + /// in this order. + /// + /// \param q_thr + /// Streamflow exceedance threshold(s). + /// shape: (series, thresholds) + /// \param ct_a + /// Hits for each time step. + /// shape: (series, thresholds, time) + /// \param ct_b + /// False alarms for each time step. + /// shape: (series, thresholds, time) + /// \param ct_c + /// Misses for each time step. + /// shape: (series, thresholds, time) + /// \param ct_d + /// Correct rejections for each time step. + /// shape: (series, thresholds, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_thr + /// Number of thresholds. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Probabilities of detection. + /// shape: (series, subsets, samples, thresholds, cells) + template<class XD2> + inline xt::xtensor<double, 5> calc_CONT_TBL( + const XD2& q_thr, + const xt::xtensor<double, 3>& ct_a, + const xt::xtensor<double, 3>& ct_b, + const xt::xtensor<double, 3>& ct_c, + const xt::xtensor<double, 3>& ct_d, + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_thr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 5> CONT_TBL = + xt::zeros<double>({n_srs, n_msk, n_exp, + n_thr, std::size_t {4}}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + std::size_t i = 0; + for (auto cell: {ct_a, ct_b, ct_c, ct_d}) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto cell_masked = xt::where( + xt::view(t_msk, xt::all(), m, xt::newaxis(), xt::all()), + cell, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto cell_masked_sampled = + xt::view(cell_masked, xt::all(), xt::all(), + b_exp[e]); + + // calculate the mean over the time steps + xt::view(CONT_TBL, xt::all(), m, e, xt::all(), i) = + xt::nansum(cell_masked_sampled, -1); + } + + i++; + } + } + + // assign NaN where thresholds were not provided (i.e. set as NaN) + xt::masked_view( + CONT_TBL, + xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(), + xt::newaxis(), xt::all(), + xt::newaxis())) + ) = NAN; + + return CONT_TBL; + } + } + } +} + +#endif //EVALHYD_DETERMINIST_EVENTS_HPP diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index 759b825..f625d25 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -173,6 +173,9 @@ namespace evalhyd const xt::xexpression<XD2>& q_obs, const xt::xexpression<XD2>& q_prd, const std::vector<std::string>& metrics, + const xt::xexpression<XD2>& q_thr = XD2({}), + xtl::xoptional<const std::string, bool> events = + xtl::missing<const std::string>(), xtl::xoptional<const std::string, bool> transform = xtl::missing<const std::string>(), xtl::xoptional<double, bool> exponent = @@ -194,7 +197,8 @@ namespace evalhyd if (xt::get_rank<XD2>::value != 2) { throw std::runtime_error( - "observations and/or predictions are not two-dimensional" + "observations and/or predictions and/or thresholds " + "are not two-dimensional" ); } if (xt::get_rank<XB3>::value != 3) @@ -207,6 +211,7 @@ namespace evalhyd // retrieve real types of the expressions const XD2& q_obs_ = q_obs.derived_cast(); const XD2& q_prd_ = q_prd.derived_cast(); + const XD2& q_thr_ = q_thr.derived_cast(); const XB3& t_msk_ = t_msk.derived_cast(); const XS2& m_cdt_ = m_cdt.derived_cast(); @@ -216,7 +221,8 @@ namespace evalhyd metrics, {"MAE", "MARE", "MSE", "RMSE", "NSE", "KGE", "KGE_D", "KGEPRIME", "KGEPRIME_D", - "KGENP", "KGENP_D"} + "KGENP", "KGENP_D", + "CONT_TBL"} ); if ( diagnostics.has_value() ) @@ -274,6 +280,17 @@ namespace evalhyd ); } + if (q_thr_.size() > 0) + { + if (q_prd_.shape(0) != q_thr_.shape(0)) + { + throw std::runtime_error( + "predictions and thresholds feature different " + "numbers of series" + ); + } + } + if (t_msk_.size() > 0) { if (q_prd_.shape(0) != t_msk_.shape(0)) @@ -407,6 +424,7 @@ namespace evalhyd const XD2& obs = q_transform(q_obs_); const XD2& prd = q_transform(q_prd_); + const XD2& thr = q_transform(q_thr_); // generate bootstrap experiment if requested std::vector<xt::xkeep_slice<int>> exp; @@ -440,7 +458,7 @@ namespace evalhyd // instantiate determinist evaluator determinist::Evaluator<XD2, XB3> evaluator( - obs, prd, + obs, prd, thr, events, t_msk_.size() > 0 ? t_msk_: (m_cdt_.size() > 0 ? c_msk : t_msk_), exp ); @@ -516,6 +534,12 @@ namespace evalhyd uncertainty::summarise_d(evaluator.get_KGENP_D(), summary) ); } + else if ( metric == "CONT_TBL" ) + { + r.emplace_back( + uncertainty::summarise_d(evaluator.get_CONT_TBL(), summary) + ); + } } if ( diagnostics.has_value() ) diff --git a/tests/expected/evald/CONT_TBL.csv b/tests/expected/evald/CONT_TBL.csv new file mode 100644 index 0000000..d9490d6 --- /dev/null +++ b/tests/expected/evald/CONT_TBL.csv @@ -0,0 +1,204 @@ +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +157.,19.,15.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,19.,14.,120. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,21.,14.,118. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,21.,14.,118. +200.,15.,8.,88. +220.,21.,6.,64. +NAN,NAN,NAN,NAN +158.,21.,14.,118. +200.,16.,8.,87. +221.,21.,5.,64. +NAN,NAN,NAN,NAN diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 8bcd4df..57cd892 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -30,7 +30,8 @@ using namespace xt::placeholders; // required for `_` to work std::vector<std::string> all_metrics_d = { "MAE", "MARE", "MSE", "RMSE", - "NSE", "KGE", "KGE_D", "KGEPRIME", "KGEPRIME_D", "KGENP", "KGENP_D" + "NSE", "KGE", "KGE_D", "KGEPRIME", "KGEPRIME_D", "KGENP", "KGENP_D", + "CONT_TBL" }; std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d() @@ -74,18 +75,31 @@ TEST(DeterministTests, TestMetrics) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // read in expected results auto expected = load_expected_d(); // compute scores (with 2D tensors) std::vector<xt::xarray<double>> results = evalhyd::evald( - observed, predicted, all_metrics_d + observed, predicted, all_metrics_d, thresholds, "high" ); // check results for (std::size_t m = 0; m < all_metrics_d.size(); m++) { + if (all_metrics_d[m] == "CONT_TBL") + { + // /!\ stacked-up thresholds in CSV file because 5D metric, + // so need to resize array + expected[all_metrics_d[m]].resize( + {predicted.shape(0), std::size_t {1}, std::size_t {1}, + thresholds.shape(1), std::size_t {4}} + ); + } + EXPECT_TRUE(xt::all(xt::isclose( results[m], expected[all_metrics_d[m]], 1e-05, 1e-08, true ))) << "Failure for (" << all_metrics_d[m] << ")"; @@ -99,15 +113,21 @@ TEST(DeterministTests, TestTransform) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // compute and check results on square-rooted streamflow series std::vector<xt::xarray<double>> results_sqrt = - evalhyd::evald(observed, predicted, all_metrics_d, "sqrt"); + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "sqrt"); xt::xtensor<double, 2> obs_sqrt = xt::sqrt(observed); xt::xtensor<double, 2> prd_sqrt = xt::sqrt(predicted); + xt::xtensor<double, 2> thr_sqrt = xt::sqrt(thresholds); std::vector<xt::xarray<double>> results_sqrt_ = - evalhyd::evald(obs_sqrt, prd_sqrt, all_metrics_d); + evalhyd::evald(obs_sqrt, prd_sqrt, all_metrics_d, + thr_sqrt, "high"); for (std::size_t m = 0; m < all_metrics_d.size(); m++) { @@ -118,14 +138,17 @@ TEST(DeterministTests, TestTransform) // compute and check results on inverted streamflow series std::vector<xt::xarray<double>> results_inv = - evalhyd::evald(observed, predicted, all_metrics_d, "inv"); + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "inv"); xt::xtensor<double, 2> epsilon = xt::mean(observed, {1}, xt::keep_dims) * 0.01; xt::xtensor<double, 2> obs_inv = 1. / (observed + epsilon); xt::xtensor<double, 2> prd_inv = 1. / (predicted + epsilon); + xt::xtensor<double, 2> thr_inv = 1. / (thresholds + epsilon); std::vector<xt::xarray<double>> results_inv_ = - evalhyd::evald(obs_inv, prd_inv, all_metrics_d); + evalhyd::evald(obs_inv, prd_inv, all_metrics_d, + thr_inv, "high"); for (std::size_t m = 0; m < all_metrics_d.size(); m++) { @@ -136,13 +159,16 @@ TEST(DeterministTests, TestTransform) // compute and check results on square-rooted streamflow series std::vector<xt::xarray<double>> results_log = - evalhyd::evald(observed, predicted, all_metrics_d, "log"); + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "log"); xt::xtensor<double, 2> obs_log = xt::log(observed + epsilon); xt::xtensor<double, 2> prd_log = xt::log(predicted + epsilon); + xt::xtensor<double, 2> thr_log = xt::log(thresholds + epsilon); std::vector<xt::xarray<double>> results_log_ = - evalhyd::evald(obs_log, prd_log, all_metrics_d); + evalhyd::evald(obs_log, prd_log, all_metrics_d, + thr_log, "high"); for (std::size_t m = 0; m < all_metrics_d.size(); m++) { @@ -153,13 +179,16 @@ TEST(DeterministTests, TestTransform) // compute and check results on power-transformed streamflow series std::vector<xt::xarray<double>> results_pow = - evalhyd::evald(observed, predicted, all_metrics_d, "pow", 0.2); + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, "high", "pow", 0.2); xt::xtensor<double, 2> obs_pow = xt::pow(observed, 0.2); xt::xtensor<double, 2> prd_pow = xt::pow(predicted, 0.2); + xt::xtensor<double, 2> thr_pow = xt::pow(thresholds, 0.2); std::vector<xt::xarray<double>> results_pow_ = - evalhyd::evald(obs_pow, prd_pow, all_metrics_d); + evalhyd::evald(obs_pow, prd_pow, all_metrics_d, + thr_pow, "high"); for (std::size_t m = 0; m < all_metrics_d.size(); m++) { @@ -177,6 +206,9 @@ TEST(DeterministTests, TestMasks) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // generate temporal subset by dropping 20 first time steps xt::xtensor<bool, 3> masks = xt::ones<bool>({std::size_t {predicted.shape(0)}, @@ -186,20 +218,27 @@ TEST(DeterministTests, TestMasks) // compute scores using masks to subset whole record std::vector<xt::xarray<double>> metrics_masked = - evalhyd::evald(observed, predicted, all_metrics_d, {}, {}, {}, masks); + evalhyd::evald(observed, predicted, all_metrics_d, + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks); // compute scores on pre-computed subset of whole record xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _)); xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _)); std::vector<xt::xarray<double>> metrics_subset = - evalhyd::evald(obs, prd, all_metrics_d); + evalhyd::evald(obs, prd, all_metrics_d, thresholds, "high"); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE(xt::all(xt::isclose(metrics_masked[m], metrics_subset[m]))) - << "Failure for (" << all_metrics_d[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_masked[m], metrics_subset[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } @@ -210,6 +249,9 @@ TEST(DeterministTests, TestMaskingConditions) xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // generate dummy empty masks required to access next optional argument xt::xtensor<bool, 3> masks; @@ -224,7 +266,12 @@ TEST(DeterministTests, TestMaskingConditions) std::vector<xt::xarray<double>> metrics_q_conditioned = evalhyd::evald( observed, predicted, all_metrics_d, - {}, {}, {}, masks, q_conditions + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks, q_conditions ); // compute scores using "NaN-ed" time indices where conditions on streamflow met @@ -232,17 +279,18 @@ TEST(DeterministTests, TestMaskingConditions) evalhyd::evald( xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)), predicted, - all_metrics_d + all_metrics_d, + thresholds, + "high" ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::all(xt::isclose( - metrics_q_conditioned[m], metrics_q_preconditioned[m] - )) - ) << "Failure for (" << all_metrics_d[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_q_conditioned[m], metrics_q_preconditioned[m], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } // conditions on streamflow statistics _____________________________________ @@ -258,7 +306,12 @@ TEST(DeterministTests, TestMaskingConditions) std::vector<xt::xarray<double>> metrics_q_conditioned_ = evalhyd::evald( observed, predicted, all_metrics_d, - {}, {}, {}, masks, q_conditions_ + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks, q_conditions_ ); // compute scores using "NaN-ed" time indices where conditions on streamflow met @@ -266,17 +319,18 @@ TEST(DeterministTests, TestMaskingConditions) evalhyd::evald( xt::eval(xt::where(observed >= mean, observed, NAN)), predicted, - all_metrics_d + all_metrics_d, + thresholds, + "high" ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::all(xt::isclose( - metrics_q_conditioned_[m], metrics_q_preconditioned_[m] - )) - ) << "Failure for (" << all_metrics_d[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_q_conditioned_[m], metrics_q_preconditioned_[m], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } // conditions on temporal indices __________________________________________ @@ -290,7 +344,12 @@ TEST(DeterministTests, TestMaskingConditions) std::vector<xt::xarray<double>> metrics_t_conditioned = evalhyd::evald( observed, predicted, all_metrics_d, - {}, {}, {}, masks, t_conditions + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + masks, t_conditions ); // compute scores on already subset time series @@ -298,17 +357,18 @@ TEST(DeterministTests, TestMaskingConditions) evalhyd::evald( xt::eval(xt::view(observed, xt::all(), xt::range(0, 100))), xt::eval(xt::view(predicted, xt::all(), xt::range(0, 100))), - all_metrics_d + all_metrics_d, + thresholds, + "high" ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::all(xt::isclose( - metrics_t_conditioned[m], metrics_t_subset[m] - )) - ) << "Failure for (" << all_metrics_d[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_t_conditioned[m], metrics_t_subset[m], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } @@ -320,6 +380,9 @@ TEST(DeterministTests, TestMissingData) std::tie(observed, predicted) = load_data_d(); predicted = xt::view(predicted, xt::range(0, 5), xt::all()); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // add some missing observations artificially by assigning NaN values xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; // add some missing predictions artificially by assigning NaN values @@ -331,7 +394,7 @@ TEST(DeterministTests, TestMissingData) // compute metrics with observations containing NaN values std::vector<xt::xarray<double>> metrics_nan = - evalhyd::evald(observed, predicted, all_metrics_d); + evalhyd::evald(observed, predicted, all_metrics_d, thresholds, "high"); for (std::size_t m = 0; m < all_metrics_d.size(); m++) { @@ -343,21 +406,23 @@ TEST(DeterministTests, TestMissingData) xt::view(observed, 0, xt::range(20+(3*(p+1)), _)); xt::xtensor<double, 1> prd = xt::view(predicted, p, xt::range(20+(3*(p+1)), _)); + xt::xtensor<double, 1> thr = + xt::view(thresholds, p); std::vector<xt::xarray<double>> metrics_sbs = evalhyd::evald( xt::eval(xt::view(obs, xt::newaxis(), xt::all())), xt::eval(xt::view(prd, xt::newaxis(), xt::all())), - {all_metrics_d[m]} + {all_metrics_d[m]}, + xt::eval(xt::view(thr, xt::newaxis(), xt::all())), + "high" ); // compare to check results are the same - EXPECT_TRUE( - xt::all(xt::isclose( - xt::view(metrics_nan[m], p), - metrics_sbs[0] - )) - ) << "Failure for (" << all_metrics_d[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + xt::view(metrics_nan[m], p), metrics_sbs[0], + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } } @@ -380,6 +445,9 @@ TEST(DeterministTests, TestBootstrap) xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // compute metrics via bootstrap std::unordered_map<std::string, int> bootstrap = {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; @@ -389,9 +457,11 @@ TEST(DeterministTests, TestBootstrap) xt::eval(xt::view(observed, xt::newaxis(), xt::all())), predicted, all_metrics_d, - {}, // transform - {}, // exponent - {}, // epsilon + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon xt::xtensor<bool, 3>({}), // t_msk xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap, @@ -413,17 +483,17 @@ TEST(DeterministTests, TestBootstrap) evalhyd::evald( xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())), predicted_x3, - all_metrics_d + all_metrics_d, + thresholds, + "high" ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { - EXPECT_TRUE( - xt::all(xt::isclose( - metrics_bts[m], metrics_rep[m] - )) - ) << "Failure for (" << all_metrics_d[m] << ")"; + EXPECT_TRUE(xt::all(xt::isclose( + metrics_bts[m], metrics_rep[m], 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ")"; } } @@ -445,6 +515,9 @@ TEST(DeterministTests, TestBootstrapSummary) xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); + xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; + thresholds = xt::repeat(thresholds, predicted.shape(0), 0); + // compute metrics via bootstrap with raw summary std::unordered_map<std::string, int> bootstrap_0 = {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; @@ -454,9 +527,11 @@ TEST(DeterministTests, TestBootstrapSummary) xt::eval(xt::view(observed, xt::newaxis(), xt::all())), predicted, all_metrics_d, - {}, // transform - {}, // exponent - {}, // epsilon + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon xt::xtensor<bool, 3>({}), // t_msk xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap_0, @@ -472,9 +547,11 @@ TEST(DeterministTests, TestBootstrapSummary) xt::eval(xt::view(observed, xt::newaxis(), xt::all())), predicted, all_metrics_d, - {}, // transform - {}, // exponent - {}, // epsilon + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon xt::xtensor<bool, 3>({}), // t_msk xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap_1, @@ -485,19 +562,17 @@ TEST(DeterministTests, TestBootstrapSummary) for (std::size_t m = 0; m < all_metrics_d.size(); m++) { // mean - EXPECT_TRUE( - xt::all(xt::isclose( - xt::mean(metrics_raw[m], {2}), - xt::view(metrics_mas[m], xt::all(), xt::all(), 0) - )) - ) << "Failure for (" << all_metrics_d[m] << ") on mean"; + EXPECT_TRUE(xt::all(xt::isclose( + xt::mean(metrics_raw[m], {2}), + xt::view(metrics_mas[m], xt::all(), xt::all(), 0), + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ") on mean"; // standard deviation - EXPECT_TRUE( - xt::all(xt::isclose( - xt::stddev(metrics_raw[m], {2}), - xt::view(metrics_mas[m], xt::all(), xt::all(), 1) - )) - ) << "Failure for (" << all_metrics_d[m] << ") on standard deviation"; + EXPECT_TRUE(xt::all(xt::isclose( + xt::stddev(metrics_raw[m], {2}), + xt::view(metrics_mas[m], xt::all(), xt::all(), 1), + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ") on standard deviation"; } // compute metrics via bootstrap with quantiles summary @@ -509,9 +584,11 @@ TEST(DeterministTests, TestBootstrapSummary) xt::eval(xt::view(observed, xt::newaxis(), xt::all())), predicted, all_metrics_d, - {}, // transform - {}, // exponent - {}, // epsilon + thresholds, // thresholds + "high", // events + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon xt::xtensor<bool, 3>({}), // t_msk xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap_2, @@ -527,12 +604,11 @@ TEST(DeterministTests, TestBootstrapSummary) for (auto q : quantiles) { - EXPECT_TRUE( - xt::all(xt::isclose( - xt::quantile(metrics_raw[m], {q}, 2), - xt::view(metrics_qtl[m], xt::all(), xt::all(), i) - )) - ) << "Failure for (" << all_metrics_d[m] << ") on quantile " << q; + EXPECT_TRUE(xt::all(xt::isclose( + xt::quantile(metrics_raw[m], {q}, 2), + xt::view(metrics_qtl[m], xt::all(), xt::all(), i), + 1e-05, 1e-08, true + ))) << "Failure for (" << all_metrics_d[m] << ") on quantile " << q; i++; } } @@ -566,6 +642,8 @@ TEST(DeterministTests, TestCompleteness) obs, prd, std::vector<std::string> {}, // metrics + xt::xtensor<double, 2>({}), // thresholds + xtl::missing<const std::string>(), // events xtl::missing<const std::string>(), // transform xtl::missing<double>(), // exponent xtl::missing<double>(), // epsilon -- GitLab