From fadb5fa948444c1c0b2c941e8cdeec1e84163ca1 Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Tue, 23 Aug 2022 14:05:44 +0200 Subject: [PATCH] deal with both missing observations/predictions for determinist Unlike 3de89046390def7e25171aa4a420ebe4f0ead06d, this also deals with missing predictions. This effectively pairwise remove those time steps where either observations or predictions are missing data. This does so without creating copies of the input data, via the use of xfunctions stored as class members. The unit test for missing data is updated to include NAN also in the predictions (with a varying number of NAN across the 5 time series). --- include/evalhyd/evald.hpp | 7 +++-- src/determinist/evaluator.hpp | 57 +++++++++++++++++++++++------------ tests/test_determinist.cpp | 40 ++++++++++++++++-------- 3 files changed, 69 insertions(+), 35 deletions(-) diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index a3af81b..248071d 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -35,14 +35,17 @@ namespace evalhyd /// :Parameters: /// /// q_obs: ``xt::xexpression<A>`` - /// Streamflow observations. Time steps with missing observation + /// Streamflow observations. Time steps with missing observations /// must be assigned `NAN` values. Those time steps will be ignored /// both in the observations and the predictions before the /// *metrics* are computed. /// shape: ({1+, ...}, time) /// /// q_prd: ``xt::xexpression<A>`` - /// Streamflow predictions. + /// Streamflow predictions. Time steps with missing predictions + /// must be assigned `NAN` values. Those time steps will be ignored + /// both in the observations and the predictions before the + /// *metrics* are computed. /// shape: ({1+, ...}, time) /// /// metrics: ``std::vector<std::string>`` diff --git a/src/determinist/evaluator.hpp b/src/determinist/evaluator.hpp index 95f0346..124b9f0 100644 --- a/src/determinist/evaluator.hpp +++ b/src/determinist/evaluator.hpp @@ -26,10 +26,20 @@ namespace evalhyd template <class A> class Evaluator { + // define type for function to be used to "mask" (i.e. yield NaNs) the + // time steps for which the observations/predictions are missing data + typedef xt::xfunction<xt::detail::conditional_ternary, + xt::xfunction<xt::math::isnan_fun, const A&>, + xt::xscalar<float>, + const A&> nan_function; + private: // members for input data - const A& q_obs; - const A& q_prd; + const A& _q_obs; + const A& _q_prd; + nan_function q_obs; + nan_function q_prd; + // members for computational elements A mean_obs; A mean_prd; @@ -43,17 +53,33 @@ namespace evalhyd // constructor method Evaluator(const xt::xexpression<A>& obs, const xt::xexpression<A>& prd) : - q_obs{obs.derived_cast()}, - q_prd{prd.derived_cast()} + _q_obs{obs.derived_cast()}, + _q_prd{prd.derived_cast()}, + // "mask" (i.e. use NaN) observations where predictions are + // missing (i.e. NaN) + q_obs{xt::where(xt::isnan(prd.derived_cast()), + NAN, obs.derived_cast())}, + // "mask" (i.e. use NaN) predictions where observations are + // missing (i.e. NaN) + q_prd{xt::where(xt::isnan(obs.derived_cast()), + NAN, prd.derived_cast())} { // check that data dimensions are compatible - if (q_prd.dimension() != q_obs.dimension()) + if (_q_prd.dimension() != _q_obs.dimension()) { throw std::runtime_error( "number of dimensions of 'sim' and 'obs' " "must be identical" ); } + else if (_q_prd.shape(_q_prd.dimension() - 1) + != _q_obs.shape( _q_obs.dimension() - 1)) + { + throw std::runtime_error( + "length of time series of 'sim' and 'obs' " + "must be identical" + ); + } }; // members for evaluation metrics @@ -103,10 +129,7 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_mean_prd() { - mean_prd = xt::nanmean( - xt::where(xt::isnan(q_obs), NAN, q_prd), - -1, xt::keep_dims - ); + mean_prd = xt::nanmean(q_prd, -1, xt::keep_dims); } // Compute the quadratic error between observations and predictions. @@ -157,9 +180,7 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_quad_prd() { - quad_prd = xt::square( - xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd - ); + quad_prd = xt::square(q_prd - mean_prd); } // Compute the Pearson correlation coefficient. @@ -191,8 +212,7 @@ namespace evalhyd // calculate error in timing and dynamics $r_{pearson}$ // (Pearson's correlation coefficient) auto r_num = xt::nansum( - (xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd) - * (q_obs - mean_obs), + (q_prd - mean_prd) * (q_obs - mean_obs), -1, xt::keep_dims ); auto r_den = xt::sqrt( @@ -218,8 +238,7 @@ namespace evalhyd void Evaluator<A>::calc_bias() { // calculate $bias$ - bias = xt::nansum(xt::where(xt::isnan(q_obs), NAN, q_prd), - -1, xt::keep_dims) + bias = xt::nansum(q_prd, -1, xt::keep_dims) / xt::nansum(q_obs, -1, xt::keep_dims); } @@ -287,8 +306,7 @@ namespace evalhyd void Evaluator<A>::calc_KGE() { // calculate error in spread of flow $alpha$ - auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); - auto alpha = detail::std_dev(q_prd_, mean_prd) + auto alpha = detail::std_dev(q_prd, mean_prd) / detail::std_dev(q_obs, mean_obs); // compute KGE @@ -326,8 +344,7 @@ namespace evalhyd void Evaluator<A>::calc_KGEPRIME() { // calculate error in spread of flow $gamma$ - auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); - auto gamma = (detail::std_dev(q_prd_, mean_prd) / mean_prd) + auto gamma = (detail::std_dev(q_prd, mean_prd) / mean_prd) / (detail::std_dev(q_obs, mean_obs) / mean_obs); // compute KGE diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 8ef2fec..89c213e 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -189,6 +189,12 @@ TEST(DeterministTests, TestMissingData) // add some missing observations artificially by assigning NaN values xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; + // add some missing predictions artificially by assigning NaN values + xt::view(observed, 0, xt::range(20, 23)) = NAN; + xt::view(observed, 1, xt::range(20, 26)) = NAN; + xt::view(observed, 2, xt::range(20, 29)) = NAN; + xt::view(observed, 3, xt::range(20, 32)) = NAN; + xt::view(observed, 4, xt::range(20, 35)) = NAN; // compute metrics with observations containing NaN values std::vector<xt::xarray<double>> metrics_nan = @@ -196,19 +202,27 @@ TEST(DeterministTests, TestMissingData) observed, predicted, metrics ); - // compute metrics on subset of observations and predictions - // (i.e. eliminating region with NaN in observations manually) - xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _)); - xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _)); - - std::vector<xt::xarray<double>> metrics_sbs = - evalhyd::evald<xt::xtensor<double, 2>>( - obs, prd, metrics - ); - for (int m = 0; m < metrics.size(); m++) { - EXPECT_TRUE( - xt::allclose(metrics_nan[m], metrics_sbs[m]) - ) << "Failure for (" << metrics[m] << ")"; + for (int p = 0; p < predicted.shape(0); p++) { + // compute metrics on subset of observations and predictions + // (i.e. eliminating region with NaN in observations manually) + xt::xtensor<double, 1> obs = + xt::view(observed, 0, xt::range(3*(p+1), _)); + xt::xtensor<double, 1> prd = + xt::view(predicted, p, xt::range(3*(p+1), _)); + + std::vector<xt::xarray<double>> metrics_sbs = + evalhyd::evald<xt::xtensor<double, 1>>( + obs, prd, {metrics[m]} + ); + + // compare to check results are the same + EXPECT_TRUE( + xt::allclose( + xt::view(metrics_nan[m], p), + metrics_sbs[0] + ) + ) << "Failure for (" << metrics[m] << ")"; + } } } \ No newline at end of file -- GitLab