From 3de89046390def7e25171aa4a420ebe4f0ead06d Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Tue, 23 Aug 2022 09:55:50 +0200 Subject: [PATCH] deal with missing observations as NaN for determinist In order to avoid copies of the input data, xfunctions are generated to replace timesteps in predictions where they are NaN in observations. This means that with this implementation, missing predictions are not supported. --- include/evalhyd/evald.hpp | 5 +++- src/determinist/evaluator.hpp | 45 +++++++++++++++++++++++------------ tests/test_determinist.cpp | 45 +++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 16 deletions(-) diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index e04de65..a3af81b 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -35,7 +35,10 @@ namespace evalhyd /// :Parameters: /// /// q_obs: ``xt::xexpression<A>`` - /// Streamflow observations. + /// Streamflow observations. Time steps with missing observation + /// must be assigned `NAN` values. Those time steps will be ignored + /// both in the observations and the predictions before the + /// *metrics* are computed. /// shape: ({1+, ...}, time) /// /// q_prd: ``xt::xexpression<A>`` diff --git a/src/determinist/evaluator.hpp b/src/determinist/evaluator.hpp index f47060a..95f0346 100644 --- a/src/determinist/evaluator.hpp +++ b/src/determinist/evaluator.hpp @@ -15,7 +15,9 @@ namespace evalhyd template <class A1, class A2> inline auto std_dev(A1&& arr, A2&& mean_arr) { - return xt::sqrt(xt::mean(xt::square(arr - mean_arr), -1, xt::keep_dims)); + return xt::sqrt( + xt::nanmean(xt::square(arr - mean_arr), -1, xt::keep_dims) + ); } } @@ -53,11 +55,13 @@ namespace evalhyd ); } }; + // members for evaluation metrics A RMSE; A NSE; A KGE; A KGEPRIME; + // methods to compute elements void calc_mean_obs(); void calc_mean_prd(); @@ -66,6 +70,7 @@ namespace evalhyd void calc_quad_prd(); void calc_r_pearson(); void calc_bias(); + // methods to compute metrics void calc_RMSE(); void calc_NSE(); @@ -84,7 +89,7 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_mean_obs() { - mean_obs = xt::mean(q_obs, -1, xt::keep_dims); + mean_obs = xt::nanmean(q_obs, -1, xt::keep_dims); } // Compute the mean of the predictions. @@ -98,7 +103,10 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_mean_prd() { - mean_prd = xt::mean(q_prd, -1, xt::keep_dims); + mean_prd = xt::nanmean( + xt::where(xt::isnan(q_obs), NAN, q_prd), + -1, xt::keep_dims + ); } // Compute the quadratic error between observations and predictions. @@ -149,7 +157,9 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_quad_prd() { - quad_prd = xt::square(q_prd - mean_prd); + quad_prd = xt::square( + xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd + ); } // Compute the Pearson correlation coefficient. @@ -180,12 +190,14 @@ namespace evalhyd { // calculate error in timing and dynamics $r_{pearson}$ // (Pearson's correlation coefficient) - auto r_num = xt::sum( - (q_prd - mean_prd) * (q_obs - mean_obs), -1, xt::keep_dims + auto r_num = xt::nansum( + (xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd) + * (q_obs - mean_obs), + -1, xt::keep_dims ); auto r_den = xt::sqrt( - xt::sum(quad_prd, -1, xt::keep_dims) - * xt::sum(quad_obs, -1, xt::keep_dims) + xt::nansum(quad_prd, -1, xt::keep_dims) + * xt::nansum(quad_obs, -1, xt::keep_dims) ); r_pearson = r_num / r_den; @@ -206,8 +218,9 @@ namespace evalhyd void Evaluator<A>::calc_bias() { // calculate $bias$ - bias = xt::sum(q_prd, -1, xt::keep_dims) - / xt::sum(q_obs, -1, xt::keep_dims); + bias = xt::nansum(xt::where(xt::isnan(q_obs), NAN, q_prd), + -1, xt::keep_dims) + / xt::nansum(q_obs, -1, xt::keep_dims); } // Compute the root-mean-square error (RMSE). @@ -222,7 +235,7 @@ namespace evalhyd void Evaluator<A>::calc_RMSE() { // compute RMSE - RMSE = xt::sqrt(xt::mean(quad_err, -1, xt::keep_dims)); + RMSE = xt::sqrt(xt::nanmean(quad_err, -1, xt::keep_dims)); } // Compute the Nash-Sutcliffe Efficiency (NSE). @@ -240,8 +253,8 @@ namespace evalhyd void Evaluator<A>::calc_NSE() { // compute squared errors operands - A f_num = xt::sum(quad_err, -1, xt::keep_dims); - A f_den = xt::sum(quad_obs, -1, xt::keep_dims); + A f_num = xt::nansum(quad_err, -1, xt::keep_dims); + A f_den = xt::nansum(quad_obs, -1, xt::keep_dims); // compute NSE NSE = 1 - (f_num / f_den); @@ -274,7 +287,8 @@ namespace evalhyd void Evaluator<A>::calc_KGE() { // calculate error in spread of flow $alpha$ - auto alpha = detail::std_dev(q_prd, mean_prd) + auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); + auto alpha = detail::std_dev(q_prd_, mean_prd) / detail::std_dev(q_obs, mean_obs); // compute KGE @@ -312,7 +326,8 @@ namespace evalhyd void Evaluator<A>::calc_KGEPRIME() { // calculate error in spread of flow $gamma$ - auto gamma = (detail::std_dev(q_prd, mean_prd) / mean_prd) + auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); + auto gamma = (detail::std_dev(q_prd_, mean_prd) / mean_prd) / (detail::std_dev(q_obs, mean_obs) / mean_obs); // compute KGE diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 4d1c48d..8ef2fec 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -8,6 +8,8 @@ #include "evalhyd/evald.hpp" +using namespace xt::placeholders; // required for `_` to work + TEST(DeterministTests, TestMetrics2D) { // read in data @@ -167,3 +169,46 @@ TEST(DeterministTests, TestTransform) EXPECT_TRUE(xt::allclose(metrics[0], nse_pow)); } + +TEST(DeterministTests, TestMissingData) +{ + std::vector<std::string> metrics = + {"RMSE", "NSE", "KGE", "KGEPRIME"}; + + // read in data + std::ifstream ifs; + ifs.open("./data/q_obs.csv"); + xt::xtensor<double, 2> observed =xt::load_csv<int>(ifs); + ifs.close(); + + ifs.open("./data/q_prd.csv"); + xt::xtensor<double, 2> predicted = xt::view( + xt::load_csv<double>(ifs), xt::range(0, 5), xt::all() + ); + ifs.close(); + + // add some missing observations artificially by assigning NaN values + xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; + + // compute metrics with observations containing NaN values + std::vector<xt::xarray<double>> metrics_nan = + evalhyd::evald<xt::xtensor<double, 2>>( + observed, predicted, metrics + ); + + // compute metrics on subset of observations and predictions + // (i.e. eliminating region with NaN in observations manually) + xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _)); + xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _)); + + std::vector<xt::xarray<double>> metrics_sbs = + evalhyd::evald<xt::xtensor<double, 2>>( + obs, prd, metrics + ); + + for (int m = 0; m < metrics.size(); m++) { + EXPECT_TRUE( + xt::allclose(metrics_nan[m], metrics_sbs[m]) + ) << "Failure for (" << metrics[m] << ")"; + } +} \ No newline at end of file -- GitLab