diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index e04de65ec3bc6d018775bea07489c966241620d5..a3af81b522bd7fd5247b88febbc67dfac56613e9 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -35,7 +35,10 @@ namespace evalhyd /// :Parameters: /// /// q_obs: ``xt::xexpression<A>`` - /// Streamflow observations. + /// Streamflow observations. Time steps with missing observation + /// must be assigned `NAN` values. Those time steps will be ignored + /// both in the observations and the predictions before the + /// *metrics* are computed. /// shape: ({1+, ...}, time) /// /// q_prd: ``xt::xexpression<A>`` diff --git a/src/determinist/evaluator.hpp b/src/determinist/evaluator.hpp index f47060ac9842811f79d78b17be1a1cfc0ab86398..95f034643a401d6e9adf9d239fe2e8fea613320c 100644 --- a/src/determinist/evaluator.hpp +++ b/src/determinist/evaluator.hpp @@ -15,7 +15,9 @@ namespace evalhyd template <class A1, class A2> inline auto std_dev(A1&& arr, A2&& mean_arr) { - return xt::sqrt(xt::mean(xt::square(arr - mean_arr), -1, xt::keep_dims)); + return xt::sqrt( + xt::nanmean(xt::square(arr - mean_arr), -1, xt::keep_dims) + ); } } @@ -53,11 +55,13 @@ namespace evalhyd ); } }; + // members for evaluation metrics A RMSE; A NSE; A KGE; A KGEPRIME; + // methods to compute elements void calc_mean_obs(); void calc_mean_prd(); @@ -66,6 +70,7 @@ namespace evalhyd void calc_quad_prd(); void calc_r_pearson(); void calc_bias(); + // methods to compute metrics void calc_RMSE(); void calc_NSE(); @@ -84,7 +89,7 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_mean_obs() { - mean_obs = xt::mean(q_obs, -1, xt::keep_dims); + mean_obs = xt::nanmean(q_obs, -1, xt::keep_dims); } // Compute the mean of the predictions. @@ -98,7 +103,10 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_mean_prd() { - mean_prd = xt::mean(q_prd, -1, xt::keep_dims); + mean_prd = xt::nanmean( + xt::where(xt::isnan(q_obs), NAN, q_prd), + -1, xt::keep_dims + ); } // Compute the quadratic error between observations and predictions. @@ -149,7 +157,9 @@ namespace evalhyd template <class A> void Evaluator<A>::calc_quad_prd() { - quad_prd = xt::square(q_prd - mean_prd); + quad_prd = xt::square( + xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd + ); } // Compute the Pearson correlation coefficient. @@ -180,12 +190,14 @@ namespace evalhyd { // calculate error in timing and dynamics $r_{pearson}$ // (Pearson's correlation coefficient) - auto r_num = xt::sum( - (q_prd - mean_prd) * (q_obs - mean_obs), -1, xt::keep_dims + auto r_num = xt::nansum( + (xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd) + * (q_obs - mean_obs), + -1, xt::keep_dims ); auto r_den = xt::sqrt( - xt::sum(quad_prd, -1, xt::keep_dims) - * xt::sum(quad_obs, -1, xt::keep_dims) + xt::nansum(quad_prd, -1, xt::keep_dims) + * xt::nansum(quad_obs, -1, xt::keep_dims) ); r_pearson = r_num / r_den; @@ -206,8 +218,9 @@ namespace evalhyd void Evaluator<A>::calc_bias() { // calculate $bias$ - bias = xt::sum(q_prd, -1, xt::keep_dims) - / xt::sum(q_obs, -1, xt::keep_dims); + bias = xt::nansum(xt::where(xt::isnan(q_obs), NAN, q_prd), + -1, xt::keep_dims) + / xt::nansum(q_obs, -1, xt::keep_dims); } // Compute the root-mean-square error (RMSE). @@ -222,7 +235,7 @@ namespace evalhyd void Evaluator<A>::calc_RMSE() { // compute RMSE - RMSE = xt::sqrt(xt::mean(quad_err, -1, xt::keep_dims)); + RMSE = xt::sqrt(xt::nanmean(quad_err, -1, xt::keep_dims)); } // Compute the Nash-Sutcliffe Efficiency (NSE). @@ -240,8 +253,8 @@ namespace evalhyd void Evaluator<A>::calc_NSE() { // compute squared errors operands - A f_num = xt::sum(quad_err, -1, xt::keep_dims); - A f_den = xt::sum(quad_obs, -1, xt::keep_dims); + A f_num = xt::nansum(quad_err, -1, xt::keep_dims); + A f_den = xt::nansum(quad_obs, -1, xt::keep_dims); // compute NSE NSE = 1 - (f_num / f_den); @@ -274,7 +287,8 @@ namespace evalhyd void Evaluator<A>::calc_KGE() { // calculate error in spread of flow $alpha$ - auto alpha = detail::std_dev(q_prd, mean_prd) + auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); + auto alpha = detail::std_dev(q_prd_, mean_prd) / detail::std_dev(q_obs, mean_obs); // compute KGE @@ -312,7 +326,8 @@ namespace evalhyd void Evaluator<A>::calc_KGEPRIME() { // calculate error in spread of flow $gamma$ - auto gamma = (detail::std_dev(q_prd, mean_prd) / mean_prd) + auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); + auto gamma = (detail::std_dev(q_prd_, mean_prd) / mean_prd) / (detail::std_dev(q_obs, mean_obs) / mean_obs); // compute KGE diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 4d1c48d5b181116d01c276481c0d1a953aff808a..8ef2fec6e58d2d7ff1d40d82da2cfbb688966265 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -8,6 +8,8 @@ #include "evalhyd/evald.hpp" +using namespace xt::placeholders; // required for `_` to work + TEST(DeterministTests, TestMetrics2D) { // read in data @@ -167,3 +169,46 @@ TEST(DeterministTests, TestTransform) EXPECT_TRUE(xt::allclose(metrics[0], nse_pow)); } + +TEST(DeterministTests, TestMissingData) +{ + std::vector<std::string> metrics = + {"RMSE", "NSE", "KGE", "KGEPRIME"}; + + // read in data + std::ifstream ifs; + ifs.open("./data/q_obs.csv"); + xt::xtensor<double, 2> observed =xt::load_csv<int>(ifs); + ifs.close(); + + ifs.open("./data/q_prd.csv"); + xt::xtensor<double, 2> predicted = xt::view( + xt::load_csv<double>(ifs), xt::range(0, 5), xt::all() + ); + ifs.close(); + + // add some missing observations artificially by assigning NaN values + xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; + + // compute metrics with observations containing NaN values + std::vector<xt::xarray<double>> metrics_nan = + evalhyd::evald<xt::xtensor<double, 2>>( + observed, predicted, metrics + ); + + // compute metrics on subset of observations and predictions + // (i.e. eliminating region with NaN in observations manually) + xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _)); + xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _)); + + std::vector<xt::xarray<double>> metrics_sbs = + evalhyd::evald<xt::xtensor<double, 2>>( + obs, prd, metrics + ); + + for (int m = 0; m < metrics.size(); m++) { + EXPECT_TRUE( + xt::allclose(metrics_nan[m], metrics_sbs[m]) + ) << "Failure for (" << metrics[m] << ")"; + } +} \ No newline at end of file