deal with both missing observations/predictions for determinist

Unlike 3de89046, this also deals with missing predictions. This effectively pairwise remove those time steps where either observations or predictions are missing data. This does so without creating copies of the input data, via the use of xfunctions stored as class members. The unit test for missing data is updated to include NAN also in the predictions (with a varying number of NAN across the 5 time series).

deal with both missing observations/predictions for determinist
Unlike 3de89046, this also deals with missing predictions. This effectively pairwise remove those time steps where either observations or predictions are missing data. This does so without creating copies of the input data, via the use of xfunctions stored as class members. The unit test for missing data is updated to include NAN also in the predictions (with a varying number of NAN across the 5 time series).
fadb5fa9 · Thibault Hallouin · 3de89046 · fadb5fa9 · fadb5fa9 · fadb5fa9
Commit fadb5fa9 authored 2 years ago by Thibault Hallouin
Hide whitespace changes
Inline Side-by-side

Showing

with 69 additions and 35 deletions
+69 -35
--- a/include/evalhyd/evald.hpp
+++ b/include/evalhyd/evald.hpp
@@ -35,14 +35,17 @@ namespace evalhyd
    /// :Parameters:
    ///
    ///    q_obs: ``xt::xexpression<A>``
-    ///       Streamflow observations. Time steps with missing observation
+    ///       Streamflow observations. Time steps with missing observations
    ///       must be assigned `NAN` values. Those time steps will be ignored
    ///       both in the observations and the predictions before the
    ///       *metrics* are computed.
    ///       shape: ({1+, ...}, time)
    ///
    ///    q_prd: ``xt::xexpression<A>``
-    ///       Streamflow predictions.
+    ///       Streamflow predictions. Time steps with missing predictions
+    ///       must be assigned `NAN` values. Those time steps will be ignored
+    ///       both in the observations and the predictions before the
+    ///       *metrics* are computed.
    ///       shape: ({1+, ...}, time)
    ///
    ///    metrics: ``std::vector<std::string>``

--- a/src/determinist/evaluator.hpp
+++ b/src/determinist/evaluator.hpp
@@ -26,10 +26,20 @@ namespace evalhyd
        template <class A>
        class Evaluator
        {
+        // define type for function to be used to "mask" (i.e. yield NaNs) the
+        // time steps for which the observations/predictions are missing data
+        typedef xt::xfunction<xt::detail::conditional_ternary,
+                              xt::xfunction<xt::math::isnan_fun, const A&>,
+                              xt::xscalar<float>,
+                              const A&> nan_function;
        private:
            // members for input data
-            const A& q_obs;
+            const A& _q_obs;
-            const A& q_prd;
+            const A& _q_prd;
+            nan_function q_obs;
+            nan_function q_prd;
            // members for computational elements
            A mean_obs;
            A mean_prd;
@@ -43,17 +53,33 @@ namespace evalhyd
            // constructor method
            Evaluator(const xt::xexpression<A>& obs,
                      const xt::xexpression<A>& prd) :
-                    q_obs{obs.derived_cast()},
+                    _q_obs{obs.derived_cast()},
-                    q_prd{prd.derived_cast()}
+                    _q_prd{prd.derived_cast()},
+                    // "mask" (i.e. use NaN) observations where predictions are
+                    // missing (i.e. NaN)
+                    q_obs{xt::where(xt::isnan(prd.derived_cast()),
+                                    NAN, obs.derived_cast())},
+                    // "mask" (i.e. use NaN) predictions where observations are
+                    // missing (i.e. NaN)
+                    q_prd{xt::where(xt::isnan(obs.derived_cast()),
+                                    NAN, prd.derived_cast())}
            {
                // check that data dimensions are compatible
-                if (q_prd.dimension() != q_obs.dimension())
+                if (_q_prd.dimension() != _q_obs.dimension())
                {
                    throw std::runtime_error(
                            "number of dimensions of 'sim' and 'obs' "
                            "must be identical"
                    );
                }
+                else if (_q_prd.shape(_q_prd.dimension() - 1)
+                         != _q_obs.shape( _q_obs.dimension() - 1))
+                {
+                    throw std::runtime_error(
+                            "length of time series of 'sim' and 'obs' "
+                            "must be identical"
+                    );
+                }
            };
            // members for evaluation metrics
@@ -103,10 +129,7 @@ namespace evalhyd
        template <class A>
        void Evaluator<A>::calc_mean_prd()
        {
-            mean_prd = xt::nanmean(
+            mean_prd = xt::nanmean(q_prd, -1, xt::keep_dims);
-                    xt::where(xt::isnan(q_obs), NAN, q_prd),
-                    -1, xt::keep_dims
-            );
        }
        // Compute the quadratic error between observations and predictions.
@@ -157,9 +180,7 @@ namespace evalhyd
        template <class A>
        void Evaluator<A>::calc_quad_prd()
        {
-            quad_prd = xt::square(
+            quad_prd = xt::square(q_prd - mean_prd);
-                    xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd
-            );
        }
        // Compute the Pearson correlation coefficient.
@@ -191,8 +212,7 @@ namespace evalhyd
            // calculate error in timing and dynamics $r_{pearson}$
            // (Pearson's correlation coefficient)
            auto r_num = xt::nansum(
-                    (xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd)
+                    (q_prd - mean_prd) * (q_obs - mean_obs),
-                    * (q_obs - mean_obs),
                    -1, xt::keep_dims
            );
            auto r_den = xt::sqrt(
@@ -218,8 +238,7 @@ namespace evalhyd
        void Evaluator<A>::calc_bias()
        {
            // calculate $bias$
-            bias = xt::nansum(xt::where(xt::isnan(q_obs), NAN, q_prd),
+            bias = xt::nansum(q_prd, -1, xt::keep_dims)
-                              -1, xt::keep_dims)
                   / xt::nansum(q_obs, -1, xt::keep_dims);
        }
@@ -287,8 +306,7 @@ namespace evalhyd
        void Evaluator<A>::calc_KGE()
        {
            // calculate error in spread of flow $alpha$
-            auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd);
+            auto alpha = detail::std_dev(q_prd, mean_prd)
-            auto alpha = detail::std_dev(q_prd_, mean_prd)
                         / detail::std_dev(q_obs, mean_obs);
            // compute KGE
@@ -326,8 +344,7 @@ namespace evalhyd
        void Evaluator<A>::calc_KGEPRIME()
        {
            // calculate error in spread of flow $gamma$
-            auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd);
+            auto gamma = (detail::std_dev(q_prd, mean_prd) / mean_prd)
-            auto gamma = (detail::std_dev(q_prd_, mean_prd) / mean_prd)
                         / (detail::std_dev(q_obs, mean_obs) / mean_obs);
            // compute KGE

--- a/tests/test_determinist.cpp
+++ b/tests/test_determinist.cpp
@@ -189,6 +189,12 @@ TEST(DeterministTests, TestMissingData)
    // add some missing observations artificially by assigning NaN values
    xt::view(observed, xt::all(), xt::range(0, 20)) = NAN;
+    // add some missing predictions artificially by assigning NaN values
+    xt::view(observed, 0, xt::range(20, 23)) = NAN;
+    xt::view(observed, 1, xt::range(20, 26)) = NAN;
+    xt::view(observed, 2, xt::range(20, 29)) = NAN;
+    xt::view(observed, 3, xt::range(20, 32)) = NAN;
+    xt::view(observed, 4, xt::range(20, 35)) = NAN;
    // compute metrics with observations containing NaN values
    std::vector<xt::xarray<double>> metrics_nan =
@@ -196,19 +202,27 @@ TEST(DeterministTests, TestMissingData)
                    observed, predicted, metrics
            );
-    // compute metrics on subset of observations and predictions
-    // (i.e. eliminating region with NaN in observations manually)
-    xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _));
-    xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _));
-    std::vector<xt::xarray<double>> metrics_sbs =
-            evalhyd::evald<xt::xtensor<double, 2>>(
-                    obs, prd, metrics
-            );
    for (int m = 0; m < metrics.size(); m++) {
-        EXPECT_TRUE(
+        for (int p = 0; p < predicted.shape(0); p++) {
-                xt::allclose(metrics_nan[m], metrics_sbs[m])
+            // compute metrics on subset of observations and predictions
-        ) << "Failure for (" << metrics[m] << ")";
+            // (i.e. eliminating region with NaN in observations manually)
+            xt::xtensor<double, 1> obs =
+                    xt::view(observed, 0, xt::range(3*(p+1), _));
+            xt::xtensor<double, 1> prd =
+                    xt::view(predicted, p, xt::range(3*(p+1), _));
+            std::vector<xt::xarray<double>> metrics_sbs =
+                    evalhyd::evald<xt::xtensor<double, 1>>(
+                            obs, prd, {metrics[m]}
+                    );
+            // compare to check results are the same
+            EXPECT_TRUE(
+                    xt::allclose(
+                            xt::view(metrics_nan[m], p),
+                            metrics_sbs[0]
+                    )
+            ) << "Failure for (" << metrics[m] << ")";
+        }
    }
 }
\ No newline at end of file