From e453205d5741f6f059b8dc04a4304ac51a0f089d Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Tue, 17 Jan 2023 14:06:44 +0100
Subject: [PATCH] add one dimension to temporal masks for determinist
 evaluation

In view to be able to use determinist metrics in `evalp`, the temporal
masks need to feature a dimension specific to each prediction series.
In fact, this was already done internally to `determinist::Evaluator`
to deal with potentially missing predictions. From a determinist point
of view, this is actually beneficial as well since (a) it allows the
user to use different masks for different series and (b) it avoids an
internal copy of masks to go from 2D to 3D.
---
 .../evalhyd/detail/determinist/elements.hpp   | 44 ++++++----
 .../evalhyd/detail/determinist/evaluator.hpp  | 41 +++++----
 .../evalhyd/detail/determinist/metrics.hpp    | 10 ++-
 include/evalhyd/evald.hpp                     | 85 ++++++++++---------
 tests/test_determinist.cpp                    | 12 +--
 5 files changed, 111 insertions(+), 81 deletions(-)

diff --git a/include/evalhyd/detail/determinist/elements.hpp b/include/evalhyd/detail/determinist/elements.hpp
index 7400792..fb8dc43 100644
--- a/include/evalhyd/detail/determinist/elements.hpp
+++ b/include/evalhyd/detail/determinist/elements.hpp
@@ -24,7 +24,7 @@ namespace evalhyd
             ///     shape: (1, time)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -54,7 +54,8 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN);
+                    auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_obs, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
@@ -76,7 +77,7 @@ namespace evalhyd
             ///     shape: (series, time)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -106,7 +107,8 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN);
+                    auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_prd, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
@@ -151,7 +153,7 @@ namespace evalhyd
             ///     shape: (subsets, samples, series, 1)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param n_srs
             ///     Number of prediction series.
             /// \param n_tim
@@ -181,7 +183,8 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN);
+                    auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_obs, NAN);
 
                     for (std::size_t e = 0; e < n_exp; e++)
                     {
@@ -204,7 +207,7 @@ namespace evalhyd
             ///     shape: (subsets, samples, series, 1)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param n_srs
             ///     Number of prediction series.
             /// \param n_tim
@@ -234,7 +237,8 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN);
+                    auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_prd, NAN);
 
                     for (std::size_t e = 0; e < n_exp; e++)
                     {
@@ -269,7 +273,7 @@ namespace evalhyd
             ///     shape: (subsets, samples, series, time)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -306,8 +310,10 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN);
-                    auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN);
+                    auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_prd, NAN);
+                    auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_obs, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
@@ -349,7 +355,7 @@ namespace evalhyd
             ///     shape: (subsets, samples, series, 1)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -383,8 +389,10 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN);
-                    auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN);
+                    auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_prd, NAN);
+                    auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_obs, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
@@ -410,7 +418,7 @@ namespace evalhyd
             ///     shape: (series, time)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -442,8 +450,10 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto prd_masked = xt::where(xt::view(t_msk, m), q_prd, NAN);
-                    auto obs_masked = xt::where(xt::view(t_msk, m), q_obs, NAN);
+                    auto prd_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_prd, NAN);
+                    auto obs_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                q_obs, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
diff --git a/include/evalhyd/detail/determinist/evaluator.hpp b/include/evalhyd/detail/determinist/evaluator.hpp
index e17bfd5..6314795 100644
--- a/include/evalhyd/detail/determinist/evaluator.hpp
+++ b/include/evalhyd/detail/determinist/evaluator.hpp
@@ -19,14 +19,15 @@ namespace evalhyd
 {
     namespace determinist
     {
-        template <class XD2, class XB2>
+        template <class XD2, class XB3>
         class Evaluator
         {
         private:
             // members for input data
             const XD2& q_obs;
             const XD2& q_prd;
-            xt::xtensor<bool, 3> t_msk;
+            // members for optional input data
+            XB3 t_msk;
             const std::vector<xt::xkeep_slice<int>>& b_exp;
 
             // members for dimensions
@@ -56,7 +57,7 @@ namespace evalhyd
             {
                 if (!mean_obs.has_value())
                 {
-                    mean_obs = elements::calc_mean_obs<XD2>(
+                    mean_obs = elements::calc_mean_obs(
                             q_obs, t_msk, b_exp, n_srs, n_msk, n_exp
                     );
                 }
@@ -149,28 +150,34 @@ namespace evalhyd
             // constructor method
             Evaluator(const XD2& obs,
                       const XD2& prd,
-                      const XB2& msk,
+                      const XB3& msk,
                       const std::vector<xt::xkeep_slice<int>>& exp) :
-                    q_obs{obs}, q_prd{prd}, b_exp{exp}
+                    q_obs{obs}, q_prd{prd}, t_msk{msk}, b_exp{exp}
             {
+                // initialise a mask if none provided
+                // (corresponding to no temporal subset)
+                if (msk.size() < 1)
+                {
+                    t_msk = xt::ones<bool>(
+                            {q_prd.shape(0), std::size_t {1}, q_prd.shape(1)}
+                    );
+                }
+
                 // determine size for recurring dimensions
-                n_tim = q_prd.shape(1);
                 n_srs = q_prd.shape(0);
-                n_msk = msk.shape(0);
+                n_tim = q_prd.shape(1);
+                n_msk = msk.shape(1);
                 n_exp = b_exp.size();
 
                 // drop time steps where observations or predictions are NaN
-                auto obs_nan = xt::isnan(q_obs);
-                auto prd_nan = xt::isnan(q_prd);
-
-                t_msk = xt::ones<bool>({n_msk, n_srs, n_tim});
-                xt::view(t_msk, xt::all()) =
-                        xt::view(msk, xt::all(), xt::newaxis(), xt::all());
-                for (std::size_t m = 0; m < n_msk; m++)
+                for (std::size_t s = 0; s < n_srs; s++)
                 {
-                    xt::view(t_msk, m) =
-                            xt::where(obs_nan || prd_nan,
-                                      false, xt::view(t_msk, m));
+                    auto obs_nan = xt::isnan(xt::view(q_obs, 0));
+                    auto prd_nan = xt::isnan(xt::view(q_prd, s));
+
+                    auto msk_nan = xt::where(obs_nan || prd_nan)[0];
+
+                    xt::view(t_msk, s, xt::all(), xt::keep(msk_nan)) = false;
                 }
             };
 
diff --git a/include/evalhyd/detail/determinist/metrics.hpp b/include/evalhyd/detail/determinist/metrics.hpp
index 8ee318f..550c695 100644
--- a/include/evalhyd/detail/determinist/metrics.hpp
+++ b/include/evalhyd/detail/determinist/metrics.hpp
@@ -23,7 +23,7 @@ namespace evalhyd
             ///     shape: (series, time)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -53,7 +53,8 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto quad_err_masked = xt::where(xt::view(t_msk, m), quad_err, NAN);
+                    auto quad_err_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                     quad_err, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
@@ -76,7 +77,7 @@ namespace evalhyd
             ///     shape: (subsets, samples, series, time)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
-            ///     shape: (subsets, series, time)
+            ///     shape: (series, subsets, time)
             /// \param b_exp
             ///     Boostrap samples.
             ///     shape: (samples, time slice)
@@ -106,7 +107,8 @@ namespace evalhyd
                 {
                     // apply the mask
                     // (using NaN workaround until reducers work on masked_view)
-                    auto quad_err_masked = xt::where(xt::view(t_msk, m), quad_err, NAN);
+                    auto quad_err_masked = xt::where(xt::view(t_msk, xt::all(), m),
+                                                     quad_err, NAN);
 
                     // compute variable one sample at a time
                     for (std::size_t e = 0; e < n_exp; e++)
diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp
index 9bdd365..31bc544 100644
--- a/include/evalhyd/evald.hpp
+++ b/include/evalhyd/evald.hpp
@@ -32,9 +32,9 @@ namespace evalhyd
     ///         (e.g. ``xt::xtensor<double, 2>``, ``xt::pytensor<double, 2>``,
     ///         ``xt::rtensor<double, 2>``, etc.).
     ///
-    ///    XB2: Any 2-dimensional container class storing boolean elements
-    ///         (e.g. ``xt::xtensor<bool, 4>``, ``xt::pytensor<bool, 4>``,
-    ///         ``xt::rtensor<bool, 4>``, etc.).
+    ///    XB3: Any 3-dimensional container class storing boolean elements
+    ///         (e.g. ``xt::xtensor<bool, 3>``, ``xt::pytensor<bool, 3>``,
+    ///         ``xt::rtensor<bool, 3>``, etc.).
     ///
     /// :Parameters:
     ///
@@ -76,13 +76,11 @@ namespace evalhyd
     ///       epsilon, as recommended by `Pushpalatha et al. (2012)
     ///       <https://doi.org/10.1016/j.jhydrol.2011.11.055>`_.
     ///
-    ///    t_msk: ``XB2``, optional
+    ///    t_msk: ``XB3``, optional
     ///       Mask used to temporally subset of the whole streamflow time series
     ///       (where True/False is used for the time steps to include/discard in
-    ///       the subset). If provided, masks must feature the same number of
-    ///       dimensions as observations and predictions, and it must
-    ///       broadcastable with both of them.
-    ///       shape: (subsets, time)
+    ///       the subset).
+    ///       shape: (series, subsets, time)
     ///
     ///       .. seealso:: :doc:`../../functionalities/temporal-masking`
     ///
@@ -153,12 +151,12 @@ namespace evalhyd
     ///
     ///    .. code-block:: c++
     ///
-    ///       xt::xtensor<double, 2> msk = {{ 1, 1, 0, 1, 0 }};
+    ///       xt::xtensor<double, 3> msk = {{{ 1, 1, 0, 1, 0 }}};
     ///
     ///       evalhyd::evald(obs, prd, {"NSE"}, "none", 1, -9, msk);
     ///
     /// \endrst
-    template <class XD2, class XB2 = xt::xtensor<bool, 2>>
+    template <class XD2, class XB3 = xt::xtensor<bool, 3>>
     std::vector<xt::xarray<double>> evald(
             const xt::xexpression<XD2>& q_obs,
             const xt::xexpression<XD2>& q_prd,
@@ -169,7 +167,7 @@ namespace evalhyd
                     xtl::missing<double>(),
             xtl::xoptional<double, bool> epsilon =
                     xtl::missing<double>(),
-            const xt::xexpression<XB2>& t_msk = XB2({}),
+            const xt::xexpression<XB3>& t_msk = XB3({}),
             const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
             xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap =
                     xtl::missing<const std::unordered_map<std::string, int>>(),
@@ -183,10 +181,10 @@ namespace evalhyd
                     "observations and/or predictions are not two-dimensional"
             );
         }
-        if (xt::get_rank<XB2>::value != 2)
+        if (xt::get_rank<XB3>::value != 3)
         {
             throw std::runtime_error(
-                    "temporal masks are not two-dimensional"
+                    "temporal masks are not three-dimensional"
             );
         }
 
@@ -194,7 +192,7 @@ namespace evalhyd
         const XD2& q_obs_ = q_obs.derived_cast();
         const XD2& q_prd_ = q_prd.derived_cast();
 
-        const XB2& t_msk_ = t_msk.derived_cast();
+        const XB3& t_msk_ = t_msk.derived_cast();
 
         // check that the metrics to be computed are valid
         utils::check_metrics(
@@ -219,7 +217,7 @@ namespace evalhyd
         }
         if (t_msk_.size() > 0)
         {
-            if (q_obs_.shape(1) != t_msk_.shape(1))
+            if (q_obs_.shape(1) != t_msk_.shape(2))
             {
                 throw std::runtime_error(
                         "observations and masks feature different "
@@ -245,44 +243,51 @@ namespace evalhyd
                     "observations contain more than one time series"
             );
         }
+        if (t_msk_.size() > 0)
+        {
+            if (q_prd_.shape(0) != t_msk_.shape(0))
+            {
+                throw std::runtime_error(
+                        "predictions and masks feature different "
+                        "number of series"
+                );
+            }
+        }
 
         // retrieve dimensions
-        std::size_t n_tim = q_obs_.shape(1);
-        std::size_t n_msk = t_msk_.size() > 0 ? t_msk_.shape(0) :
-                            (m_cdt.size() > 0 ? m_cdt.shape(0) : 1);
+        std::size_t n_tim = q_prd_.shape(1);
 
-        // initialise a mask if none provided
-        // (corresponding to no temporal subset)
+        // generate masks from conditions if provided
         auto gen_msk = [&]()
         {
-            // if t_msk provided, it takes priority
-            if (t_msk_.size() > 0)
+            if ((t_msk_.size() < 1) && (m_cdt.size() > 0))
             {
-                return t_msk_;
-            }
-            // else if m_cdt provided, use them to generate t_msk
-            else if (m_cdt.size() > 0)
-            {
-                XB2 c_msk = xt::zeros<bool>({n_msk, n_tim});
+                std::size_t n_srs = q_prd_.shape(0);
+                std::size_t n_msk = m_cdt.shape(0);
 
-                for (std::size_t m = 0; m < n_msk; m++)
+                XB3 c_msk = xt::zeros<bool>({n_srs, n_msk, n_tim});
+
+                for (std::size_t s = 0; s < n_srs; s++)
                 {
-                    xt::view(c_msk, m) =
-                            masks::generate_mask_from_conditions(
-                                    m_cdt[0], xt::view(q_obs_, 0), q_prd_
-                            );
+                    for (std::size_t m = 0; m < n_msk; m++)
+                    {
+                        xt::view(c_msk, s, m) =
+                                masks::generate_mask_from_conditions(
+                                        xt::view(m_cdt, m),
+                                        xt::view(q_obs_, 0),
+                                        xt::view(q_prd_, s, xt::newaxis())
+                                );
+                    }
                 }
 
                 return c_msk;
             }
-            // if neither t_msk nor m_cdt provided, generate dummy mask
             else
             {
-                return XB2({xt::ones<bool>({std::size_t{1}, n_tim})});
+                return XB3({});
             }
         };
-
-        auto msk = gen_msk();
+        const XB3 c_msk = gen_msk();
 
         // apply streamflow transformation if requested
         auto q_transform = [&](const XD2& q)
@@ -392,7 +397,11 @@ namespace evalhyd
         }
 
         // instantiate determinist evaluator
-        determinist::Evaluator<XD2, XB2> evaluator(obs, prd, msk, exp);
+        determinist::Evaluator<XD2, XB3> evaluator(
+                obs, prd,
+                t_msk_.size() > 0 ? t_msk_: (m_cdt.size() > 0 ? c_msk : t_msk_),
+                exp
+        );
 
         // retrieve or compute requested metrics
         std::vector<xt::xarray<double>> r;
diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp
index decdf97..b7ee38e 100644
--- a/tests/test_determinist.cpp
+++ b/tests/test_determinist.cpp
@@ -152,9 +152,11 @@ TEST(DeterministTests, TestMasks)
     std::tie(observed, predicted) = load_data_d();
 
     // generate temporal subset by dropping 20 first time steps
-    xt::xtensor<bool, 2> masks =
-            xt::ones<bool>({std::size_t {1}, std::size_t {observed.size()}});
-    xt::view(masks, 0, xt::range(0, 20)) = 0;
+    xt::xtensor<bool, 3> masks =
+            xt::ones<bool>({std::size_t {predicted.shape(0)},
+                            std::size_t {1},
+                            std::size_t {observed.size()}});
+    xt::view(masks, xt::all(), 0, xt::range(0, 20)) = 0;
 
     // compute scores using masks to subset whole record
     std::vector<xt::xarray<double>> metrics_masked =
@@ -183,7 +185,7 @@ TEST(DeterministTests, TestMaskingConditions)
     std::tie(observed, predicted) = load_data_d();
 
     // generate dummy empty masks required to access next optional argument
-    xt::xtensor<bool, 2> masks;
+    xt::xtensor<bool, 3> masks;
 
     // conditions on streamflow values _________________________________________
 
@@ -360,7 +362,7 @@ TEST(DeterministTests, TestBootstrap)
                     {},  // transform
                     {},  // exponent
                     {},  // epsilon
-                    xt::xtensor<bool, 2>({}),  // t_msk
+                    xt::xtensor<bool, 3>({}),  // t_msk
                     {},  // m_cdt
                     bootstrap,
                     datetimes
-- 
GitLab