From 67abb41fc90d81aa0d458d1f10366a4d6b0de3ee Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Wed, 18 Jan 2023 14:40:30 +0100
Subject: [PATCH] add access to determinist metrics to `evalp`

This is an attempt to add access to determinist metrics to probabilist
evaluation (i.e. via the choice of a reducer on ensemble members).

The idea is to instantiate one determinist::Evaluator per site (in order
to reduce the dimension to 2D) as part of probabilist::Evaluator. This
way, the determinist evaluator handles all the memoisation/dependency
management, while the probabilist evaluator only needs to get the metrics
via a call to the getter methods of the determinist evaluator (e.g.
`get_RMSE`).

Unfortunately, while this commit compiles/links, it fails to run because
the views to q_obs/q_prd given to each determinist evaluator lose the
reference to the data as soon as it leaves the constructor method. So
there is likely a move/forward required somewhere, or the use of
a constructor method expecting universal references, but for now I could
not find a working solution along those lines. So I park this here for
now.
---
 .../evalhyd/detail/determinist/elements.hpp   |  40 ++---
 .../evalhyd/detail/determinist/evaluator.hpp  |  44 ++++-
 .../evalhyd/detail/probabilist/evaluator.hpp  | 160 +++++++++++++++++-
 include/evalhyd/evald.hpp                     |   2 +-
 include/evalhyd/evalp.hpp                     |  39 ++++-
 5 files changed, 253 insertions(+), 32 deletions(-)

diff --git a/include/evalhyd/detail/determinist/elements.hpp b/include/evalhyd/detail/determinist/elements.hpp
index fb8dc43..876e1ec 100644
--- a/include/evalhyd/detail/determinist/elements.hpp
+++ b/include/evalhyd/detail/determinist/elements.hpp
@@ -37,9 +37,9 @@ namespace evalhyd
             /// \return
             ///     Mean observed streamflow.
             ///     shape: (subsets, samples, series, 1)
-            template <class XD2>
+            template <class XD2o>
             inline xt::xtensor<double, 4> calc_mean_obs(
-                    const XD2& q_obs,
+                    const XD2o& q_obs,
                     const xt::xtensor<bool, 3>& t_msk,
                     const std::vector<xt::xkeep_slice<int>>& b_exp,
                     std::size_t n_srs,
@@ -90,9 +90,9 @@ namespace evalhyd
             /// \return
             ///     Mean predicted streamflow.
             ///     shape: (subsets, samples, series, 1)
-            template <class XD2>
+            template <class XD2p>
             inline xt::xtensor<double, 4> calc_mean_prd(
-                    const XD2& q_prd,
+                    const XD2p& q_prd,
                     const xt::xtensor<bool, 3>& t_msk,
                     const std::vector<xt::xkeep_slice<int>>& b_exp,
                     std::size_t n_srs,
@@ -134,10 +134,10 @@ namespace evalhyd
             /// \return
             ///     Quadratic errors between observations and predictions.
             ///     shape: (series, time)
-            template <class XD2>
+            template <class XD2o, class XD2p>
             inline xt::xtensor<double, 2> calc_quad_err(
-                    const XD2& q_obs,
-                    const XD2& q_prd
+                    const XD2o& q_obs,
+                    const XD2p& q_prd
             )
             {
                 return xt::square(q_obs - q_prd);
@@ -165,9 +165,9 @@ namespace evalhyd
             /// \return
             ///     Quadratic errors between observations and mean observation.
             ///     shape: (subsets, samples, series, time)
-            template <class XD2>
+            template <class XD2o>
             inline xt::xtensor<double, 4> calc_quad_obs(
-                    const XD2& q_obs,
+                    const XD2o& q_obs,
                     const xt::xtensor<double, 4>& mean_obs,
                     const xt::xtensor<bool, 3>& t_msk,
                     std::size_t n_srs,
@@ -219,9 +219,9 @@ namespace evalhyd
             /// \return
             ///     Quadratic errors between predictions and mean prediction.
             ///     shape: (subsets, samples, series, time)
-            template <class XD2>
+            template <class XD2p>
             inline xt::xtensor<double, 4> calc_quad_prd(
-                    const XD2& q_prd,
+                    const XD2p& q_prd,
                     const xt::xtensor<double, 4>& mean_prd,
                     const xt::xtensor<bool, 3>& t_msk,
                     std::size_t n_srs,
@@ -286,10 +286,10 @@ namespace evalhyd
             /// \return
             ///     Pearson correlation coefficients.
             ///     shape: (subsets, samples, series)
-            template <class XD2>
+            template <class XD2o, class XD2p>
             inline xt::xtensor<double, 3> calc_r_pearson(
-                    const XD2& q_obs,
-                    const XD2& q_prd,
+                    const XD2o& q_obs,
+                    const XD2p& q_prd,
                     const xt::xtensor<double, 4>& mean_obs,
                     const xt::xtensor<double, 4>& mean_prd,
                     const xt::xtensor<double, 4>& quad_obs,
@@ -368,10 +368,10 @@ namespace evalhyd
             /// \return
             ///     Alphas, ratios of standard deviations.
             ///     shape: (subsets, samples, series)
-            template <class XD2>
+            template <class XD2o, class XD2p>
             inline xt::xtensor<double, 3> calc_alpha(
-                    const XD2& q_obs,
-                    const XD2& q_prd,
+                    const XD2o& q_obs,
+                    const XD2p& q_prd,
                     const xt::xtensor<double, 4>& mean_obs,
                     const xt::xtensor<double, 4>& mean_prd,
                     const xt::xtensor<bool, 3>& t_msk,
@@ -431,10 +431,10 @@ namespace evalhyd
             /// \return
             ///     Biases.
             ///     shape: (subsets, samples, series)
-            template <class XD2>
+            template <class XD2o, class XD2p>
             inline xt::xtensor<double, 3> calc_bias(
-                    const XD2& q_obs,
-                    const XD2& q_prd,
+                    const XD2o& q_obs,
+                    const XD2p& q_prd,
                     const xt::xtensor<bool, 3>& t_msk,
                     const std::vector<xt::xkeep_slice<int>>& b_exp,
                     std::size_t n_srs,
diff --git a/include/evalhyd/detail/determinist/evaluator.hpp b/include/evalhyd/detail/determinist/evaluator.hpp
index 6314795..7d8220c 100644
--- a/include/evalhyd/detail/determinist/evaluator.hpp
+++ b/include/evalhyd/detail/determinist/evaluator.hpp
@@ -19,13 +19,13 @@ namespace evalhyd
 {
     namespace determinist
     {
-        template <class XD2, class XB3>
+        template <class XD2o, class XD2p, class XB3>
         class Evaluator
         {
         private:
             // members for input data
-            const XD2& q_obs;
-            const XD2& q_prd;
+            const XD2o& q_obs;
+            const XD2p& q_prd;
             // members for optional input data
             XB3 t_msk;
             const std::vector<xt::xkeep_slice<int>>& b_exp;
@@ -148,8 +148,8 @@ namespace evalhyd
 
         public:
             // constructor method
-            Evaluator(const XD2& obs,
-                      const XD2& prd,
+            Evaluator(const XD2o& obs,
+                      const XD2p& prd,
                       const XB3& msk,
                       const std::vector<xt::xkeep_slice<int>>& exp) :
                     q_obs{obs}, q_prd{prd}, t_msk{msk}, b_exp{exp}
@@ -181,6 +181,40 @@ namespace evalhyd
                 }
             };
 
+            Evaluator(XD2o&& obs,
+                      XD2p&& prd,
+                      const XB3& msk,
+                      const std::vector<xt::xkeep_slice<int>>& exp) :
+                    q_obs{obs}, q_prd{prd}, t_msk{msk}, b_exp{exp}
+            {
+                std::cout << "universal" << std::endl;
+                // initialise a mask if none provided
+                // (corresponding to no temporal subset)
+                if (msk.size() < 1)
+                {
+                    t_msk = xt::ones<bool>(
+                            {q_prd.shape(0), std::size_t {1}, q_prd.shape(1)}
+                    );
+                }
+
+                // determine size for recurring dimensions
+                n_srs = q_prd.shape(0);
+                n_tim = q_prd.shape(1);
+                n_msk = msk.shape(1);
+                n_exp = b_exp.size();
+
+                // drop time steps where observations or predictions are NaN
+                for (std::size_t s = 0; s < n_srs; s++)
+                {
+                    auto obs_nan = xt::isnan(xt::view(q_obs, 0));
+                    auto prd_nan = xt::isnan(xt::view(q_prd, s));
+
+                    auto msk_nan = xt::where(obs_nan || prd_nan)[0];
+
+                    xt::view(t_msk, s, xt::all(), xt::keep(msk_nan)) = false;
+                }
+            };
+
             // methods to compute metrics
             xt::xtensor<double, 3> get_RMSE()
             {
diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp
index edae68b..00bf169 100644
--- a/include/evalhyd/detail/probabilist/evaluator.hpp
+++ b/include/evalhyd/detail/probabilist/evaluator.hpp
@@ -12,6 +12,7 @@
 #include <xtensor/xtensor.hpp>
 #include <xtensor/xview.hpp>
 
+#include "../determinist/evaluator.hpp"
 #include "brier.hpp"
 #include "quantiles.hpp"
 #include "contingency.hpp"
@@ -31,9 +32,29 @@ namespace evalhyd
             // members for optional input data
             const XD2& _q_thr;
             xtl::xoptional<const std::string, bool> _events;
+            xtl::xoptional<const std::string, bool> _reducer;
             XB4 t_msk;
             const std::vector<xt::xkeep_slice<int>>& b_exp;
 
+            // member to access determinist metrics
+            xtl::xoptional<xt::xtensor<double, 3>, bool> _q_prd_reduced;
+
+            using xv2d2 = decltype(xt::view(std::declval<const XD2&>(),
+                                            std::declval<std::size_t>(),
+                                            xt::newaxis(),
+                                            xt::all()));
+            using xv2d3 = decltype(xt::view(std::declval<xt::xtensor<double, 3>>(),
+                                            std::declval<std::size_t>(),
+                                            xt::all(),
+                                            xt::all()));
+            using xv3b4 = decltype(xt::view(std::declval<XB4&>(),
+                                            std::declval<std::size_t>(),
+                                            xt::all(),
+                                            xt::all(),
+                                            xt::all()));
+
+            std::vector<determinist::Evaluator<xv2d2, xv2d3, xv3b4>> _d_evaluators;
+
             // members for dimensions
             std::size_t n_sit;
             std::size_t n_ldt;
@@ -86,6 +107,11 @@ namespace evalhyd
             xtl::xoptional<xt::xtensor<double, 6>, bool> FAR;
             xtl::xoptional<xt::xtensor<double, 6>, bool> CSI;
             xtl::xoptional<xt::xtensor<double, 5>, bool> ROCSS;
+            // > Determinist-based
+            xtl::xoptional<xt::xtensor<double, 4>, bool> RMSE;
+            xtl::xoptional<xt::xtensor<double, 4>, bool> NSE;
+            xtl::xoptional<xt::xtensor<double, 4>, bool> KGE;
+            xtl::xoptional<xt::xtensor<double, 4>, bool> KGEPRIME;
 
             // methods to get optional parameters
             auto get_q_thr()
@@ -130,6 +156,53 @@ namespace evalhyd
                 }
             }
 
+            auto get_reducer()
+            {
+                if (_reducer.has_value())
+                {
+                    if (_reducer.value() == "median"
+                        || _reducer.value() == "mean")
+                    {
+                        return _reducer.value();
+                    }
+                    else
+                    {
+                        throw std::runtime_error(
+                                "invalid value for member *reducer*: "
+                                + _events.value()
+                        );
+                    }
+                }
+                else
+                {
+                    throw std::runtime_error(
+                            "determinist metric requested, "
+                            "but member *reducer* not provided"
+                    );
+                }
+            }
+
+            auto get_d_evaluators()
+            {
+                if (_d_evaluators.empty())
+                {
+                    // initialise all determinist evaluators
+                    for (std::size_t s = 0; s < n_sit; s++)
+                    {
+                        xv2d2 obs = xt::view(q_obs, s, xt::newaxis(), xt::all());
+                        xv2d3 prd = xt::view(get_q_prd_reduced(), s, xt::all(), xt::all());
+                        xv3b4 msk = xt::view(t_msk, s, xt::all(), xt::all(), xt::all());
+
+                        determinist::Evaluator<xv2d2, xv2d3, xv3b4> evaluator(
+                                obs, prd, msk, b_exp
+                        );
+
+                        _d_evaluators.push_back(std::move(evaluator));
+                    }
+                }
+                return _d_evaluators;
+            }
+
             // methods to compute elements
             xt::xtensor<double, 3> get_o_k()
             {
@@ -242,6 +315,22 @@ namespace evalhyd
                 return ct_d.value();
             };
 
+            xt::xtensor<double, 3> get_q_prd_reduced()
+            {
+                if (!_q_prd_reduced.has_value())
+                {
+                    if (get_reducer() == "median")
+                    {
+                        _q_prd_reduced = xt::median(q_prd, 2);
+                    }
+                    else // (get_reducer() == "mean")
+                    {
+                        _q_prd_reduced = xt::mean(q_prd, 2);
+                    }
+                }
+                return _q_prd_reduced.value();
+            }
+
             // methods to compute intermediate metrics
             xt::xtensor<double, 4> get_bs()
             {
@@ -327,9 +416,10 @@ namespace evalhyd
                       const XD2& thr,
                       xtl::xoptional<const std::string&, bool> events,
                       const XB4& msk,
-                      const std::vector<xt::xkeep_slice<int>>& exp) :
-                    q_obs{obs}, q_prd{prd},
-                    _q_thr{thr}, _events{events}, t_msk(msk), b_exp(exp)
+                      const std::vector<xt::xkeep_slice<int>>& exp,
+                      xtl::xoptional<const std::string&, bool> reducer) :
+                    q_obs{obs}, q_prd{prd}, _q_thr{thr}, _events{events},
+                    t_msk(msk), b_exp(exp), _reducer{reducer}
             {
                 // initialise a mask if none provided
                 // (corresponding to no temporal subset)
@@ -511,6 +601,70 @@ namespace evalhyd
                 }
                 return ROCSS.value();
             };
+
+            xt::xtensor<double, 4> get_RMSE()
+            {
+                if (!RMSE.has_value())
+                {
+                    RMSE = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
+                    
+                    auto evaluators = get_d_evaluators();
+
+                    for (std::size_t s = 0; s < n_sit; s++)
+                    {
+                        xt::view(RMSE.value(), s) = evaluators[s].get_RMSE();
+                    }
+                }
+                return RMSE.value();
+            };
+
+            xt::xtensor<double, 4> get_NSE()
+            {
+                if (!NSE.has_value())
+                {
+                    NSE = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
+
+                    auto evaluators = get_d_evaluators();
+
+                    for (std::size_t s = 0; s < n_sit; s++)
+                    {
+                        xt::view(NSE.value(), s) = evaluators[s].get_NSE();
+                    }
+                }
+                return NSE.value();
+            };
+
+            xt::xtensor<double, 4> get_KGE()
+            {
+                if (!KGE.has_value())
+                {
+                    KGE = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
+
+                    auto evaluators = get_d_evaluators();
+
+                    for (std::size_t s = 0; s < n_sit; s++)
+                    {
+                        xt::view(KGE.value(), s) = evaluators[s].get_KGE();
+                    }
+                }
+                return KGE.value();
+            };
+
+            xt::xtensor<double, 4> get_KGEPRIME()
+            {
+                if (!KGEPRIME.has_value())
+                {
+                    KGEPRIME = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
+
+                    auto evaluators = get_d_evaluators();
+
+                    for (std::size_t s = 0; s < n_sit; s++)
+                    {
+                        xt::view(KGEPRIME.value(), s) = evaluators[s].get_KGEPRIME();
+                    }
+                }
+                return KGEPRIME.value();
+            };
         };
     }
 }
diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp
index 31bc544..825086b 100644
--- a/include/evalhyd/evald.hpp
+++ b/include/evalhyd/evald.hpp
@@ -397,7 +397,7 @@ namespace evalhyd
         }
 
         // instantiate determinist evaluator
-        determinist::Evaluator<XD2, XB3> evaluator(
+        determinist::Evaluator<XD2, XD2, XB3> evaluator(
                 obs, prd,
                 t_msk_.size() > 0 ? t_msk_: (m_cdt.size() > 0 ? c_msk : t_msk_),
                 exp
diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp
index 6c52fac..012e511 100644
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -73,6 +73,12 @@ namespace evalhyd
     ///       occurring when streamflow goes below threshold). It must be
     ///       provided if *q_thr* is provided.
     ///
+    ///    reducer: ``std::string``, optional
+    ///       The type of reduction to apply to the prediction ensemble members
+    ///       for the computation of deterministic metrics. It can either be set
+    ///       as "median" or as "mean". It must be provided in case *metrics*
+    ///       contains deterministic-based metrics.
+    ///
     ///    t_msk: ``XB4``, optional
     ///       Mask(s) used to generate temporal subsets of the whole streamflow
     ///       time series (where True/False is used for the time steps to
@@ -163,7 +169,9 @@ namespace evalhyd
             const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {},
             xtl::xoptional<const std::unordered_map<std::string, int>, bool> bootstrap =
                     xtl::missing<const std::unordered_map<std::string, int>>(),
-            const std::vector<std::string>& dts = {}
+            const std::vector<std::string>& dts = {},
+            xtl::xoptional<const std::string, bool> reducer =
+                    xtl::missing<const std::string>()
     )
     {
         // check ranks of tensors
@@ -198,7 +206,8 @@ namespace evalhyd
                 metrics,
                 {"BS", "BSS", "BS_CRD", "BS_LBD",
                  "QS", "CRPS",
-                 "POD", "POFD", "FAR", "CSI", "ROCSS"}
+                 "POD", "POFD", "FAR", "CSI", "ROCSS",
+                 "RMSE", "NSE", "KGE", "KGEPRIME"}
         );
 
         // check optional parameters
@@ -362,7 +371,7 @@ namespace evalhyd
         probabilist::Evaluator<XD2, XD4, XB4> evaluator(
                 q_obs_, q_prd_, q_thr_, events,
                 t_msk_.size() > 0 ? t_msk_: (m_cdt.size() > 0 ? c_msk : t_msk_),
-                b_exp
+                b_exp, reducer
         );
 
         // initialise data structure for outputs
@@ -436,6 +445,30 @@ namespace evalhyd
                         uncertainty::summarise(evaluator.get_ROCSS(), summary)
                 );
             }
+            else if ( metric == "RMSE" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_RMSE(), summary)
+                );
+            }
+            else if ( metric == "NSE" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_NSE(), summary)
+                );
+            }
+            else if ( metric == "KGE" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_KGE(), summary)
+                );
+            }
+            else if ( metric == "KGEPRIME" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_KGEPRIME(), summary)
+                );
+            }
         }
 
         return r;
-- 
GitLab