From 295b3208193d4f4e31408c0ef9908a2b037349a4 Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Wed, 29 Jun 2022 18:49:52 +0200
Subject: [PATCH] add dimensions for sites/lead times to probabilistic
 evaluator

Internally, rather than using the multi-dimensional character of
tensors to compute all sites and all lead times at once, loops are
performed for each site and each lead time, in turn, in order to
minimise memory imprint. Although at the moment, the input tensors are
expected to feature the sites and lead times dimensions. If memory is
an issue, the user can still send smaller tensors with size 1 for those
dimensions and recompose multi-sites/multi-lead times output arrays
externally.
---
 include/evalhyd/evalp.hpp               | 175 +++++++++++++++---------
 src/probabilist/evaluator.h             |  35 +++--
 src/probabilist/evaluator_brier.cpp     |   2 +-
 src/probabilist/evaluator_elements.cpp  |   7 +-
 src/probabilist/evaluator_quantiles.cpp |  14 +-
 src/probabilist/evaluator_utils.cpp     |  46 -------
 tests/CMakeLists.txt                    |   1 -
 tests/test_probabilist.cpp              |  34 +++--
 8 files changed, 168 insertions(+), 146 deletions(-)
 delete mode 100644 src/probabilist/evaluator_utils.cpp

diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp
index 236a876..9dd8285 100644
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -23,16 +23,16 @@ namespace evalhyd
     ///
     ///    q_obs: :cpp:`xt::xtensor<double, 2>`
     ///       Streamflow observations.
-    ///       shape: (1, time)
+    ///       shape: (sites, time)
     ///
-    ///    q_prd: :cpp:`xt::xtensor<double, 2>`
+    ///    q_prd: :cpp:`xt::xtensor<double, 4>`
     ///       Streamflow predictions.
-    ///       shape: (members, time)
+    ///       shape: (sites, lead times, members, time)
     ///
     ///    metrics: :cpp:`std::vector<std::string>`
     ///       The sequence of evaluation metrics to be computed.
     ///
-    ///    q_thr: :cpp:`xt::xtensor<double, 1>`, optional
+    ///    q_thr: :cpp:`xt::xtensor<double, 2>`, optional
     ///       Streamflow exceedance threshold(s).
     ///       shape: (thresholds,)
     ///
@@ -43,11 +43,11 @@ namespace evalhyd
     ///       performed and only one set of metrics is returned corresponding
     ///       to the whole time series. If provided, as many sets of metrics are
     ///       returned as they are masks provided.
-    ///       shape: (1+, time)
+    ///       shape: (subsets, time)
     ///
     /// :Returns:
     ///
-    ///    :cpp:`std::vector<xt::xarray<double>>`
+    ///    :cpp:`std::vector<xt::xtensor<double, 5>>`
     ///       The sequence of evaluation metrics computed
     ///       in the same order as given in *metrics*.
     ///
@@ -69,7 +69,7 @@ namespace evalhyd
     /// \endrst
     std::vector<xt::xarray<double>> evalp(
             const xt::xtensor<double, 2>& q_obs,
-            const xt::xtensor<double, 2>& q_prd,
+            const xt::xtensor<double, 4>& q_prd,
             const std::vector<std::string>& metrics,
             const xt::xtensor<double, 1>& q_thr = {},
             const xt::xtensor<bool, 2>& t_msk = {}
@@ -84,8 +84,22 @@ namespace evalhyd
         // check that optional parameters are given as arguments
         eh::utils::check_optionals(metrics, q_thr);
 
-        // instantiate probabilist evaluator
-        eh::probabilist::Evaluator evaluator(q_obs, q_prd, q_thr, t_msk);
+        // retrieve dimensions
+        std::size_t n_sit = q_prd.shape(0);
+        std::size_t n_ltm = q_prd.shape(1);
+        std::size_t n_mbr = q_prd.shape(2);
+        std::size_t n_thr = q_thr.size();
+        std::size_t n_msk = t_msk.size() < 1 ? 1 : t_msk.shape(0);
+
+        // register metrics number of dimensions
+        std::unordered_map<std::string, std::vector<std::size_t>> dim;
+
+        dim["BS"] = {n_sit, n_ltm, n_msk, n_thr};
+        dim["BSS"] = {n_sit, n_ltm, n_msk, n_thr};
+        dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_thr, 3};
+        dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_thr, 3};
+        dim["QS"] = {n_sit, n_ltm, n_msk, n_mbr};
+        dim["CRPS"] = {n_sit, n_ltm, n_msk};
 
         // declare maps for memoisation purposes
         std::unordered_map<std::string, std::vector<std::string>> elt;
@@ -110,76 +124,105 @@ namespace evalhyd
 
         eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep);
 
-        // pre-compute required elt
-        for (const auto& element : req_elt)
-        {
-            if ( element == "o_k" )
-                evaluator.calc_o_k();
-            else if ( element == "bar_o" )
-                evaluator.calc_bar_o();
-            else if ( element == "y_k" )
-                evaluator.calc_y_k();
-            else if ( element == "q_qnt" )
-                evaluator.calc_q_qnt();
-        }
+        // initialise data structure for outputs
+        std::vector<xt::xarray<double>> r;
+        for (const auto& metric : metrics)
+            r.emplace_back(xt::zeros<double>(dim[metric]));
 
-        // pre-compute required dep
-        for (const auto& dependency : req_dep)
+        // compute variables one site at a time to minimise memory imprint
+        for (int s = 0; s < n_sit; s++)
+            // compute variables one lead time at a time to minimise memory imprint
+            for (int l = 0; l < n_ltm; l++)
         {
-            if ( dependency == "bs" )
-                evaluator.calc_bs();
-            else if ( dependency == "qs" )
-                evaluator.calc_qs();
-            else if ( dependency == "crps" )
-                evaluator.calc_crps();
-        }
+            // instantiate probabilist evaluator
+            const auto q_obs_v = xt::view(q_obs, s, xt::all());
+            const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all());
 
-        // retrieve or compute requested metrics
-        std::vector<xt::xarray<double>> r;
+            eh::probabilist::Evaluator evaluator(q_obs_v, q_prd_v, q_thr, t_msk);
 
-        for (const auto& metric : metrics)
-        {
-            if ( metric == "BS" )
+            // pre-compute required elt
+            for (const auto& element : req_elt)
             {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
-                        == req_dep.end())
-                    evaluator.calc_BS();
-                r.emplace_back(xt::squeeze(evaluator.BS));
+                if ( element == "o_k" )
+                    evaluator.calc_o_k();
+                else if ( element == "bar_o" )
+                    evaluator.calc_bar_o();
+                else if ( element == "y_k" )
+                    evaluator.calc_y_k();
+                else if ( element == "q_qnt" )
+                    evaluator.calc_q_qnt();
             }
-            else if ( metric == "BSS" )
+
+            // pre-compute required dep
+            for (const auto& dependency : req_dep)
             {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
-                        == req_dep.end())
-                    evaluator.calc_BSS();
-                r.emplace_back(xt::squeeze(evaluator.BSS));
+                if ( dependency == "bs" )
+                    evaluator.calc_bs();
+                else if ( dependency == "qs" )
+                    evaluator.calc_qs();
+                else if ( dependency == "crps" )
+                    evaluator.calc_crps();
             }
-            else if ( metric == "BS_CRD" )
+
+            // retrieve or compute requested metrics
+            for (int m = 0; m < metrics.size(); m++)
             {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                const auto& metric = metrics[m];
+
+                if ( metric == "BS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                         == req_dep.end())
-                    evaluator.calc_BS_CRD();
-                r.emplace_back(xt::squeeze(evaluator.BS_CRD));
-            }
-            else if ( metric == "BS_LBD" )
-            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        evaluator.calc_BS();
+                    // (sites, lead times, subsets, thresholds)
+                    xt::view(r[m], s, l, xt::all(), xt::all()) =
+                            evaluator.BS;
+                }
+                else if ( metric == "BSS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                         == req_dep.end())
-                    evaluator.calc_BS_LBD();
-                r.emplace_back(xt::squeeze(evaluator.BS_LBD));
-            }
-            else if ( metric == "QS" )
-            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        evaluator.calc_BSS();
+                    // (sites, lead times, subsets, thresholds)
+                    xt::view(r[m], s, l, xt::all(), xt::all()) =
+                            evaluator.BSS;
+                }
+                else if ( metric == "BS_CRD" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                         == req_dep.end())
-                    evaluator.calc_QS();
-                r.emplace_back(xt::squeeze(evaluator.QS));
-            }
-            else if ( metric == "CRPS" )
-            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        evaluator.calc_BS_CRD();
+                    // (sites, lead times, subsets, thresholds, components)
+                    xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
+                            evaluator.BS_CRD;
+                }
+                else if ( metric == "BS_LBD" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        == req_dep.end())
+                        evaluator.calc_BS_LBD();
+                    // (sites, lead times, subsets, thresholds, components)
+                    xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
+                            evaluator.BS_LBD;
+                }
+                else if ( metric == "QS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        == req_dep.end())
+                        evaluator.calc_QS();
+                    // (sites, lead times, subsets, quantiles)
+                    xt::view(r[m], s, l, xt::all(), xt::all()) =
+                            evaluator.QS;
+                }
+                else if ( metric == "CRPS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                         == req_dep.end())
-                    evaluator.calc_CRPS();
-                r.emplace_back(xt::squeeze(evaluator.CRPS));
+                        evaluator.calc_CRPS();
+                    // (sites, lead times, subsets)
+                    xt::view(r[m], s, l, xt::all()) =
+                            evaluator.CRPS;
+                }
             }
         }
 
diff --git a/src/probabilist/evaluator.h b/src/probabilist/evaluator.h
index 26f0223..0a622ce 100644
--- a/src/probabilist/evaluator.h
+++ b/src/probabilist/evaluator.h
@@ -2,6 +2,25 @@
 #define EVALHYD_PROBABILIST_EVALUATOR_H
 
 #include <xtensor/xtensor.hpp>
+#include <xtensor/xview.hpp>
+
+using view1d_xtensor2d_type = decltype(
+    xt::view(
+            std::declval<const xt::xtensor<double, 2>&>(),
+            std::declval<int>(),
+            xt::all()
+    )
+);
+
+using view2d_xtensor4d_type = decltype(
+    xt::view(
+            std::declval<const xt::xtensor<double, 4>&>(),
+            std::declval<int>(),
+            std::declval<int>(),
+            xt::all(),
+            xt::all()
+    )
+);
 
 namespace evalhyd
 {
@@ -11,8 +30,8 @@ namespace evalhyd
         {
         private:
             // members for input data
-            const xt::xtensor<double, 2>& q_obs;
-            const xt::xtensor<double, 2>& q_prd;
+            const view1d_xtensor2d_type& q_obs;
+            const view2d_xtensor4d_type& q_prd;
             const xt::xtensor<double, 1>& q_thr;
             xt::xtensor<bool, 2> t_msk;
 
@@ -30,8 +49,8 @@ namespace evalhyd
 
         public:
             // constructor method
-            Evaluator(const xt::xtensor<double, 2>& obs,
-                      const xt::xtensor<double, 2>& prd,
+            Evaluator(const view1d_xtensor2d_type& obs,
+                      const view2d_xtensor4d_type& prd,
                       const xt::xtensor<double, 1>& thr,
                       const xt::xtensor<bool, 2>& msk) :
                     q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk)
@@ -39,13 +58,13 @@ namespace evalhyd
                 // initialise a mask if none provided
                 // (corresponding to no temporal subset)
                 if (msk.size() < 1)
-                    t_msk = xt::ones<bool>(obs.shape());
+                    t_msk = xt::ones<bool>({std::size_t {1}, q_obs.size()});
 
                 // determine size for recurring dimensions
-                n = obs.shape(1);
+                n = q_obs.size();
                 n_msk = t_msk.shape(0);
-                n_mbr = prd.shape(0);
-                n_thr = thr.size();
+                n_mbr = q_prd.shape(0);
+                n_thr = q_thr.size();
             };
 
             // members for intermediate evaluation metrics
diff --git a/src/probabilist/evaluator_brier.cpp b/src/probabilist/evaluator_brier.cpp
index dcc8b7e..c250f17 100644
--- a/src/probabilist/evaluator_brier.cpp
+++ b/src/probabilist/evaluator_brier.cpp
@@ -26,7 +26,7 @@ namespace evalhyd
         //     Event probability forecast.
         //     shape: (thresholds, time)
         // \assign bs:
-        //     2D array of Brier score for each threshold for each time step.
+        //     Brier score for each threshold for each time step.
         //     shape: (thresholds, time)
         void Evaluator::calc_bs()
         {
diff --git a/src/probabilist/evaluator_elements.cpp b/src/probabilist/evaluator_elements.cpp
index 5f354e3..2df49be 100644
--- a/src/probabilist/evaluator_elements.cpp
+++ b/src/probabilist/evaluator_elements.cpp
@@ -12,7 +12,7 @@ namespace evalhyd
         //
         // \require q_obs:
         //     Streamflow observations.
-        //     shape: (1, time)
+        //     shape: (time,)
         // \require q_thr:
         //     Streamflow exceedance threshold(s).
         //     shape: (thresholds,)
@@ -22,7 +22,7 @@ namespace evalhyd
         void Evaluator::calc_o_k()
         {
             // determine observed realisation of threshold(s) exceedance
-            o_k = xt::squeeze(is_above_threshold(q_obs, q_thr));
+            o_k = q_obs >= xt::view(q_thr, xt::all(), xt::newaxis());
         }
 
         // Determine mean observed realisation of threshold(s) exceedance.
@@ -65,7 +65,8 @@ namespace evalhyd
         {
             // determine if members have exceeded threshold(s)
             xt::xtensor<double, 3> e_frc =
-                    is_above_threshold(q_prd, q_thr);
+                    q_prd
+                    >= xt::view(q_thr, xt::all(), xt::newaxis(), xt::newaxis());
 
             // calculate how many members have exceeded threshold(s)
             xt::xtensor<double, 2> n_frc = xt::sum(e_frc, 1);
diff --git a/src/probabilist/evaluator_quantiles.cpp b/src/probabilist/evaluator_quantiles.cpp
index 726d14d..a92c76a 100644
--- a/src/probabilist/evaluator_quantiles.cpp
+++ b/src/probabilist/evaluator_quantiles.cpp
@@ -15,7 +15,7 @@ namespace evalhyd
         //
         // \require q_obs:
         //     Streamflow observations.
-        //     shape: (1, time)
+        //     shape: (time,)
         // \require q_qnt:
         //     Streamflow quantiles.
         //     shape: (quantiles, time)
@@ -50,11 +50,11 @@ namespace evalhyd
         //     shape: (quantiles, time)
         // \assign QS:
         //     Quantile scores.
-        //     shape: (subsets, quantiles, 1)
+        //     shape: (subsets, quantiles)
         void Evaluator::calc_QS()
         {
             // initialise output variable
-            // shape: (subsets, quantiles, 1)
+            // shape: (subsets, quantiles)
             QS = xt::zeros<double>({n_msk, n_mbr});
 
             // compute variable one mask at a time to minimise memory imprint
@@ -103,14 +103,14 @@ namespace evalhyd
         //     shape: (subsets, time)
         // \require crps:
         //     CRPS for each time step.
-        //     shape: (quantiles, time)
+        //     shape: (1, time)
         // \assign CRPS:
         //     CRPS.
-        //     shape: (subsets, 1, 1)
+        //     shape: (subsets,)
         void Evaluator::calc_CRPS()
         {
             // initialise output variable
-            // shape: (subsets, thresholds, 1)
+            // shape: (subsets,)
             CRPS = xt::zeros<double>({n_msk});
 
             // compute variable one mask at a time to minimise memory imprint
@@ -123,7 +123,7 @@ namespace evalhyd
                 // calculate the mean over the time steps
                 // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
                 xt::view(CRPS, m) =
-                        xt::nanmean(crps_masked, -1);
+                        xt::squeeze(xt::nanmean(crps_masked, -1));
             }
         }
     }
diff --git a/src/probabilist/evaluator_utils.cpp b/src/probabilist/evaluator_utils.cpp
deleted file mode 100644
index 923cc3a..0000000
--- a/src/probabilist/evaluator_utils.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <xtensor/xtensor.hpp>
-#include <xtensor/xview.hpp>
-
-namespace evalhyd
-{
-    namespace probabilist
-    {
-        // Determine whether flows are greater than given threshold(s).
-        //
-        // \param q:
-        //     Streamflow data.
-        //     shape: (1+, time)
-        // \param thr:
-        //     Streamflow thresholds.
-        //     shape: (thresholds,)
-        // \return
-        //     Boolean-like threshold(s) exceedance.
-        //     shape: (thresholds, 1+, time)
-        xt::xtensor<double, 3> is_above_threshold(
-                const xt::xtensor<double, 2> &q,
-                const xt::xtensor<double, 1> &thr
-        )
-        {
-            return q >= xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis());
-        }
-
-        // Determine whether flows are strictly lower than given threshold(s)
-        //
-        // \param q:
-        //     Streamflow data.
-        //     shape: (1+, time)
-        // \param thr:
-        //     Streamflow thresholds.
-        //     shape: (thresholds,)
-        // \return
-        //     Boolean-like threshold(s) exceedance.
-        //     shape: (thresholds, 1+, time)
-        xt::xtensor<double, 3> is_below_threshold(
-                const xt::xtensor<double, 2> &q,
-                const xt::xtensor<double, 1> &thr
-        )
-        {
-            return q < xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis());
-        }
-    }
-}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index d60239f..a616f8f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -37,6 +37,5 @@ add_executable(
         ../src/probabilist/evaluator_brier.cpp
         ../src/probabilist/evaluator_quantiles.cpp
         ../src/probabilist/evaluator_elements.cpp
-        ../src/probabilist/evaluator_utils.cpp
 )
 target_link_libraries(evalhyd_tests gtest gtest_main)
diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp
index 880993f..ae694f1 100644
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -11,44 +11,50 @@ TEST(ProbabilistTests, TestBrier) {
     // read in data
     std::ifstream ifs;
     ifs.open("./data/q_obs.csv");
-    xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs);
+    xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<int>(ifs));
     ifs.close();
 
     ifs.open("./data/q_prd.csv");
     xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs);
     ifs.close();
 
+    // add "artificial" site dimension (size 1) to observations
+    auto obs = xt::view(observed, xt::newaxis(), xt::all());
+    // add "artificial" site dimension (size 1) and lead time dimension (size 1)
+    // to predictions
+    auto prd = xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all());
+
     // compute scores
     xt::xtensor<double, 1> thresholds = {690, 534, 445};
 
     std::vector<xt::xarray<double>> metrics =
             evalhyd::evalp(
-                    observed, predicted, {"BS", "BSS", "BS_CRD", "BS_LBD"},
+                    obs, prd, {"BS", "BSS", "BS_CRD", "BS_LBD"},
                     thresholds
             );
 
     // check results
     // Brier scores
-    xt::xtensor<double, 1> bs =
-            {0.10615136, 0.07395622, 0.08669186};
+    xt::xtensor<double, 4> bs =
+            {{{{0.10615136, 0.07395622, 0.08669186}}}};
     EXPECT_TRUE(xt::allclose(metrics[0], bs));
 
     // Brier skill scores
-    xt::xtensor<double, 1> bss =
-            {0.5705594, 0.6661165, 0.5635126};
+    xt::xtensor<double, 4> bss =
+            {{{{0.5705594, 0.6661165, 0.5635126}}}};
     EXPECT_TRUE(xt::allclose(metrics[1], bss));
 
     // Brier calibration-refinement decompositions
-    xt::xtensor<double, 2> bs_crd =
-            {{0.011411758, 0.1524456, 0.2471852},
-             {0.005532413, 0.1530793, 0.2215031},
-             {0.010139431, 0.1220601, 0.1986125}};
+    xt::xtensor<double, 5> bs_crd =
+            {{{{{0.011411758, 0.1524456, 0.2471852},
+                {0.005532413, 0.1530793, 0.2215031},
+                {0.010139431, 0.1220601, 0.1986125}}}}};
     EXPECT_TRUE(xt::allclose(metrics[2], bs_crd));
 
     // Brier likelihood-base rate decompositions
-    xt::xtensor<double, 2> bs_lbd =
-            {{0.012159881, 0.1506234, 0.2446149},
-             {0.008031746, 0.1473869, 0.2133114},
-             {0.017191279, 0.1048221, 0.1743227}};
+    xt::xtensor<double, 5> bs_lbd =
+            {{{{{0.012159881, 0.1506234, 0.2446149},
+                {0.008031746, 0.1473869, 0.2133114},
+                {0.017191279, 0.1048221, 0.1743227}}}}};
     EXPECT_TRUE(xt::allclose(metrics[3], bs_lbd));
 }
-- 
GitLab