add dimensions for sites/lead times to probabilistic evaluator

Internally, rather than using the multi-dimensional character of tensors to compute all sites and all lead times at once, loops are performed for each site and each lead time, in turn, in order to minimise memory imprint. Although at the moment, the input tensors are expected to feature the sites and lead times dimensions. If memory is an issue, the user can still send smaller tensors with size 1 for those dimensions and recompose multi-sites/multi-lead times output arrays externally.

add dimensions for sites/lead times to probabilistic evaluator
Internally, rather than using the multi-dimensional character of tensors to compute all sites and all lead times at once, loops are performed for each site and each lead time, in turn, in order to minimise memory imprint. Although at the moment, the input tensors are expected to feature the sites and lead times dimensions. If memory is an issue, the user can still send smaller tensors with size 1 for those dimensions and recompose multi-sites/multi-lead times output arrays externally.
295b3208 · Thibault Hallouin · df13cac1 · 295b3208 · 295b3208 · 295b3208
Commit 295b3208 authored 3 years ago by Thibault Hallouin
Hide whitespace changes
Inline Side-by-side

Showing

with 168 additions and 146 deletions
+168 -146
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -23,16 +23,16 @@ namespace evalhyd
    ///
    ///    q_obs: :cpp:`xt::xtensor<double, 2>`
    ///       Streamflow observations.
-    ///       shape: (1, time)
+    ///       shape: (sites, time)
    ///
-    ///    q_prd: :cpp:`xt::xtensor<double, 2>`
+    ///    q_prd: :cpp:`xt::xtensor<double, 4>`
    ///       Streamflow predictions.
-    ///       shape: (members, time)
+    ///       shape: (sites, lead times, members, time)
    ///
    ///    metrics: :cpp:`std::vector<std::string>`
    ///       The sequence of evaluation metrics to be computed.
    ///
-    ///    q_thr: :cpp:`xt::xtensor<double, 1>`, optional
+    ///    q_thr: :cpp:`xt::xtensor<double, 2>`, optional
    ///       Streamflow exceedance threshold(s).
    ///       shape: (thresholds,)
    ///
@@ -43,11 +43,11 @@ namespace evalhyd
    ///       performed and only one set of metrics is returned corresponding
    ///       to the whole time series. If provided, as many sets of metrics are
    ///       returned as they are masks provided.
-    ///       shape: (1+, time)
+    ///       shape: (subsets, time)
    ///
    /// :Returns:
    ///
-    ///    :cpp:`std::vector<xt::xarray<double>>`
+    ///    :cpp:`std::vector<xt::xtensor<double, 5>>`
    ///       The sequence of evaluation metrics computed
    ///       in the same order as given in *metrics*.
    ///
@@ -69,7 +69,7 @@ namespace evalhyd
    /// \endrst
    std::vector<xt::xarray<double>> evalp(
            const xt::xtensor<double, 2>& q_obs,
-            const xt::xtensor<double, 2>& q_prd,
+            const xt::xtensor<double, 4>& q_prd,
            const std::vector<std::string>& metrics,
            const xt::xtensor<double, 1>& q_thr = {},
            const xt::xtensor<bool, 2>& t_msk = {}
@@ -84,8 +84,22 @@ namespace evalhyd
        // check that optional parameters are given as arguments
        eh::utils::check_optionals(metrics, q_thr);

-        // instantiate probabilist evaluator
-        eh::probabilist::Evaluator evaluator(q_obs, q_prd, q_thr, t_msk);
+        // retrieve dimensions
+        std::size_t n_sit = q_prd.shape(0);
+        std::size_t n_ltm = q_prd.shape(1);
+        std::size_t n_mbr = q_prd.shape(2);
+        std::size_t n_thr = q_thr.size();
+        std::size_t n_msk = t_msk.size() < 1 ? 1 : t_msk.shape(0);
+
+        // register metrics number of dimensions
+        std::unordered_map<std::string, std::vector<std::size_t>> dim;
+
+        dim["BS"] = {n_sit, n_ltm, n_msk, n_thr};
+        dim["BSS"] = {n_sit, n_ltm, n_msk, n_thr};
+        dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_thr, 3};
+        dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_thr, 3};
+        dim["QS"] = {n_sit, n_ltm, n_msk, n_mbr};
+        dim["CRPS"] = {n_sit, n_ltm, n_msk};

        // declare maps for memoisation purposes
        std::unordered_map<std::string, std::vector<std::string>> elt;
@@ -110,76 +124,105 @@ namespace evalhyd

        eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep);

-        // pre-compute required elt
-        for (const auto& element : req_elt)
-        {
-            if ( element == "o_k" )
-                evaluator.calc_o_k();
-            else if ( element == "bar_o" )
-                evaluator.calc_bar_o();
-            else if ( element == "y_k" )
-                evaluator.calc_y_k();
-            else if ( element == "q_qnt" )
-                evaluator.calc_q_qnt();
-        }
+        // initialise data structure for outputs
+        std::vector<xt::xarray<double>> r;
+        for (const auto& metric : metrics)
+            r.emplace_back(xt::zeros<double>(dim[metric]));

-        // pre-compute required dep
-        for (const auto& dependency : req_dep)
+        // compute variables one site at a time to minimise memory imprint
+        for (int s = 0; s < n_sit; s++)
+            // compute variables one lead time at a time to minimise memory imprint
+            for (int l = 0; l < n_ltm; l++)
        {
-            if ( dependency == "bs" )
-                evaluator.calc_bs();
-            else if ( dependency == "qs" )
-                evaluator.calc_qs();
-            else if ( dependency == "crps" )
-                evaluator.calc_crps();
-        }
+            // instantiate probabilist evaluator
+            const auto q_obs_v = xt::view(q_obs, s, xt::all());
+            const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all());

-        // retrieve or compute requested metrics
-        std::vector<xt::xarray<double>> r;
+            eh::probabilist::Evaluator evaluator(q_obs_v, q_prd_v, q_thr, t_msk);

-        for (const auto& metric : metrics)
-        {
-            if ( metric == "BS" )
+            // pre-compute required elt
+            for (const auto& element : req_elt)
            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
-                        == req_dep.end())
-                    evaluator.calc_BS();
-                r.emplace_back(xt::squeeze(evaluator.BS));
+                if ( element == "o_k" )
+                    evaluator.calc_o_k();
+                else if ( element == "bar_o" )
+                    evaluator.calc_bar_o();
+                else if ( element == "y_k" )
+                    evaluator.calc_y_k();
+                else if ( element == "q_qnt" )
+                    evaluator.calc_q_qnt();
            }
-            else if ( metric == "BSS" )
+
+            // pre-compute required dep
+            for (const auto& dependency : req_dep)
            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
-                        == req_dep.end())
-                    evaluator.calc_BSS();
-                r.emplace_back(xt::squeeze(evaluator.BSS));
+                if ( dependency == "bs" )
+                    evaluator.calc_bs();
+                else if ( dependency == "qs" )
+                    evaluator.calc_qs();
+                else if ( dependency == "crps" )
+                    evaluator.calc_crps();
            }
-            else if ( metric == "BS_CRD" )
+
+            // retrieve or compute requested metrics
+            for (int m = 0; m < metrics.size(); m++)
            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                const auto& metric = metrics[m];
+
+                if ( metric == "BS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                        == req_dep.end())
-                    evaluator.calc_BS_CRD();
-                r.emplace_back(xt::squeeze(evaluator.BS_CRD));
-            }
-            else if ( metric == "BS_LBD" )
-            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        evaluator.calc_BS();
+                    // (sites, lead times, subsets, thresholds)
+                    xt::view(r[m], s, l, xt::all(), xt::all()) =
+                            evaluator.BS;
+                }
+                else if ( metric == "BSS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                        == req_dep.end())
-                    evaluator.calc_BS_LBD();
-                r.emplace_back(xt::squeeze(evaluator.BS_LBD));
-            }
-            else if ( metric == "QS" )
-            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        evaluator.calc_BSS();
+                    // (sites, lead times, subsets, thresholds)
+                    xt::view(r[m], s, l, xt::all(), xt::all()) =
+                            evaluator.BSS;
+                }
+                else if ( metric == "BS_CRD" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                        == req_dep.end())
-                    evaluator.calc_QS();
-                r.emplace_back(xt::squeeze(evaluator.QS));
-            }
-            else if ( metric == "CRPS" )
-            {
-                if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        evaluator.calc_BS_CRD();
+                    // (sites, lead times, subsets, thresholds, components)
+                    xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
+                            evaluator.BS_CRD;
+                }
+                else if ( metric == "BS_LBD" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        == req_dep.end())
+                        evaluator.calc_BS_LBD();
+                    // (sites, lead times, subsets, thresholds, components)
+                    xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
+                            evaluator.BS_LBD;
+                }
+                else if ( metric == "QS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
+                        == req_dep.end())
+                        evaluator.calc_QS();
+                    // (sites, lead times, subsets, quantiles)
+                    xt::view(r[m], s, l, xt::all(), xt::all()) =
+                            evaluator.QS;
+                }
+                else if ( metric == "CRPS" )
+                {
+                    if (std::find(req_dep.begin(), req_dep.end(), metric)
                        == req_dep.end())
-                    evaluator.calc_CRPS();
-                r.emplace_back(xt::squeeze(evaluator.CRPS));
+                        evaluator.calc_CRPS();
+                    // (sites, lead times, subsets)
+                    xt::view(r[m], s, l, xt::all()) =
+                            evaluator.CRPS;
+                }
            }
        }


--- a/src/probabilist/evaluator.h
+++ b/src/probabilist/evaluator.h
@@ -2,6 +2,25 @@
 #define EVALHYD_PROBABILIST_EVALUATOR_H

 #include <xtensor/xtensor.hpp>
+#include <xtensor/xview.hpp>
+
+using view1d_xtensor2d_type = decltype(
+    xt::view(
+            std::declval<const xt::xtensor<double, 2>&>(),
+            std::declval<int>(),
+            xt::all()
+    )
+);
+
+using view2d_xtensor4d_type = decltype(
+    xt::view(
+            std::declval<const xt::xtensor<double, 4>&>(),
+            std::declval<int>(),
+            std::declval<int>(),
+            xt::all(),
+            xt::all()
+    )
+);

 namespace evalhyd
 {
@@ -11,8 +30,8 @@ namespace evalhyd
        {
        private:
            // members for input data
-            const xt::xtensor<double, 2>& q_obs;
-            const xt::xtensor<double, 2>& q_prd;
+            const view1d_xtensor2d_type& q_obs;
+            const view2d_xtensor4d_type& q_prd;
            const xt::xtensor<double, 1>& q_thr;
            xt::xtensor<bool, 2> t_msk;

@@ -30,8 +49,8 @@ namespace evalhyd

        public:
            // constructor method
-            Evaluator(const xt::xtensor<double, 2>& obs,
-                      const xt::xtensor<double, 2>& prd,
+            Evaluator(const view1d_xtensor2d_type& obs,
+                      const view2d_xtensor4d_type& prd,
                      const xt::xtensor<double, 1>& thr,
                      const xt::xtensor<bool, 2>& msk) :
                    q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk)
@@ -39,13 +58,13 @@ namespace evalhyd
                // initialise a mask if none provided
                // (corresponding to no temporal subset)
                if (msk.size() < 1)
-                    t_msk = xt::ones<bool>(obs.shape());
+                    t_msk = xt::ones<bool>({std::size_t {1}, q_obs.size()});

                // determine size for recurring dimensions
-                n = obs.shape(1);
+                n = q_obs.size();
                n_msk = t_msk.shape(0);
-                n_mbr = prd.shape(0);
-                n_thr = thr.size();
+                n_mbr = q_prd.shape(0);
+                n_thr = q_thr.size();
            };

            // members for intermediate evaluation metrics

--- a/src/probabilist/evaluator_brier.cpp
+++ b/src/probabilist/evaluator_brier.cpp
@@ -26,7 +26,7 @@ namespace evalhyd
        //     Event probability forecast.
        //     shape: (thresholds, time)
        // \assign bs:
-        //     2D array of Brier score for each threshold for each time step.
+        //     Brier score for each threshold for each time step.
        //     shape: (thresholds, time)
        void Evaluator::calc_bs()
        {

--- a/src/probabilist/evaluator_elements.cpp
+++ b/src/probabilist/evaluator_elements.cpp
@@ -12,7 +12,7 @@ namespace evalhyd
        //
        // \require q_obs:
        //     Streamflow observations.
-        //     shape: (1, time)
+        //     shape: (time,)
        // \require q_thr:
        //     Streamflow exceedance threshold(s).
        //     shape: (thresholds,)
@@ -22,7 +22,7 @@ namespace evalhyd
        void Evaluator::calc_o_k()
        {
            // determine observed realisation of threshold(s) exceedance
-            o_k = xt::squeeze(is_above_threshold(q_obs, q_thr));
+            o_k = q_obs >= xt::view(q_thr, xt::all(), xt::newaxis());
        }

        // Determine mean observed realisation of threshold(s) exceedance.
@@ -65,7 +65,8 @@ namespace evalhyd
        {
            // determine if members have exceeded threshold(s)
            xt::xtensor<double, 3> e_frc =
-                    is_above_threshold(q_prd, q_thr);
+                    q_prd
+                    >= xt::view(q_thr, xt::all(), xt::newaxis(), xt::newaxis());

            // calculate how many members have exceeded threshold(s)
            xt::xtensor<double, 2> n_frc = xt::sum(e_frc, 1);

--- a/src/probabilist/evaluator_quantiles.cpp
+++ b/src/probabilist/evaluator_quantiles.cpp
@@ -15,7 +15,7 @@ namespace evalhyd
        //
        // \require q_obs:
        //     Streamflow observations.
-        //     shape: (1, time)
+        //     shape: (time,)
        // \require q_qnt:
        //     Streamflow quantiles.
        //     shape: (quantiles, time)
@@ -50,11 +50,11 @@ namespace evalhyd
        //     shape: (quantiles, time)
        // \assign QS:
        //     Quantile scores.
-        //     shape: (subsets, quantiles, 1)
+        //     shape: (subsets, quantiles)
        void Evaluator::calc_QS()
        {
            // initialise output variable
-            // shape: (subsets, quantiles, 1)
+            // shape: (subsets, quantiles)
            QS = xt::zeros<double>({n_msk, n_mbr});

            // compute variable one mask at a time to minimise memory imprint
@@ -103,14 +103,14 @@ namespace evalhyd
        //     shape: (subsets, time)
        // \require crps:
        //     CRPS for each time step.
-        //     shape: (quantiles, time)
+        //     shape: (1, time)
        // \assign CRPS:
        //     CRPS.
-        //     shape: (subsets, 1, 1)
+        //     shape: (subsets,)
        void Evaluator::calc_CRPS()
        {
            // initialise output variable
-            // shape: (subsets, thresholds, 1)
+            // shape: (subsets,)
            CRPS = xt::zeros<double>({n_msk});

            // compute variable one mask at a time to minimise memory imprint
@@ -123,7 +123,7 @@ namespace evalhyd
                // calculate the mean over the time steps
                // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
                xt::view(CRPS, m) =
-                        xt::nanmean(crps_masked, -1);
+                        xt::squeeze(xt::nanmean(crps_masked, -1));
            }
        }
    }

--- a/src/probabilist/evaluator_utils.cpp
+++ b/src/probabilist/evaluator_utils.cpp
-#include <xtensor/xtensor.hpp>
-#include <xtensor/xview.hpp>
-
-namespace evalhyd
-{
-    namespace probabilist
-    {
-        // Determine whether flows are greater than given threshold(s).
-        //
-        // \param q:
-        //     Streamflow data.
-        //     shape: (1+, time)
-        // \param thr:
-        //     Streamflow thresholds.
-        //     shape: (thresholds,)
-        // \return
-        //     Boolean-like threshold(s) exceedance.
-        //     shape: (thresholds, 1+, time)
-        xt::xtensor<double, 3> is_above_threshold(
-                const xt::xtensor<double, 2> &q,
-                const xt::xtensor<double, 1> &thr
-        )
-        {
-            return q >= xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis());
-        }
-
-        // Determine whether flows are strictly lower than given threshold(s)
-        //
-        // \param q:
-        //     Streamflow data.
-        //     shape: (1+, time)
-        // \param thr:
-        //     Streamflow thresholds.
-        //     shape: (thresholds,)
-        // \return
-        //     Boolean-like threshold(s) exceedance.
-        //     shape: (thresholds, 1+, time)
-        xt::xtensor<double, 3> is_below_threshold(
-                const xt::xtensor<double, 2> &q,
-                const xt::xtensor<double, 1> &thr
-        )
-        {
-            return q < xt::view(thr, xt::all(), xt::newaxis(), xt::newaxis());
-        }
-    }
-}
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -37,6 +37,5 @@ add_executable(
        ../src/probabilist/evaluator_brier.cpp
        ../src/probabilist/evaluator_quantiles.cpp
        ../src/probabilist/evaluator_elements.cpp
-        ../src/probabilist/evaluator_utils.cpp
 )
 target_link_libraries(evalhyd_tests gtest gtest_main)
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -11,44 +11,50 @@ TEST(ProbabilistTests, TestBrier) {
    // read in data
    std::ifstream ifs;
    ifs.open("./data/q_obs.csv");
-    xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs);
+    xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<int>(ifs));
    ifs.close();

    ifs.open("./data/q_prd.csv");
    xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs);
    ifs.close();

+    // add "artificial" site dimension (size 1) to observations
+    auto obs = xt::view(observed, xt::newaxis(), xt::all());
+    // add "artificial" site dimension (size 1) and lead time dimension (size 1)
+    // to predictions
+    auto prd = xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all());
+
    // compute scores
    xt::xtensor<double, 1> thresholds = {690, 534, 445};

    std::vector<xt::xarray<double>> metrics =
            evalhyd::evalp(
-                    observed, predicted, {"BS", "BSS", "BS_CRD", "BS_LBD"},
+                    obs, prd, {"BS", "BSS", "BS_CRD", "BS_LBD"},
                    thresholds
            );

    // check results
    // Brier scores
-    xt::xtensor<double, 1> bs =
-            {0.10615136, 0.07395622, 0.08669186};
+    xt::xtensor<double, 4> bs =
+            {{{{0.10615136, 0.07395622, 0.08669186}}}};
    EXPECT_TRUE(xt::allclose(metrics[0], bs));

    // Brier skill scores
-    xt::xtensor<double, 1> bss =
-            {0.5705594, 0.6661165, 0.5635126};
+    xt::xtensor<double, 4> bss =
+            {{{{0.5705594, 0.6661165, 0.5635126}}}};
    EXPECT_TRUE(xt::allclose(metrics[1], bss));

    // Brier calibration-refinement decompositions
-    xt::xtensor<double, 2> bs_crd =
-            {{0.011411758, 0.1524456, 0.2471852},
-             {0.005532413, 0.1530793, 0.2215031},
-             {0.010139431, 0.1220601, 0.1986125}};
+    xt::xtensor<double, 5> bs_crd =
+            {{{{{0.011411758, 0.1524456, 0.2471852},
+                {0.005532413, 0.1530793, 0.2215031},
+                {0.010139431, 0.1220601, 0.1986125}}}}};
    EXPECT_TRUE(xt::allclose(metrics[2], bs_crd));

    // Brier likelihood-base rate decompositions
-    xt::xtensor<double, 2> bs_lbd =
-            {{0.012159881, 0.1506234, 0.2446149},
-             {0.008031746, 0.1473869, 0.2133114},
-             {0.017191279, 0.1048221, 0.1743227}};
+    xt::xtensor<double, 5> bs_lbd =
+            {{{{{0.012159881, 0.1506234, 0.2446149},
+                {0.008031746, 0.1473869, 0.2133114},
+                {0.017191279, 0.1048221, 0.1743227}}}}};
    EXPECT_TRUE(xt::allclose(metrics[3], bs_lbd));
 }