diff --git a/include/evalhyd/probabilist.hpp b/include/evalhyd/probabilist.hpp
index 98c68a911648c4413d79176e346104b88fe703e0..9dbfe7d013268dd457fd96d82dbec43bbd0a7e8c 100644
--- a/include/evalhyd/probabilist.hpp
+++ b/include/evalhyd/probabilist.hpp
@@ -28,13 +28,23 @@ namespace evalhyd
     /// \param [in] q_thr (optional):
     ///     1D array of streamflow exceedance threshold(s).
     ///     shape: (thresholds,)
+    /// \param [in] t_msk (optional):
+    ///     2D array of booleans representing temporal mask(s) used to work on
+    ///     temporal subsets of the whole streamflow time series (where
+    ///     True/False is used for the time steps to include/discard in a
+    ///     given subset. If not provided, no subset is performed and only
+    ///     one set of metrics is returned corresponding to the whole time
+    ///     series. If provided, as many sets of metrics are returned as they
+    ///     are masks provided.
+    ///     shape: (1+, time)
     /// \return
     ///     Vector of 2D array of metrics for each threshold.
-    std::vector<xt::xtensor<double, 2>> evalp(
+    std::vector<xt::xtensor<double, 3>> evalp(
             const xt::xtensor<double, 2>& q_obs,
             const xt::xtensor<double, 2>& q_prd,
             const std::vector<std::string>& metrics,
-            const xt::xtensor<double, 1>& q_thr = {}
+            const xt::xtensor<double, 1>& q_thr = {},
+            const xt::xtensor<bool, 2>& t_msk = {}
     )
     {
         // check that the metrics to be computed are valid
@@ -47,7 +57,7 @@ namespace evalhyd
         eh::utils::check_optionals(metrics, q_thr);
 
         // instantiate probabilist evaluator
-        eh::probabilist::Evaluator evaluator(q_obs, q_prd, q_thr);
+        eh::probabilist::Evaluator evaluator(q_obs, q_prd, q_thr, t_msk);
 
         // declare maps for memoisation purposes
         std::unordered_map<std::string, std::vector<std::string>> elt;
@@ -55,14 +65,14 @@ namespace evalhyd
 
         // register potentially recurring computation elements across metrics
         elt["bs"] = {"o_k", "y_k"};
-        elt["bss"] = {"o_k", "bar_o"};
+        elt["BSS"] = {"o_k", "bar_o"};
         elt["BS_CRD"] = {"o_k", "bar_o", "y_k"};
         elt["BS_LBD"] = {"o_k", "y_k"};
         elt["qs"] = {"q_qnt"};
 
         // register nested metrics (i.e. metric dependent on another metric)
         dep["BS"] = {"bs"};
-        dep["BSS"] = {"bs", "bss"};
+        dep["BSS"] = {"bs"};
         dep["QS"] = {"qs"};
         dep["CRPS"] = {"qs", "crps"};
 
@@ -90,8 +100,6 @@ namespace evalhyd
         {
             if ( dependency == "bs" )
                 evaluator.calc_bs();
-            else if ( dependency == "bss" )
-                evaluator.calc_bss();
             else if ( dependency == "qs" )
                 evaluator.calc_qs();
             else if ( dependency == "crps" )
@@ -99,7 +107,7 @@ namespace evalhyd
         }
 
         // retrieve or compute requested metrics
-        std::vector<xt::xtensor<double, 2>> r;
+        std::vector<xt::xtensor<double, 3>> r;
 
         for (const auto& metric : metrics)
         {
diff --git a/include/evalhyd/probabilist/evaluator.h b/include/evalhyd/probabilist/evaluator.h
index ccd60af7ec42280a9ded16b4fec52249beda84c9..43e27f49cf14054d770c506ed364ff59fe61ab34 100644
--- a/include/evalhyd/probabilist/evaluator.h
+++ b/include/evalhyd/probabilist/evaluator.h
@@ -14,10 +14,11 @@ namespace evalhyd
             const xt::xtensor<double, 2>& q_obs;
             const xt::xtensor<double, 2>& q_prd;
             const xt::xtensor<double, 1>& q_thr;
+            xt::xtensor<bool, 2> t_msk;
 
             // members for computational elements
             xt::xtensor<double, 2> o_k;
-            xt::xtensor<double, 1> bar_o;
+            xt::xtensor<double, 2> bar_o;
             xt::xtensor<double, 2> y_k;
             xt::xtensor<double, 2> q_qnt;
 
@@ -25,23 +26,27 @@ namespace evalhyd
             // constructor method
             Evaluator(const xt::xtensor<double, 2>& obs,
                       const xt::xtensor<double, 2>& prd,
-                      const xt::xtensor<double, 1>& thr) :
-                    q_obs{obs}, q_prd{prd}, q_thr{thr} {};
+                      const xt::xtensor<double, 1>& thr,
+                      const xt::xtensor<bool, 2>& msk) :
+                    q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk)
+            {
+            if (msk.size() < 1)
+                t_msk = xt::ones<bool>(obs.shape());
+            };
 
             // members for intermediate evaluation metrics
             // (i.e. before the reduction along the temporal axis)
             xt::xtensor<double, 2> bs;
-            xt::xtensor<double, 2> bss;
             xt::xtensor<double, 2> qs;
             xt::xtensor<double, 2> crps;
 
             // members for evaluation metrics
-            xt::xtensor<double, 2> BS;
-            xt::xtensor<double, 2> BS_CRD;
-            xt::xtensor<double, 2> BS_LBD;
-            xt::xtensor<double, 2> BSS;
-			xt::xtensor<double, 2> QS;
-            xt::xtensor<double, 2> CRPS;
+            xt::xtensor<double, 3> BS;
+            xt::xtensor<double, 3> BS_CRD;
+            xt::xtensor<double, 3> BS_LBD;
+            xt::xtensor<double, 3> BSS;
+			xt::xtensor<double, 3> QS;
+            xt::xtensor<double, 3> CRPS;
 
             // methods to compute derived data
             void calc_q_qnt();
@@ -53,7 +58,6 @@ namespace evalhyd
 
             // methods to compute intermediate metrics
             void calc_bs();
-            void calc_bss();
             void calc_qs();
             void calc_crps();
 
diff --git a/include/evalhyd/probabilist/evaluator_brier.cpp b/include/evalhyd/probabilist/evaluator_brier.cpp
index dfe611a842e29ec50764aa2e2b1da80bc58bcc42..069b74b710b5c8968cf8ec988947d56fcad6ec44 100644
--- a/include/evalhyd/probabilist/evaluator_brier.cpp
+++ b/include/evalhyd/probabilist/evaluator_brier.cpp
@@ -37,17 +37,37 @@ namespace evalhyd
 
         // Compute the Brier score (BS).
         //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \require bs:
-        //     2D array of Brier score for each threshold for each time step.
+        //     Brier score for each threshold for each time step.
         //     shape: (thresholds, time)
         // \assign BS:
-        //     2D array of Brier score for each threshold.
-        //     shape: (thresholds, 1)
+        //     Brier score for each subset and for each threshold.
+        //     shape: (subsets, thresholds, 1)
         void Evaluator::calc_BS()
         {
-            // calculate the mean over the time steps
-            // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$
-            BS = xt::mean(bs, -1, xt::keep_dims);
+            // define some dimensions
+            std::size_t n_msk = t_msk.shape(0);
+            std::size_t n_thr = bs.shape(0);
+
+            // initialise output variable
+            // shape: (subsets, thresholds, 1)
+            BS = xt::zeros<double>({n_msk, n_thr, std::size_t {1}});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN);
+
+                // calculate the mean over the time steps
+                // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$
+                xt::view(BS, m, xt::all(), xt::all()) =
+                        xt::nanmean(bs_masked, -1, xt::keep_dims);
+            }
         }
 
         // Compute the calibration-refinement decomposition of the Brier score
@@ -56,18 +76,24 @@ namespace evalhyd
         // BS = reliability - resolution + uncertainty
         //
         // \require q_thr:
-        //     1D array of streamflow exceedance threshold(s).
+        //     Streamflow exceedance threshold(s).
         //     shape: (thresholds,)
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \require o_k:
         //     Observed event outcome.
         //     shape: (thresholds, time)
         // \require y_k:
         //     Event probability forecast.
         //     shape: (thresholds, time)
+        // \require bar_o:
+        //     Mean event observed outcome.
+        //     shape: (subsets, thresholds)
         // \assign BS_CRD:
-        //     2D array of Brier score components (reliability, resolution,
-        //     uncertainty) for each threshold.
-        //     shape: (thresholds, components)
+        //     Brier score components (reliability, resolution, uncertainty)
+        //     for each subset and for each threshold.
+        //     shape: (subsets, thresholds, components)
         void Evaluator::calc_BS_CRD()
         {
             // declare internal variables
@@ -79,66 +105,80 @@ namespace evalhyd
             xt::xtensor<double, 1> y_i;
 
             // define some dimensions
-            std::size_t n = o_k.shape(1);
             std::size_t n_thr = o_k.shape(0);
             std::size_t n_mbr = q_prd.shape(0);
+            std::size_t n_msk = t_msk.shape(0);
             std::size_t n_cmp = 3;
 
-            // initialise output variable
-            // shape: (components, thresholds)
-            BS_CRD = xt::zeros<double>({n_thr, n_cmp});
-
             // compute range of forecast values $y_i$
             y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr;
 
-            // compute mask to subsample time steps belonging to same forecast bin
-            // (where bins are defined as the range of forecast values)
-            msk_bins = xt::equal(
-                    y_k, xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis())
-            );
-
-            // compute number of forecasts in each forecast bin $N_i$
-            N_i = xt::sum(msk_bins, -1);
-
-            // compute subsample relative frequency
-            // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
-            bar_o_i = xt::where(
-                    N_i > 0,
-                    xt::sum(
-                            xt::where(
-                                    msk_bins,
-                                    xt::view(o_k, xt::newaxis(),
-                                             xt::all(), xt::all()),
-                                    0.
-                            ),
-                            -1
-                    ) / N_i,
-                    0.
-            );
-
-            // calculate reliability =
-            // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$
-            xt::col(BS_CRD, 0) =
-                    xt::sum(
-                            xt::square(
-                                    xt::view(y_i, xt::all(), xt::newaxis())
-                                    - bar_o_i
-                            ) * N_i,
-                            0
-                    ) / n;
-
-            // calculate resolution =
-            // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$
-            xt::col(BS_CRD, 1) =
-                    xt::sum(
-                            xt::square(
-                                    bar_o_i - bar_o
-                            ) * N_i,
-                            0
-                    ) / n;
-
-            // calculate uncertainty = $\bar{o} (1 - \bar{o})$
-            xt::col(BS_CRD, 2) = bar_o * (1 - bar_o);
+            // initialise output variable
+            // shape: (subsets, thresholds, components)
+            BS_CRD = xt::zeros<double>({n_msk, n_thr, n_cmp});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN);
+                auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN);
+
+                // calculate length of subset
+                auto l = xt::sum(xt::row(t_msk, m));
+
+                // compute mask to subsample time steps belonging to same forecast bin
+                // (where bins are defined as the range of forecast values)
+                msk_bins = xt::equal(
+                        y_k_masked,
+                        xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis())
+                );
+
+                // compute number of forecasts in each forecast bin $N_i$
+                N_i = xt::nansum(msk_bins, -1);
+
+                // compute subsample relative frequency
+                // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
+                bar_o_i = xt::where(
+                        N_i > 0,
+                        xt::nansum(
+                                xt::where(
+                                        msk_bins,
+                                        xt::view(o_k_masked, xt::newaxis(),
+                                                 xt::all(), xt::all()),
+                                        0.
+                                ),
+                                -1
+                        ) / N_i,
+                        0.
+                );
+
+                // calculate reliability =
+                // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$
+                xt::view(BS_CRD, m, xt::all(), 0) =
+                        xt::nansum(
+                                xt::square(
+                                        xt::view(y_i, xt::all(), xt::newaxis())
+                                        - bar_o_i
+                                ) * N_i,
+                                0
+                        ) / l;
+
+                // calculate resolution =
+                // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$
+                xt::view(BS_CRD, m, xt::all(), 1) =
+                        xt::nansum(
+                                xt::square(
+                                        bar_o_i - xt::row(bar_o, m)
+                                ) * N_i,
+                                0
+                        ) / l;
+
+                // calculate uncertainty = $\bar{o} (1 - \bar{o})$
+                xt::view(BS_CRD, m, xt::all(), 2) =
+                        xt::row(bar_o, m) * (1 - xt::row(bar_o, m));
+            }
         }
 
         // Compute the likelihood-base rate decomposition of the Brier score
@@ -146,6 +186,9 @@ namespace evalhyd
         //
         // BS = type 2 bias - discrimination + sharpness
         //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \require o_k:
         //     Observed event outcome.
         //     shape: (thresholds, time)
@@ -153,9 +196,9 @@ namespace evalhyd
         //     Event probability forecast.
         //     shape: (thresholds, time)
         // \return BS_LBD:
-        //     2D array of Brier score components (type 2 bias, discrimination,
-        //     sharpness) for each threshold.
-        //     shape: (thresholds, components)
+        //     Brier score components (type 2 bias, discrimination, sharpness)
+        //     for each subset and for each threshold.
+        //     shape: (subsets, thresholds, components)
         void Evaluator::calc_BS_LBD()
         {
             // declare internal variables
@@ -169,124 +212,148 @@ namespace evalhyd
             xt::xtensor<double, 1> o_j;
 
             // define some dimensions
-            std::size_t n = o_k.shape(1);
             std::size_t n_thr = o_k.shape(0);
+            std::size_t n_msk = t_msk.shape(0);
             std::size_t n_cmp = 3;
 
-            // declare and initialise output variable
-            // shape: (components, thresholds)
-            BS_LBD = xt::zeros<double>({n_thr, n_cmp});
-
             // set the range of observed values $o_j$
             o_j = {0., 1.};
 
-            // compute mask to subsample time steps belonging to same observation bin
-            // (where bins are defined as the range of forecast values)
-            msk_bins = xt::equal(
-                    o_k, xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis())
-            );
-
-            // compute number of observations in each observation bin $M_j$
-            M_j = xt::sum(msk_bins, -1);
-
-            // compute subsample relative frequency
-            // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$
-            bar_y_j = xt::where(
-                    M_j > 0,
-                    xt::sum(
-                            xt::where(
-                                    msk_bins,
-                                    xt::view(y_k, xt::newaxis(),
-                                             xt::all(), xt::all()),
-                                    0.
-                            ),
-                            -1
-                    ) / M_j,
-                    0.
-            );
-
-            // compute mean "climatology" forecast probability
-            // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$
-            bar_y = xt::mean(y_k, -1);
-
-            // calculate type 2 bias =
-            // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$
-            xt::col(BS_LBD, 0) =
-                    xt::sum(
-                            xt::square(
-                                    xt::view(o_j, xt::all(), xt::newaxis())
-                                    - bar_y_j
-                            ) * M_j,
-                            0
-                    ) / n;
-
-            // calculate discrimination =
-            // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$
-            xt::col(BS_LBD, 1) =
-                    xt::sum(
-                            xt::square(
-                                    bar_y_j - bar_y
-                            ) * M_j,
-                            0
-                    ) / n;
-
-            // calculate sharpness =
-            // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$
-            xt::col(BS_LBD, 2) =
-                    xt::sum(
-                            xt::square(
-                                    y_k -
-                                    xt::view(bar_y, xt::all(), xt::newaxis())
-                            ),
-                            -1
-                    ) / n;
+            // declare and initialise output variable
+            // shape: (subsets, thresholds, components)
+            BS_LBD = xt::zeros<double>({n_msk, n_thr, n_cmp});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN);
+                auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN);
+
+                // calculate length of subset
+                auto l = xt::sum(xt::row(t_msk, m));
+
+                // compute mask to subsample time steps belonging to same observation bin
+                // (where bins are defined as the range of forecast values)
+                msk_bins = xt::equal(
+                        o_k_masked,
+                        xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis())
+                );
+
+                // compute number of observations in each observation bin $M_j$
+                M_j = xt::nansum(msk_bins, -1);
+
+                // compute subsample relative frequency
+                // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$
+                bar_y_j = xt::where(
+                        M_j > 0,
+                        xt::nansum(
+                                xt::where(
+                                        msk_bins,
+                                        xt::view(
+                                                y_k_masked,
+                                                xt::newaxis(), xt::all(), xt::all()
+                                        ),
+                                        0.
+                                ),
+                                -1
+                        ) / M_j,
+                        0.
+                );
+
+                // compute mean "climatology" forecast probability
+                // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$
+                bar_y = xt::nanmean(y_k_masked, -1);
+
+                // calculate type 2 bias =
+                // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$
+                xt::view(BS_LBD, m, xt::all(), 0) =
+                        xt::nansum(
+                                xt::square(
+                                        xt::view(o_j, xt::all(), xt::newaxis())
+                                        - bar_y_j
+                                ) * M_j,
+                                0
+                        ) / l;
+
+                // calculate discrimination =
+                // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$
+                xt::view(BS_LBD, m, xt::all(), 1) =
+                        xt::nansum(
+                                xt::square(
+                                        bar_y_j - bar_y
+                                ) * M_j,
+                                0
+                        ) / l;
+
+                // calculate sharpness =
+                // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$
+                xt::view(BS_LBD, m, xt::all(), 2) =
+                        xt::nansum(
+                                xt::square(
+                                        y_k_masked -
+                                        xt::view(bar_y, xt::all(), xt::newaxis())
+                                ),
+                                -1
+                        ) / l;
+            }
         }
 
-        // Compute the Brier skill score for each time step.
+        // Compute the Brier skill score (BSS).
         //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \require o_k:
         //     Observed event outcome.
         //     shape: (thresholds, time)
         // \require bar_o:
         //     Mean event observed outcome.
-        //     shape: (thresholds,)
+        //     shape: (subsets, thresholds)
         // \require bs:
         //     Brier scores for each time step.
         //     shape: (thresholds, time)
-        // \assign bss:
-        //     2D array of Brier skill score for each threshold for each time step.
-        //     shape: (thresholds, time)
-        void Evaluator::calc_bss()
-        {
-            // calculate reference Brier score(s)
-            // $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$
-            xt::xtensor<double, 2> bs_ref =
-                    xt::mean(
-                            xt::square(
-                                    o_k -
-                                    xt::view(bar_o, xt::all(), xt::newaxis())
-                            ),
-                            -1, xt::keep_dims
-                    );
-
-            // return computed Brier skill score(s)
-            // $bss = 1 - \frac{bs}{bs_{ref}}
-            bss = 1 - (bs / bs_ref);
-        }
-
-        // Compute the Brier skill score (BSS).
-        //
-        // \require bss:
-        //     2D array of Brier skill score for each threshold for each time step.
-        //     shape: (thresholds, time)
         // \assign BSS:
-        //     2D array of Brier skill score for each threshold.
-        //     shape: (thresholds, 1)
+        //     Brier skill score for each subset and for each threshold.
+        //     shape: (subsets, thresholds, 1)
         void Evaluator::calc_BSS()
         {
-            // calculate the mean over the time steps
-            // $BSS = \frac{1}{n} \sum_{k=1}^{n} bss$
-            BSS = xt::mean(bss, -1, xt::keep_dims);
+            // define some dimensions
+            std::size_t n_thr = o_k.shape(0);
+            std::size_t n_msk = t_msk.shape(0);
+
+            // declare and initialise output variable
+            // shape: (subsets, thresholds, components)
+            BSS = xt::zeros<double>({n_msk, n_thr, std::size_t {1}});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN);
+                auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN);
+
+                // calculate reference Brier score(s)
+                // $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$
+                xt::xtensor<double, 2> bs_ref =
+                        xt::nanmean(
+                                xt::square(
+                                        o_k_masked -
+                                        xt::view(
+                                                xt::row(bar_o, m),
+                                                xt::all(), xt::newaxis()
+                                        )
+                                ),
+                                -1, xt::keep_dims
+                        );
+
+                // compute Brier skill score(s)
+                // $BSS = \frac{1}{n} \sum_{k=1}^{n} 1 - \frac{bs}{bs_{ref}}
+                xt::view(BSS, m, xt::all(), xt::all()) =
+                        xt::nanmean(1 - (bs_masked / bs_ref), -1, xt::keep_dims);
+            }
         }
     }
 }
diff --git a/include/evalhyd/probabilist/evaluator_elements.cpp b/include/evalhyd/probabilist/evaluator_elements.cpp
index 1618d8f0fa194bdd6a22a2e25490aa5fe6674239..49e98845635433987aa2a29c5c918ba98e7deac8 100644
--- a/include/evalhyd/probabilist/evaluator_elements.cpp
+++ b/include/evalhyd/probabilist/evaluator_elements.cpp
@@ -1,4 +1,5 @@
 #include <xtensor/xmath.hpp>
+#include <xtensor/xview.hpp>
 #include <xtensor/xsort.hpp>
 
 #include "evaluator.h"
@@ -10,13 +11,13 @@ namespace evalhyd
         // Determine observed realisation of threshold(s) exceedance.
         //
         // \require q_obs:
-        //     2D array of streamflow observations.
+        //     Streamflow observations.
         //     shape: (1, time)
         // \require q_thr:
-        //     1D array of streamflow exceedance threshold(s).
+        //     Streamflow exceedance threshold(s).
         //     shape: (thresholds,)
         // \assign o_k:
-        //     2D array of event observed outcome.
+        //     Event observed outcome.
         //     shape: (thresholds, time)
         void Evaluator::calc_o_k()
         {
@@ -27,28 +28,38 @@ namespace evalhyd
         // Determine mean observed realisation of threshold(s) exceedance.
         //
         // \require o_k:
-        //     2D array of event observed outcome.
+        //     Event observed outcome.
         //     shape: (thresholds, time)
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \assign bar_o:
-        //     1D array of mean event observed outcome.
-        //     shape: (thresholds,)
+        //     Mean event observed outcome.
+        //     shape: (subsets, thresholds)
         void Evaluator::calc_bar_o()
         {
+            // apply the mask
+            // (using NaN workaround until reducers work on masked_view)
+            auto o_k_masked = xt::where(
+                    xt::view(t_msk, xt::all(), xt::newaxis(), xt::all()),
+                    o_k, NAN
+            );
+
             // compute mean "climatology" relative frequency of the event
             // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$
-            bar_o = xt::mean(o_k, -1);
+            bar_o = xt::nanmean(o_k_masked, -1);
         }
 
         // Determine forecast probability of threshold(s) exceedance to occur.
         //
         // \require q_prd:
-        //     2D array of streamflow predictions.
+        //     Streamflow predictions.
         //     shape: (members, time)
         // \require q_thr:
-        //     1D array of streamflow exceedance threshold(s).
+        //     Streamflow exceedance threshold(s).
         //     shape: (thresholds,)
         // \assign y_k:
-        //     2D array of event probability forecast.
+        //     Event probability forecast.
         //     shape: (thresholds, time)
         void Evaluator::calc_y_k()
         {
@@ -72,10 +83,10 @@ namespace evalhyd
         // Compute the forecast quantiles from the ensemble members.
         //
         // \require q_prd:
-        //     2D array of streamflow predictions.
+        //     Streamflow predictions.
         //     shape: (members, time)
         // \assign q_qnt:
-        //     2D array of streamflow forecast quantiles.
+        //     Streamflow forecast quantiles.
         //     shape: (quantiles, time)
         void Evaluator::calc_q_qnt()
         {
diff --git a/include/evalhyd/probabilist/evaluator_quantiles.cpp b/include/evalhyd/probabilist/evaluator_quantiles.cpp
index 89b54ce453207c03655c24cce6520eeb7a0878ce..7170732732fce65d595f13f8398758f08e234c6b 100644
--- a/include/evalhyd/probabilist/evaluator_quantiles.cpp
+++ b/include/evalhyd/probabilist/evaluator_quantiles.cpp
@@ -14,13 +14,13 @@ namespace evalhyd
         // Compute the quantile scores for each time step.
         //
         // \require q_obs:
-        //     2D array of streamflow observations.
+        //     Streamflow observations.
         //     shape: (1, time)
         // \require q_qnt:
-        //     2D array of streamflow quantiles.
+        //     Streamflow quantiles.
         //     shape: (quantiles, time)
         // \assign qs:
-        //     2D array of the quantile scores for each time step.
+        //     Quantile scores for each time step.
         //     shape: (quantiles, time)
         void Evaluator::calc_qs()
         {
@@ -45,17 +45,37 @@ namespace evalhyd
         
         // Compute the quantile score (QS).
         //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \require qs:
-        //     2D array of quantile scores for each time step.
+        //     Quantile scores for each time step.
         //     shape: (quantiles, time)
         // \assign QS:
-        //     2D array of quantile scores.
-        //     shape: (quantiles, 1)
+        //     Quantile scores.
+        //     shape: (subsets, quantiles, 1)
         void Evaluator::calc_QS()
         {
-            // calculate the mean over the time steps
-            // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$
-            QS = xt::mean(qs, -1, xt::keep_dims);
+            // define some dimensions
+            std::size_t n_msk = t_msk.shape(0);
+            std::size_t n_qnt = qs.shape(0);
+
+            // initialise output variable
+            // shape: (subsets, thresholds, 1)
+            QS = xt::zeros<double>({n_msk, n_qnt, std::size_t {1}});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto qs_masked = xt::where(xt::row(t_msk, m), qs, NAN);
+
+                // calculate the mean over the time steps
+                // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$
+                xt::view(QS, m, xt::all(), xt::all()) =
+                        xt::nanmean(qs_masked, -1, xt::keep_dims);
+            }
         }
         
         // Compute the continuous rank probability score(s) based
@@ -67,10 +87,10 @@ namespace evalhyd
         //     integration to be accurate.
         //
         // \require qs:
-        //     2D array of quantile scores for each time step.
+        //     Quantile scores for each time step.
         //     shape: (quantiles, time)
         // \assign crps:
-        //     2D array of CRPS for each time step.
+        //     CRPS for each time step.
         //     shape: (1, time)
         void Evaluator::calc_crps()
         {
@@ -88,17 +108,36 @@ namespace evalhyd
         // Compute the continuous rank probability score (CRPS) based
         // on quantile scores.
         //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
         // \require crps:
-        //     2D array of CRPS for each time step.
+        //     CRPS for each time step.
         //     shape: (quantiles, time)
         // \assign CRPS:
-        //     2D array containing the CRPS.
-        //     shape: (1, 1)
+        //     CRPS.
+        //     shape: (subsets, 1, 1)
         void Evaluator::calc_CRPS()
         {
-            // calculate the mean over the time steps
-            // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
-            CRPS = xt::mean(crps, -1, xt::keep_dims);
+            // define some dimensions
+            std::size_t n_msk = t_msk.shape(0);
+
+            // initialise output variable
+            // shape: (subsets, thresholds, 1)
+            CRPS = xt::zeros<double>({n_msk, std::size_t {1}, std::size_t {1}});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto crps_masked = xt::where(xt::row(t_msk, m), crps, NAN);
+
+                // calculate the mean over the time steps
+                // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
+                xt::view(CRPS, m, xt::all(), xt::all()) =
+                        xt::nanmean(crps_masked, -1, xt::keep_dims);
+            }
         }
     }
 }
diff --git a/include/evalhyd/probabilist/evaluator_utils.cpp b/include/evalhyd/probabilist/evaluator_utils.cpp
index 720ad75ee7fe0d60e0640351dc561370f9b12278..923cc3aef2ce64446d56188104e4e14861a9b74e 100644
--- a/include/evalhyd/probabilist/evaluator_utils.cpp
+++ b/include/evalhyd/probabilist/evaluator_utils.cpp
@@ -8,13 +8,13 @@ namespace evalhyd
         // Determine whether flows are greater than given threshold(s).
         //
         // \param q:
-        //     Array of streamflow data.
+        //     Streamflow data.
         //     shape: (1+, time)
         // \param thr:
-        //     Array of streamflow thresholds.
+        //     Streamflow thresholds.
         //     shape: (thresholds,)
         // \return
-        //     Array of boolean-like threshold(s) exceedance.
+        //     Boolean-like threshold(s) exceedance.
         //     shape: (thresholds, 1+, time)
         xt::xtensor<double, 3> is_above_threshold(
                 const xt::xtensor<double, 2> &q,
@@ -27,13 +27,13 @@ namespace evalhyd
         // Determine whether flows are strictly lower than given threshold(s)
         //
         // \param q:
-        //     Array of streamflow data.
+        //     Streamflow data.
         //     shape: (1+, time)
         // \param thr:
-        //     Array of streamflow thresholds.
+        //     Streamflow thresholds.
         //     shape: (thresholds,)
         // \return
-        //     Array of boolean-like threshold(s) exceedance.
+        //     Boolean-like threshold(s) exceedance.
         //     shape: (thresholds, 1+, time)
         xt::xtensor<double, 3> is_below_threshold(
                 const xt::xtensor<double, 2> &q,
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index b162b136d4972424621654608e11f60befafb464..9b62835030aae524e69ccbb76143e57334732dfc 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -19,6 +19,9 @@ include_directories("../deps/xtensor/include")
 
 include_directories(../include)
 
+SET(GCC_EVALHYD_COMPILE_FLAGS "-O3")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_EVALHYD_COMPILE_FLAGS}")
+
 # TEST SUITE -------------------------------------------------------------------
 include_directories(data)
 
diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp
index bfb3c457de71ba6cb2a0119eabc211200c8413d2..7bc1734953d9aba500179fdc6610e4e5229d4498 100644
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -22,7 +22,7 @@ TEST(ProbabilistTests, TestBrier) {
     // compute scores
     xt::xtensor<double, 1> thresholds = {690, 534, 445};
 
-    std::vector<xt::xtensor<double, 2>> metrics =
+    std::vector<xt::xtensor<double, 3>> metrics =
             evalhyd::evalp(
                     observed, predicted, {"BS", "BSS", "BS_CRD", "BS_LBD"},
                     thresholds
@@ -30,30 +30,30 @@ TEST(ProbabilistTests, TestBrier) {
 
     // check results
     // Brier scores
-    xt::xtensor<double, 2> bs =
-            {{0.10615136},
-             {0.07395622},
-             {0.08669186}};
+    xt::xtensor<double, 3> bs =
+            {{{0.10615136},
+              {0.07395622},
+              {0.08669186}}};
     EXPECT_TRUE(xt::allclose(metrics[0], bs));
 
     // Brier skill scores
-    xt::xtensor<double, 2> bss =
-            {{0.5705594},
-             {0.6661165},
-             {0.5635126}};
+    xt::xtensor<double, 3> bss =
+            {{{0.5705594},
+              {0.6661165},
+              {0.5635126}}};
     EXPECT_TRUE(xt::allclose(metrics[1], bss));
 
     // Brier calibration-refinement decompositions
-    xt::xtensor<double, 2> bs_crd =
-            {{0.011411758, 0.1524456, 0.2471852},
-             {0.005532413, 0.1530793, 0.2215031},
-             {0.010139431, 0.1220601, 0.1986125}};
+    xt::xtensor<double, 3> bs_crd =
+            {{{0.011411758, 0.1524456, 0.2471852},
+              {0.005532413, 0.1530793, 0.2215031},
+              {0.010139431, 0.1220601, 0.1986125}}};
     EXPECT_TRUE(xt::allclose(metrics[2], bs_crd));
 
-    // Brier likelihood-based decompositions
-    xt::xtensor<double, 2> bs_lbd =
-            {{0.012159881, 0.1506234, 0.2446149},
-             {0.008031746, 0.1473869, 0.2133114},
-             {0.017191279, 0.1048221, 0.1743227}};
+    // Brier likelihood-base rate decompositions
+    xt::xtensor<double, 3> bs_lbd =
+            {{{0.012159881, 0.1506234, 0.2446149},
+              {0.008031746, 0.1473869, 0.2133114},
+              {0.017191279, 0.1048221, 0.1743227}}};
     EXPECT_TRUE(xt::allclose(metrics[3], bs_lbd));
 }