diff --git a/include/evalhyd/detail/probabilist/brier.hpp b/include/evalhyd/detail/probabilist/brier.hpp
index 831f56e739c2bdab5f37d09951c561af6c307cfd..0638080b260ca1f34b843c58606b8bea0007c2ab 100644
--- a/include/evalhyd/detail/probabilist/brier.hpp
+++ b/include/evalhyd/detail/probabilist/brier.hpp
@@ -304,10 +304,10 @@ namespace evalhyd
                 return BS;
             }
 
-            /// Compute the calibration-refinement decomposition of the Brier score
-            /// into reliability, resolution, and uncertainty.
-            ///
-            /// BS = reliability - resolution + uncertainty
+            /// Compute the X and Y axes of the reliability diagram
+            /// (`y_i`, the forecast probability; `bar_o_i`, the observed frequency;)
+            /// as well as the frequencies of the sampling histogram
+            /// (`N_i`, the number of forecasts of given probability `y_i`)'.
             ///
             /// \param q_thr
             ///     Streamflow exceedance threshold(s).
@@ -318,9 +318,6 @@ namespace evalhyd
             /// \param y_k
             ///     Event probability forecast.
             ///     shape: (sites, lead times, thresholds, time)
-            /// \param bar_o
-            ///     Mean event observed outcome.
-            ///     shape: (sites, lead times, subsets, samples, thresholds)
             /// \param t_msk
             ///     Temporal subsets of the whole record.
             ///     shape: (sites, lead times, subsets, time)
@@ -340,15 +337,14 @@ namespace evalhyd
             /// \param n_exp
             ///     Number of bootstrap samples.
             /// \return
-            ///     Brier score components (reliability, resolution, uncertainty)
-            ///     for each subset and for each threshold.
-            ///     shape: (sites, lead times, subsets, samples, thresholds, components)
+            ///     X and Y axes of the reliability diagram, and ordinates
+            ///     (i.e. frequencies) of the sampling histogram, in this order.
+            ///     shape: (sites, lead times, subsets, samples, thresholds, bins, axes)
             template <class XD2>
-            inline xt::xtensor<double, 6> calc_BS_CRD(
+            inline xt::xtensor<double, 7> calc_REL_DIAG(
                     const XD2& q_thr,
                     const xt::xtensor<double, 3>& o_k,
                     const xt::xtensor<double, 4>& y_k,
-                    const xt::xtensor<double, 5>& bar_o,
                     const xt::xtensor<bool, 4>& t_msk,
                     const std::vector<xt::xkeep_slice<int>>& b_exp,
                     std::size_t n_sit,
@@ -359,21 +355,16 @@ namespace evalhyd
                     std::size_t n_exp
             )
             {
-                // declare internal variables
-                // shape: (sites, lead times, bins, thresholds, time)
-                xt::xtensor<double, 5> msk_bins;
-                // shape: (sites, lead times, bins, thresholds)
-                xt::xtensor<double, 4> N_i, bar_o_i;
-                // shape: (bins,)
-                xt::xtensor<double, 1> y_i;
+                // initialise output variable
+                xt::xtensor<double, 7> REL_DIAG =
+                    xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr,
+                                       n_mbr + 1, std::size_t {3}});
 
                 // compute range of forecast values $y_i$
-                y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr;
+                auto y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr;
 
-                // initialise output variable
-                xt::xtensor<double, 6> BS_CRD =
-                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr,
-                                           std::size_t {3}});
+                xt::view(REL_DIAG, xt::all(), xt::all(), xt::all(), xt::all(),
+                         xt::all(), xt::all(), 0) = y_i;
 
                 // compute variable one mask at a time to minimise memory imprint
                 for (std::size_t m = 0; m < n_msk; m++)
@@ -407,39 +398,36 @@ namespace evalhyd
                         auto t_msk_sampled =
                                 xt::view(t_msk, xt::all(), xt::all(), m,
                                          xt::newaxis(), b_exp[e]);
-                        auto bar_o_sampled =
-                                xt::view(bar_o, xt::all(), xt::all(),
-                                         xt::all(), e, xt::all());
-
-                        // calculate length of subset
-                        auto l = xt::sum(t_msk_sampled, -1);
 
                         // compute mask to subsample time steps belonging to same forecast bin
                         // (where bins are defined as the range of forecast values)
-                        msk_bins = xt::equal(
+                        auto msk_bins = xt::equal(
                                 // force evaluation to avoid segfault
                                 xt::view(xt::eval(y_k_masked_sampled),
-                                         xt::all(), xt::all(), xt::newaxis(),
-                                         xt::all(), xt::all()),
+                                         xt::all(), xt::all(), xt::all(),
+                                         xt::newaxis(), xt::all()),
                                 xt::view(y_i,
-                                         xt::newaxis(), xt::newaxis(), xt::all(),
-                                         xt::newaxis(), xt::newaxis())
+                                         xt::newaxis(), xt::newaxis(), xt::newaxis(),
+                                         xt::all(), xt::newaxis())
                         );
 
                         // compute number of forecasts in each forecast bin $N_i$
-                        N_i = xt::nansum(msk_bins, -1);
+                        auto N_i = xt::eval(xt::nansum(msk_bins, -1));
+
+                        xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(),
+                                 xt::all(), 2) = N_i;
 
                         // compute subsample relative frequency
                         // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
-                        bar_o_i = xt::where(
+                        auto bar_o_i = xt::where(
                                 N_i > 0,
                                 xt::nansum(
                                         xt::where(
                                                 msk_bins,
                                                 xt::view(o_k_masked_sampled,
                                                          xt::all(), xt::all(),
-                                                         xt::newaxis(),
-                                                         xt::all(), xt::all()),
+                                                         xt::all(), xt::newaxis(),
+                                                         xt::all()),
                                                 0.
                                         ),
                                         -1
@@ -447,37 +435,135 @@ namespace evalhyd
                                 0.
                         );
 
+                        xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(),
+                                 xt::all(), 1) = bar_o_i;
+                    }
+                }
+
+                // assign NaN where thresholds were not provided (i.e. set as NaN)
+                xt::masked_view(
+                        REL_DIAG,
+                        xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(),
+                                           xt::newaxis(), xt::newaxis(),
+                                           xt::all(), xt::newaxis(),
+                                           xt::newaxis()))
+                ) = NAN;
+
+                return REL_DIAG;
+            }
+
+            /// Compute the calibration-refinement decomposition of the Brier score
+            /// into reliability, resolution, and uncertainty.
+            ///
+            /// BS = reliability - resolution + uncertainty
+            ///
+            /// \param q_thr
+            ///     Streamflow exceedance threshold(s).
+            ///     shape: (sites, thresholds)
+            /// \param bar_o
+            ///     Mean event observed outcome.
+            ///     shape: (sites, lead times, subsets, samples, thresholds)
+            /// \param REL_DIAG
+            ///     Axes of the reliability diagram and the sampling histogram.
+            ///     shape: (sites, lead times, thresholds, time)
+            /// \param t_msk
+            ///     Temporal subsets of the whole record.
+            ///     shape: (sites, lead times, subsets, time)
+            /// \param b_exp
+            ///     Boostrap samples.
+            ///     shape: (samples, time slice)
+            /// \param n_sit
+            ///     Number of sites.
+            /// \param n_ldt
+            ///     Number of lead times.
+            /// \param n_thr
+            ///     Number of thresholds.
+            /// \param n_msk
+            ///     Number of temporal subsets.
+            /// \param n_exp
+            ///     Number of bootstrap samples.
+            /// \return
+            ///     Brier score components (reliability, resolution, uncertainty)
+            ///     for each subset and for each threshold.
+            ///     shape: (sites, lead times, subsets, samples, thresholds, bins, axes)
+            template <class XD2>
+            inline xt::xtensor<double, 6> calc_BS_CRD(
+                    const XD2& q_thr,
+                    const xt::xtensor<double, 5>& bar_o,
+                    const xt::xtensor<double, 7>& REL_DIAG,
+                    const xt::xtensor<bool, 4>& t_msk,
+                    const std::vector<xt::xkeep_slice<int>>& b_exp,
+                    std::size_t n_sit,
+                    std::size_t n_ldt,
+                    std::size_t n_thr,
+                    std::size_t n_msk,
+                    std::size_t n_exp
+            )
+            {
+                // initialise output variable
+                xt::xtensor<double, 6> BS_CRD =
+                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr,
+                                           std::size_t {3}});
+
+                // compute variable one mask at a time to minimise memory imprint
+                for (std::size_t m = 0; m < n_msk; m++)
+                {
+                    // compute variable one sample at a time
+                    for (std::size_t e = 0; e < n_exp; e++)
+                    {
+                        // apply the bootstrap sampling
+                        auto t_msk_sampled =
+                                xt::view(t_msk, xt::all(), xt::all(), m,
+                                         xt::newaxis(), b_exp[e]);
+
+                        // calculate length of subset
+                        auto l = xt::sum(t_msk_sampled, -1);
+
+                        // retrieve range of forecast values $y_i$
+                        auto y_i = xt::eval(
+                                xt::view(REL_DIAG, xt::all(), xt::all(), m, e,
+                                         xt::all(), xt::all(), 0)
+                        );
+
+                        // retrieve number of forecasts in each forecast bin $N_i$
+                        auto N_i = xt::eval(
+                                xt::view(REL_DIAG, xt::all(), xt::all(), m, e,
+                                         xt::all(), xt::all(), 2)
+                        );
+
+                        // retrieve subsample relative frequency
+                        // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
+                        auto bar_o_i = xt::eval(
+                                xt::view(REL_DIAG, xt::all(), xt::all(), m, e,
+                                         xt::all(), xt::all(), 1)
+                        );
+
+                        // retrieve mean event observed outcome $bar_o$
+                        auto _bar_o = xt::view(bar_o, xt::all(), xt::all(),
+                                               m, e, xt::all());
+                        // (reshape to insert size-one axis for "bins")
+                        auto _bar_o_ = xt::view(_bar_o, xt::all(), xt::all(),
+                                                xt::all(), xt::newaxis());
+
                         // calculate reliability =
                         // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$
                         xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 0) =
                                 xt::nansum(
-                                        xt::square(
-                                                xt::view(y_i, xt::newaxis(),
-                                                         xt::newaxis(), xt::all(),
-                                                         xt::newaxis())
-                                                - bar_o_i
-                                        ) * N_i,
-                                        2
+                                        xt::square(y_i - bar_o_i) * N_i,
+                                        -1
                                 ) / l;
 
                         // calculate resolution =
                         // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$
                         xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 1) =
                                 xt::nansum(
-                                        xt::square(
-                                                bar_o_i
-                                                - xt::view(bar_o_sampled,
-                                                           xt::all(), xt::all(),
-                                                           m, xt::newaxis(),
-                                                           xt::all())
-                                        ) * N_i,
-                                        2
+                                        xt::square(bar_o_i - _bar_o_) * N_i,
+                                        -1
                                 ) / l;
 
                         // calculate uncertainty = $\bar{o} (1 - \bar{o})$
                         xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 2) =
-                                xt::view(bar_o_sampled, xt::all(), xt::all(), m)
-                                * (1 - xt::view(bar_o_sampled, xt::all(), xt::all(), m));
+                                _bar_o * (1 - _bar_o);
                     }
                 }
 
diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp
index 7c3967e3ac55e973db5f4127f76830dc3cea3d3c..93111a14bce6dc8e9352dc555b5eb821047c0e1b 100644
--- a/include/evalhyd/detail/probabilist/evaluator.hpp
+++ b/include/evalhyd/detail/probabilist/evaluator.hpp
@@ -91,6 +91,7 @@ namespace evalhyd
             // members for evaluation metrics
             // > Brier-based
             xtl::xoptional<xt::xtensor<double, 5>, bool> BS;
+            xtl::xoptional<xt::xtensor<double, 7>, bool> REL_DIAG;
             xtl::xoptional<xt::xtensor<double, 6>, bool> BS_CRD;
             xtl::xoptional<xt::xtensor<double, 6>, bool> BS_LBD;
             xtl::xoptional<xt::xtensor<double, 5>, bool> BSS;
@@ -520,14 +521,27 @@ namespace evalhyd
                 return BS.value();
             };
 
+            xt::xtensor<double, 7> get_REL_DIAG()
+            {
+                if (!REL_DIAG.has_value())
+                {
+                    REL_DIAG = metrics::calc_REL_DIAG(
+                            get_q_thr(), get_o_k(), get_y_k(),
+                            t_msk, b_exp,
+                            n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp
+                    );
+                }
+                return REL_DIAG.value();
+            };
+
             xt::xtensor<double, 6> get_BS_CRD()
             {
                 if (!BS_CRD.has_value())
                 {
                     BS_CRD = metrics::calc_BS_CRD(
-                            get_q_thr(), get_o_k(), get_y_k(), get_bar_o(),
+                            get_q_thr(), get_bar_o(), get_REL_DIAG(),
                             t_msk, b_exp,
-                            n_sit, n_ldt, n_thr, n_mbr, n_msk, n_exp
+                            n_sit, n_ldt, n_thr, n_msk, n_exp
                     );
                 }
                 return BS_CRD.value();
diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp
index 80a14ce28a939120c16b1f72c3e50556fae688c0..7c50c1a38b2a4c5d18be45c6918b965953be4aab 100644
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -215,7 +215,7 @@ namespace evalhyd
         // check that the metrics to be computed are valid
         utils::check_metrics(
                 metrics,
-                {"BS", "BSS", "BS_CRD", "BS_LBD",
+                {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG",
                  "QS", "CRPS",
                  "POD", "POFD", "FAR", "CSI", "ROCSS",
                  "RANK_HIST", "DS", "AS",
@@ -421,6 +421,12 @@ namespace evalhyd
                         uncertainty::summarise_p(evaluator.get_BS_LBD(), summary)
                 );
             }
+            else if ( metric == "REL_DIAG" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise_p(evaluator.get_REL_DIAG(), summary)
+                );
+            }
             else if ( metric == "QS" )
             {
                 r.emplace_back(