diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37e7f9018b53b07fefd728de44f2172ad37759c9..59c8caca24e5a48a27f7f625124af64415c197fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,9 +21,7 @@ message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor")
 # define evalhyd library
 add_library(
         evalhyd
-        src/probabilist/evaluator_brier.cpp
-        src/probabilist/evaluator_elements.cpp
-        src/probabilist/evaluator_quantiles.cpp
+        INTERFACE
 )
 
 add_library(EvalHyd::evalhyd ALIAS evalhyd)
@@ -36,30 +34,20 @@ set_target_properties(
 
 target_include_directories(
         evalhyd
-        PUBLIC
+        INTERFACE
                 $<INSTALL_INTERFACE:include>
                 $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-        PRIVATE
-                ${CMAKE_CURRENT_SOURCE_DIR}/src
 )
 
 target_link_libraries(
         evalhyd
-        PUBLIC
+        INTERFACE
                 xtensor
 )
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    target_compile_options(
-            evalhyd
-            PRIVATE
-            "/bigobj"
-    )
-endif()
-
 target_compile_features(
         evalhyd
-        PUBLIC
+        INTERFACE
                 cxx_std_14
 )
 
diff --git a/src/determinist/evaluator.hpp b/include/evalhyd/detail/determinist/evaluator.hpp
similarity index 100%
rename from src/determinist/evaluator.hpp
rename to include/evalhyd/detail/determinist/evaluator.hpp
diff --git a/src/masks.hpp b/include/evalhyd/detail/masks.hpp
similarity index 100%
rename from src/masks.hpp
rename to include/evalhyd/detail/masks.hpp
diff --git a/src/maths.hpp b/include/evalhyd/detail/maths.hpp
similarity index 100%
rename from src/maths.hpp
rename to include/evalhyd/detail/maths.hpp
diff --git a/src/probabilist/evaluator_brier.cpp b/include/evalhyd/detail/probabilist/evaluator.hpp
similarity index 54%
rename from src/probabilist/evaluator_brier.cpp
rename to include/evalhyd/detail/probabilist/evaluator.hpp
index 2528aef8fb811add2845b19bc1bb5709081c9c10..07bd764574e4dbfc4937fc7c46c6fd85559883f9 100644
--- a/src/probabilist/evaluator_brier.cpp
+++ b/include/evalhyd/detail/probabilist/evaluator.hpp
@@ -1,23 +1,251 @@
-#include <xtensor/xmath.hpp>
+#ifndef EVALHYD_PROBABILIST_EVALUATOR_HPP
+#define EVALHYD_PROBABILIST_EVALUATOR_HPP
+
+#include <stdexcept>
+#include <vector>
+
+#include <xtensor/xtensor.hpp>
 #include <xtensor/xview.hpp>
 #include <xtensor/xmasked_view.hpp>
+#include <xtensor/xslice.hpp>
 #include <xtensor/xoperation.hpp>
+#include <xtensor/xmath.hpp>
+#include <xtensor/xsort.hpp>
 
-#include "probabilist/evaluator.hpp"
-
-namespace eh = evalhyd;
-
-// NOTE ------------------------------------------------------------------------
-// All equations in metrics below are following notations from
-// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences.
-// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225".
-// In particular, pp. 302-303, 332-333.
-// -----------------------------------------------------------------------------
 
 namespace evalhyd
 {
     namespace probabilist
     {
+        template <class D2, class D4, class B4>
+        class Evaluator
+        {
+        private:
+            using view1d_xtensor2d_double_type = decltype(
+            xt::view(
+                    std::declval<const D2&>(),
+                    std::declval<int>(),
+                    xt::all()
+            )
+            );
+
+            using view2d_xtensor4d_double_type = decltype(
+            xt::view(
+                    std::declval<const D4&>(),
+                    std::declval<int>(),
+                    std::declval<int>(),
+                    xt::all(),
+                    xt::all()
+            )
+            );
+
+            using view2d_xtensor4d_bool_type = decltype(
+            xt::view(
+                    std::declval<const B4&>(),
+                    std::declval<int>(),
+                    std::declval<int>(),
+                    xt::all(),
+                    xt::all()
+            )
+            );
+
+            // members for input data
+            const view1d_xtensor2d_double_type& q_obs;
+            const view2d_xtensor4d_double_type& q_prd;
+            const view1d_xtensor2d_double_type& q_thr;
+            xt::xtensor<bool, 2> t_msk;
+            const std::vector<xt::xkeep_slice<int>>& b_exp;
+
+            // members for dimensions
+            std::size_t n;
+            std::size_t n_msk;
+            std::size_t n_mbr;
+            std::size_t n_thr;
+            std::size_t n_exp;
+
+            // members for computational elements
+            xt::xtensor<double, 2> o_k;
+            xt::xtensor<double, 3> bar_o;
+            xt::xtensor<double, 2> y_k;
+            xt::xtensor<double, 2> q_qnt;
+
+        public:
+            // constructor method
+            Evaluator(const view1d_xtensor2d_double_type& obs,
+                      const view2d_xtensor4d_double_type& prd,
+                      const view1d_xtensor2d_double_type& thr,
+                      const view2d_xtensor4d_bool_type& msk,
+                      const std::vector<xt::xkeep_slice<int>>& exp) :
+                    q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk), b_exp(exp)
+            {
+                // initialise a mask if none provided
+                // (corresponding to no temporal subset)
+                if (msk.size() < 1)
+                    t_msk = xt::ones<bool>({std::size_t {1}, q_obs.size()});
+
+                // determine size for recurring dimensions
+                n = q_obs.size();
+                n_msk = t_msk.shape(0);
+                n_mbr = q_prd.shape(0);
+                n_thr = q_thr.size();
+                n_exp = b_exp.size();
+
+                // drop time steps where observations and/or predictions are NaN
+                auto obs_nan = xt::isnan(q_obs);
+                auto prd_nan = xt::isnan(q_prd);
+                auto sum_nan = xt::eval(xt::sum(prd_nan, -1));
+
+                if (xt::amin(sum_nan) != xt::amax(sum_nan))
+                    throw std::runtime_error(
+                            "predictions across members feature non-equal lengths"
+                    );
+
+                auto msk_nan = xt::where(obs_nan | xt::row(prd_nan, 0))[0];
+
+                xt::view(t_msk, xt::all(), xt::keep(msk_nan)) = false;
+            };
+
+            // members for intermediate evaluation metrics
+            // (i.e. before the reduction along the temporal axis)
+            xt::xtensor<double, 2> bs;
+            xt::xtensor<double, 2> qs;
+            xt::xtensor<double, 2> crps;
+
+            // members for evaluation metrics
+            xt::xtensor<double, 3> BS;
+            xt::xtensor<double, 4> BS_CRD;
+            xt::xtensor<double, 4> BS_LBD;
+            xt::xtensor<double, 3> BSS;
+            xt::xtensor<double, 3> QS;
+            xt::xtensor<double, 2> CRPS;
+
+            // methods to compute derived data
+            void calc_q_qnt();
+
+            // methods to compute elements
+            void calc_o_k();
+            void calc_bar_o();
+            void calc_y_k();
+
+            // methods to compute intermediate metrics
+            void calc_bs();
+            void calc_qs();
+            void calc_crps();
+
+            // methods to compute metrics
+            void calc_BS();
+            void calc_BS_CRD();
+            void calc_BS_LBD();
+            void calc_BSS();
+            void calc_QS();
+            void calc_CRPS();
+        };
+
+        // --------------------------------------------------------------------
+        // ELEMENTS
+        // --------------------------------------------------------------------
+
+        // Determine observed realisation of threshold(s) exceedance.
+        //
+        // \require q_obs:
+        //     Streamflow observations.
+        //     shape: (time,)
+        // \require q_thr:
+        //     Streamflow exceedance threshold(s).
+        //     shape: (thresholds,)
+        // \assign o_k:
+        //     Event observed outcome.
+        //     shape: (thresholds, time)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_o_k()
+        {
+            // determine observed realisation of threshold(s) exceedance
+            o_k = q_obs >= xt::view(q_thr, xt::all(), xt::newaxis());
+        }
+
+        // Determine mean observed realisation of threshold(s) exceedance.
+        //
+        // \require o_k:
+        //     Event observed outcome.
+        //     shape: (thresholds, time)
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
+        // \assign bar_o:
+        //     Mean event observed outcome.
+        //     shape: (subsets, samples, thresholds)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_bar_o()
+        {
+            // apply the mask
+            // (using NaN workaround until reducers work on masked_view)
+            auto o_k_masked = xt::where(
+                    xt::view(t_msk, xt::all(), xt::newaxis(), xt::all()),
+                    o_k, NAN
+            );
+
+            // compute variable one sample at a time
+            bar_o = xt::zeros<double>({n_msk, n_exp, n_thr});
+
+            for (int e = 0; e < n_exp; e++)
+            {
+                // apply the bootstrap sampling
+                auto o_k_masked_sampled =
+                        xt::view(o_k_masked, xt::all(), xt::all(), b_exp[e]);
+
+                // compute mean "climatology" relative frequency of the event
+                // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$
+                xt::view(bar_o, xt::all(), e, xt::all()) =
+                        xt::nanmean(o_k_masked_sampled, -1);
+            }
+        }
+
+        // Determine forecast probability of threshold(s) exceedance to occur.
+        //
+        // \require q_prd:
+        //     Streamflow predictions.
+        //     shape: (members, time)
+        // \require q_thr:
+        //     Streamflow exceedance threshold(s).
+        //     shape: (thresholds,)
+        // \assign y_k:
+        //     Event probability forecast.
+        //     shape: (thresholds, time)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_y_k()
+        {
+            // determine if members have exceeded threshold(s)
+            auto e_frc =
+                    q_prd
+                    >= xt::view(q_thr, xt::all(), xt::newaxis(), xt::newaxis());
+
+            // calculate how many members have exceeded threshold(s)
+            auto n_frc = xt::sum(e_frc, 1);
+
+            // determine probability of threshold(s) exceedance
+            // /!\ probability calculation dividing by n (the number of
+            //     members), not n+1 (the number of ranks) like in other metrics
+            y_k = xt::cast<double>(n_frc) / n_mbr;
+        }
+
+        // Compute the forecast quantiles from the ensemble members.
+        //
+        // \require q_prd:
+        //     Streamflow predictions.
+        //     shape: (members, time)
+        // \assign q_qnt:
+        //     Streamflow forecast quantiles.
+        //     shape: (quantiles, time)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_q_qnt()
+        {
+            q_qnt = xt::sort(q_prd, 0);
+        }
+
+        // --------------------------------------------------------------------
+        // BRIER
+        // --------------------------------------------------------------------
+
         // Compute the Brier score for each time step.
         //
         // \require o_k:
@@ -29,7 +257,8 @@ namespace evalhyd
         // \assign bs:
         //     Brier score for each threshold for each time step.
         //     shape: (thresholds, time)
-        void Evaluator::calc_bs()
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_bs()
         {
             // return computed Brier score(s)
             // $bs = (o_k - y_k)^2$
@@ -47,7 +276,8 @@ namespace evalhyd
         // \assign BS:
         //     Brier score for each subset and for each threshold.
         //     shape: (subsets, samples, thresholds)
-        void Evaluator::calc_BS()
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_BS()
         {
             // initialise output variable
             // shape: (subsets, thresholds)
@@ -105,7 +335,8 @@ namespace evalhyd
         //     Brier score components (reliability, resolution, uncertainty)
         //     for each subset and for each threshold.
         //     shape: (subsets, samples, thresholds, components)
-        void Evaluator::calc_BS_CRD()
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_BS_CRD()
         {
             // declare internal variables
             // shape: (bins, thresholds, time)
@@ -227,7 +458,8 @@ namespace evalhyd
         //     Brier score components (type 2 bias, discrimination, sharpness)
         //     for each subset and for each threshold.
         //     shape: (subsets, samples, thresholds, components)
-        void Evaluator::calc_BS_LBD()
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_BS_LBD()
         {
             // declare internal variables
             // shape: (bins, thresholds, time)
@@ -361,7 +593,8 @@ namespace evalhyd
         // \assign BSS:
         //     Brier skill score for each subset and for each threshold.
         //     shape: (subsets, samples, thresholds)
-        void Evaluator::calc_BSS()
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_BSS()
         {
             // declare and initialise output variable
             // shape: (subsets, thresholds)
@@ -420,5 +653,147 @@ namespace evalhyd
                                        xt::all()))
             ) = NAN;
         }
+
+        // --------------------------------------------------------------------
+        // QUANTILES
+        // --------------------------------------------------------------------
+
+        // Compute the quantile scores for each time step.
+        //
+        // \require q_obs:
+        //     Streamflow observations.
+        //     shape: (time,)
+        // \require q_qnt:
+        //     Streamflow quantiles.
+        //     shape: (quantiles, time)
+        // \assign qs:
+        //     Quantile scores for each time step.
+        //     shape: (quantiles, time)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_qs()
+        {
+            // compute the quantile order $alpha$
+            xt::xtensor<double, 1> alpha =
+                    xt::arange<double>(1., double(n_mbr + 1))
+                    / double(n_mbr + 1);
+
+            // calculate the difference
+            xt::xtensor<double, 2> diff = q_qnt - q_obs;
+
+            // calculate the quantile scores
+            qs = xt::where(
+                    diff > 0,
+                    2 * (1 - xt::view(alpha, xt::all(), xt::newaxis())) * diff,
+                    - 2 * xt::view(alpha, xt::all(), xt::newaxis()) * diff
+            );
+        }
+
+        // Compute the quantile score (QS).
+        //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
+        // \require qs:
+        //     Quantile scores for each time step.
+        //     shape: (quantiles, time)
+        // \assign QS:
+        //     Quantile scores.
+        //     shape: (subsets, quantiles)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_QS()
+        {
+            // initialise output variable
+            // shape: (subsets, quantiles)
+            QS = xt::zeros<double>({n_msk, n_exp, n_mbr});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto qs_masked = xt::where(xt::row(t_msk, m), qs, NAN);
+
+                // compute variable one sample at a time
+                for (int e = 0; e < n_exp; e++)
+                {
+                    // apply the bootstrap sampling
+                    auto qs_masked_sampled =
+                            xt::view(qs_masked, xt::all(), b_exp[e]);
+
+                    // calculate the mean over the time steps
+                    // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$
+                    xt::view(QS, m, e, xt::all()) =
+                            xt::nanmean(qs_masked_sampled, -1);
+                }
+            }
+        }
+
+        // Compute the continuous rank probability score(s) based
+        // on quantile scores for each time step, and integrating using the
+        // trapezoidal rule.
+        //
+        // /!\ The number of quantiles must be sufficiently large so that the
+        //     cumulative distribution is smooth enough for the numerical
+        //     integration to be accurate.
+        //
+        // \require qs:
+        //     Quantile scores for each time step.
+        //     shape: (quantiles, time)
+        // \assign crps:
+        //     CRPS for each time step.
+        //     shape: (1, time)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_crps()
+        {
+            // integrate with trapezoidal rule
+            crps = xt::view(
+                    // xt::trapz(y, dx=1/(n+1), axis=0)
+                    xt::trapz(qs, 1./(double(n_mbr) + 1.), 0),
+                    xt::newaxis(), xt::all()
+            );
+        }
+
+        // Compute the continuous rank probability score (CRPS) based
+        // on quantile scores.
+        //
+        // \require t_msk:
+        //     Temporal subsets of the whole record.
+        //     shape: (subsets, time)
+        // \require crps:
+        //     CRPS for each time step.
+        //     shape: (1, time)
+        // \assign CRPS:
+        //     CRPS.
+        //     shape: (subsets,)
+        template <class D2, class D4, class B4>
+        void Evaluator<D2, D4, B4>::calc_CRPS()
+        {
+            // initialise output variable
+            // shape: (subsets,)
+            CRPS = xt::zeros<double>({n_msk, n_exp});
+
+            // compute variable one mask at a time to minimise memory imprint
+            for (int m = 0; m < n_msk; m++)
+            {
+                // apply the mask
+                // (using NaN workaround until reducers work on masked_view)
+                auto crps_masked = xt::where(xt::row(t_msk, m), crps, NAN);
+
+                // compute variable one sample at a time
+                for (int e = 0; e < n_exp; e++)
+                {
+                    // apply the bootstrap sampling
+                    auto crps_masked_sampled =
+                            xt::view(crps_masked, xt::all(), b_exp[e]);
+
+                    // calculate the mean over the time steps
+                    // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
+                    xt::view(CRPS, m, e) =
+                            xt::squeeze(xt::nanmean(crps_masked_sampled, -1));
+                }
+            }
+        }
     }
 }
+
+#endif //EVALHYD_PROBABILIST_EVALUATOR_HPP
diff --git a/src/uncertainty.hpp b/include/evalhyd/detail/uncertainty.hpp
similarity index 100%
rename from src/uncertainty.hpp
rename to include/evalhyd/detail/uncertainty.hpp
diff --git a/src/utils.hpp b/include/evalhyd/detail/utils.hpp
similarity index 100%
rename from src/utils.hpp
rename to include/evalhyd/detail/utils.hpp
diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp
index 8ef12b0e561a65ec77231945a1f7eab41e27e22d..5dc85412c0d0bdb264e44e8aa787b7a8774428d9 100644
--- a/include/evalhyd/evald.hpp
+++ b/include/evalhyd/evald.hpp
@@ -9,11 +9,11 @@
 #include <xtensor/xtensor.hpp>
 #include <xtensor/xarray.hpp>
 
-#include "utils.hpp"
-#include "masks.hpp"
-#include "maths.hpp"
-#include "uncertainty.hpp"
-#include "determinist/evaluator.hpp"
+#include "detail/utils.hpp"
+#include "detail/masks.hpp"
+#include "detail/maths.hpp"
+#include "detail/uncertainty.hpp"
+#include "detail/determinist/evaluator.hpp"
 
 
 namespace evalhyd
diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp
index fdf58215c9f0de240ef1d6242b89e5425dd38ce7..2a9725c5eb5da5664fe51bed93583bc2f155b6db 100644
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -8,11 +8,11 @@
 #include <xtensor/xtensor.hpp>
 #include <xtensor/xarray.hpp>
 
-#include "utils.hpp"
-#include "masks.hpp"
-#include "maths.hpp"
-#include "uncertainty.hpp"
-#include "probabilist/evaluator.hpp"
+#include "detail/utils.hpp"
+#include "detail/masks.hpp"
+#include "detail/maths.hpp"
+#include "detail/uncertainty.hpp"
+#include "detail/probabilist/evaluator.hpp"
 
 
 namespace evalhyd
@@ -307,7 +307,7 @@ namespace evalhyd
                          xt::view(c_msk, s, l, xt::all(), xt::all()) :
                          xt::view(t_msk_, s, l, xt::all(), xt::all()));
 
-                eh::probabilist::Evaluator evaluator(
+                eh::probabilist::Evaluator<D2, D4, B4> evaluator(
                         q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp
                 );
 
diff --git a/src/probabilist/evaluator.hpp b/src/probabilist/evaluator.hpp
deleted file mode 100644
index 1df1e0c566609838f4cf51f336084b3bc547894b..0000000000000000000000000000000000000000
--- a/src/probabilist/evaluator.hpp
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef EVALHYD_PROBABILIST_EVALUATOR_HPP
-#define EVALHYD_PROBABILIST_EVALUATOR_HPP
-
-#include <stdexcept>
-#include <vector>
-
-#include <xtensor/xtensor.hpp>
-#include <xtensor/xview.hpp>
-#include <xtensor/xslice.hpp>
-
-using view1d_xtensor2d_double_type = decltype(
-    xt::view(
-            std::declval<const xt::xtensor<double, 2>&>(),
-            std::declval<int>(),
-            xt::all()
-    )
-);
-
-using view2d_xtensor4d_double_type = decltype(
-    xt::view(
-            std::declval<const xt::xtensor<double, 4>&>(),
-            std::declval<int>(),
-            std::declval<int>(),
-            xt::all(),
-            xt::all()
-    )
-);
-
-using view2d_xtensor4d_bool_type = decltype(
-    xt::view(
-            std::declval<const xt::xtensor<bool, 4>&>(),
-            std::declval<int>(),
-            std::declval<int>(),
-            xt::all(),
-            xt::all()
-    )
-);
-
-namespace evalhyd
-{
-    namespace probabilist
-    {
-        class Evaluator
-        {
-        private:
-            // members for input data
-            const view1d_xtensor2d_double_type& q_obs;
-            const view2d_xtensor4d_double_type& q_prd;
-            const view1d_xtensor2d_double_type& q_thr;
-            xt::xtensor<bool, 2> t_msk;
-            const std::vector<xt::xkeep_slice<int>>& b_exp;
-
-            // members for dimensions
-            std::size_t n;
-            std::size_t n_msk;
-            std::size_t n_mbr;
-            std::size_t n_thr;
-            std::size_t n_exp;
-
-            // members for computational elements
-            xt::xtensor<double, 2> o_k;
-            xt::xtensor<double, 3> bar_o;
-            xt::xtensor<double, 2> y_k;
-            xt::xtensor<double, 2> q_qnt;
-
-        public:
-            // constructor method
-            Evaluator(const view1d_xtensor2d_double_type& obs,
-                      const view2d_xtensor4d_double_type& prd,
-                      const view1d_xtensor2d_double_type& thr,
-                      const view2d_xtensor4d_bool_type& msk,
-                      const std::vector<xt::xkeep_slice<int>>& exp) :
-                    q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk), b_exp(exp)
-            {
-                // initialise a mask if none provided
-                // (corresponding to no temporal subset)
-                if (msk.size() < 1)
-                    t_msk = xt::ones<bool>({std::size_t {1}, q_obs.size()});
-
-                // determine size for recurring dimensions
-                n = q_obs.size();
-                n_msk = t_msk.shape(0);
-                n_mbr = q_prd.shape(0);
-                n_thr = q_thr.size();
-                n_exp = b_exp.size();
-
-                // drop time steps where observations and/or predictions are NaN
-                auto obs_nan = xt::isnan(q_obs);
-                auto prd_nan = xt::isnan(q_prd);
-                auto sum_nan = xt::eval(xt::sum(prd_nan, -1));
-
-                if (xt::amin(sum_nan) != xt::amax(sum_nan))
-                    throw std::runtime_error(
-                            "predictions across members feature non-equal lengths"
-                    );
-
-                auto msk_nan = xt::where(obs_nan | xt::row(prd_nan, 0))[0];
-
-                xt::view(t_msk, xt::all(), xt::keep(msk_nan)) = false;
-            };
-
-            // members for intermediate evaluation metrics
-            // (i.e. before the reduction along the temporal axis)
-            xt::xtensor<double, 2> bs;
-            xt::xtensor<double, 2> qs;
-            xt::xtensor<double, 2> crps;
-
-            // members for evaluation metrics
-            xt::xtensor<double, 3> BS;
-            xt::xtensor<double, 4> BS_CRD;
-            xt::xtensor<double, 4> BS_LBD;
-            xt::xtensor<double, 3> BSS;
-			xt::xtensor<double, 3> QS;
-            xt::xtensor<double, 2> CRPS;
-
-            // methods to compute derived data
-            void calc_q_qnt();
-
-            // methods to compute elements
-            void calc_o_k();
-            void calc_bar_o();
-            void calc_y_k();
-
-            // methods to compute intermediate metrics
-            void calc_bs();
-            void calc_qs();
-            void calc_crps();
-
-            // methods to compute metrics
-            void calc_BS();
-            void calc_BS_CRD();
-            void calc_BS_LBD();
-            void calc_BSS();
-            void calc_QS();
-            void calc_CRPS();
-        };
-    }
-}
-
-#endif //EVALHYD_PROBABILIST_EVALUATOR_HPP
diff --git a/src/probabilist/evaluator_elements.cpp b/src/probabilist/evaluator_elements.cpp
deleted file mode 100644
index bdce6c1b729a600343ac2bea710caea604a7658c..0000000000000000000000000000000000000000
--- a/src/probabilist/evaluator_elements.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <xtensor/xmath.hpp>
-#include <xtensor/xview.hpp>
-#include <xtensor/xsort.hpp>
-
-#include "probabilist/evaluator.hpp"
-
-namespace evalhyd
-{
-    namespace probabilist
-    {
-        // Determine observed realisation of threshold(s) exceedance.
-        //
-        // \require q_obs:
-        //     Streamflow observations.
-        //     shape: (time,)
-        // \require q_thr:
-        //     Streamflow exceedance threshold(s).
-        //     shape: (thresholds,)
-        // \assign o_k:
-        //     Event observed outcome.
-        //     shape: (thresholds, time)
-        void Evaluator::calc_o_k()
-        {
-            // determine observed realisation of threshold(s) exceedance
-            o_k = q_obs >= xt::view(q_thr, xt::all(), xt::newaxis());
-        }
-
-        // Determine mean observed realisation of threshold(s) exceedance.
-        //
-        // \require o_k:
-        //     Event observed outcome.
-        //     shape: (thresholds, time)
-        // \require t_msk:
-        //     Temporal subsets of the whole record.
-        //     shape: (subsets, time)
-        // \assign bar_o:
-        //     Mean event observed outcome.
-        //     shape: (subsets, samples, thresholds)
-        void Evaluator::calc_bar_o()
-        {
-            // apply the mask
-            // (using NaN workaround until reducers work on masked_view)
-            auto o_k_masked = xt::where(
-                    xt::view(t_msk, xt::all(), xt::newaxis(), xt::all()),
-                    o_k, NAN
-            );
-
-            // compute variable one sample at a time
-            bar_o = xt::zeros<double>({n_msk, n_exp, n_thr});
-
-            for (int e = 0; e < n_exp; e++)
-            {
-                // apply the bootstrap sampling
-                auto o_k_masked_sampled =
-                        xt::view(o_k_masked, xt::all(), xt::all(), b_exp[e]);
-
-                // compute mean "climatology" relative frequency of the event
-                // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$
-                xt::view(bar_o, xt::all(), e, xt::all()) =
-                        xt::nanmean(o_k_masked_sampled, -1);
-            }
-        }
-
-        // Determine forecast probability of threshold(s) exceedance to occur.
-        //
-        // \require q_prd:
-        //     Streamflow predictions.
-        //     shape: (members, time)
-        // \require q_thr:
-        //     Streamflow exceedance threshold(s).
-        //     shape: (thresholds,)
-        // \assign y_k:
-        //     Event probability forecast.
-        //     shape: (thresholds, time)
-        void Evaluator::calc_y_k()
-        {
-            // determine if members have exceeded threshold(s)
-            auto e_frc =
-                    q_prd
-                    >= xt::view(q_thr, xt::all(), xt::newaxis(), xt::newaxis());
-
-            // calculate how many members have exceeded threshold(s)
-            auto n_frc = xt::sum(e_frc, 1);
-
-            // determine probability of threshold(s) exceedance
-            // /!\ probability calculation dividing by n (the number of
-            //     members), not n+1 (the number of ranks) like in other metrics
-            y_k = xt::cast<double>(n_frc) / n_mbr;
-        }
-
-        // Compute the forecast quantiles from the ensemble members.
-        //
-        // \require q_prd:
-        //     Streamflow predictions.
-        //     shape: (members, time)
-        // \assign q_qnt:
-        //     Streamflow forecast quantiles.
-        //     shape: (quantiles, time)
-        void Evaluator::calc_q_qnt()
-        {
-            q_qnt = xt::sort(q_prd, 0);
-        }
-    }
-}
diff --git a/src/probabilist/evaluator_quantiles.cpp b/src/probabilist/evaluator_quantiles.cpp
deleted file mode 100644
index c6f37e23a716251c046fe35d3c11e7c5a6b46f73..0000000000000000000000000000000000000000
--- a/src/probabilist/evaluator_quantiles.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <unordered_map>
-#include <xtensor/xmath.hpp>
-#include <xtensor/xview.hpp>
-#include <xtensor/xoperation.hpp>
-
-#include "probabilist/evaluator.hpp"
-
-namespace eh = evalhyd;
-
-namespace evalhyd
-{
-    namespace probabilist
-    {
-        // Compute the quantile scores for each time step.
-        //
-        // \require q_obs:
-        //     Streamflow observations.
-        //     shape: (time,)
-        // \require q_qnt:
-        //     Streamflow quantiles.
-        //     shape: (quantiles, time)
-        // \assign qs:
-        //     Quantile scores for each time step.
-        //     shape: (quantiles, time)
-        void Evaluator::calc_qs()
-        {
-            // compute the quantile order $alpha$
-            xt::xtensor<double, 1> alpha =
-                    xt::arange<double>(1., double(n_mbr + 1))
-                    / double(n_mbr + 1);
-
-            // calculate the difference
-            xt::xtensor<double, 2> diff = q_qnt - q_obs;
-
-            // calculate the quantile scores
-            qs = xt::where(
-                    diff > 0,
-                    2 * (1 - xt::view(alpha, xt::all(), xt::newaxis())) * diff,
-                    - 2 * xt::view(alpha, xt::all(), xt::newaxis()) * diff
-            );
-        }
-        
-        // Compute the quantile score (QS).
-        //
-        // \require t_msk:
-        //     Temporal subsets of the whole record.
-        //     shape: (subsets, time)
-        // \require qs:
-        //     Quantile scores for each time step.
-        //     shape: (quantiles, time)
-        // \assign QS:
-        //     Quantile scores.
-        //     shape: (subsets, quantiles)
-        void Evaluator::calc_QS()
-        {
-            // initialise output variable
-            // shape: (subsets, quantiles)
-            QS = xt::zeros<double>({n_msk, n_exp, n_mbr});
-
-            // compute variable one mask at a time to minimise memory imprint
-            for (int m = 0; m < n_msk; m++)
-            {
-                // apply the mask
-                // (using NaN workaround until reducers work on masked_view)
-                auto qs_masked = xt::where(xt::row(t_msk, m), qs, NAN);
-
-                // compute variable one sample at a time
-                for (int e = 0; e < n_exp; e++)
-                {
-                    // apply the bootstrap sampling
-                    auto qs_masked_sampled =
-                            xt::view(qs_masked, xt::all(), b_exp[e]);
-
-                    // calculate the mean over the time steps
-                    // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$
-                    xt::view(QS, m, e, xt::all()) =
-                            xt::nanmean(qs_masked_sampled, -1);
-                }
-            }
-        }
-        
-        // Compute the continuous rank probability score(s) based
-        // on quantile scores for each time step, and integrating using the
-        // trapezoidal rule.
-        //
-        // /!\ The number of quantiles must be sufficiently large so that the
-        //     cumulative distribution is smooth enough for the numerical
-        //     integration to be accurate.
-        //
-        // \require qs:
-        //     Quantile scores for each time step.
-        //     shape: (quantiles, time)
-        // \assign crps:
-        //     CRPS for each time step.
-        //     shape: (1, time)
-        void Evaluator::calc_crps()
-        {
-            // integrate with trapezoidal rule
-            crps = xt::view(
-                    // xt::trapz(y, dx=1/(n+1), axis=0)
-                    xt::trapz(qs, 1./(double(n_mbr) + 1.), 0),
-                    xt::newaxis(), xt::all()
-            );
-        }
-
-        // Compute the continuous rank probability score (CRPS) based
-        // on quantile scores.
-        //
-        // \require t_msk:
-        //     Temporal subsets of the whole record.
-        //     shape: (subsets, time)
-        // \require crps:
-        //     CRPS for each time step.
-        //     shape: (1, time)
-        // \assign CRPS:
-        //     CRPS.
-        //     shape: (subsets,)
-        void Evaluator::calc_CRPS()
-        {
-            // initialise output variable
-            // shape: (subsets,)
-            CRPS = xt::zeros<double>({n_msk, n_exp});
-
-            // compute variable one mask at a time to minimise memory imprint
-            for (int m = 0; m < n_msk; m++)
-            {
-                // apply the mask
-                // (using NaN workaround until reducers work on masked_view)
-                auto crps_masked = xt::where(xt::row(t_msk, m), crps, NAN);
-
-                // compute variable one sample at a time
-                for (int e = 0; e < n_exp; e++)
-                {
-                    // apply the bootstrap sampling
-                    auto crps_masked_sampled =
-                            xt::view(crps_masked, xt::all(), b_exp[e]);
-
-                    // calculate the mean over the time steps
-                    // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
-                    xt::view(CRPS, m, e) =
-                            xt::squeeze(xt::nanmean(crps_masked_sampled, -1));
-                }
-            }
-        }
-    }
-}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 436ab16c76a7128a95ca96e8b878b8dd14fb89d8..1ef9682e651c7db211bd5a8f7447341a42437f5a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -24,7 +24,7 @@ set_target_properties(
 target_include_directories(
         evalhyd_tests
         PRIVATE
-                ${CMAKE_SOURCE_DIR}/src
+                ${CMAKE_SOURCE_DIR}/include/evalhyd
 )
 
 target_compile_definitions(
diff --git a/tests/test_uncertainty.cpp b/tests/test_uncertainty.cpp
index 1bfb312240310b6ec5fe3849b65127644394c9e6..d87a97288213c98b3e38876def1596bb63f5583f 100644
--- a/tests/test_uncertainty.cpp
+++ b/tests/test_uncertainty.cpp
@@ -5,7 +5,7 @@
 #include <xtensor/xrandom.hpp>
 #include <xtensor/xio.hpp>
 
-#include "uncertainty.hpp"
+#include "detail/uncertainty.hpp"
 
 TEST(UncertaintyTests, TestBootstrapGenerator)
 {