diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp
index 1420056b3ed3ad68534886a210c6f64000a275a4..decdf97cef9b02b689d58be8f3ac2adc3d38dfe9 100644
--- a/tests/test_determinist.cpp
+++ b/tests/test_determinist.cpp
@@ -21,6 +21,11 @@
 
 using namespace xt::placeholders;  // required for `_` to work
 
+
+std::vector<std::string> all_metrics_d = {
+        "RMSE", "NSE", "KGE", "KGEPRIME"
+};
+
 std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d()
 {
     // read in data
@@ -152,32 +157,26 @@ TEST(DeterministTests, TestMasks)
     xt::view(masks, 0, xt::range(0, 20)) = 0;
 
     // compute scores using masks to subset whole record
-    std::vector<std::string> metrics =
-            {"RMSE", "NSE", "KGE", "KGEPRIME"};
-
     std::vector<xt::xarray<double>> metrics_masked =
-            evalhyd::evald(observed, predicted, metrics, {}, {}, {}, masks);
+            evalhyd::evald(observed, predicted, all_metrics_d, {}, {}, {}, masks);
 
     // compute scores on pre-computed subset of whole record
     xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _));
     xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _));
 
     std::vector<xt::xarray<double>> metrics_subset =
-            evalhyd::evald(obs, prd, metrics);
+            evalhyd::evald(obs, prd, all_metrics_d);
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
     {
         EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m]))
-        << "Failure for (" << metrics[m] << ")";
+        << "Failure for (" << all_metrics_d[m] << ")";
     }
 }
 
 TEST(DeterministTests, TestMaskingConditions)
 {
-    std::vector<std::string> metrics =
-            {"RMSE", "NSE", "KGE", "KGEPRIME"};
-
     // read in data
     xt::xtensor<double, 2> observed;
     xt::xtensor<double, 2> predicted;
@@ -195,7 +194,7 @@ TEST(DeterministTests, TestMaskingConditions)
 
     std::vector<xt::xarray<double>> metrics_q_conditioned =
             evalhyd::evald(
-                    observed, predicted, metrics,
+                    observed, predicted, all_metrics_d,
                     {}, {}, {}, masks, q_conditions
             );
 
@@ -204,17 +203,17 @@ TEST(DeterministTests, TestMaskingConditions)
             evalhyd::evald(
                     xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)),
                     predicted,
-                    metrics
+                    all_metrics_d
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
     {
         EXPECT_TRUE(
                 xt::allclose(
                         metrics_q_conditioned[m], metrics_q_preconditioned[m]
                 )
-        ) << "Failure for (" << metrics[m] << ")";
+        ) << "Failure for (" << all_metrics_d[m] << ")";
     }
 
     // conditions on streamflow statistics _____________________________________
@@ -228,7 +227,7 @@ TEST(DeterministTests, TestMaskingConditions)
 
     std::vector<xt::xarray<double>> metrics_q_conditioned_ =
             evalhyd::evald(
-                    observed, predicted, metrics,
+                    observed, predicted, all_metrics_d,
                     {}, {}, {}, masks, q_conditions_
             );
 
@@ -237,17 +236,17 @@ TEST(DeterministTests, TestMaskingConditions)
             evalhyd::evald(
                     xt::eval(xt::where(observed >= mean, observed, NAN)),
                     predicted,
-                    metrics
+                    all_metrics_d
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
     {
         EXPECT_TRUE(
                 xt::allclose(
                         metrics_q_conditioned_[m], metrics_q_preconditioned_[m]
                 )
-        ) << "Failure for (" << metrics[m] << ")";
+        ) << "Failure for (" << all_metrics_d[m] << ")";
     }
 
     // conditions on temporal indices __________________________________________
@@ -259,7 +258,7 @@ TEST(DeterministTests, TestMaskingConditions)
 
     std::vector<xt::xarray<double>> metrics_t_conditioned =
             evalhyd::evald(
-                    observed, predicted, metrics,
+                    observed, predicted, all_metrics_d,
                     {}, {}, {}, masks, t_conditions
             );
 
@@ -268,25 +267,22 @@ TEST(DeterministTests, TestMaskingConditions)
             evalhyd::evald(
                     xt::eval(xt::view(observed, xt::all(), xt::range(0, 100))),
                     xt::eval(xt::view(predicted, xt::all(), xt::range(0, 100))),
-                    metrics
+                    all_metrics_d
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
     {
         EXPECT_TRUE(
                 xt::allclose(
                         metrics_t_conditioned[m], metrics_t_subset[m]
                 )
-        ) << "Failure for (" << metrics[m] << ")";
+        ) << "Failure for (" << all_metrics_d[m] << ")";
     }
 }
 
 TEST(DeterministTests, TestMissingData)
 {
-    std::vector<std::string> metrics =
-            {"RMSE", "NSE", "KGE", "KGEPRIME"};
-
     // read in data
     xt::xtensor<double, 2> observed;
     xt::xtensor<double, 2> predicted;
@@ -303,9 +299,9 @@ TEST(DeterministTests, TestMissingData)
 
     // compute metrics with observations containing NaN values
     std::vector<xt::xarray<double>> metrics_nan =
-            evalhyd::evald(observed, predicted, metrics);
+            evalhyd::evald(observed, predicted, all_metrics_d);
 
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
     {
         for (std::size_t p = 0; p < predicted.shape(0); p++)
         {
@@ -320,7 +316,7 @@ TEST(DeterministTests, TestMissingData)
                     evalhyd::evald(
                             xt::eval(xt::view(obs, xt::newaxis(), xt::all())),
                             xt::eval(xt::view(prd, xt::newaxis(), xt::all())),
-                            {metrics[m]}
+                            {all_metrics_d[m]}
                     );
 
             // compare to check results are the same
@@ -329,16 +325,13 @@ TEST(DeterministTests, TestMissingData)
                             xt::view(metrics_nan[m], p),
                             metrics_sbs[0]
                     )
-            ) << "Failure for (" << metrics[m] << ")";
+            ) << "Failure for (" << all_metrics_d[m] << ")";
         }
     }
 }
 
 TEST(DeterministTests, TestBootstrap)
 {
-    std::vector<std::string> metrics =
-            {"RMSE", "NSE", "KGE", "KGEPRIME"};
-
     // read in data
     std::ifstream ifs;
 
@@ -363,7 +356,7 @@ TEST(DeterministTests, TestBootstrap)
             evalhyd::evald(
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     predicted,
-                    metrics,
+                    all_metrics_d,
                     {},  // transform
                     {},  // exponent
                     {},  // epsilon
@@ -388,16 +381,16 @@ TEST(DeterministTests, TestBootstrap)
             evalhyd::evald(
                     xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())),
                     predicted_x3,
-                    metrics
+                    all_metrics_d
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
     {
         EXPECT_TRUE(
                 xt::allclose(
                         metrics_bts[m], metrics_rep[m]
                 )
-        ) << "Failure for (" << metrics[m] << ")";
+        ) << "Failure for (" << all_metrics_d[m] << ")";
     }
 }
diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp
index be6cb4fb7c7015d979656698932aee8e6349258f..8a54c25261ffc8b119046fd1ef34b5feb3dc0beb 100644
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -21,6 +21,13 @@
 
 using namespace xt::placeholders;  // required for `_` to work
 
+
+std::vector<std::string> all_metrics_p = {
+        "BS", "BSS", "BS_CRD", "BS_LBD",
+        "QS", "CRPS",
+        "POD", "POFD", "FAR", "CSI", "ROCSS"
+};
+
 std::tuple<xt::xtensor<double, 1>, xt::xtensor<double, 2>> load_data_p()
 {
     // read in data
@@ -419,8 +426,6 @@ TEST(ProbabilistTests, TestMasks)
 
     // compute scores using masks to subset whole record
     xt::xtensor<double, 2> thresholds = {{690, 534, 445}};
-    std::vector<std::string> metrics =
-            {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"};
 
     std::vector<xt::xarray<double>> metrics_masked =
             evalhyd::evalp(
@@ -428,7 +433,7 @@ TEST(ProbabilistTests, TestMasks)
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     // shape: (sites [1], lead times [1], members [m], time [t])
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high",
                     // shape: (sites [1], lead times [1], subsets [1], time [t])
@@ -442,24 +447,22 @@ TEST(ProbabilistTests, TestMasks)
                     xt::eval(xt::view(observed, xt::newaxis(), xt::range(20, _))),
                     // shape: (sites [1], lead times [1], members [m], time [t-20])
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(20, _))),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high"
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_p.size(); m++)
     {
         EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m]))
-        << "Failure for (" << metrics[m] << ")";
+        << "Failure for (" << all_metrics_p[m] << ")";
     }
 }
 
 TEST(ProbabilistTests, TestMaskingConditions)
 {
     xt::xtensor<double, 2> thresholds = {{690, 534, 445}};
-    std::vector<std::string> metrics =
-            {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"};
 
     // read in data
     xt::xtensor<double, 1> observed_;
@@ -483,7 +486,7 @@ TEST(ProbabilistTests, TestMaskingConditions)
             evalhyd::evalp(
                     xt::eval(observed),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high",
                     masks,
@@ -495,19 +498,20 @@ TEST(ProbabilistTests, TestMaskingConditions)
             evalhyd::evalp(
                     xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high"
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_p.size(); m++)
     {
         EXPECT_TRUE(
-                xt::allclose(
-                        metrics_q_conditioned[m], metrics_q_preconditioned[m]
-                )
-        ) << "Failure for (" << metrics[m] << ")";
+                xt::sum(xt::isclose(metrics_q_conditioned[m],
+                                    metrics_q_preconditioned[m],
+                                    1e-05, 1e-08, true))
+                == xt::xscalar<double>(metrics_q_conditioned[m].size())
+        ) << "Failure for (" << all_metrics_p[m] << ")";
     }
 
     // conditions on streamflow statistics _____________________________________
@@ -524,7 +528,7 @@ TEST(ProbabilistTests, TestMaskingConditions)
             evalhyd::evalp(
                     xt::eval(observed),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high",
                     masks,
@@ -536,19 +540,20 @@ TEST(ProbabilistTests, TestMaskingConditions)
             evalhyd::evalp(
                     xt::eval(xt::where(q_prd_mean >= median, observed, NAN)),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high"
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_p.size(); m++)
     {
         EXPECT_TRUE(
-                xt::allclose(
-                        metrics_q_conditioned_[m], metrics_q_preconditioned_[m]
-                )
-        ) << "Failure for (" << metrics[m] << ")";
+                xt::sum(xt::isclose(metrics_q_conditioned_[m],
+                                    metrics_q_preconditioned_[m],
+                                    1e-05, 1e-08, true))
+                == xt::xscalar<double>(metrics_q_conditioned_[m].size())
+        ) << "Failure for (" << all_metrics_p[m] << ")";
     }
 
     // conditions on temporal indices __________________________________________
@@ -562,7 +567,7 @@ TEST(ProbabilistTests, TestMaskingConditions)
             evalhyd::evalp(
                     xt::eval(observed),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high",
                     masks,
@@ -574,19 +579,20 @@ TEST(ProbabilistTests, TestMaskingConditions)
             evalhyd::evalp(
                     xt::eval(xt::view(observed_, xt::newaxis(), xt::range(0, 100))),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(0, 100))),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high"
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_p.size(); m++)
     {
         EXPECT_TRUE(
-                xt::allclose(
-                        metrics_t_conditioned[m], metrics_t_subset[m]
-                )
-        ) << "Failure for (" << metrics[m] << ")";
+                xt::sum(xt::isclose(metrics_t_conditioned[m],
+                                    metrics_t_subset[m],
+                                    1e-05, 1e-08, true))
+                == xt::xscalar<double>(metrics_t_conditioned[m].size())
+        ) << "Failure for (" << all_metrics_p[m] << ")";
     }
 }
 
@@ -594,8 +600,6 @@ TEST(ProbabilistTests, TestMissingData)
 {
     xt::xtensor<double, 2> thresholds
         {{ 4., 5. }};
-    std::vector<std::string> metrics =
-            {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"};
 
     // compute metrics on series with NaN
     xt::xtensor<double, 4> forecast_nan {{
@@ -616,7 +620,7 @@ TEST(ProbabilistTests, TestMissingData)
         evalhyd::evalp(
                 observed_nan,
                 forecast_nan,
-                metrics,
+                all_metrics_p,
                 thresholds,
                 "high"
         );
@@ -636,7 +640,7 @@ TEST(ProbabilistTests, TestMissingData)
         evalhyd::evalp(
                 observed_pp1,
                 forecast_pp1,
-                metrics,
+                all_metrics_p,
                 thresholds,
                 "high"
         );
@@ -655,29 +659,29 @@ TEST(ProbabilistTests, TestMissingData)
         evalhyd::evalp(
                 observed_pp2,
                 forecast_pp2,
-                metrics,
+                all_metrics_p,
                 thresholds,
                 "high"
         );
 
     // check that numerical results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_p.size(); m++)
     {
         // for leadtime 1
         EXPECT_TRUE(
-                xt::allclose(
-                        xt::view(metrics_nan[m], xt::all(), 0),
-                        xt::view(metrics_pp1[m], xt::all(), 0)
-                )
-        ) << "Failure for (" << metrics[m] << ", " << "leadtime 1)";
+                xt::sum(xt::isclose(xt::view(metrics_nan[m], xt::all(), 0),
+                                    xt::view(metrics_pp1[m], xt::all(), 0),
+                                    1e-05, 1e-08, true))
+                == xt::xscalar<double>(metrics_pp1[m].size())
+        ) << "Failure for (" << all_metrics_p[m] << ", " << "leadtime 1)";
         
         // for leadtime 2
         EXPECT_TRUE(
-                xt::allclose(
-                        xt::view(metrics_nan[m], xt::all(), 1),
-                        xt::view(metrics_pp2[m], xt::all(), 0)
-                )
-        ) << "Failure for (" << metrics[m] << ", " << "leadtime 2)";
+                xt::sum(xt::isclose(xt::view(metrics_nan[m], xt::all(), 1),
+                                    xt::view(metrics_pp2[m], xt::all(), 0),
+                                    1e-05, 1e-08, true))
+                == xt::xscalar<double>(metrics_pp2[m].size())
+        ) << "Failure for (" << all_metrics_p[m] << ", " << "leadtime 2)";
     }
 }
 
@@ -685,8 +689,6 @@ TEST(ProbabilistTests, TestBootstrap)
 {
     xt::xtensor<double, 2> thresholds
             {{ 33.87, 55.67 }};
-    std::vector<std::string> metrics =
-            {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"};
 
     // read in data
     std::ifstream ifs;
@@ -712,7 +714,7 @@ TEST(ProbabilistTests, TestBootstrap)
             evalhyd::evalp(
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high",  // events
                     xt::xtensor<bool, 4>({}),  // t_msk
@@ -736,18 +738,18 @@ TEST(ProbabilistTests, TestBootstrap)
             evalhyd::evalp(
                     xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())),
                     xt::eval(xt::view(predicted_x3, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    metrics,
+                    all_metrics_p,
                     thresholds,
                     "high"
             );
 
     // check results are identical
-    for (std::size_t m = 0; m < metrics.size(); m++)
+    for (std::size_t m = 0; m < all_metrics_p.size(); m++)
     {
         EXPECT_TRUE(
                 xt::allclose(
                         metrics_bts[m], metrics_rep[m]
                 )
-        ) << "Failure for (" << metrics[m] << ")";
+        ) << "Failure for (" << all_metrics_p[m] << ")";
     }
 }
\ No newline at end of file