diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp
index ee863a7ab165ebb7e07a924649917bbd26a32ecf..8c1f04065ab02a5c2f4bcc77fe89b3f80729f73f 100644
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -97,78 +97,6 @@ TEST(ProbabilistTests, TestBrier)
     );
 }
 
-TEST(ProbabilistTests, TestContingency)
-{
-    // read in data
-    xt::xtensor<double, 1> observed;
-    xt::xtensor<double, 2> predicted;
-    std::tie(observed, predicted) = load_data_p();
-
-    // compute scores
-    xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}};
-
-    std::vector<xt::xarray<double>> metrics =
-            evalhyd::evalp(
-                    // shape: (sites [1], time [t])
-                    xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
-                    // shape: (sites [1], lead times [1], members [m], time [t])
-                    xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    {"POD", "POFD", "FAR", "CSI", "ROCSS"},
-                    thresholds,
-                    "high"
-            );
-
-    std::cout << "POD" << std::endl;
-    std::cout << metrics[0] << std::endl;
-    std::cout << "POFD" << std::endl;
-    std::cout << metrics[1] << std::endl;
-    std::cout << "FAR" << std::endl;
-    std::cout << metrics[2] << std::endl;
-    std::cout << "CSI" << std::endl;
-    std::cout << metrics[3] << std::endl;
-    std::cout << "ROCSS" << std::endl;
-    std::cout << metrics[4] << std::endl;
-
-//    // check results
-//    // POD
-//    xt::xtensor<double, 6> pod =
-//            {{{{{{0.10615136, 0.07395622, 0.08669186, NAN}}}}}};
-//    EXPECT_TRUE(
-//            xt::sum(xt::isclose(metrics[0], pod, 1e-05, 1e-08, true))
-//            == xt::xscalar<double>(4)
-//    );
-//
-//    // POFD
-//    xt::xtensor<double, 6> pofd =
-//            {{{{{{0.5705594, 0.6661165, 0.5635126, NAN}}}}}};
-//    EXPECT_TRUE(
-//            xt::sum(xt::isclose(metrics[1], pofd, 1e-05, 1e-08, true))
-//            == xt::xscalar<double>(4)
-//    );
-//
-//    // FAR
-//    xt::xtensor<double, 6> far =
-//            {{{{{{{0.011411758, 0.1524456, 0.2471852},
-//                  {0.005532413, 0.1530793, 0.2215031},
-//                  {0.010139431, 0.1220601, 0.1986125},
-//                  {NAN, NAN, NAN}}}}}}};
-//    EXPECT_TRUE(
-//            xt::sum(xt::isclose(metrics[2], far, 1e-05, 1e-08, true))
-//            == xt::xscalar<double>(12)
-//    );
-//
-//    // CSI
-//    xt::xtensor<double, 6> csi =
-//            {{{{{{0.012159881, 0.1506234, 0.2446149},
-//                 {0.008031746, 0.1473869, 0.2133114},
-//                 {0.017191279, 0.1048221, 0.1743227},
-//                 {NAN, NAN, NAN}}}}}};
-//    EXPECT_TRUE(
-//            xt::sum(xt::isclose(metrics[3], csi, 1e-05, 1e-08, true))
-//            == xt::xscalar<double>(12)
-//    );
-}
-
 TEST(ProbabilistTests, TestQuantiles)
 {
     // read in data