From 017726c604595dbb87a0458d8190a52b2bf443af Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Mon, 13 Feb 2023 14:03:25 +0100
Subject: [PATCH] move expected outputs into CSV files

---
 tests/CMakeLists.txt                |   2 +-
 tests/expected/evald/KGE.csv        |  51 +++
 tests/expected/evald/KGEPRIME.csv   |  51 +++
 tests/expected/evald/KGEPRIME_D.csv |  51 +++
 tests/expected/evald/KGE_D.csv      |  51 +++
 tests/expected/evald/MAE.csv        |  51 +++
 tests/expected/evald/MARE.csv       |  51 +++
 tests/expected/evald/MSE.csv        |  51 +++
 tests/expected/evald/NSE.csv        |  51 +++
 tests/expected/evald/RMSE.csv       |  51 +++
 tests/expected/evalp/AS.csv         |   1 +
 tests/expected/evalp/AW.csv         |   1 +
 tests/expected/evalp/AWI.csv        |   1 +
 tests/expected/evalp/AWN.csv        |   1 +
 tests/expected/evalp/BS.csv         |   4 +
 tests/expected/evalp/BSS.csv        |   4 +
 tests/expected/evalp/BS_CRD.csv     |   4 +
 tests/expected/evalp/BS_LBD.csv     |   4 +
 tests/expected/evalp/CR.csv         |   1 +
 tests/expected/evalp/CRPS.csv       |   1 +
 tests/expected/evalp/CSI.csv        |  52 +++
 tests/expected/evalp/DS.csv         |   1 +
 tests/expected/evalp/FAR.csv        |  52 +++
 tests/expected/evalp/POD.csv        |  52 +++
 tests/expected/evalp/POFD.csv       |  52 +++
 tests/expected/evalp/QS.csv         |   1 +
 tests/expected/evalp/RANK_HIST.csv  |   1 +
 tests/expected/evalp/ROCSS.csv      |   4 +
 tests/expected/evalp/WS.csv         |   1 +
 tests/expected/evalp/WSS.csv        |   1 +
 tests/test_determinist.cpp          | 251 +++++++--------
 tests/test_probabilist.cpp          | 484 ++++++----------------------
 32 files changed, 902 insertions(+), 533 deletions(-)
 create mode 100644 tests/expected/evald/KGE.csv
 create mode 100644 tests/expected/evald/KGEPRIME.csv
 create mode 100644 tests/expected/evald/KGEPRIME_D.csv
 create mode 100644 tests/expected/evald/KGE_D.csv
 create mode 100644 tests/expected/evald/MAE.csv
 create mode 100644 tests/expected/evald/MARE.csv
 create mode 100644 tests/expected/evald/MSE.csv
 create mode 100644 tests/expected/evald/NSE.csv
 create mode 100644 tests/expected/evald/RMSE.csv
 create mode 100644 tests/expected/evalp/AS.csv
 create mode 100644 tests/expected/evalp/AW.csv
 create mode 100644 tests/expected/evalp/AWI.csv
 create mode 100644 tests/expected/evalp/AWN.csv
 create mode 100644 tests/expected/evalp/BS.csv
 create mode 100644 tests/expected/evalp/BSS.csv
 create mode 100644 tests/expected/evalp/BS_CRD.csv
 create mode 100644 tests/expected/evalp/BS_LBD.csv
 create mode 100644 tests/expected/evalp/CR.csv
 create mode 100644 tests/expected/evalp/CRPS.csv
 create mode 100644 tests/expected/evalp/CSI.csv
 create mode 100644 tests/expected/evalp/DS.csv
 create mode 100644 tests/expected/evalp/FAR.csv
 create mode 100644 tests/expected/evalp/POD.csv
 create mode 100644 tests/expected/evalp/POFD.csv
 create mode 100644 tests/expected/evalp/QS.csv
 create mode 100644 tests/expected/evalp/RANK_HIST.csv
 create mode 100644 tests/expected/evalp/ROCSS.csv
 create mode 100644 tests/expected/evalp/WS.csv
 create mode 100644 tests/expected/evalp/WSS.csv

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2475248..a67f819 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -49,7 +49,7 @@ target_include_directories(
 target_compile_definitions(
         evalhyd_tests
         PRIVATE
-                EVALHYD_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/data"
+                EVALHYD_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}"
 )
 
 target_link_libraries(
diff --git a/tests/expected/evald/KGE.csv b/tests/expected/evald/KGE.csv
new file mode 100644
index 0000000..65b1265
--- /dev/null
+++ b/tests/expected/evald/KGE.csv
@@ -0,0 +1,51 @@
+0.7480876678384525
+0.74610619665192
+0.7441110304778197
+0.7430108522656984
+0.7417677706194681
+0.740519915124128
+0.7396393314765528
+0.7391812106418076
+0.7385521156240031
+0.7374975605864584
+0.736478762920044
+0.7356032352557134
+0.7349262719558889
+0.7341531483736209
+0.7335193136927298
+0.732498016247827
+0.7316031283668971
+0.7311620062353068
+0.7304853804554484
+0.7298301318606002
+0.7291682672297097
+0.7284933080332816
+0.7278420198262487
+0.7273338548948837
+0.7266696338186898
+0.7261028872180326
+0.7255515136947399
+0.7249203100577184
+0.724129099815763
+0.7235915471922136
+0.723195030128365
+0.7223157825504646
+0.7214401411915639
+0.7203988937539173
+0.7197737983854688
+0.7188157660001235
+0.7176518268945717
+0.716230562324343
+0.7149933138365094
+0.7133901818967825
+0.7126230134351779
+0.711672543996632
+0.7101399598194
+0.7086263896776204
+0.7068405183946846
+0.7050500737470602
+0.7031816136500466
+0.7006732963875493
+0.6961818766730593
+0.6916216736996625
+0.6764337637969222
diff --git a/tests/expected/evald/KGEPRIME.csv b/tests/expected/evald/KGEPRIME.csv
new file mode 100644
index 0000000..b3ab574
--- /dev/null
+++ b/tests/expected/evald/KGEPRIME.csv
@@ -0,0 +1,51 @@
+0.8131407494929581
+0.8127748549543973
+0.8120324184047302
+0.8117867087643632
+0.8113865804825375
+0.8110546552600805
+0.8110115513420257
+0.8109282579595157
+0.8107808794738488
+0.8102714601853906
+0.8098723493335118
+0.80972964340455
+0.8096359354316549
+0.8093267143355573
+0.8091370309262083
+0.80876520882257
+0.808297843099951
+0.8082504172955576
+0.8079588362318786
+0.8077792859196675
+0.8075824101888797
+0.8072872435440667
+0.8071551289052044
+0.8070989073590911
+0.8068619291248411
+0.8067928045232984
+0.8066998263051663
+0.806455314511177
+0.806108392682091
+0.8060009627097642
+0.8059219799600571
+0.8056200993568368
+0.805444123201897
+0.805182363569416
+0.8049830239354914
+0.8048140123227603
+0.8043601618202886
+0.8037118677795622
+0.8035729804192682
+0.8027506521878371
+0.8024589351470541
+0.8021804568386013
+0.8017099458589753
+0.8013131114124993
+0.8004747893474917
+0.7998057696216888
+0.7995518852898957
+0.7986597353849383
+0.7971020515928053
+0.7958136756510419
+0.7899796163833354
diff --git a/tests/expected/evald/KGEPRIME_D.csv b/tests/expected/evald/KGEPRIME_D.csv
new file mode 100644
index 0000000..558a336
--- /dev/null
+++ b/tests/expected/evald/KGEPRIME_D.csv
@@ -0,0 +1,51 @@
+0.9071248643948864,1.1477333530243923,1.0668239858924582
+0.9077537938998346,1.1478429758547268,1.0684568980300118
+0.9080496949392161,1.1484028026509738,1.0696675053044051
+0.9082556740306482,1.1484209022637022,1.0705581846521615
+0.9084737485198940,1.1486630576436567,1.0713951161082431
+0.9089321810875520,1.1489428726605424,1.0722704389155211
+0.9093310954815347,1.1488135780819770,1.0731457617227991
+0.9094546896430078,1.1488016399441205,1.0735373535050023
+0.9095397983553178,1.1487856134230323,1.0740517976110342
+0.9095735837396087,1.1491999363901180,1.0745611228702399
+0.9096311356178044,1.1494305824532358,1.0751830627596215
+0.9101044432964827,1.1495392868849328,1.0758920230450484
+0.9102441911392056,1.1494163423692945,1.0765318788983218
+0.9102574967893201,1.1495752574168396,1.0770053722297441
+0.9103829299622012,1.1496597741688170,1.0774558307504485
+0.9104389606151662,1.1498193287039202,1.0781263996846790
+0.9103708026184928,1.1501613638087465,1.0785359074307741
+0.9106545061768646,1.1501897035858120,1.0789198209427380
+0.9106680754546872,1.1503679937353128,1.0793037344547021
+0.9107871131080774,1.1504053916036157,1.0798002625968421
+0.9109331668775991,1.1504930666773272,1.0802711965048515
+0.9109503931900584,1.1506796793881644,1.0806474317465762
+0.9112916063517488,1.1507819870125124,1.0811465193121295
+0.9116384611328783,1.1508213892258903,1.0815841807157685
+0.9116240546641530,1.1508649226798111,1.0820474363535382
+0.9117728671872545,1.1507388089259101,1.0826002718107666
+0.9119101939393581,1.1506625843082945,1.0831019187997328
+0.9119150575215442,1.1507542944710545,1.0835088671224147
+0.9119108416265477,1.1509472994224532,1.0839593256431190
+0.9120349545371204,1.1508964568748929,1.0844277001277152
+0.9120974194539910,1.1508349399490534,1.0847834599821353
+0.9121099264543083,1.1508757330670814,1.0854130781417561
+0.9125099729643541,1.1509349546260605,1.0861169195803568
+0.9129864907072712,1.1511152707612748,1.0868719494872194
+0.9130727481787702,1.1511623991072226,1.0873224080079240
+0.9137178424599028,1.1513164779920453,1.0880697596445472
+0.9137419646569960,1.1514764306974452,1.0888222301279968
+0.9136897426066677,1.1518176713888661,1.0896156513860558
+0.9144254206019433,1.1517387012476892,1.0907520353814693
+0.9142984381688551,1.1522701343488451,1.0915224218288104
+0.9142709614317646,1.1523150149572656,1.0920496630519076
+0.9144137451984202,1.1523110423287639,1.0927842175714655
+0.9149046185657690,1.1525453982271228,1.0938489377113123
+0.9155452591872579,1.1527254670882050,1.0949674057428342
+0.9155186197874120,1.1532046716768467,1.0959246300993313
+0.9154368750377250,1.1532273515892346,1.0972017823824649
+0.9167750313811954,1.1532539657633949,1.0988244568263661
+0.9169078074034277,1.1533196303650488,1.1006314097560101
+0.9175076075676912,1.1535549186612073,1.1038434861394426
+0.9179463597643975,1.1527797412590994,1.1077849981956065
+0.9179618893753376,1.1523899554203556,1.1189773567810644
diff --git a/tests/expected/evald/KGE_D.csv b/tests/expected/evald/KGE_D.csv
new file mode 100644
index 0000000..7d65cb5
--- /dev/null
+++ b/tests/expected/evald/KGE_D.csv
@@ -0,0 +1,51 @@
+0.9071248643948864,1.2244294704151979,1.0668239858924582
+0.9077537938998346,1.2264207454072791,1.0684568980300118
+0.9080496949392161,1.2284091609962542,1.0696675053044051
+0.9082556740306482,1.2294513963440263,1.0705581846521615
+0.9084737485198940,1.2306719900133749,1.0713951161082431
+0.9089321810875520,1.2319774783565793,1.0722704389155211
+0.9093310954815347,1.2328444223282775,1.0731457617227991
+0.9094546896430078,1.2332814722478178,1.0735373535050023
+0.9095397983553178,1.2338552531667024,1.0740517976110342
+0.9095735837396087,1.2348855740497733,1.0745611228702399
+0.9096311356178044,1.2358482940716458,1.0751830627596215
+0.9101044432964827,1.2367801489363925,1.0758920230450484
+0.9102441911392056,1.2373833346872534,1.0765318788983218
+0.9102574967893201,1.2380987280203271,1.0770053722297441
+0.9103829299622012,1.2387076270574358,1.0774558307504485
+0.9104389606151662,1.2396505731434120,1.0781263996846790
+0.9103708026184928,1.2404903302072829,1.0785359074307741
+0.9106545061768646,1.2409624690429850,1.0789198209427380
+0.9106680754546872,1.2415964716356864,1.0793037344547021
+0.9107871131080774,1.2422080439464072,1.0798002625968421
+0.9109331668775991,1.2428445217100519,1.0802711965048515
+0.9109503931900584,1.2434790402937934,1.0806474317465762
+0.9112916063517488,1.2441639397456739,1.0811465193121295
+0.9116384611328783,1.2447102094160669,1.0815841807157685
+0.9116240546641530,1.2452904391749027,1.0820474363535382
+0.9117728671872545,1.2457901473263877,1.0826002718107666
+0.9119101939393581,1.2462848529553729,1.0831019187997328
+0.9119150575215442,1.2468524819385858,1.0835088671224147
+0.9119108416265477,1.2475800585327315,1.0839593256431190
+0.9120349545371204,1.2480639978139763,1.0844277001277152
+0.9120974194539910,1.2484067080262666,1.0847834599821353
+0.9121099264543083,1.2491755719869908,1.0854130781417561
+0.9125099729643541,1.2500499275558148,1.0861169195803568
+0.9129864907072712,1.2511148984168152,1.0868719494872194
+0.9130727481787702,1.2516846718054440,1.0873224080079240
+0.9137178424599028,1.2527126434836113,1.0880697596445472
+0.9137419646569960,1.2537531352118179,1.0888222301279968
+0.9136897426066677,1.2550385622883493,1.0896156513860558
+0.9144254206019433,1.2562613326135268,1.0907520353814693
+0.9142984381688551,1.2577286876454601,1.0915224218288104
+0.9142709614317646,1.2583852238137356,1.0920496630519076
+0.9144137451984202,1.2592273207901978,1.0927842175714655
+0.9149046185657690,1.2607105595147998,1.0938489377113123
+0.9155452591872579,1.2621968142312687,1.0949674057428342
+0.9155186197874120,1.2638254032362688,1.0959246300993313
+0.9154368750377250,1.2653231056559175,1.0972017823824649
+0.9167750313811954,1.2672236625128150,1.0988244568263661
+0.9169078074034277,1.2693798106679639,1.1006314097560101
+0.9175076075676912,1.2733440828682880,1.1038434861394426
+0.9179463597643975,1.2770321035906431,1.1077849981956065
+0.9179618893753376,1.2894982662973180,1.1189773567810644
diff --git a/tests/expected/evald/MAE.csv b/tests/expected/evald/MAE.csv
new file mode 100644
index 0000000..ef9cf0e
--- /dev/null
+++ b/tests/expected/evald/MAE.csv
@@ -0,0 +1,51 @@
+265.1929260450160655
+265.6816720257234579
+265.7041800643086731
+265.7041800643086731
+265.8360128617363216
+266.1318327974276485
+266.4019292604501743
+266.3729903536977304
+266.5048231511253789
+266.6816720257234579
+266.6977491961415012
+266.8360128617363216
+267.0482315112540164
+267.3215434083601281
+267.4758842443729918
+267.8617363344051228
+268.0160771704179865
+267.9389067524115831
+268.1511254019292778
+268.1382636655948772
+268.3311897106109427
+268.5144694533761935
+268.4726688102894059
+268.3344051446945286
+268.5369774919614088
+268.6527331189710708
+268.7556270096462754
+268.9260450160771825
+269.0739549839228175
+269.2443729903537246
+269.4147909967845749
+269.7491961414791035
+269.7909967845658912
+269.9099678456591391
+270.0643086816720029
+269.9421221864951690
+270.1864951768488936
+270.6623794212218854
+271.1061093247588474
+271.5852090032154251
+271.9067524115755532
+272.1286173633440626
+272.3279742765273568
+272.6784565916398719
+273.4501607717041907
+274.5530546623793953
+274.8617363344051228
+276.1286173633440626
+278.5176848874597795
+281.2700964630225258
+291.2990353697749129
diff --git a/tests/expected/evald/MARE.csv b/tests/expected/evald/MARE.csv
new file mode 100644
index 0000000..bb93762
--- /dev/null
+++ b/tests/expected/evald/MARE.csv
@@ -0,0 +1,51 @@
+0.2110884459948862
+0.2114774783536764
+0.2114953943175681
+0.2114953943175681
+0.2116003306775049
+0.2118357976315096
+0.2120507891982094
+0.2120277543874916
+0.2121326907474284
+0.2122734590351485
+0.2122862561522140
+0.2123963113589770
+0.2125652333042412
+0.2127827842943542
+0.2129056366181827
+0.2132127674277538
+0.2133356197515823
+0.2132741935896681
+0.2134431155349323
+0.2134328778412799
+0.2135864432460655
+0.2137323303806118
+0.2136990578762416
+0.2135890026694786
+0.2137502463445035
+0.2138423855873749
+0.2139242871365938
+0.2140599365774878
+0.2141776700544901
+0.2143133194953841
+0.2144489689362780
+0.2147151489712397
+0.2147484214756099
+0.2148431201418944
+0.2149659724657229
+0.2148687143760253
+0.2150632305554205
+0.2154420252205583
+0.2157952256515652
+0.2161765797401161
+0.2164325220814255
+0.2166091222969289
+0.2167678065485407
+0.2170467837005679
+0.2176610453197104
+0.2185389275504014
+0.2187846321980584
+0.2197930450228172
+0.2216946966187457
+0.2238855630603538
+0.2318684046857924
diff --git a/tests/expected/evald/MSE.csv b/tests/expected/evald/MSE.csv
new file mode 100644
index 0000000..53a1eb3
--- /dev/null
+++ b/tests/expected/evald/MSE.csv
@@ -0,0 +1,51 @@
+603782.2604501608293504
+603540.1704180064843968
+604973.1768488745437935
+605519.1061093247262761
+606241.1157556270482019
+605823.9710610932670534
+605116.8520900321891531
+605160.5144694533664733
+605628.1511254019569606
+607006.1800643086899072
+608195.0578778134658933
+607157.1061093247262761
+607415.4598070739302784
+608465.9453376205638051
+608766.6463022507959977
+609964.8456591640133411
+611618.5176848875125870
+610871.5080385851906613
+611795.5273311897180974
+612155.2250803858041763
+612401.4630225080763921
+613310.6237942122388631
+612593.7202572347596288
+611633.2090032154228538
+612660.8906752411276102
+612724.9549839228857309
+612831.2958199357381091
+613728.3408360128523782
+614918.0514469452900812
+615075.4372990353731439
+615330.4244372990215197
+616544.5594855305971578
+615854.5048231511609629
+615046.1800643086899072
+615534.5530546624213457
+613767.3826366559369490
+615365.1704180064843968
+617751.3633440514095128
+615900.7909967845771462
+618968.0353697749087587
+620238.9099678456550464
+620927.2025723472470418
+620784.1286173633998260
+619856.3247588424710557
+622720.4019292604643852
+625799.9421221865341067
+621881.5369774919236079
+624982.4630225080763921
+628774.5691318328026682
+633351.0771704179933295
+656835.5305466237477958
diff --git a/tests/expected/evald/NSE.csv b/tests/expected/evald/NSE.csv
new file mode 100644
index 0000000..578721b
--- /dev/null
+++ b/tests/expected/evald/NSE.csv
@@ -0,0 +1,51 @@
+0.7189121923160171
+0.7190248961181289
+0.7183577671505612
+0.7181036125173065
+0.7177674845422075
+0.7179616841657375
+0.7182908798615486
+0.7182705530594651
+0.718052847156156
+0.7174113126846504
+0.7168578365723296
+0.7173410498202125
+0.7172207745500294
+0.7167317262719881
+0.7165917364437786
+0.7160339207336052
+0.7152640620034193
+0.7156118286033213
+0.7151816560490262
+0.7150142005632107
+0.7148995657223857
+0.714476310478086
+0.7148100613295596
+0.715257222533744
+0.7147787904778381
+0.7147489656597577
+0.7146994592160939
+0.7142818444011523
+0.7137279805841595
+0.7136547103888884
+0.7135360024036145
+0.7129707679121519
+0.7132920194045123
+0.7136683309479988
+0.7134409713480524
+0.7142636686863795
+0.7135198265862999
+0.7124089464193454
+0.7132704711081728
+0.7118425308507162
+0.7112508815459708
+0.7109304503708312
+0.710997057734077
+0.7114289921740817
+0.710095635390346
+0.7086619708754944
+0.7104861647677075
+0.7090425441765131
+0.7072771481677261
+0.705146577768962
+0.6942135081069736
diff --git a/tests/expected/evald/RMSE.csv b/tests/expected/evald/RMSE.csv
new file mode 100644
index 0000000..42b7512
--- /dev/null
+++ b/tests/expected/evald/RMSE.csv
@@ -0,0 +1,51 @@
+777.0342723780984
+776.8784785396018
+777.8002165394881
+778.1510818018085
+778.6148699810626
+778.3469477431598
+777.8925710469488
+777.9206350711191
+778.2211453856814
+779.1060133668002
+779.8686157794872
+779.2028658246354
+779.3686289600538
+780.0422715068848
+780.2349942820117
+781.0024620058275
+782.0604309673821
+781.5826943059737
+782.1735915582868
+782.4034925026765
+782.5608366271009
+783.1415094312216
+782.6836655106805
+782.0698236111757
+782.7265746576138
+782.7674973987633
+782.8354206472366
+783.4081572437275
+784.1671068381696
+784.267452658234
+784.4299997050719
+785.2035146925481
+784.7639803298513
+784.2487998488162
+784.5601016204319
+783.4330747655832
+784.4521466718072
+785.971604667784
+784.7934702816943
+786.7452162992635
+787.5524807705488
+787.9893416616416
+787.8985522371287
+787.3095482456963
+789.1263535893733
+791.0751810809049
+788.5946594908514
+790.5583236058602
+792.9530686817681
+795.833573789406
+810.4539040233095
diff --git a/tests/expected/evalp/AS.csv b/tests/expected/evalp/AS.csv
new file mode 100644
index 0000000..2062dbb
--- /dev/null
+++ b/tests/expected/evalp/AS.csv
@@ -0,0 +1 @@
+0.4914810317862
diff --git a/tests/expected/evalp/AW.csv b/tests/expected/evalp/AW.csv
new file mode 100644
index 0000000..40622cd
--- /dev/null
+++ b/tests/expected/evalp/AW.csv
@@ -0,0 +1 @@
+9.2749196141479,31.3215434083601
diff --git a/tests/expected/evalp/AWI.csv b/tests/expected/evalp/AWI.csv
new file mode 100644
index 0000000..8b3b7e7
--- /dev/null
+++ b/tests/expected/evalp/AWI.csv
@@ -0,0 +1 @@
+0.9821120161733,0.9880951944476
diff --git a/tests/expected/evalp/AWN.csv b/tests/expected/evalp/AWN.csv
new file mode 100644
index 0000000..34e29a4
--- /dev/null
+++ b/tests/expected/evalp/AWN.csv
@@ -0,0 +1 @@
+0.0073826568351,0.0249313434669
diff --git a/tests/expected/evalp/BS.csv b/tests/expected/evalp/BS.csv
new file mode 100644
index 0000000..3db5ba1
--- /dev/null
+++ b/tests/expected/evalp/BS.csv
@@ -0,0 +1,4 @@
+0.1061513565769
+0.0739562201528
+0.0866918610329
+nan
diff --git a/tests/expected/evalp/BSS.csv b/tests/expected/evalp/BSS.csv
new file mode 100644
index 0000000..6b26f31
--- /dev/null
+++ b/tests/expected/evalp/BSS.csv
@@ -0,0 +1,4 @@
+0.5705594211361
+0.6661165249535
+0.5635125720476
+nan
diff --git a/tests/expected/evalp/BS_CRD.csv b/tests/expected/evalp/BS_CRD.csv
new file mode 100644
index 0000000..9fa242b
--- /dev/null
+++ b/tests/expected/evalp/BS_CRD.csv
@@ -0,0 +1,4 @@
+0.0114117580190,0.1524456042419,0.2471852027998
+0.0055324125593,0.1530792786029,0.2215030861964
+0.0101394313199,0.1220600742934,0.1986125040064
+nan,nan,nan
diff --git a/tests/expected/evalp/BS_LBD.csv b/tests/expected/evalp/BS_LBD.csv
new file mode 100644
index 0000000..903f102
--- /dev/null
+++ b/tests/expected/evalp/BS_LBD.csv
@@ -0,0 +1,4 @@
+0.0121598807967,0.1506234181408,0.2446148939211
+0.0080317462446,0.1473868836293,0.2133113575375
+0.0171912794414,0.1048221425794,0.1743227241709
+nan,nan,nan
diff --git a/tests/expected/evalp/CR.csv b/tests/expected/evalp/CR.csv
new file mode 100644
index 0000000..a4a7469
--- /dev/null
+++ b/tests/expected/evalp/CR.csv
@@ -0,0 +1 @@
+0.0064308681672,0.0353697749196
diff --git a/tests/expected/evalp/CRPS.csv b/tests/expected/evalp/CRPS.csv
new file mode 100644
index 0000000..c449874
--- /dev/null
+++ b/tests/expected/evalp/CRPS.csv
@@ -0,0 +1 @@
+252.9569186533230
diff --git a/tests/expected/evalp/CSI.csv b/tests/expected/evalp/CSI.csv
new file mode 100644
index 0000000..ed9f3d5
--- /dev/null
+++ b/tests/expected/evalp/CSI.csv
@@ -0,0 +1,52 @@
+0.4469453376206,0.3311897106109,0.2733118971061,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7792207792208,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7843137254902,0.8108108108108,0.7032967032967,nan
+0.7712418300654,0.8108108108108,0.7032967032967,nan
+0.7712418300654,0.8108108108108,0.7032967032967,nan
+0.7712418300654,0.8018018018018,0.7111111111111,nan
diff --git a/tests/expected/evalp/DS.csv b/tests/expected/evalp/DS.csv
new file mode 100644
index 0000000..bf3ba37
--- /dev/null
+++ b/tests/expected/evalp/DS.csv
@@ -0,0 +1 @@
+148.7901639344262
diff --git a/tests/expected/evalp/FAR.csv b/tests/expected/evalp/FAR.csv
new file mode 100644
index 0000000..7f82d19
--- /dev/null
+++ b/tests/expected/evalp/FAR.csv
@@ -0,0 +1,52 @@
+0.5530546623794,0.6688102893891,0.7266881028939,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1111111111111,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1044776119403,0.0816326530612,0.0857142857143,nan
+0.1060606060606,0.0816326530612,0.0857142857143,nan
+0.1060606060606,0.0816326530612,0.0857142857143,nan
+0.1060606060606,0.0824742268041,0.0724637681159,nan
diff --git a/tests/expected/evalp/POD.csv b/tests/expected/evalp/POD.csv
new file mode 100644
index 0000000..80667c1
--- /dev/null
+++ b/tests/expected/evalp/POD.csv
@@ -0,0 +1,52 @@
+1.0000000000000,1.0000000000000,1.0000000000000,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8633093525180,0.8737864077670,0.7529411764706,nan
+0.8489208633094,0.8737864077670,0.7529411764706,nan
+0.8489208633094,0.8737864077670,0.7529411764706,nan
+0.8489208633094,0.8640776699029,0.7529411764706,nan
diff --git a/tests/expected/evalp/POFD.csv b/tests/expected/evalp/POFD.csv
new file mode 100644
index 0000000..d3007cb
--- /dev/null
+++ b/tests/expected/evalp/POFD.csv
@@ -0,0 +1,52 @@
+1.0000000000000,1.0000000000000,1.0000000000000,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0872093023256,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0265486725664,nan
+0.0813953488372,0.0384615384615,0.0221238938053,nan
diff --git a/tests/expected/evalp/QS.csv b/tests/expected/evalp/QS.csv
new file mode 100644
index 0000000..10f11b5
--- /dev/null
+++ b/tests/expected/evalp/QS.csv
@@ -0,0 +1 @@
+345.9157803611179,345.0692555033388,343.1293593865944,340.7098689092258,338.2815978233983,335.9735345040806,333.5551570615883,330.3324264160278,327.3335394509029,324.3259955478602,321.1900816225579,318.1751174870145,315.1221864951768,311.9720504575810,308.6449418748451,305.6121691813011,302.1695523126391,298.4459559732869,294.9746475389559,291.2738065792731,287.7245857036857,284.1019045263419,280.2355923818945,276.2186495176851,272.5014840465003,268.6527331189711,264.7401681919366,260.8558001484045,256.9032896364086,252.9262923571603,248.9312391788272,244.9863962404153,240.6629977739305,236.3289636408610,232.0897848132574,227.3870887954491,222.9760079149148,218.6999752658918,214.0996784565916,209.6725204056392,205.1895869403907,200.3957457333661,195.2372000989366,190.0801385110065,185.3842443729902,180.6178580262183,174.5832302745488,169.1540934949294,163.1109324758844,156.2747959436064,147.5753153598814
diff --git a/tests/expected/evalp/RANK_HIST.csv b/tests/expected/evalp/RANK_HIST.csv
new file mode 100644
index 0000000..c35d009
--- /dev/null
+++ b/tests/expected/evalp/RANK_HIST.csv
@@ -0,0 +1 @@

diff --git a/tests/expected/evalp/ROCSS.csv b/tests/expected/evalp/ROCSS.csv
new file mode 100644
index 0000000..ab68dea
--- /dev/null
+++ b/tests/expected/evalp/ROCSS.csv
@@ -0,0 +1,4 @@
+0.7108499247114
+0.8017176997760
+0.7130661114003
+nan
diff --git a/tests/expected/evalp/WS.csv b/tests/expected/evalp/WS.csv
new file mode 100644
index 0000000..82b07bb
--- /dev/null
+++ b/tests/expected/evalp/WS.csv
@@ -0,0 +1 @@
+764.4471750114835,2578.1382636655953
diff --git a/tests/expected/evalp/WSS.csv b/tests/expected/evalp/WSS.csv
new file mode 100644
index 0000000..fd929da
--- /dev/null
+++ b/tests/expected/evalp/WSS.csv
@@ -0,0 +1 @@
+0.6621887740287,0.4360388849930
diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp
index 41df128..e68201c 100644
--- a/tests/test_determinist.cpp
+++ b/tests/test_determinist.cpp
@@ -6,16 +6,18 @@
 #include <vector>
 #include <tuple>
 #include <array>
+#include <string>
+#include <unordered_map>
 
 #include <gtest/gtest.h>
 
 #include <xtensor/xtensor.hpp>
+#include <xtensor/xarray.hpp>
 #include <xtensor/xview.hpp>
 #include <xtensor/xmanipulation.hpp>
 #include <xtensor/xmath.hpp>
 #include <xtensor/xsort.hpp>
 #include <xtensor/xcsv.hpp>
-#include <xtensor/xio.hpp>
 
 #include "evalhyd/evald.hpp"
 
@@ -35,19 +37,36 @@ std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d()
 {
     // read in data
     std::ifstream ifs;
-    ifs.open(EVALHYD_DATA_DIR "/q_obs.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs.csv");
     xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs);
     ifs.close();
 
-    ifs.open(EVALHYD_DATA_DIR "/q_prd.csv");
-    xt::xtensor<double, 2> predicted = xt::view(
-            xt::load_csv<double>(ifs), xt::range(0, 5), xt::all()
-    );
+    ifs.open(EVALHYD_DATA_DIR "/data/q_prd.csv");
+    xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs);
     ifs.close();
 
     return std::make_tuple(observed, predicted);
 }
 
+std::unordered_map<std::string, xt::xarray<double>> load_expected_d()
+{
+    // read in expected results
+    std::ifstream ifs;
+    std::unordered_map<std::string, xt::xarray<double>> expected;
+
+    for (const auto& metric : all_metrics_d)
+    {
+        ifs.open(EVALHYD_DATA_DIR "/expected/evald/" + metric + ".csv");
+        expected[metric] = xt::view(
+                xt::squeeze(xt::load_csv<double>(ifs)),
+                xt::all(), xt::newaxis(), xt::newaxis()
+        );
+        ifs.close();
+    }
+
+    return expected;
+}
+
 TEST(DeterministTests, TestMetrics)
 {
     // read in data
@@ -55,84 +74,22 @@ TEST(DeterministTests, TestMetrics)
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_d();
 
+    // read in expected results
+    auto expected = load_expected_d();
+
     // compute scores (with 2D tensors)
-    std::vector<xt::xarray<double>> metrics =
+    std::vector<xt::xarray<double>> results =
             evalhyd::evald(
                     observed, predicted, all_metrics_d
             );
 
-    // check results on all metrics
-    xt::xtensor<double, 3> mae =
-            {{{ 265.192926}},
-             {{ 265.681672}},
-             {{ 265.70418 }},
-             {{ 265.70418 }},
-             {{ 265.836013}}};
-    EXPECT_TRUE(xt::allclose(metrics[0], mae));
-
-    xt::xtensor<double, 3> mare =
-            {{{ 0.211088}},
-             {{ 0.211477}},
-             {{ 0.211495}},
-             {{ 0.211495}},
-             {{ 0.2116  }}};
-    EXPECT_TRUE(xt::allclose(metrics[1], mare));
-
-    xt::xtensor<double, 3> mse =
-            {{{ 603782.26045 }},
-             {{ 603540.170418}},
-             {{ 604973.176849}},
-             {{ 605519.106109}},
-             {{ 606241.115756}}};
-    EXPECT_TRUE(xt::allclose(metrics[2], mse));
-
-    xt::xtensor<double, 3> rmse =
-            {{{777.034272}},
-             {{776.878479}},
-             {{777.800217}},
-             {{778.151082}},
-             {{778.61487 }}};
-    EXPECT_TRUE(xt::allclose(metrics[3], rmse));
-
-    xt::xtensor<double, 3> nse =
-            {{{0.718912}},
-             {{0.719025}},
-             {{0.718358}},
-             {{0.718104}},
-             {{0.717767}}};
-    EXPECT_TRUE(xt::allclose(metrics[4], nse));
-
-    xt::xtensor<double, 3> kge =
-            {{{0.748088}},
-             {{0.746106}},
-             {{0.744111}},
-             {{0.743011}},
-             {{0.741768}}};
-    EXPECT_TRUE(xt::allclose(metrics[5], kge));
-
-    xt::xtensor<double, 4> kge_d =
-            {{{{ 0.907125,  1.224429,  1.066824}}},
-             {{{ 0.907754,  1.226421,  1.068457}}},
-             {{{ 0.90805 ,  1.228409,  1.069668}}},
-             {{{ 0.908256,  1.229451,  1.070558}}},
-             {{{ 0.908474,  1.230672,  1.071395}}}};
-    EXPECT_TRUE(xt::allclose(metrics[6], kge_d));
-
-    xt::xtensor<double, 3> kgeprime =
-            {{{0.813141}},
-             {{0.812775}},
-             {{0.812032}},
-             {{0.811787}},
-             {{0.811387}}};
-    EXPECT_TRUE(xt::allclose(metrics[7], kgeprime));
-
-    xt::xtensor<double, 4> kgeprime_d =
-            {{{{ 0.907125,  1.147733,  1.066824}}},
-             {{{ 0.907754,  1.147843,  1.068457}}},
-             {{{ 0.90805 ,  1.148403,  1.069668}}},
-             {{{ 0.908256,  1.148421,  1.070558}}},
-             {{{ 0.908474,  1.148663,  1.071395}}}};
-    EXPECT_TRUE(xt::allclose(metrics[8], kgeprime_d));
+    // check results
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results[m], expected[all_metrics_d[m]], 1e-05, 1e-08, true
+        ))) << "Failure for (" << all_metrics_d[m] << ")";
+    }
 }
 
 TEST(DeterministTests, TestTransform)
@@ -143,49 +100,73 @@ TEST(DeterministTests, TestTransform)
     std::tie(observed, predicted) = load_data_d();
 
     // compute and check results on square-rooted streamflow series
-    std::vector<xt::xarray<double>> metrics =
-            evalhyd::evald(observed, predicted, {"NSE"}, "sqrt");
+    std::vector<xt::xarray<double>> results_sqrt =
+            evalhyd::evald(observed, predicted, all_metrics_d, "sqrt");
 
-    xt::xtensor<double, 3> nse_sqrt =
-            {{{0.882817}},
-             {{0.883023}},
-             {{0.883019}},
-             {{0.883029}},
-             {{0.882972}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_sqrt)));
+    xt::xtensor<double, 2> obs_sqrt = xt::sqrt(observed);
+    xt::xtensor<double, 2> prd_sqrt = xt::sqrt(predicted);
+
+    std::vector<xt::xarray<double>> results_sqrt_ =
+            evalhyd::evald(obs_sqrt, prd_sqrt, all_metrics_d);
+
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results_sqrt[m], results_sqrt_[m], 1e-05, 1e-08, true
+        ))) << "Failure for (" << all_metrics_d[m] << ")";
+    }
 
     // compute and check results on inverted streamflow series
-    metrics = evalhyd::evald(observed, predicted, {"NSE"}, "inv");
+    std::vector<xt::xarray<double>> results_inv =
+            evalhyd::evald(observed, predicted, all_metrics_d, "inv");
+
+    xt::xtensor<double, 2> epsilon = xt::mean(observed, {1}, xt::keep_dims) * 0.01;
+    xt::xtensor<double, 2> obs_inv = 1. / (observed + epsilon);
+    xt::xtensor<double, 2> prd_inv = 1. / (predicted + epsilon);
 
-    xt::xtensor<double, 3> nse_inv =
-            {{{0.737323}},
-             {{0.737404}},
-             {{0.737429}},
-             {{0.737546}},
-             {{0.737595}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_inv)));
+    std::vector<xt::xarray<double>> results_inv_ =
+            evalhyd::evald(obs_inv, prd_inv, all_metrics_d);
+
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results_inv[m], results_inv_[m], 1e-05, 1e-08, true
+        ))) << "Failure for (" << all_metrics_d[m] << ")";
+    }
 
     // compute and check results on square-rooted streamflow series
-    metrics = evalhyd::evald(observed, predicted, {"NSE"}, "log");
+    std::vector<xt::xarray<double>> results_log =
+            evalhyd::evald(observed, predicted, all_metrics_d, "log");
+
+    xt::xtensor<double, 2> obs_log = xt::log(observed + epsilon);
+    xt::xtensor<double, 2> prd_log = xt::log(predicted + epsilon);
+
+    std::vector<xt::xarray<double>> results_log_ =
+            evalhyd::evald(obs_log, prd_log, all_metrics_d);
 
-    xt::xtensor<double, 3> nse_log =
-            {{{0.893344}},
-             {{0.893523}},
-             {{0.893585}},
-             {{0.893758}},
-             {{0.893793}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_log)));
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results_log[m], results_log_[m], 1e-05, 1e-08, true
+        ))) << "Failure for (" << all_metrics_d[m] << ")";
+    }
 
     // compute and check results on power-transformed streamflow series
-    metrics = evalhyd::evald(observed, predicted, {"NSE"}, "pow", 0.2);
+    std::vector<xt::xarray<double>> results_pow =
+            evalhyd::evald(observed, predicted, all_metrics_d, "pow", 0.2);
+
+    xt::xtensor<double, 2> obs_pow = xt::pow(observed, 0.2);
+    xt::xtensor<double, 2> prd_pow = xt::pow(predicted, 0.2);
 
-    xt::xtensor<double, 3> nse_pow =
-            {{{0.899207}},
-             {{0.899395}},
-             {{0.899451}},
-             {{0.899578}},
-             {{0.899588}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_pow)));
+    std::vector<xt::xarray<double>> results_pow_ =
+            evalhyd::evald(obs_pow, prd_pow, all_metrics_d);
+
+    for (std::size_t m = 0; m < all_metrics_d.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results_pow[m], results_pow_[m], 1e-05, 1e-08, true
+        ))) << "Failure for (" << all_metrics_d[m] << ")";
+    }
 
 }
 
@@ -235,13 +216,10 @@ TEST(DeterministTests, TestMaskingConditions)
     // conditions on streamflow values _________________________________________
 
     // compute scores using masking conditions on streamflow to subset whole record
-    xt::xtensor<std::array<char, 32>, 2> q_conditions = {{
-            {{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
-            {{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
-            {{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
-            {{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
-            {{std::array<char, 32>{"q_obs{<2000,>3000}"}}}
-    }};
+    xt::xtensor<std::array<char, 32>, 2> q_conditions = {
+            {std::array<char, 32>{"q_obs{<2000,>3000}"}}
+    };
+    q_conditions = xt::repeat(q_conditions, predicted.shape(0), 0);
 
     std::vector<xt::xarray<double>> metrics_q_conditioned =
             evalhyd::evald(
@@ -270,13 +248,10 @@ TEST(DeterministTests, TestMaskingConditions)
     // conditions on streamflow statistics _____________________________________
 
     // compute scores using masking conditions on streamflow to subset whole record
-    xt::xtensor<std::array<char, 32>, 2> q_conditions_ = {{
-            {{std::array<char, 32>{"q_obs{>=mean}"}}},
-            {{std::array<char, 32>{"q_obs{>=mean}"}}},
-            {{std::array<char, 32>{"q_obs{>=mean}"}}},
-            {{std::array<char, 32>{"q_obs{>=mean}"}}},
-            {{std::array<char, 32>{"q_obs{>=mean}"}}}
-    }};
+    xt::xtensor<std::array<char, 32>, 2> q_conditions_ = {
+            {std::array<char, 32>{"q_obs{>=mean}"}}
+    };
+    q_conditions_ = xt::repeat(q_conditions_, predicted.shape(0), 0);
 
     double mean = xt::mean(observed, {1})();
 
@@ -307,13 +282,10 @@ TEST(DeterministTests, TestMaskingConditions)
     // conditions on temporal indices __________________________________________
 
     // compute scores using masking conditions on time indices to subset whole record
-    xt::xtensor<std::array<char, 32>, 2> t_conditions = {{
-            {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
-            {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
-            {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
-            {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
-            {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}}
-    }};
+    xt::xtensor<std::array<char, 32>, 2> t_conditions = {
+            {std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}
+    };
+    t_conditions = xt::repeat(t_conditions, predicted.shape(0), 0);
 
     std::vector<xt::xarray<double>> metrics_t_conditioned =
             evalhyd::evald(
@@ -346,6 +318,7 @@ TEST(DeterministTests, TestMissingData)
     xt::xtensor<double, 2> observed;
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_d();
+    predicted = xt::view(predicted, xt::range(0, 5), xt::all());
 
     // add some missing observations artificially by assigning NaN values
     xt::view(observed, xt::all(), xt::range(0, 20)) = NAN;
@@ -394,16 +367,16 @@ TEST(DeterministTests, TestBootstrap)
     // read in data
     std::ifstream ifs;
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1));
     ifs.close();
     std::vector<std::string> datetimes (x_dts.begin(), x_dts.end());
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1));
     ifs.close();
 
-    ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv");
     xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1);
     ifs.close();
 
@@ -459,16 +432,16 @@ TEST(DeterministTests, TestBootstrapSummary)
     // read in data
     std::ifstream ifs;
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1));
     ifs.close();
     std::vector<std::string> datetimes (x_dts.begin(), x_dts.end());
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1));
     ifs.close();
 
-    ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv");
     xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1);
     ifs.close();
 
diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp
index 66936f3..165b0df 100644
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -6,11 +6,14 @@
 #include <vector>
 #include <tuple>
 #include <array>
+#include <string>
+#include <unordered_map>
 
 #include <gtest/gtest.h>
 
 #include <xtl/xoptional.hpp>
 #include <xtensor/xtensor.hpp>
+#include <xtensor/xarray.hpp>
 #include <xtensor/xview.hpp>
 #include <xtensor/xmath.hpp>
 #include <xtensor/xsort.hpp>
@@ -38,17 +41,37 @@ std::tuple<xt::xtensor<double, 1>, xt::xtensor<double, 2>> load_data_p()
 {
     // read in data
     std::ifstream ifs;
-    ifs.open(EVALHYD_DATA_DIR "/q_obs.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs.csv");
     xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<int>(ifs));
     ifs.close();
 
-    ifs.open(EVALHYD_DATA_DIR "/q_prd.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_prd.csv");
     xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs);
     ifs.close();
 
     return std::make_tuple(observed, predicted);
 }
 
+std::unordered_map<std::string, xt::xarray<double>> load_expected_p()
+{
+    // read in expected results
+    std::ifstream ifs;
+    std::unordered_map<std::string, xt::xarray<double>> expected;
+
+    for (const auto& metric : all_metrics_p)
+    {
+        ifs.open(EVALHYD_DATA_DIR "/expected/evalp/" + metric + ".csv");
+        expected[metric] = xt::view(
+                xt::squeeze(xt::load_csv<double>(ifs)),
+                xt::newaxis(), xt::newaxis(), xt::newaxis(),
+                xt::newaxis(), xt::all()
+        );
+        ifs.close();
+    }
+
+    return expected;
+}
+
 TEST(ProbabilistTests, TestBrier)
 {
     // read in data
@@ -56,54 +79,31 @@ TEST(ProbabilistTests, TestBrier)
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_p();
 
+    // read in expected results
+    auto expected = load_expected_p();
+
     // compute scores
     xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}};
+    std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD"};
 
-    std::vector<xt::xarray<double>> metrics =
+    std::vector<xt::xarray<double>> results =
             evalhyd::evalp(
                     // shape: (sites [1], time [t])
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     // shape: (sites [1], lead times [1], members [m], time [t])
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    {"BS", "BSS", "BS_CRD", "BS_LBD"},
+                    metrics,
                     thresholds,
                     "high"
             );
 
     // check results
-    // Brier scores
-    xt::xtensor<double, 5> bs =
-            {{{{{0.10615136, 0.07395622, 0.08669186, NAN}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[0], bs, 1e-05, 1e-08, true))
-    );
-
-    // Brier skill scores
-    xt::xtensor<double, 5> bss =
-            {{{{{0.5705594, 0.6661165, 0.5635126, NAN}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[1], bss, 1e-05, 1e-08, true))
-    );
-
-    // Brier calibration-refinement decompositions
-    xt::xtensor<double, 6> bs_crd =
-            {{{{{{0.011411758, 0.1524456, 0.2471852},
-                 {0.005532413, 0.1530793, 0.2215031},
-                 {0.010139431, 0.1220601, 0.1986125},
-                 {NAN, NAN, NAN}}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[2], bs_crd, 1e-05, 1e-08, true))
-    );
-
-    // Brier likelihood-base rate decompositions
-    xt::xtensor<double, 6> bs_lbd =
-            {{{{{{0.012159881, 0.1506234, 0.2446149},
-                 {0.008031746, 0.1473869, 0.2133114},
-                 {0.017191279, 0.1048221, 0.1743227},
-                 {NAN, NAN, NAN}}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[3], bs_lbd, 1e-05, 1e-08, true))
-    );
+    for (std::size_t m = 0; m < metrics.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results[m], expected[metrics[m]], 1e-05, 1e-08, true
+        ))) << "Failure for (" << metrics[m] << ")";
+    }
 }
 
 TEST(ProbabilistTests, TestQuantiles)
@@ -113,36 +113,28 @@ TEST(ProbabilistTests, TestQuantiles)
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_p();
 
+    // read in expected results
+    auto expected = load_expected_p();
+
     // compute scores
-    std::vector<xt::xarray<double>> metrics =
+    std::vector<std::string> metrics = {"QS", "CRPS"};
+
+    std::vector<xt::xarray<double>> results =
             evalhyd::evalp(
                     // shape: (sites [1], time [t])
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     // shape: (sites [1], lead times [1], members [m], time [t])
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    {"QS", "CRPS"}
+                    metrics
             );
 
     // check results
-    // Quantile scores
-    xt::xtensor<double, 5> qs =
-            {{{{{345.91578, 345.069256, 343.129359, 340.709869, 338.281598,
-                 335.973535, 333.555157, 330.332426, 327.333539, 324.325996,
-                 321.190082, 318.175117, 315.122186, 311.97205, 308.644942,
-                 305.612169, 302.169552, 298.445956, 294.974648, 291.273807,
-                 287.724586, 284.101905, 280.235592, 276.21865, 272.501484,
-                 268.652733, 264.740168, 260.8558, 256.90329, 252.926292,
-                 248.931239, 244.986396, 240.662998, 236.328964, 232.089785,
-                 227.387089, 222.976008, 218.699975, 214.099678, 209.67252,
-                 205.189587, 200.395746, 195.2372, 190.080139, 185.384244,
-                 180.617858, 174.58323, 169.154093, 163.110932, 156.274796,
-                 147.575315}}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[0], qs)));
-
-    // Continuous ranked probability scores
-    xt::xtensor<double, 4> crps =
-            {{{{252.956919}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[1], crps)));
+    for (std::size_t m = 0; m < metrics.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results[m], expected[metrics[m]], 1e-05, 1e-08, true
+        ))) << "Failure for (" << metrics[m] << ")";
+    }
 }
 
 TEST(ProbabilistTests, TestContingency)
@@ -152,260 +144,31 @@ TEST(ProbabilistTests, TestContingency)
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_p();
 
+    // read in expected results
+    auto expected = load_expected_p();
+
     // compute scores
     xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}};
+    std::vector<std::string> metrics = {"POD", "POFD", "FAR", "CSI", "ROCSS"};
 
-    std::vector<xt::xarray<double>> metrics =
+    std::vector<xt::xarray<double>> results =
             evalhyd::evalp(
                     // shape: (sites [1], time [t])
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     // shape: (sites [1], lead times [1], members [m], time [t])
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    {"POD", "POFD", "FAR", "CSI", "ROCSS"},
+                    metrics,
                     thresholds,
                     "low"
             );
 
     // check results
-    // POD
-    xt::xtensor<double, 6> pod =
-            {{{{{{ 1.      , 1.      , 1.      , NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.863309, 0.873786, 0.752941, NAN},
-                 { 0.848921, 0.873786, 0.752941, NAN},
-                 { 0.848921, 0.873786, 0.752941, NAN},
-                 { 0.848921, 0.864078, 0.752941, NAN}}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[0], pod, 1e-05, 1e-08, true))
-    );
-
-    // POFD
-    xt::xtensor<double, 6> pofd =
-            {{{{{{ 1.      , 1.      , 1.      , NAN},
-                 { 0.087209, 0.038462, 0.026549, NAN},
-                 { 0.087209, 0.038462, 0.026549, NAN},
-                 { 0.087209, 0.038462, 0.026549, NAN},
-                 { 0.087209, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.026549, NAN},
-                 { 0.081395, 0.038462, 0.022124, NAN}}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[1], pofd, 1e-04, 1e-07, true))
-    );
-
-    // FAR
-    xt::xtensor<double, 6> far =
-            {{{{{{ 0.553055, 0.66881 , 0.726688, NAN},
-                 { 0.111111, 0.081633, 0.085714, NAN},
-                 { 0.111111, 0.081633, 0.085714, NAN},
-                 { 0.111111, 0.081633, 0.085714, NAN},
-                 { 0.111111, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.104478, 0.081633, 0.085714, NAN},
-                 { 0.106061, 0.081633, 0.085714, NAN},
-                 { 0.106061, 0.081633, 0.085714, NAN},
-                 { 0.106061, 0.082474, 0.072464, NAN}}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[2], far, 1e-05, 1e-08, true))
-    );
-
-    // CSI
-    xt::xtensor<double, 6> csi =
-            {{{{{{ 0.446945, 0.33119 , 0.273312, NAN},
-                 { 0.779221, 0.810811, 0.703297, NAN},
-                 { 0.779221, 0.810811, 0.703297, NAN},
-                 { 0.779221, 0.810811, 0.703297, NAN},
-                 { 0.779221, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.784314, 0.810811, 0.703297, NAN},
-                 { 0.771242, 0.810811, 0.703297, NAN},
-                 { 0.771242, 0.810811, 0.703297, NAN},
-                 { 0.771242, 0.801802, 0.711111, NAN}}}}}}
-    ;
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[3], csi, 1e-05, 1e-08, true))
-    );
-
-    // ROC skill scores
-    xt::xtensor<double, 5> rocss =
-            {{{{{ 0.710850, 0.801718, 0.713066, NAN}}}}};
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[4], rocss, 1e-05, 1e-08, true))
-    );
+    for (std::size_t m = 0; m < metrics.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results[m], expected[metrics[m]], 1e-05, 1e-08, true
+        ))) << "Failure for (" << metrics[m] << ")";
+    }
 }
 
 TEST(ProbabilistTests, TestRanks)
@@ -415,8 +178,12 @@ TEST(ProbabilistTests, TestRanks)
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_p();
 
+    // read in expected results
+    auto expected = load_expected_p();
+    std::vector<std::string> metrics = {"RANK_HIST", "DS", "AS"};
+
     // compute scores
-    std::vector<xt::xarray<double>> metrics =
+    std::vector<xt::xarray<double>> results =
             evalhyd::evalp(
                     // shape: (sites [1], time [t])
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
@@ -434,69 +201,12 @@ TEST(ProbabilistTests, TestRanks)
             );
 
     // check results
-    // Rank histogram
-    xt::xtensor<double, 5> rank_hist;
-#if EVALHYD_TESTING_OS == WINDOWS
-    rank_hist =  {{{{{ 0.607717,  0.      ,  0.      ,  0.      ,  0.      ,  0.003215,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
-                       0.003215,  0.      ,  0.003215,  0.      ,  0.      ,  0.      ,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.003215,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.006431,
-                       0.      ,  0.      ,  0.003215,  0.006431,  0.      ,  0.      ,
-                       0.      ,  0.003215,  0.      ,  0.      ,  0.003215,  0.003215,
-                       0.003215,  0.      ,  0.006431,  0.344051}}}}};
-#elif EVALHYD_TESTING_OS == MACOS
-    rank_hist =  {{{{{ 0.607717,  0.      ,  0.      ,  0.      ,  0.      ,  0.003215,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
-                       0.003215,  0.      ,  0.003215,  0.      ,  0.      ,  0.      ,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.003215,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.006431,
-                       0.      ,  0.      ,  0.003215,  0.006431,  0.      ,  0.      ,
-                       0.      ,  0.003215,  0.      ,  0.      ,  0.003215,  0.003215,
-                       0.003215,  0.      ,  0.006431,  0.344051}}}}};
-#elif EVALHYD_TESTING_OS == LINUX
-    rank_hist =  {{{{{ 0.607717,  0.      ,  0.      ,  0.      ,  0.      ,  0.003215,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
-                       0.003215,  0.      ,  0.003215,  0.      ,  0.      ,  0.      ,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.003215,
-                       0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.006431,
-                       0.      ,  0.      ,  0.003215,  0.006431,  0.      ,  0.      ,
-                       0.      ,  0.003215,  0.      ,  0.      ,  0.003215,  0.003215,
-                       0.003215,  0.      ,  0.006431,  0.344051}}}}};
-#endif
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[0], rank_hist, 1e-04, 1e-06, true))
-    );
-
-    // Delta scores
-    xt::xtensor<double, 4> ds;
-#if EVALHYD_TESTING_OS == WINDOWS
-    ds = {{{{ 148.790164}}}};
-#elif EVALHYD_TESTING_OS == MACOS
-    ds = {{{{ 148.790164}}}};
-#elif EVALHYD_TESTING_OS == LINUX
-    ds = {{{{ 148.790164}}}};
-#endif
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[1], ds, 1e-04, 1e-07, true))
-    );
-
-
-    // Alpha scores
-    xt::xtensor<double, 4> as;
-#if EVALHYD_TESTING_OS == WINDOWS
-    as = {{{{ 0.491481}}}};
-#elif EVALHYD_TESTING_OS == MACOS
-    as = {{{{ 0.491481}}}};
-#elif EVALHYD_TESTING_OS == LINUX
-    as = {{{{ 0.491481}}}};
-#endif
-    EXPECT_TRUE(
-            xt::all(xt::isclose(metrics[2], as, 1e-04, 1e-07, true))
-    );
+    for (std::size_t m = 0; m < metrics.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results[m], expected[metrics[m]], 1e-05, 1e-08, true
+        ))) << "Failure for (" << metrics[m] << ")";
+    }
 }
 
 TEST(ProbabilistTests, TestIntervals)
@@ -506,8 +216,13 @@ TEST(ProbabilistTests, TestIntervals)
     xt::xtensor<double, 2> predicted;
     std::tie(observed, predicted) = load_data_p();
 
+    // read in expected results
+    auto expected = load_expected_p();
+
     // compute scores
-    std::vector<xt::xarray<double>> metrics =
+    std::vector<std::string> metrics = {"CR", "AW", "AWN", "AWI", "WS", "WSS"};
+
+    std::vector<xt::xarray<double>> results =
             evalhyd::evalp(
                     // shape: (sites [1], time [t])
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
@@ -520,29 +235,12 @@ TEST(ProbabilistTests, TestIntervals)
             );
 
     // check results
-    // coverage ratios
-    xt::xtensor<double, 5> cr = {{{{{ 0.006431, 0.03537 }}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[0], cr, 1e-05, 1e-06, true)));
-
-    // average widths
-    xt::xtensor<double, 5> aw = {{{{{ 9.27492, 31.321543}}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[1], aw, 1e-05, 1e-06, true)));
-
-    // average widths normalised
-    xt::xtensor<double, 5> awn = {{{{{ 0.007383, 0.024931}}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[2], awn, 1e-05, 1e-06, true)));
-
-    // average widths indices
-    xt::xtensor<double, 5> awi = {{{{{ 0.982112, 0.988095}}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[3], awi, 1e-05, 1e-06, true)));
-
-    // Winkler scores
-    xt::xtensor<double, 5> ws = {{{{{ 764.447175, 2578.138264}}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[4], ws, 1e-05, 1e-06, true)));
-
-    // Winkler skill scores
-    xt::xtensor<double, 5> wss = {{{{{ 0.662189, 0.436039}}}}};
-    EXPECT_TRUE(xt::all(xt::isclose(metrics[5], wss, 1e-05, 1e-06, true)));
+    for (std::size_t m = 0; m < metrics.size(); m++)
+    {
+        EXPECT_TRUE(xt::all(xt::isclose(
+                results[m], expected[metrics[m]], 1e-05, 1e-08, true
+        ))) << "Failure for (" << metrics[m] << ")";
+    }
 }
 
 TEST(ProbabilistTests, TestMasks)
@@ -891,16 +589,16 @@ TEST(ProbabilistTests, TestBootstrap)
     // read in data
     std::ifstream ifs;
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1));
     ifs.close();
     std::vector<std::string> datetimes (x_dts.begin(), x_dts.end());
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1));
     ifs.close();
 
-    ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv");
     xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1);
     ifs.close();
 
@@ -976,16 +674,16 @@ TEST(ProbabilistTests, TestBootstrapSummary)
     // read in data
     std::ifstream ifs;
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1));
     ifs.close();
     std::vector<std::string> datetimes (x_dts.begin(), x_dts.end());
 
-    ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_obs_1yr.csv");
     xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1));
     ifs.close();
 
-    ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv");
+    ifs.open(EVALHYD_DATA_DIR "/data/q_prd_1yr.csv");
     xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1);
     ifs.close();
 
-- 
GitLab