• Thibault Hallouin's avatar
    implement bootstrapping method for metric uncertainty estimation · 16ce8f4e
    Thibault Hallouin authored
    The bootstrapping method is based on a non-overlapping block sampling
    with replacement, where the blocks are years of data. The number of
    samples and the sample length (i.e the number of year blocks) are both
    customisable.
    
    The method is accessible both for deterministic and probabilistic
    evaluation where a new axis is added. For now, the metrics for all the
    samples are returned, but in the future, some summary statistics would
    be implemented (e.g. quantiles or mean/standard deviation).
    
    /!\ For determinist evaluation, the n-dimensional functionality became
        untenable such that the number of dimensions was fixed and
        restricted to 2D tensors.
    
    New unit tests are included to test both the bootstrapping generator
    and the numerical results obtained with the bootstrapping turned on.
    16ce8f4e
test_probabilist.cpp 15.66 KiB
#include <fstream>
#include <vector>
#include <tuple>
#include <array>
#include <gtest/gtest.h>
#include <xtensor/xtensor.hpp>
#include <xtensor/xmanipulation.hpp>
#include <xtensor/xcsv.hpp>
#include "evalhyd/evalp.hpp"
using namespace xt::placeholders;  // required for `_` to work
std::tuple<xt::xtensor<double, 1>, xt::xtensor<double, 2>> load_data_p()
    // read in data
    std::ifstream ifs;
    ifs.open("./data/q_obs.csv");
    xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<int>(ifs));
    ifs.close();
    ifs.open("./data/q_prd.csv");
    xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs);
    ifs.close();
    return std::make_tuple(observed, predicted);
TEST(ProbabilistTests, TestBrier)
    // read in data
    xt::xtensor<double, 1> observed;
    xt::xtensor<double, 2> predicted;
    std::tie(observed, predicted) = load_data_p();
    // compute scores
    xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}};
    std::vector<xt::xarray<double>> metrics =
            evalhyd::evalp(
                    // shape: (sites [1], time [t])
                    xt::view(observed, xt::newaxis(), xt::all()),
                    // shape: (sites [1], lead times [1], members [m], time [t])
                    xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()),
                    {"BS", "BSS", "BS_CRD", "BS_LBD"},
                    thresholds
    // check results
    // Brier scores
    xt::xtensor<double, 5> bs =
            {{{{{0.10615136, 0.07395622, 0.08669186, NAN}}}}};
    EXPECT_TRUE(
            xt::sum(xt::isclose(metrics[0], bs, 1e-05, 1e-08, true))
            == xt::xscalar<double>(4)
    // Brier skill scores
    xt::xtensor<double, 5> bss =
            {{{{{0.5705594, 0.6661165, 0.5635126, NAN}}}}};
    EXPECT_TRUE(
            xt::sum(xt::isclose(metrics[1], bss, 1e-05, 1e-08, true))
            == xt::xscalar<double>(4)
    // Brier calibration-refinement decompositions
    xt::xtensor<double, 6> bs_crd =
            {{{{{{0.011411758, 0.1524456, 0.2471852},
                 {0.005532413, 0.1530793, 0.2215031},
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
{0.010139431, 0.1220601, 0.1986125}, {NAN, NAN, NAN}}}}}}; EXPECT_TRUE( xt::sum(xt::isclose(metrics[2], bs_crd, 1e-05, 1e-08, true)) == xt::xscalar<double>(12) ); // Brier likelihood-base rate decompositions xt::xtensor<double, 6> bs_lbd = {{{{{{0.012159881, 0.1506234, 0.2446149}, {0.008031746, 0.1473869, 0.2133114}, {0.017191279, 0.1048221, 0.1743227}, {NAN, NAN, NAN}}}}}}; EXPECT_TRUE( xt::sum(xt::isclose(metrics[3], bs_lbd, 1e-05, 1e-08, true)) == xt::xscalar<double>(12) ); } TEST(ProbabilistTests, TestQuantiles) { // read in data xt::xtensor<double, 1> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_p(); // compute scores std::vector<xt::xarray<double>> metrics = evalhyd::evalp( // shape: (sites [1], time [t]) xt::view(observed, xt::newaxis(), xt::all()), // shape: (sites [1], lead times [1], members [m], time [t]) xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), {"QS", "CRPS"} ); // check results // Quantile scores xt::xtensor<double, 5> qs = {{{{{345.91578, 345.069256, 343.129359, 340.709869, 338.281598, 335.973535, 333.555157, 330.332426, 327.333539, 324.325996, 321.190082, 318.175117, 315.122186, 311.97205, 308.644942, 305.612169, 302.169552, 298.445956, 294.974648, 291.273807, 287.724586, 284.101905, 280.235592, 276.21865, 272.501484, 268.652733, 264.740168, 260.8558, 256.90329, 252.926292, 248.931239, 244.986396, 240.662998, 236.328964, 232.089785, 227.387089, 222.976008, 218.699975, 214.099678, 209.67252, 205.189587, 200.395746, 195.2372, 190.080139, 185.384244, 180.617858, 174.58323, 169.154093, 163.110932, 156.274796, 147.575315}}}}}; EXPECT_TRUE(xt::allclose(metrics[0], qs)); // Continuous ranked probability scores xt::xtensor<double, 4> crps = {{{{252.956919}}}}; EXPECT_TRUE(xt::allclose(metrics[1], crps)); } TEST(ProbabilistTests, TestMasks) { // read in data xt::xtensor<double, 1> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_p(); // generate temporal subset by dropping 20 first time steps xt::xtensor<double, 4> masks = xt::ones<bool>({std::size_t {1}, std::size_t {1}, std::size_t {1}, std::size_t {observed.size()}}); xt::view(masks, 0, xt::all(), 0, xt::range(0, 20)) = 0;
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
// compute scores using masks to subset whole record xt::xtensor<double, 2> thresholds = {{690, 534, 445}}; std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; std::vector<xt::xarray<double>> metrics_masked = evalhyd::evalp( // shape: (sites [1], time [t]) xt::view(observed, xt::newaxis(), xt::all()), // shape: (sites [1], lead times [1], members [m], time [t]) xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds, // shape: (sites [1], lead times [1], subsets [1], time [t]) masks ); // compute scores on pre-computed subset of whole record std::vector<xt::xarray<double>> metrics_subset = evalhyd::evalp( // shape: (sites [1], time [t-20]) xt::view(observed, xt::newaxis(), xt::range(20, _)), // shape: (sites [1], lead times [1], members [m], time [t-20]) xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(20, _)), metrics, thresholds ); // check results are identical for (int m = 0; m < metrics.size(); m++) { EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m])) << "Failure for (" << metrics[m] << ")"; } } TEST(ProbabilistTests, TestMaskingConditions) { xt::xtensor<double, 2> thresholds = {{690, 534, 445}}; std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; // read in data xt::xtensor<double, 1> observed_; xt::xtensor<double, 2> predicted; std::tie(observed_, predicted) = load_data_p(); // turn observed into 2D view (to simplify syntax later on) auto observed = xt::view(observed_, xt::newaxis(), xt::all()); // generate dummy empty masks required to access next optional argument xt::xtensor<bool, 4> masks; // conditions on streamflow values _________________________________________ // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 2> q_conditions = { {{"q_obs{<2000,>3000}"}} }; std::vector<xt::xarray<double>> metrics_q_conditioned = evalhyd::evalp( observed, xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds, masks, q_conditions ); // compute scores using "NaN-ed" time indices where conditions on streamflow met
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
std::vector<xt::xarray<double>> metrics_q_preconditioned = evalhyd::evalp( xt::where((observed < 2000) | (observed > 3000), observed, NAN), xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds ); // check results are identical for (int m = 0; m < metrics.size(); m++) { EXPECT_TRUE( xt::allclose( metrics_q_conditioned[m], metrics_q_preconditioned[m] ) ) << "Failure for (" << metrics[m] << ")"; } // conditions on streamflow statistics _____________________________________ // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 2> q_conditions_ = { {{"q_prd_mean{>=median}"}} }; auto q_prd_mean = xt::mean(predicted, {0}, xt::keep_dims); double median = xt::median(q_prd_mean); std::vector<xt::xarray<double>> metrics_q_conditioned_ = evalhyd::evalp( observed, xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds, masks, q_conditions_ ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned_ = evalhyd::evalp( xt::where(q_prd_mean >= median, observed, NAN), xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds ); // check results are identical for (int m = 0; m < metrics.size(); m++) { EXPECT_TRUE( xt::allclose( metrics_q_conditioned_[m], metrics_q_preconditioned_[m] ) ) << "Failure for (" << metrics[m] << ")"; } // conditions on temporal indices __________________________________________ // compute scores using masking conditions on time indices to subset whole record xt::xtensor<std::array<char, 32>, 2> t_conditions = { {{"t{0,1,2,3,4,5:97,97,98,99}"}} }; std::vector<xt::xarray<double>> metrics_t_conditioned = evalhyd::evalp( observed, xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds, masks, t_conditions ); // compute scores on already subset time series std::vector<xt::xarray<double>> metrics_t_subset =
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
evalhyd::evalp( xt::view(observed, xt::all(), xt::range(0, 100)), xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::range(0, 100)), metrics, thresholds ); // check results are identical for (int m = 0; m < metrics.size(); m++) { EXPECT_TRUE( xt::allclose( metrics_t_conditioned[m], metrics_t_subset[m] ) ) << "Failure for (" << metrics[m] << ")"; } } TEST(ProbabilistTests, TestMissingData) { xt::xtensor<double, 2> thresholds {{ 4., 5. }}; std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; // compute metrics on series with NaN xt::xtensor<double, 4> forecast_nan {{ // leadtime 1 {{ 5.3, 4.2, 5.7, 2.3, NAN }, { 4.3, 4.2, 4.7, 4.3, NAN }, { 5.3, 5.2, 5.7, 2.3, NAN }}, // leadtime 2 {{ NAN, 4.2, 5.7, 2.3, 3.1 }, { NAN, 4.2, 4.7, 4.3, 3.3 }, { NAN, 5.2, 5.7, 2.3, 3.9 }} }}; xt::xtensor<double, 2> observed_nan {{ 4.7, 4.3, NAN, 2.7, 4.1 }}; std::vector<xt::xarray<double>> metrics_nan = eh::evalp( observed_nan, forecast_nan, metrics, thresholds ); // compute metrics on manually subset series (one leadtime at a time) xt::xtensor<double, 4> forecast_pp1 {{ // leadtime 1 {{ 5.3, 4.2, 2.3 }, { 4.3, 4.2, 4.3 }, { 5.3, 5.2, 2.3 }}, }}; xt::xtensor<double, 2> observed_pp1 {{ 4.7, 4.3, 2.7 }}; std::vector<xt::xarray<double>> metrics_pp1 = eh::evalp( observed_pp1, forecast_pp1, metrics, thresholds ); xt::xtensor<double, 4> forecast_pp2 {{ // leadtime 2 {{ 4.2, 2.3, 3.1 }, { 4.2, 4.3, 3.3 },
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
{ 5.2, 2.3, 3.9 }} }}; xt::xtensor<double, 2> observed_pp2 {{ 4.3, 2.7, 4.1 }}; std::vector<xt::xarray<double>> metrics_pp2 = eh::evalp( observed_pp2, forecast_pp2, metrics, thresholds ); // check that numerical results are identical for (int m = 0; m < metrics.size(); m++) { // for leadtime 1 EXPECT_TRUE( xt::allclose( xt::view(metrics_nan[m], xt::all(), 0), xt::view(metrics_pp1[m], xt::all(), 0) ) ) << "Failure for (" << metrics[m] << ", " << "leadtime 1)"; // for leadtime 2 EXPECT_TRUE( xt::allclose( xt::view(metrics_nan[m], xt::all(), 1), xt::view(metrics_pp2[m], xt::all(), 0) ) ) << "Failure for (" << metrics[m] << ", " << "leadtime 2)"; } } TEST(ProbabilistTests, TestBootstrap) { xt::xtensor<double, 2> thresholds {{ 33.87, 55.67 }}; std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"}; // read in data std::ifstream ifs; ifs.open("./data/q_obs_1yr.csv"); xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); ifs.close(); std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); ifs.open("./data/q_obs_1yr.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); ifs.close(); ifs.open("./data/q_prd_1yr.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); // compute metrics via bootstrap std::unordered_map<std::string, int> bootstrap = {{"n_samples", 10}, {"len_sample", 3}}; std::vector<xt::xarray<double>> metrics_bts = eh::evalp( xt::view(observed, xt::newaxis(), xt::all()), xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds, {}, // t_msk {}, // m_cdt bootstrap,
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
datetimes ); // compute metrics by repeating year of data 3 times // (since there is only one year of data, and that the bootstrap works on // one-year blocks, it can only select that given year to form samples, // and the length of the sample corresponds to how many times this year // is repeated in the sample, so that repeating the input data this many // times should result in the same numerical results) xt::xtensor<double, 1> observed_x3 = xt::concatenate(xt::xtuple(observed, observed, observed), 0); xt::xtensor<double, 2> predicted_x3 = xt::concatenate(xt::xtuple(predicted, predicted, predicted), 1); std::vector<xt::xarray<double>> metrics_rep = eh::evalp( xt::view(observed_x3, xt::newaxis(), xt::all()), xt::view(predicted_x3, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, thresholds ); // check results are identical for (int m = 0; m < metrics.size(); m++) { EXPECT_TRUE( xt::allclose( metrics_bts[m], metrics_rep[m] ) ) << "Failure for (" << metrics[m] << ")"; } }