test_determinist.cpp 14.20 KiB
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#include <fstream>
#include <vector>
#include <tuple>
#include <array>
#include <gtest/gtest.h>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xmanipulation.hpp>
#include <xtensor/xcsv.hpp>
#include "evalhyd/evald.hpp"
#ifndef EVALHYD_DATA_DIR
#error "need to define data directory"
#endif
using namespace xt::placeholders;  // required for `_` to work
std::vector<std::string> all_metrics_d = {
        "RMSE", "NSE", "KGE", "KGEPRIME"
std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d()
    // read in data
    std::ifstream ifs;
    ifs.open(EVALHYD_DATA_DIR "/q_obs.csv");
    xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs);
    ifs.close();
    ifs.open(EVALHYD_DATA_DIR "/q_prd.csv");
    xt::xtensor<double, 2> predicted = xt::view(
            xt::load_csv<double>(ifs), xt::range(0, 5), xt::all()
    ifs.close();
    return std::make_tuple(observed, predicted);
TEST(DeterministTests, TestMetrics)
    // read in data
    xt::xtensor<double, 2> observed;
    xt::xtensor<double, 2> predicted;
    std::tie(observed, predicted) = load_data_d();
    // compute scores (with 2D tensors)
    std::vector<xt::xarray<double>> metrics =
            evalhyd::evald(
                    observed, predicted, {"RMSE", "NSE", "KGE", "KGEPRIME"}
    // check results on all metrics
    xt::xtensor<double, 3> rmse =
            {{{777.034272}},
             {{776.878479}},
             {{777.800217}},
             {{778.151082}},
             {{778.61487 }}};
    EXPECT_TRUE(xt::allclose(metrics[0], rmse));
    xt::xtensor<double, 3> nse =
            {{{0.718912}},
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
{{0.719025}}, {{0.718358}}, {{0.718104}}, {{0.717767}}}; EXPECT_TRUE(xt::allclose(metrics[1], nse)); xt::xtensor<double, 3> kge = {{{0.748088}}, {{0.746106}}, {{0.744111}}, {{0.743011}}, {{0.741768}}}; EXPECT_TRUE(xt::allclose(metrics[2], kge)); xt::xtensor<double, 3> kgeprime = {{{0.813141}}, {{0.812775}}, {{0.812032}}, {{0.811787}}, {{0.811387}}}; EXPECT_TRUE(xt::allclose(metrics[3], kgeprime)); } TEST(DeterministTests, TestTransform) { // read in data xt::xtensor<double, 2> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); // compute and check results on square-rooted streamflow series std::vector<xt::xarray<double>> metrics = evalhyd::evald(observed, predicted, {"NSE"}, "sqrt"); xt::xtensor<double, 3> nse_sqrt = {{{0.882817}}, {{0.883023}}, {{0.883019}}, {{0.883029}}, {{0.882972}}}; EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_sqrt))); // compute and check results on inverted streamflow series metrics = evalhyd::evald(observed, predicted, {"NSE"}, "inv"); xt::xtensor<double, 3> nse_inv = {{{0.737323}}, {{0.737404}}, {{0.737429}}, {{0.737546}}, {{0.737595}}}; EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_inv))); // compute and check results on square-rooted streamflow series metrics = evalhyd::evald(observed, predicted, {"NSE"}, "log"); xt::xtensor<double, 3> nse_log = {{{0.893344}}, {{0.893523}}, {{0.893585}}, {{0.893758}}, {{0.893793}}}; EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_log))); // compute and check results on power-transformed streamflow series metrics = evalhyd::evald(observed, predicted, {"NSE"}, "pow", 0.2); xt::xtensor<double, 3> nse_pow = {{{0.899207}}, {{0.899395}},
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
{{0.899451}}, {{0.899578}}, {{0.899588}}}; EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_pow))); } TEST(DeterministTests, TestMasks) { // read in data xt::xtensor<double, 2> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); // generate temporal subset by dropping 20 first time steps xt::xtensor<bool, 3> masks = xt::ones<bool>({std::size_t {predicted.shape(0)}, std::size_t {1}, std::size_t {observed.size()}}); xt::view(masks, xt::all(), 0, xt::range(0, 20)) = 0; // compute scores using masks to subset whole record std::vector<xt::xarray<double>> metrics_masked = evalhyd::evald(observed, predicted, all_metrics_d, {}, {}, {}, masks); // compute scores on pre-computed subset of whole record xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _)); xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _)); std::vector<xt::xarray<double>> metrics_subset = evalhyd::evald(obs, prd, all_metrics_d); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { EXPECT_TRUE(xt::all(xt::isclose(metrics_masked[m], metrics_subset[m]))) << "Failure for (" << all_metrics_d[m] << ")"; } } TEST(DeterministTests, TestMaskingConditions) { // read in data xt::xtensor<double, 2> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); // generate dummy empty masks required to access next optional argument xt::xtensor<bool, 3> masks; // conditions on streamflow values _________________________________________ // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 2> q_conditions = {{ {{std::array<char, 32>{"q_obs{<2000,>3000}"}}}, {{std::array<char, 32>{"q_obs{<2000,>3000}"}}}, {{std::array<char, 32>{"q_obs{<2000,>3000}"}}}, {{std::array<char, 32>{"q_obs{<2000,>3000}"}}}, {{std::array<char, 32>{"q_obs{<2000,>3000}"}}} }}; std::vector<xt::xarray<double>> metrics_q_conditioned = evalhyd::evald( observed, predicted, all_metrics_d, {}, {}, {}, masks, q_conditions ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned = evalhyd::evald(
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)), predicted, all_metrics_d ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { EXPECT_TRUE( xt::all(xt::isclose( metrics_q_conditioned[m], metrics_q_preconditioned[m] )) ) << "Failure for (" << all_metrics_d[m] << ")"; } // conditions on streamflow statistics _____________________________________ // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 2> q_conditions_ = {{ {{std::array<char, 32>{"q_obs{>=mean}"}}}, {{std::array<char, 32>{"q_obs{>=mean}"}}}, {{std::array<char, 32>{"q_obs{>=mean}"}}}, {{std::array<char, 32>{"q_obs{>=mean}"}}}, {{std::array<char, 32>{"q_obs{>=mean}"}}} }}; double mean = xt::mean(observed, {1})(); std::vector<xt::xarray<double>> metrics_q_conditioned_ = evalhyd::evald( observed, predicted, all_metrics_d, {}, {}, {}, masks, q_conditions_ ); // compute scores using "NaN-ed" time indices where conditions on streamflow met std::vector<xt::xarray<double>> metrics_q_preconditioned_ = evalhyd::evald( xt::eval(xt::where(observed >= mean, observed, NAN)), predicted, all_metrics_d ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { EXPECT_TRUE( xt::all(xt::isclose( metrics_q_conditioned_[m], metrics_q_preconditioned_[m] )) ) << "Failure for (" << all_metrics_d[m] << ")"; } // conditions on temporal indices __________________________________________ // compute scores using masking conditions on time indices to subset whole record xt::xtensor<std::array<char, 32>, 2> t_conditions = {{ {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}}, {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}}, {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}}, {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}}, {{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}} }}; std::vector<xt::xarray<double>> metrics_t_conditioned = evalhyd::evald( observed, predicted, all_metrics_d, {}, {}, {}, masks, t_conditions ); // compute scores on already subset time series
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
std::vector<xt::xarray<double>> metrics_t_subset = evalhyd::evald( xt::eval(xt::view(observed, xt::all(), xt::range(0, 100))), xt::eval(xt::view(predicted, xt::all(), xt::range(0, 100))), all_metrics_d ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { EXPECT_TRUE( xt::all(xt::isclose( metrics_t_conditioned[m], metrics_t_subset[m] )) ) << "Failure for (" << all_metrics_d[m] << ")"; } } TEST(DeterministTests, TestMissingData) { // read in data xt::xtensor<double, 2> observed; xt::xtensor<double, 2> predicted; std::tie(observed, predicted) = load_data_d(); // add some missing observations artificially by assigning NaN values xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; // add some missing predictions artificially by assigning NaN values xt::view(observed, 0, xt::range(20, 23)) = NAN; xt::view(observed, 1, xt::range(20, 26)) = NAN; xt::view(observed, 2, xt::range(20, 29)) = NAN; xt::view(observed, 3, xt::range(20, 32)) = NAN; xt::view(observed, 4, xt::range(20, 35)) = NAN; // compute metrics with observations containing NaN values std::vector<xt::xarray<double>> metrics_nan = evalhyd::evald(observed, predicted, all_metrics_d); for (std::size_t m = 0; m < all_metrics_d.size(); m++) { for (std::size_t p = 0; p < predicted.shape(0); p++) { // compute metrics on subset of observations and predictions (i.e. // eliminating region with NaN in observations or predictions manually) xt::xtensor<double, 1> obs = xt::view(observed, 0, xt::range(20+(3*(p+1)), _)); xt::xtensor<double, 1> prd = xt::view(predicted, p, xt::range(20+(3*(p+1)), _)); std::vector<xt::xarray<double>> metrics_sbs = evalhyd::evald( xt::eval(xt::view(obs, xt::newaxis(), xt::all())), xt::eval(xt::view(prd, xt::newaxis(), xt::all())), {all_metrics_d[m]} ); // compare to check results are the same EXPECT_TRUE( xt::all(xt::isclose( xt::view(metrics_nan[m], p), metrics_sbs[0] )) ) << "Failure for (" << all_metrics_d[m] << ")"; } } } TEST(DeterministTests, TestBootstrap) { // read in data
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
std::ifstream ifs; ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); ifs.close(); std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); ifs.close(); ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); // compute metrics via bootstrap std::unordered_map<std::string, int> bootstrap = {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; std::vector<xt::xarray<double>> metrics_bts = evalhyd::evald( xt::eval(xt::view(observed, xt::newaxis(), xt::all())), predicted, all_metrics_d, {}, // transform {}, // exponent {}, // epsilon xt::xtensor<bool, 3>({}), // t_msk xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt bootstrap, datetimes ); // compute metrics by repeating year of data 3 times // (since there is only one year of data, and that the bootstrap works on // one-year blocks, it can only select that given year to form samples, // and the length of the sample corresponds to how many times this year // is repeated in the sample, so that repeating the input data this many // times should result in the same numerical results) xt::xtensor<double, 1> observed_x3 = xt::concatenate(xt::xtuple(observed, observed, observed), 0); xt::xtensor<double, 2> predicted_x3 = xt::concatenate(xt::xtuple(predicted, predicted, predicted), 1); std::vector<xt::xarray<double>> metrics_rep = evalhyd::evald( xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())), predicted_x3, all_metrics_d ); // check results are identical for (std::size_t m = 0; m < all_metrics_d.size(); m++) { EXPECT_TRUE( xt::all(xt::isclose( metrics_bts[m], metrics_rep[m] )) ) << "Failure for (" << all_metrics_d[m] << ")"; } }