An error occurred while loading the file. Please try again.
-
Thibault Hallouin authored30d27e16
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#include <fstream>
#include <vector>
#include <tuple>
#include <array>
#include <gtest/gtest.h>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xmanipulation.hpp>
#include <xtensor/xcsv.hpp>
#include "evalhyd/evald.hpp"
#ifndef EVALHYD_DATA_DIR
#error "need to define data directory"
#endif
using namespace xt::placeholders; // required for `_` to work
std::vector<std::string> all_metrics_d = {
"RMSE", "NSE", "KGE", "KGEPRIME"
};
std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d()
{
// read in data
std::ifstream ifs;
ifs.open(EVALHYD_DATA_DIR "/q_obs.csv");
xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs);
ifs.close();
ifs.open(EVALHYD_DATA_DIR "/q_prd.csv");
xt::xtensor<double, 2> predicted = xt::view(
xt::load_csv<double>(ifs), xt::range(0, 5), xt::all()
);
ifs.close();
return std::make_tuple(observed, predicted);
}
TEST(DeterministTests, TestMetrics)
{
// read in data
xt::xtensor<double, 2> observed;
xt::xtensor<double, 2> predicted;
std::tie(observed, predicted) = load_data_d();
// compute scores (with 2D tensors)
std::vector<xt::xarray<double>> metrics =
evalhyd::evald(
observed, predicted, {"RMSE", "NSE", "KGE", "KGEPRIME"}
);
// check results on all metrics
xt::xtensor<double, 3> rmse =
{{{777.034272}},
{{776.878479}},
{{777.800217}},
{{778.151082}},
{{778.61487 }}};
EXPECT_TRUE(xt::allclose(metrics[0], rmse));
xt::xtensor<double, 3> nse =
{{{0.718912}},
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
{{0.719025}},
{{0.718358}},
{{0.718104}},
{{0.717767}}};
EXPECT_TRUE(xt::allclose(metrics[1], nse));
xt::xtensor<double, 3> kge =
{{{0.748088}},
{{0.746106}},
{{0.744111}},
{{0.743011}},
{{0.741768}}};
EXPECT_TRUE(xt::allclose(metrics[2], kge));
xt::xtensor<double, 3> kgeprime =
{{{0.813141}},
{{0.812775}},
{{0.812032}},
{{0.811787}},
{{0.811387}}};
EXPECT_TRUE(xt::allclose(metrics[3], kgeprime));
}
TEST(DeterministTests, TestTransform)
{
// read in data
xt::xtensor<double, 2> observed;
xt::xtensor<double, 2> predicted;
std::tie(observed, predicted) = load_data_d();
// compute and check results on square-rooted streamflow series
std::vector<xt::xarray<double>> metrics =
evalhyd::evald(observed, predicted, {"NSE"}, "sqrt");
xt::xtensor<double, 3> nse_sqrt =
{{{0.882817}},
{{0.883023}},
{{0.883019}},
{{0.883029}},
{{0.882972}}};
EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_sqrt)));
// compute and check results on inverted streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "inv");
xt::xtensor<double, 3> nse_inv =
{{{0.737323}},
{{0.737404}},
{{0.737429}},
{{0.737546}},
{{0.737595}}};
EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_inv)));
// compute and check results on square-rooted streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "log");
xt::xtensor<double, 3> nse_log =
{{{0.893344}},
{{0.893523}},
{{0.893585}},
{{0.893758}},
{{0.893793}}};
EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_log)));
// compute and check results on power-transformed streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "pow", 0.2);
xt::xtensor<double, 3> nse_pow =
{{{0.899207}},
{{0.899395}},
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
{{0.899451}},
{{0.899578}},
{{0.899588}}};
EXPECT_TRUE(xt::all(xt::isclose(metrics[0], nse_pow)));
}
TEST(DeterministTests, TestMasks)
{
// read in data
xt::xtensor<double, 2> observed;
xt::xtensor<double, 2> predicted;
std::tie(observed, predicted) = load_data_d();
// generate temporal subset by dropping 20 first time steps
xt::xtensor<bool, 3> masks =
xt::ones<bool>({std::size_t {predicted.shape(0)},
std::size_t {1},
std::size_t {observed.size()}});
xt::view(masks, xt::all(), 0, xt::range(0, 20)) = 0;
// compute scores using masks to subset whole record
std::vector<xt::xarray<double>> metrics_masked =
evalhyd::evald(observed, predicted, all_metrics_d, {}, {}, {}, masks);
// compute scores on pre-computed subset of whole record
xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _));
xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _));
std::vector<xt::xarray<double>> metrics_subset =
evalhyd::evald(obs, prd, all_metrics_d);
// check results are identical
for (std::size_t m = 0; m < all_metrics_d.size(); m++)
{
EXPECT_TRUE(xt::all(xt::isclose(metrics_masked[m], metrics_subset[m])))
<< "Failure for (" << all_metrics_d[m] << ")";
}
}
TEST(DeterministTests, TestMaskingConditions)
{
// read in data
xt::xtensor<double, 2> observed;
xt::xtensor<double, 2> predicted;
std::tie(observed, predicted) = load_data_d();
// generate dummy empty masks required to access next optional argument
xt::xtensor<bool, 3> masks;
// conditions on streamflow values _________________________________________
// compute scores using masking conditions on streamflow to subset whole record
xt::xtensor<std::array<char, 32>, 2> q_conditions = {{
{{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
{{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
{{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
{{std::array<char, 32>{"q_obs{<2000,>3000}"}}},
{{std::array<char, 32>{"q_obs{<2000,>3000}"}}}
}};
std::vector<xt::xarray<double>> metrics_q_conditioned =
evalhyd::evald(
observed, predicted, all_metrics_d,
{}, {}, {}, masks, q_conditions
);
// compute scores using "NaN-ed" time indices where conditions on streamflow met
std::vector<xt::xarray<double>> metrics_q_preconditioned =
evalhyd::evald(
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)),
predicted,
all_metrics_d
);
// check results are identical
for (std::size_t m = 0; m < all_metrics_d.size(); m++)
{
EXPECT_TRUE(
xt::all(xt::isclose(
metrics_q_conditioned[m], metrics_q_preconditioned[m]
))
) << "Failure for (" << all_metrics_d[m] << ")";
}
// conditions on streamflow statistics _____________________________________
// compute scores using masking conditions on streamflow to subset whole record
xt::xtensor<std::array<char, 32>, 2> q_conditions_ = {{
{{std::array<char, 32>{"q_obs{>=mean}"}}},
{{std::array<char, 32>{"q_obs{>=mean}"}}},
{{std::array<char, 32>{"q_obs{>=mean}"}}},
{{std::array<char, 32>{"q_obs{>=mean}"}}},
{{std::array<char, 32>{"q_obs{>=mean}"}}}
}};
double mean = xt::mean(observed, {1})();
std::vector<xt::xarray<double>> metrics_q_conditioned_ =
evalhyd::evald(
observed, predicted, all_metrics_d,
{}, {}, {}, masks, q_conditions_
);
// compute scores using "NaN-ed" time indices where conditions on streamflow met
std::vector<xt::xarray<double>> metrics_q_preconditioned_ =
evalhyd::evald(
xt::eval(xt::where(observed >= mean, observed, NAN)),
predicted,
all_metrics_d
);
// check results are identical
for (std::size_t m = 0; m < all_metrics_d.size(); m++)
{
EXPECT_TRUE(
xt::all(xt::isclose(
metrics_q_conditioned_[m], metrics_q_preconditioned_[m]
))
) << "Failure for (" << all_metrics_d[m] << ")";
}
// conditions on temporal indices __________________________________________
// compute scores using masking conditions on time indices to subset whole record
xt::xtensor<std::array<char, 32>, 2> t_conditions = {{
{{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
{{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
{{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
{{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}},
{{std::array<char, 32>{"t{0,1,2,3,4,5:97,97,98,99}"}}}
}};
std::vector<xt::xarray<double>> metrics_t_conditioned =
evalhyd::evald(
observed, predicted, all_metrics_d,
{}, {}, {}, masks, t_conditions
);
// compute scores on already subset time series
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
std::vector<xt::xarray<double>> metrics_t_subset =
evalhyd::evald(
xt::eval(xt::view(observed, xt::all(), xt::range(0, 100))),
xt::eval(xt::view(predicted, xt::all(), xt::range(0, 100))),
all_metrics_d
);
// check results are identical
for (std::size_t m = 0; m < all_metrics_d.size(); m++)
{
EXPECT_TRUE(
xt::all(xt::isclose(
metrics_t_conditioned[m], metrics_t_subset[m]
))
) << "Failure for (" << all_metrics_d[m] << ")";
}
}
TEST(DeterministTests, TestMissingData)
{
// read in data
xt::xtensor<double, 2> observed;
xt::xtensor<double, 2> predicted;
std::tie(observed, predicted) = load_data_d();
// add some missing observations artificially by assigning NaN values
xt::view(observed, xt::all(), xt::range(0, 20)) = NAN;
// add some missing predictions artificially by assigning NaN values
xt::view(observed, 0, xt::range(20, 23)) = NAN;
xt::view(observed, 1, xt::range(20, 26)) = NAN;
xt::view(observed, 2, xt::range(20, 29)) = NAN;
xt::view(observed, 3, xt::range(20, 32)) = NAN;
xt::view(observed, 4, xt::range(20, 35)) = NAN;
// compute metrics with observations containing NaN values
std::vector<xt::xarray<double>> metrics_nan =
evalhyd::evald(observed, predicted, all_metrics_d);
for (std::size_t m = 0; m < all_metrics_d.size(); m++)
{
for (std::size_t p = 0; p < predicted.shape(0); p++)
{
// compute metrics on subset of observations and predictions (i.e.
// eliminating region with NaN in observations or predictions manually)
xt::xtensor<double, 1> obs =
xt::view(observed, 0, xt::range(20+(3*(p+1)), _));
xt::xtensor<double, 1> prd =
xt::view(predicted, p, xt::range(20+(3*(p+1)), _));
std::vector<xt::xarray<double>> metrics_sbs =
evalhyd::evald(
xt::eval(xt::view(obs, xt::newaxis(), xt::all())),
xt::eval(xt::view(prd, xt::newaxis(), xt::all())),
{all_metrics_d[m]}
);
// compare to check results are the same
EXPECT_TRUE(
xt::all(xt::isclose(
xt::view(metrics_nan[m], p),
metrics_sbs[0]
))
) << "Failure for (" << all_metrics_d[m] << ")";
}
}
}
TEST(DeterministTests, TestBootstrap)
{
// read in data
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
std::ifstream ifs;
ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1));
ifs.close();
std::vector<std::string> datetimes (x_dts.begin(), x_dts.end());
ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv");
xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1));
ifs.close();
ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv");
xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1);
ifs.close();
// compute metrics via bootstrap
std::unordered_map<std::string, int> bootstrap =
{{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}};
std::vector<xt::xarray<double>> metrics_bts =
evalhyd::evald(
xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
predicted,
all_metrics_d,
{}, // transform
{}, // exponent
{}, // epsilon
xt::xtensor<bool, 3>({}), // t_msk
xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt
bootstrap,
datetimes
);
// compute metrics by repeating year of data 3 times
// (since there is only one year of data, and that the bootstrap works on
// one-year blocks, it can only select that given year to form samples,
// and the length of the sample corresponds to how many times this year
// is repeated in the sample, so that repeating the input data this many
// times should result in the same numerical results)
xt::xtensor<double, 1> observed_x3 =
xt::concatenate(xt::xtuple(observed, observed, observed), 0);
xt::xtensor<double, 2> predicted_x3 =
xt::concatenate(xt::xtuple(predicted, predicted, predicted), 1);
std::vector<xt::xarray<double>> metrics_rep =
evalhyd::evald(
xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())),
predicted_x3,
all_metrics_d
);
// check results are identical
for (std::size_t m = 0; m < all_metrics_d.size(); m++)
{
EXPECT_TRUE(
xt::all(xt::isclose(
metrics_bts[m], metrics_rep[m]
))
) << "Failure for (" << all_metrics_d[m] << ")";
}
}