diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index 56926350ad3f9c87c7f8a69eaf0cdbcc20c4e481..71e6905eb9ae3633792b98eb41c579b057666aef 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -11,6 +11,7 @@ #include "../../src/utils.hpp" #include "../../src/masks.hpp" +#include "../../src/maths.hpp" #include "../../src/uncertainty.hpp" #include "../../src/determinist/evaluator.hpp" @@ -103,10 +104,11 @@ namespace evalhyd /// Parameters for the bootstrapping method used to estimate the /// sampling uncertainty in the evaluation of the predictions. /// Two parameters are mandatory ('n_samples' the number of random - /// samples, and 'len_sample' the length of one sample in number - /// of years), and one parameter is optional ('seed'). If not - /// provided, no bootstrapping is performed. If provided, *dts* must - /// also be provided. + /// samples, 'len_sample' the length of one sample in number of years, + /// and 'summary' the statistics to return to characterise the + /// sampling distribution), and one parameter is optional ('seed'). + /// If not provided, no bootstrapping is performed. If provided, + /// *dts* must also be provided. /// /// .. seealso:: :doc:`../../functionalities/bootstrapping` /// @@ -168,7 +170,7 @@ namespace evalhyd const xt::xtensor<bool, 2>& t_msk = {}, const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {}, const std::unordered_map<std::string, int>& bootstrap = - {{"n_samples", -9}, {"len_sample", -9}}, + {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, const std::vector<std::string>& dts = {} ) { @@ -286,12 +288,12 @@ namespace evalhyd // generate bootstrap experiment if requested std::vector<xt::xkeep_slice<int>> exp; - if ((bootstrap.find("n_samples")->second != -9) - and (bootstrap.find("len_sample")->second != -9)) + auto n_samples = bootstrap.find("n_samples")->second; + auto len_sample = bootstrap.find("len_sample")->second; + if ((n_samples != -9) and (len_sample != -9)) { exp = eh::uncertainty::bootstrap( - dts, bootstrap.find("n_samples")->second, - bootstrap.find("len_sample")->second + dts, n_samples, len_sample ); } else @@ -356,6 +358,8 @@ namespace evalhyd // retrieve or compute requested metrics std::vector<xt::xarray<double>> r; + auto summary = bootstrap.find("summary")->second; + for ( const auto& metric : metrics ) { if ( metric == "RMSE" ) @@ -363,28 +367,28 @@ namespace evalhyd if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_RMSE(); - r.emplace_back(evaluator.RMSE); + r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); } else if ( metric == "NSE" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_NSE(); - r.emplace_back(evaluator.NSE); + r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); } else if ( metric == "KGE" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_KGE(); - r.emplace_back(evaluator.KGE); + r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); } else if ( metric == "KGEPRIME" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_KGEPRIME(); - r.emplace_back(evaluator.KGEPRIME); + r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); } } diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index a24cd4899cb5f9ada8cb4fae8834c17808c74b92..3168381fc8d28c66775725e648a7016bc2e897e8 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -12,6 +12,7 @@ #include "../../src/utils.hpp" #include "../../src/masks.hpp" +#include "../../src/maths.hpp" #include "../../src/uncertainty.hpp" #include "../../src/probabilist/evaluator.h" @@ -74,10 +75,11 @@ namespace evalhyd /// Parameters for the bootstrapping method used to estimate the /// sampling uncertainty in the evaluation of the predictions. /// Two parameters are mandatory ('n_samples' the number of random - /// samples, and 'len_sample' the length of one sample in number - /// of years), and one parameter is optional ('seed'). If not - /// provided, no bootstrapping is performed. If provided, *dts* must - /// also be provided. + /// samples, 'len_sample' the length of one sample in number of years, + /// and 'summary' the statistics to return to characterise the + /// sampling distribution), and one parameter is optional ('seed'). + /// If not provided, no bootstrapping is performed. If provided, + /// *dts* must also be provided. /// /// .. seealso:: :doc:`../../functionalities/bootstrapping` /// @@ -131,7 +133,7 @@ namespace evalhyd const xt::xtensor<bool, 4>& t_msk = {}, const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {}, const std::unordered_map<std::string, int>& bootstrap = - {{"n_samples", -9}, {"len_sample", -9}}, + {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, const std::vector<std::string>& dts = {} ) { @@ -253,12 +255,12 @@ namespace evalhyd // generate bootstrap experiment if requested std::vector<xt::xkeep_slice<int>> b_exp; - if ((bootstrap.find("n_samples")->second != -9) - and (bootstrap.find("len_sample")->second != -9)) + auto n_samples = bootstrap.find("n_samples")->second; + auto len_sample = bootstrap.find("len_sample")->second; + if ((n_samples != -9) and (len_sample != -9)) { b_exp = eh::uncertainty::bootstrap( - dts, bootstrap.find("n_samples")->second, - bootstrap.find("len_sample")->second + dts, n_samples, len_sample ); } else @@ -274,6 +276,8 @@ namespace evalhyd for (const auto& metric : metrics) r.emplace_back(xt::zeros<double>(dim[metric])); + auto summary = bootstrap.find("summary")->second; + // compute variables one site at a time to minimise memory imprint for (int s = 0; s < n_sit; s++) // compute variables one lead time at a time to minimise memory imprint @@ -330,7 +334,7 @@ namespace evalhyd evaluator.calc_BS(); // (sites, lead times, subsets, samples, thresholds) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - evaluator.BS; + eh::uncertainty::summarise(evaluator.BS, summary); } else if ( metric == "BSS" ) { @@ -339,7 +343,7 @@ namespace evalhyd evaluator.calc_BSS(); // (sites, lead times, subsets, samples, thresholds) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - evaluator.BSS; + eh::uncertainty::summarise(evaluator.BSS, summary); } else if ( metric == "BS_CRD" ) { @@ -348,7 +352,7 @@ namespace evalhyd evaluator.calc_BS_CRD(); // (sites, lead times, subsets, samples, thresholds, components) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - evaluator.BS_CRD; + eh::uncertainty::summarise(evaluator.BS_CRD, summary); } else if ( metric == "BS_LBD" ) { @@ -357,7 +361,7 @@ namespace evalhyd evaluator.calc_BS_LBD(); // (sites, lead times, subsets, samples, thresholds, components) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - evaluator.BS_LBD; + eh::uncertainty::summarise(evaluator.BS_LBD, summary); } else if ( metric == "QS" ) { @@ -366,7 +370,7 @@ namespace evalhyd evaluator.calc_QS(); // (sites, lead times, subsets, samples, quantiles) xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - evaluator.QS; + eh::uncertainty::summarise(evaluator.QS, summary); } else if ( metric == "CRPS" ) { @@ -375,7 +379,7 @@ namespace evalhyd evaluator.calc_CRPS(); // (sites, lead times, subsets, samples) xt::view(r[m], s, l, xt::all(), xt::all()) = - evaluator.CRPS; + eh::uncertainty::summarise(evaluator.CRPS, summary); } } } diff --git a/src/uncertainty.hpp b/src/uncertainty.hpp index 6e480f7e3b4b8c6c796d6238149c25cb15b171f4..d7db1ea13c64a2ebb3805a6dbb2768c1a70b90f7 100644 --- a/src/uncertainty.hpp +++ b/src/uncertainty.hpp @@ -14,6 +14,8 @@ #include <xtensor/xrandom.hpp> #include <xtensor/xio.hpp> +#include "../../src/maths.hpp" + typedef std::chrono::time_point< std::chrono::system_clock, std::chrono::minutes > tp_minutes; @@ -138,6 +140,46 @@ namespace evalhyd return samples; } + + inline auto summarise(const xt::xarray<double>& values, int summary) + { + // TODO: wait for xt::quantile to be available + // or implement it internally for n-dim expressions + // summary 2: series of quantiles across samples + if (summary == 2) { +// // adjust last axis size (from samples to number of statistics) +// auto s = values.shape(); +// s.pop_back(); +// s.push_back(7); +// xt::xarray<double> v = xt::zeros<double>(s); +// // quantiles +// xt::view(v, xt::all()) = +// xt::quantile(values, {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}, -1); +// return v; + return values; + } + // TODO: wait for xt::stddev to be fixed for rtensor + // or implement it internally for n-dim expressions + // summary 1: mean and standard deviation across samples + else if (summary == 1) { +// // adjust last axis size (from samples to number of statistics) +// auto s = values.shape(); +// s.pop_back(); +// s.push_back(2); +// xt::xarray<double> v = xt::zeros<double>(s); +// // mean +// xt::strided_view(s, xt::ellipsis(), 0) = xt::mean(values, -1); +// // standard deviation +// xt::strided_view(s, xt::ellipsis(), 1) = xt::stddev(values, -1); +// return v; + return values; + } + // summary 0: raw (keep all samples) + else { + return values; + } + } + } } diff --git a/src/utils.hpp b/src/utils.hpp index f67dea75ff022ae56d35c020abc17a9d7d9ea712..60761d26e1e694d7cfde6483d6bb93788573df6c 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -112,6 +112,17 @@ namespace evalhyd throw std::runtime_error( "length of sample missing for bootstrap" ); + // check summary + if (bootstrap.find("summary") == bootstrap.end()) + throw std::runtime_error( + "summary missing for bootstrap" + ); + auto s = bootstrap.find("summary")->second; + // TODO: change upper bound when mean+stddev and quantiles implemented + if ((s < 0) or (s > 0)) + throw std::runtime_error( + "invalid value for bootstrap summary" + ); // set seed if (bootstrap.find("seed") == bootstrap.end()) xt::random::seed(time(nullptr)); diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index eb5ef2dfcdc99f0f341619720df1bcdd669225f7..0000cbbeb210aaffe9ad3ea72721ea4bc0b3c6f1 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -344,7 +344,7 @@ TEST(DeterministTests, TestBootstrap) // compute metrics via bootstrap std::unordered_map<std::string, int> bootstrap = - {{"n_samples", 10}, {"len_sample", 3}}; + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; std::vector<xt::xarray<double>> metrics_bts = eh::evald( diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index f53eaac86612596384fbeb370b2c781fe443a3da..4583adea463bd1c9834a3b9978408909d196ed87 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -407,7 +407,7 @@ TEST(ProbabilistTests, TestBootstrap) // compute metrics via bootstrap std::unordered_map<std::string, int> bootstrap = - {{"n_samples", 10}, {"len_sample", 3}}; + {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; std::vector<xt::xarray<double>> metrics_bts = eh::evalp(