Commit d37e69d2 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

pave the way for summary statistics on bootstrap samples

Ultimately, the objective is for the user to be able to get the raw
sampled metric values, or the mean and standard deviation of the sampled
metric values, or a series of quantiles of the sampled metric values.
There are still problems with the standard deviation on rtensor, and
the computation of the quantiles does not work on n-dim expressions yet.
So the second and third options are not possible yet, so only the raw
values can be returned. Nonetheless, the machinery and the choice of
where to introduce the summary functionality could be implemented,
which is the purpose of this commit. A new parameter of the bootstrap
experiment called "summary" is added: it can be given a value of 0 (to
get the raw values). In the future, it would also take a value of 1 for
mean+std, and 2 for quantiles.
No related merge requests found
Pipeline #40450 passed with stages
in 3 minutes and 54 seconds
Showing with 91 additions and 30 deletions
+91 -30
......@@ -11,6 +11,7 @@
#include "../../src/utils.hpp"
#include "../../src/masks.hpp"
#include "../../src/maths.hpp"
#include "../../src/uncertainty.hpp"
#include "../../src/determinist/evaluator.hpp"
......@@ -103,10 +104,11 @@ namespace evalhyd
/// Parameters for the bootstrapping method used to estimate the
/// sampling uncertainty in the evaluation of the predictions.
/// Two parameters are mandatory ('n_samples' the number of random
/// samples, and 'len_sample' the length of one sample in number
/// of years), and one parameter is optional ('seed'). If not
/// provided, no bootstrapping is performed. If provided, *dts* must
/// also be provided.
/// samples, 'len_sample' the length of one sample in number of years,
/// and 'summary' the statistics to return to characterise the
/// sampling distribution), and one parameter is optional ('seed').
/// If not provided, no bootstrapping is performed. If provided,
/// *dts* must also be provided.
///
/// .. seealso:: :doc:`../../functionalities/bootstrapping`
///
......@@ -168,7 +170,7 @@ namespace evalhyd
const xt::xtensor<bool, 2>& t_msk = {},
const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
const std::unordered_map<std::string, int>& bootstrap =
{{"n_samples", -9}, {"len_sample", -9}},
{{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}},
const std::vector<std::string>& dts = {}
)
{
......@@ -286,12 +288,12 @@ namespace evalhyd
// generate bootstrap experiment if requested
std::vector<xt::xkeep_slice<int>> exp;
if ((bootstrap.find("n_samples")->second != -9)
and (bootstrap.find("len_sample")->second != -9))
auto n_samples = bootstrap.find("n_samples")->second;
auto len_sample = bootstrap.find("len_sample")->second;
if ((n_samples != -9) and (len_sample != -9))
{
exp = eh::uncertainty::bootstrap(
dts, bootstrap.find("n_samples")->second,
bootstrap.find("len_sample")->second
dts, n_samples, len_sample
);
}
else
......@@ -356,6 +358,8 @@ namespace evalhyd
// retrieve or compute requested metrics
std::vector<xt::xarray<double>> r;
auto summary = bootstrap.find("summary")->second;
for ( const auto& metric : metrics )
{
if ( metric == "RMSE" )
......@@ -363,28 +367,28 @@ namespace evalhyd
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_RMSE();
r.emplace_back(evaluator.RMSE);
r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary));
}
else if ( metric == "NSE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_NSE();
r.emplace_back(evaluator.NSE);
r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary));
}
else if ( metric == "KGE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_KGE();
r.emplace_back(evaluator.KGE);
r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary));
}
else if ( metric == "KGEPRIME" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_KGEPRIME();
r.emplace_back(evaluator.KGEPRIME);
r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary));
}
}
......
......@@ -12,6 +12,7 @@
#include "../../src/utils.hpp"
#include "../../src/masks.hpp"
#include "../../src/maths.hpp"
#include "../../src/uncertainty.hpp"
#include "../../src/probabilist/evaluator.h"
......@@ -74,10 +75,11 @@ namespace evalhyd
/// Parameters for the bootstrapping method used to estimate the
/// sampling uncertainty in the evaluation of the predictions.
/// Two parameters are mandatory ('n_samples' the number of random
/// samples, and 'len_sample' the length of one sample in number
/// of years), and one parameter is optional ('seed'). If not
/// provided, no bootstrapping is performed. If provided, *dts* must
/// also be provided.
/// samples, 'len_sample' the length of one sample in number of years,
/// and 'summary' the statistics to return to characterise the
/// sampling distribution), and one parameter is optional ('seed').
/// If not provided, no bootstrapping is performed. If provided,
/// *dts* must also be provided.
///
/// .. seealso:: :doc:`../../functionalities/bootstrapping`
///
......@@ -131,7 +133,7 @@ namespace evalhyd
const xt::xtensor<bool, 4>& t_msk = {},
const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {},
const std::unordered_map<std::string, int>& bootstrap =
{{"n_samples", -9}, {"len_sample", -9}},
{{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}},
const std::vector<std::string>& dts = {}
)
{
......@@ -253,12 +255,12 @@ namespace evalhyd
// generate bootstrap experiment if requested
std::vector<xt::xkeep_slice<int>> b_exp;
if ((bootstrap.find("n_samples")->second != -9)
and (bootstrap.find("len_sample")->second != -9))
auto n_samples = bootstrap.find("n_samples")->second;
auto len_sample = bootstrap.find("len_sample")->second;
if ((n_samples != -9) and (len_sample != -9))
{
b_exp = eh::uncertainty::bootstrap(
dts, bootstrap.find("n_samples")->second,
bootstrap.find("len_sample")->second
dts, n_samples, len_sample
);
}
else
......@@ -274,6 +276,8 @@ namespace evalhyd
for (const auto& metric : metrics)
r.emplace_back(xt::zeros<double>(dim[metric]));
auto summary = bootstrap.find("summary")->second;
// compute variables one site at a time to minimise memory imprint
for (int s = 0; s < n_sit; s++)
// compute variables one lead time at a time to minimise memory imprint
......@@ -330,7 +334,7 @@ namespace evalhyd
evaluator.calc_BS();
// (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
evaluator.BS;
eh::uncertainty::summarise(evaluator.BS, summary);
}
else if ( metric == "BSS" )
{
......@@ -339,7 +343,7 @@ namespace evalhyd
evaluator.calc_BSS();
// (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
evaluator.BSS;
eh::uncertainty::summarise(evaluator.BSS, summary);
}
else if ( metric == "BS_CRD" )
{
......@@ -348,7 +352,7 @@ namespace evalhyd
evaluator.calc_BS_CRD();
// (sites, lead times, subsets, samples, thresholds, components)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) =
evaluator.BS_CRD;
eh::uncertainty::summarise(evaluator.BS_CRD, summary);
}
else if ( metric == "BS_LBD" )
{
......@@ -357,7 +361,7 @@ namespace evalhyd
evaluator.calc_BS_LBD();
// (sites, lead times, subsets, samples, thresholds, components)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) =
evaluator.BS_LBD;
eh::uncertainty::summarise(evaluator.BS_LBD, summary);
}
else if ( metric == "QS" )
{
......@@ -366,7 +370,7 @@ namespace evalhyd
evaluator.calc_QS();
// (sites, lead times, subsets, samples, quantiles)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
evaluator.QS;
eh::uncertainty::summarise(evaluator.QS, summary);
}
else if ( metric == "CRPS" )
{
......@@ -375,7 +379,7 @@ namespace evalhyd
evaluator.calc_CRPS();
// (sites, lead times, subsets, samples)
xt::view(r[m], s, l, xt::all(), xt::all()) =
evaluator.CRPS;
eh::uncertainty::summarise(evaluator.CRPS, summary);
}
}
}
......
......@@ -14,6 +14,8 @@
#include <xtensor/xrandom.hpp>
#include <xtensor/xio.hpp>
#include "../../src/maths.hpp"
typedef std::chrono::time_point<
std::chrono::system_clock, std::chrono::minutes
> tp_minutes;
......@@ -138,6 +140,46 @@ namespace evalhyd
return samples;
}
inline auto summarise(const xt::xarray<double>& values, int summary)
{
// TODO: wait for xt::quantile to be available
// or implement it internally for n-dim expressions
// summary 2: series of quantiles across samples
if (summary == 2) {
// // adjust last axis size (from samples to number of statistics)
// auto s = values.shape();
// s.pop_back();
// s.push_back(7);
// xt::xarray<double> v = xt::zeros<double>(s);
// // quantiles
// xt::view(v, xt::all()) =
// xt::quantile(values, {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}, -1);
// return v;
return values;
}
// TODO: wait for xt::stddev to be fixed for rtensor
// or implement it internally for n-dim expressions
// summary 1: mean and standard deviation across samples
else if (summary == 1) {
// // adjust last axis size (from samples to number of statistics)
// auto s = values.shape();
// s.pop_back();
// s.push_back(2);
// xt::xarray<double> v = xt::zeros<double>(s);
// // mean
// xt::strided_view(s, xt::ellipsis(), 0) = xt::mean(values, -1);
// // standard deviation
// xt::strided_view(s, xt::ellipsis(), 1) = xt::stddev(values, -1);
// return v;
return values;
}
// summary 0: raw (keep all samples)
else {
return values;
}
}
}
}
......
......@@ -112,6 +112,17 @@ namespace evalhyd
throw std::runtime_error(
"length of sample missing for bootstrap"
);
// check summary
if (bootstrap.find("summary") == bootstrap.end())
throw std::runtime_error(
"summary missing for bootstrap"
);
auto s = bootstrap.find("summary")->second;
// TODO: change upper bound when mean+stddev and quantiles implemented
if ((s < 0) or (s > 0))
throw std::runtime_error(
"invalid value for bootstrap summary"
);
// set seed
if (bootstrap.find("seed") == bootstrap.end())
xt::random::seed(time(nullptr));
......
......@@ -344,7 +344,7 @@ TEST(DeterministTests, TestBootstrap)
// compute metrics via bootstrap
std::unordered_map<std::string, int> bootstrap =
{{"n_samples", 10}, {"len_sample", 3}};
{{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}};
std::vector<xt::xarray<double>> metrics_bts =
eh::evald(
......
......@@ -407,7 +407,7 @@ TEST(ProbabilistTests, TestBootstrap)
// compute metrics via bootstrap
std::unordered_map<std::string, int> bootstrap =
{{"n_samples", 10}, {"len_sample", 3}};
{{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}};
std::vector<xt::xarray<double>> metrics_bts =
eh::evalp(
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment