Commit e775087d authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

refactor probabilist Evaluator

including:
- extracting the element/intermediate/metric calculations from the
  `probabilist::Evaluator` class
- introducing new getter methods in `probabilist::Evaluator` class
  that handle the element/intermediate/metric inter-dependencies
  internally to the object
- removing external inter-dependencies handling from `evalp`
1 merge request!3release v0.1.0
Pipeline #42828 passed with stage
in 2 minutes and 6 seconds
Showing with 1010 additions and 773 deletions
+1010 -773
This diff is collapsed.
This diff is collapsed.
#ifndef EVALHYD_PROBABILIST_QUANTILES_HPP
#define EVALHYD_PROBABILIST_QUANTILES_HPP
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xsort.hpp>
namespace evalhyd
{
namespace probabilist
{
namespace elements
{
// Compute the forecast quantiles from the ensemble members.
//
// \param q_prd
// Streamflow predictions.
// shape: (members, time)
// \return
// Streamflow forecast quantiles.
// shape: (quantiles, time)
template <class V2D4>
inline xt::xtensor<double, 2> calc_q_qnt(
const V2D4& q_prd
)
{
return xt::sort(q_prd, 0);
}
}
namespace intermediate
{
// Compute the quantile scores for each time step.
//
// \param q_obs
// Streamflow observations.
// shape: (time,)
// \param q_qnt
// Streamflow quantiles.
// shape: (quantiles, time)
// \return
// Quantile scores for each time step.
// shape: (quantiles, time)
template <class V1D2>
inline xt::xtensor<double, 2> calc_qs(
const V1D2 &q_obs,
const xt::xtensor<double, 2>& q_qnt,
std::size_t n_mbr
)
{
// compute the quantile order $alpha$
xt::xtensor<double, 1> alpha =
xt::arange<double>(1., double(n_mbr + 1))
/ double(n_mbr + 1);
// calculate the difference
xt::xtensor<double, 2> diff = q_qnt - q_obs;
// calculate the quantile scores
xt::xtensor<double, 2> qs = xt::where(
diff > 0,
2 * (1 - xt::view(alpha, xt::all(), xt::newaxis())) * diff,
- 2 * xt::view(alpha, xt::all(), xt::newaxis()) * diff
);
return qs;
}
// Compute the continuous rank probability score(s) based
// on quantile scores for each time step, and integrating using the
// trapezoidal rule.
//
// /!\ The number of quantiles must be sufficiently large so that the
// cumulative distribution is smooth enough for the numerical
// integration to be accurate.
//
// \param qs
// Quantile scores for each time step.
// shape: (quantiles, time)
// \return
// CRPS for each time step.
// shape: (1, time)
inline xt::xtensor<double, 2> calc_crps(
const xt::xtensor<double, 2>& qs,
std::size_t n_mbr
)
{
// integrate with trapezoidal rule
return xt::view(
// xt::trapz(y, dx=1/(n+1), axis=0)
xt::trapz(qs, 1./(double(n_mbr) + 1.), 0),
xt::newaxis(), xt::all()
);
}
}
namespace metrics
{
// Compute the quantile score (QS).
//
// \param qs
// Quantile scores for each time step.
// shape: (quantiles, time)
// \param t_msk
// Temporal subsets of the whole record.
// shape: (subsets, time)
// \param b_exp
// Boostrap samples.
// shape: (samples, time slice)
// \param n_mbr
// Number of ensemble members.
// \param n_msk
// Number of temporal subsets.
// \param n_exp
// Number of bootstrap samples.
// \return
// Quantile scores.
// shape: (subsets, samples, quantiles)
inline xt::xtensor<double, 3> calc_QS(
const xt::xtensor<double, 2>& qs,
const xt::xtensor<bool, 2>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
// shape: (subsets, quantiles)
xt::xtensor<double, 3> QS =
xt::zeros<double>({n_msk, n_exp, n_mbr});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto qs_masked = xt::where(xt::row(t_msk, m), qs, NAN);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto qs_masked_sampled =
xt::view(qs_masked, xt::all(), b_exp[e]);
// calculate the mean over the time steps
// $QS = \frac{1}{n} \sum_{k=1}^{n} qs$
xt::view(QS, m, e, xt::all()) =
xt::nanmean(qs_masked_sampled, -1);
}
}
return QS;
}
// Compute the continuous rank probability score (CRPS) based
// on quantile scores.
//
// \param crps
// CRPS for each time step.
// shape: (1, time)
// \param t_msk
// Temporal subsets of the whole record.
// shape: (subsets, time)
// \param b_exp
// Boostrap samples.
// shape: (samples, time slice)
// \param n_msk
// Number of temporal subsets.
// \param n_exp
// Number of bootstrap samples.
// \return
// CRPS.
// shape: (subsets, samples)
inline xt::xtensor<double, 2> calc_CRPS(
const xt::xtensor<double, 2>& crps,
const xt::xtensor<bool, 2>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
// shape: (subsets,)
xt::xtensor<double, 2> CRPS = xt::zeros<double>({n_msk, n_exp});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto crps_masked = xt::where(xt::row(t_msk, m), crps, NAN);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto crps_masked_sampled =
xt::view(crps_masked, xt::all(), b_exp[e]);
// calculate the mean over the time steps
// $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$
xt::view(CRPS, m, e) =
xt::squeeze(xt::nanmean(crps_masked_sampled, -1));
}
}
return CRPS;
}
}
}
}
#endif //EVALHYD_PROBABILIST_QUANTILES_HPP
...@@ -228,7 +228,6 @@ namespace evalhyd ...@@ -228,7 +228,6 @@ namespace evalhyd
} }
} }
// retrieve dimensions // retrieve dimensions
std::size_t n_sit = q_prd_.shape(0); std::size_t n_sit = q_prd_.shape(0);
std::size_t n_ltm = q_prd_.shape(1); std::size_t n_ltm = q_prd_.shape(1);
...@@ -250,29 +249,6 @@ namespace evalhyd ...@@ -250,29 +249,6 @@ namespace evalhyd
dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr};
dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp};
// declare maps for memoisation purposes
std::unordered_map<std::string, std::vector<std::string>> elt;
std::unordered_map<std::string, std::vector<std::string>> dep;
// register potentially recurring computation elements across metrics
elt["bs"] = {"o_k", "y_k"};
elt["BSS"] = {"o_k", "bar_o"};
elt["BS_CRD"] = {"o_k", "bar_o", "y_k"};
elt["BS_LBD"] = {"o_k", "y_k"};
elt["qs"] = {"q_qnt"};
// register nested metrics (i.e. metric dependent on another metric)
dep["BS"] = {"bs"};
dep["BSS"] = {"bs"};
dep["QS"] = {"qs"};
dep["CRPS"] = {"qs", "crps"};
// determine required elt/dep to be pre-computed
std::vector<std::string> req_elt;
std::vector<std::string> req_dep;
utils::find_requirements(metrics, elt, dep, req_elt, req_dep);
// generate masks from conditions if provided // generate masks from conditions if provided
auto gen_msk = [&]() auto gen_msk = [&]()
{ {
...@@ -353,44 +329,6 @@ namespace evalhyd ...@@ -353,44 +329,6 @@ namespace evalhyd
q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp
); );
// pre-compute required elt
for (const auto& element : req_elt)
{
if ( element == "o_k" )
{
evaluator.calc_o_k();
}
else if ( element == "bar_o" )
{
evaluator.calc_bar_o();
}
else if ( element == "y_k" )
{
evaluator.calc_y_k();
}
else if ( element == "q_qnt" )
{
evaluator.calc_q_qnt();
}
}
// pre-compute required dep
for (const auto& dependency : req_dep)
{
if ( dependency == "bs" )
{
evaluator.calc_bs();
}
else if ( dependency == "qs" )
{
evaluator.calc_qs();
}
else if ( dependency == "crps" )
{
evaluator.calc_crps();
}
}
// retrieve or compute requested metrics // retrieve or compute requested metrics
for (std::size_t m = 0; m < metrics.size(); m++) for (std::size_t m = 0; m < metrics.size(); m++)
{ {
...@@ -398,76 +336,45 @@ namespace evalhyd ...@@ -398,76 +336,45 @@ namespace evalhyd
if ( metric == "BS" ) if ( metric == "BS" )
{ {
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
{
evaluator.calc_BS();
}
// (sites, lead times, subsets, samples, thresholds) // (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.BS, summary); uncertainty::summarise(evaluator.get_BS(), summary);
} }
else if ( metric == "BSS" ) else if ( metric == "BSS" )
{ {
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
{
evaluator.calc_BSS();
}
// (sites, lead times, subsets, samples, thresholds) // (sites, lead times, subsets, samples, thresholds)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.BSS, summary); uncertainty::summarise(evaluator.get_BSS(), summary);
} }
else if ( metric == "BS_CRD" ) else if ( metric == "BS_CRD" )
{ {
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
{
evaluator.calc_BS_CRD();
}
// (sites, lead times, subsets, samples, thresholds, components) // (sites, lead times, subsets, samples, thresholds, components)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.BS_CRD, summary); uncertainty::summarise(evaluator.get_BS_CRD(), summary);
} }
else if ( metric == "BS_LBD" ) else if ( metric == "BS_LBD" )
{ {
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
{
evaluator.calc_BS_LBD();
}
// (sites, lead times, subsets, samples, thresholds, components) // (sites, lead times, subsets, samples, thresholds, components)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.BS_LBD, summary); uncertainty::summarise(evaluator.get_BS_LBD(), summary);
} }
else if ( metric == "QS" ) else if ( metric == "QS" )
{ {
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
{
evaluator.calc_QS();
}
// (sites, lead times, subsets, samples, quantiles) // (sites, lead times, subsets, samples, quantiles)
xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) =
uncertainty::summarise(evaluator.QS, summary); uncertainty::summarise(evaluator.get_QS(), summary);
} }
else if ( metric == "CRPS" ) else if ( metric == "CRPS" )
{ {
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
{
evaluator.calc_CRPS();
}
// (sites, lead times, subsets, samples) // (sites, lead times, subsets, samples)
xt::view(r[m], s, l, xt::all(), xt::all()) = xt::view(r[m], s, l, xt::all(), xt::all()) =
uncertainty::summarise(evaluator.CRPS, summary); uncertainty::summarise(evaluator.get_CRPS(), summary);
} }
} }
} }
} }
return r; return r;
}; }
} }
#endif //EVALHYD_EVALP_HPP #endif //EVALHYD_EVALP_HPP
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment