An error occurred while loading the file. Please try again.
-
Thibault Hallouin authoredb556eeea
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_PROBABILIST_BRIER_HPP
#define EVALHYD_PROBABILIST_BRIER_HPP
#include <limits>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xindex_view.hpp>
#include <xtensor/xmasked_view.hpp>
#include <xtensor/xsort.hpp>
#include <xtensor/xmath.hpp>
// NOTE ------------------------------------------------------------------------
// All equations in metrics below are following notations from
// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences.
// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225".
// In particular, pp. 302-303, 332-333.
// -----------------------------------------------------------------------------
namespace evalhyd
{
namespace probabilist
{
namespace elements
{
/// Determine observed realisation of threshold(s) exceedance.
///
/// \param q_obs
/// Streamflow observations.
/// shape: (sites, time)
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param is_high_flow_event
/// Whether events correspond to being above the threshold(s).
/// \return
/// Event observed outcome.
/// shape: (sites, thresholds, time)
template<class XD2a, class XD2b>
inline xt::xtensor<double, 3> calc_o_k(
const XD2a& q_obs,
const XD2b& q_thr,
bool is_high_flow_event
)
{
if (is_high_flow_event)
{
// observations above threshold(s)
return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all())
>= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis());
}
else
{
// observations below threshold(s)
return xt::view(q_obs, xt::all(), xt::newaxis(), xt::all())
<= xt::view(q_thr, xt::all(), xt::all(), xt::newaxis());
}
}
/// Determine mean observed realisation of threshold(s) exceedance.
///
/// \param o_k
/// Event observed outcome.
/// shape: (sites, thresholds, time)
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_thr
/// Number of thresholds.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Mean event observed outcome.
/// shape: (sites, lead times, subsets, samples, thresholds)
inline xt::xtensor<double, 5> calc_bar_o(
const xt::xtensor<double, 3>& o_k,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_thr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 5> bar_o =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr});
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(),
xt::all(), xt::newaxis(), xt::all()),
xt::view(o_k, xt::all(), xt::newaxis(),
xt::newaxis(), xt::all(), xt::all()),
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), xt::all(),
xt::all(), xt::all(), b_exp[e]);
// compute mean "climatology" relative frequency of the event
// $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$
xt::view(bar_o, xt::all(), xt::all(), xt::all(), e, xt::all()) =
xt::nanmean(o_k_masked_sampled, -1);
}
return bar_o;
}
/// Determine number of forecast members exceeding threshold(s)
///
/// \param q_prd
/// Streamflow predictions.
/// shape: (sites, lead times, members, time)
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param is_high_flow_event
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
/// Whether events correspond to being above the threshold(s).
/// \return
/// Number of forecast members exceeding threshold(s).
/// shape: (sites, lead times, thresholds, time)
template<class XD4, class XD2>
inline xt::xtensor<double, 4> calc_sum_f_k(
const XD4& q_prd,
const XD2& q_thr,
bool is_high_flow_event
)
{
if (is_high_flow_event)
{
// determine if members are above threshold(s)
auto f_k = xt::view(q_prd, xt::all(), xt::all(),
xt::newaxis(), xt::all(), xt::all())
>= xt::view(q_thr, xt::all(), xt::newaxis(),
xt::all(), xt::newaxis(), xt::newaxis());
// calculate how many members are above threshold(s)
return xt::sum(f_k, 3);
}
else
{
// determine if members are below threshold(s)
auto f_k = xt::view(q_prd, xt::all(), xt::all(),
xt::newaxis(), xt::all(), xt::all())
<= xt::view(q_thr, xt::all(), xt::newaxis(),
xt::all(), xt::newaxis(), xt::newaxis());
// calculate how many members are below threshold(s)
return xt::sum(f_k, 3);
}
}
/// Determine forecast probability of threshold(s) exceedance to occur.
///
/// \param sum_f_k
/// Number of forecast members exceeding threshold(s).
/// shape: (sites, lead times, thresholds, time)
/// \param n_mbr
/// Number of ensemble members.
/// \return
/// Event probability forecast.
/// shape: (sites, lead times, thresholds, time)
inline xt::xtensor<double, 4> calc_y_k(
const xt::xtensor<double, 4>& sum_f_k,
std::size_t n_mbr
)
{
// determine probability of threshold(s) exceedance
// /!\ probability calculation dividing by n (the number of
// members), not n+1 (the number of ranks) like in other metrics
return sum_f_k / n_mbr;
}
}
namespace intermediate
{
/// Compute the Brier score for each time step.
///
/// \param o_k
/// Observed event outcome.
/// shape: (sites, thresholds, time)
/// \param y_k
/// Event probability forecast.
/// shape: (sites, lead times, thresholds, time)
/// \return
/// Brier score for each threshold for each time step.
/// shape: (sites, lead times, thresholds, time)
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
inline xt::xtensor<double, 4> calc_bs(
const xt::xtensor<double, 3>& o_k,
const xt::xtensor<double, 4>& y_k
)
{
// return computed Brier score(s)
// $bs = (o_k - y_k)^2$
return xt::square(
xt::view(o_k, xt::all(), xt::newaxis(),
xt::all(), xt::all())
- y_k
);
}
}
namespace metrics
{
/// Compute the Brier score (BS).
///
/// \param bs
/// Brier score for each threshold for each time step.
/// shape: (sites, lead times, thresholds, time)
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_thr
/// Number of thresholds.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Brier score for each subset and for each threshold.
/// shape: (sites, lead times, subsets, samples, thresholds)
template <class XD2>
inline xt::xtensor<double, 5> calc_BS(
const xt::xtensor<double, 4>& bs,
const XD2& q_thr,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_thr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 5> BS =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto bs_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(), m,
xt::newaxis(), xt::all()),
bs,
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto bs_masked_sampled =
xt::view(bs_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
// calculate the mean over the time steps
// $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$
xt::view(BS, xt::all(), xt::all(), m, e, xt::all()) =
xt::nanmean(bs_masked_sampled, -1);
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BS,
xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(),
xt::newaxis(), xt::newaxis(),
xt::all()))
) = NAN;
return BS;
}
/// Compute the X and Y axes of the reliability diagram
/// (`y_i`, the forecast probability; `bar_o_i`, the observed frequency;)
/// as well as the frequencies of the sampling histogram
/// (`N_i`, the number of forecasts of given probability `y_i`)'.
///
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param o_k
/// Observed event outcome.
/// shape: (sites, thresholds, time)
/// \param y_k
/// Event probability forecast.
/// shape: (sites, lead times, thresholds, time)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_thr
/// Number of thresholds.
/// \param n_mbr
/// Number of ensemble members.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// X and Y axes of the reliability diagram, and ordinates
/// (i.e. frequencies) of the sampling histogram, in this order.
/// shape: (sites, lead times, subsets, samples, thresholds, bins, axes)
template <class XD2>
inline xt::xtensor<double, 7> calc_REL_DIAG(
const XD2& q_thr,
const xt::xtensor<double, 3>& o_k,
const xt::xtensor<double, 4>& y_k,
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_thr,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 7> REL_DIAG =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr,
n_mbr + 1, std::size_t {3}});
// compute range of forecast values $y_i$
auto y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr;
xt::view(REL_DIAG, xt::all(), xt::all(), xt::all(), xt::all(),
xt::all(), xt::all(), 0) = y_i;
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(),
m, xt::newaxis(), xt::all()),
xt::view(o_k, xt::all(), xt::newaxis(),
xt::all(), xt::all()),
NAN
);
auto y_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(),
m, xt::newaxis(), xt::all()),
y_k,
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
auto y_k_masked_sampled =
xt::view(y_k_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
auto t_msk_sampled =
xt::view(t_msk, xt::all(), xt::all(), m,
xt::newaxis(), b_exp[e]);
// compute mask to subsample time steps belonging to same forecast bin
// (where bins are defined as the range of forecast values)
auto msk_bins = xt::equal(
// force evaluation to avoid segfault
xt::view(xt::eval(y_k_masked_sampled),
xt::all(), xt::all(), xt::all(),
xt::newaxis(), xt::all()),
xt::view(y_i,
xt::newaxis(), xt::newaxis(), xt::newaxis(),
xt::all(), xt::newaxis())
);
// compute number of forecasts in each forecast bin $N_i$
auto N_i = xt::eval(xt::sum(msk_bins, -1));
xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(),
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
xt::all(), 2) = N_i;
// compute subsample relative frequency
// $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
auto bar_o_i = xt::where(
N_i > 0,
xt::nansum(
xt::where(
msk_bins,
xt::view(o_k_masked_sampled,
xt::all(), xt::all(),
xt::all(), xt::newaxis(),
xt::all()),
0.
),
-1
) / N_i,
0.
);
xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(),
xt::all(), 1) = bar_o_i;
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
REL_DIAG,
xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(),
xt::newaxis(), xt::newaxis(),
xt::all(), xt::newaxis(),
xt::newaxis()))
) = NAN;
return REL_DIAG;
}
/// Compute the calibration-refinement decomposition of the Brier score
/// into reliability, resolution, and uncertainty.
///
/// BS = reliability - resolution + uncertainty
///
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param bar_o
/// Mean event observed outcome.
/// shape: (sites, lead times, subsets, samples, thresholds)
/// \param REL_DIAG
/// Axes of the reliability diagram and the sampling histogram.
/// shape: (sites, lead times, thresholds, time)
/// \param t_counts
/// Time step counts for the period.
/// shape: (sites, lead times, subsets, samples)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_thr
/// Number of thresholds.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Brier score components (reliability, resolution, uncertainty)
/// for each subset and for each threshold.
/// shape: (sites, lead times, subsets, samples, thresholds, bins, axes)
template <class XD2>
inline xt::xtensor<double, 6> calc_BS_CRD(
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
const XD2& q_thr,
const xt::xtensor<double, 5>& bar_o,
const xt::xtensor<double, 7>& REL_DIAG,
const xt::xtensor<double, 4>& t_counts,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_thr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 6> BS_CRD =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr,
std::size_t {3}});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// retrieve length of period
auto l = xt::view(t_counts, xt::all(), xt::all(),
m, xt::newaxis(), e);
// retrieve range of forecast values $y_i$
auto y_i = xt::eval(
xt::view(REL_DIAG, xt::all(), xt::all(), m, e,
xt::all(), xt::all(), 0)
);
// retrieve number of forecasts in each forecast bin $N_i$
auto N_i = xt::eval(
xt::view(REL_DIAG, xt::all(), xt::all(), m, e,
xt::all(), xt::all(), 2)
);
// retrieve subsample relative frequency
// $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
auto bar_o_i = xt::eval(
xt::view(REL_DIAG, xt::all(), xt::all(), m, e,
xt::all(), xt::all(), 1)
);
// retrieve mean event observed outcome $bar_o$
auto _bar_o = xt::view(bar_o, xt::all(), xt::all(),
m, e, xt::all());
// (reshape to insert size-one axis for "bins")
auto _bar_o_ = xt::view(_bar_o, xt::all(), xt::all(),
xt::all(), xt::newaxis());
// calculate reliability =
// $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$
xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 0) =
xt::nansum(
xt::square(y_i - bar_o_i) * N_i,
-1
) / l;
// calculate resolution =
// $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$
xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 1) =
xt::nansum(
xt::square(bar_o_i - _bar_o_) * N_i,
-1
) / l;
// calculate uncertainty = $\bar{o} (1 - \bar{o})$
xt::view(BS_CRD, xt::all(), xt::all(), m, e, xt::all(), 2) =
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
_bar_o * (1 - _bar_o);
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BS_CRD,
xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(),
xt::newaxis(), xt::newaxis(),
xt::all(), xt::newaxis()))
) = NAN;
return BS_CRD;
}
/// Compute the likelihood-base rate decomposition of the Brier score
/// into type 2 bias, discrimination, and sharpness (a.k.a. refinement).
///
/// BS = type 2 bias - discrimination + sharpness
///
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param o_k
/// Observed event outcome.
/// shape: (sites, thresholds, time)
/// \param y_k
/// Event probability forecast.
/// shape: (sites, lead times, thresholds, time)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param t_counts
/// Time step counts for the period.
/// shape: (sites, lead times, subsets, samples)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_thr
/// Number of thresholds.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Brier score components (type 2 bias, discrimination, sharpness)
/// for each subset and for each threshold.
/// shape: (sites, lead times, subsets, samples, thresholds, components)
template <class XD2>
inline xt::xtensor<double, 6> calc_BS_LBD(
const XD2& q_thr,
const xt::xtensor<double, 3>& o_k,
const xt::xtensor<double, 4>& y_k,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
const xt::xtensor<double, 4>& t_counts,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_thr,
std::size_t n_msk,
std::size_t n_exp
)
{
// declare internal variables
// shape: (sites, lead times, bins, thresholds, time)
xt::xtensor<double, 5> msk_bins;
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
// shape: (sites, lead times, thresholds)
xt::xtensor<double, 3> bar_y;
// shape: (sites, lead times, bins, thresholds)
xt::xtensor<double, 4> M_j, bar_y_j;
// shape: (bins,)
xt::xtensor<double, 1> o_j;
// set the range of observed values $o_j$
o_j = {0., 1.};
// declare and initialise output variable
xt::xtensor<double, 6> BS_LBD =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr,
std::size_t {3}});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(),
m, xt::newaxis(), xt::all()),
xt::view(o_k, xt::all(), xt::newaxis(),
xt::all(), xt::all()),
NAN
);
auto y_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(),
m, xt::newaxis(), xt::all()),
y_k,
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
auto y_k_masked_sampled =
xt::view(y_k_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
// retrieve length of period
auto l = xt::view(t_counts, xt::all(), xt::all(),
m, xt::newaxis(), e);
// compute mask to subsample time steps belonging to same observation bin
// (where bins are defined as the range of forecast values)
msk_bins = xt::equal(
// force evaluation to avoid segfault
xt::view(xt::eval(o_k_masked_sampled),
xt::all(), xt::all(), xt::newaxis(),
xt::all(), xt::all()),
xt::view(o_j,
xt::newaxis(), xt::newaxis(), xt::all(),
xt::newaxis(), xt::newaxis())
);
// compute number of observations in each observation bin $M_j$
M_j = xt::nansum(msk_bins, -1);
// compute subsample relative frequency
// $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$
bar_y_j = xt::where(
M_j > 0,
xt::nansum(
xt::where(
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
msk_bins,
xt::view(y_k_masked_sampled,
xt::all(), xt::all(),
xt::newaxis(),
xt::all(), xt::all()),
0.
),
-1
) / M_j,
0.
);
// compute mean "climatology" forecast probability
// $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$
bar_y = xt::nanmean(y_k_masked_sampled, -1);
// calculate type 2 bias =
// $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$
xt::view(BS_LBD, xt::all(), xt::all(), m, e, xt::all(), 0) =
xt::nansum(
xt::square(
xt::view(o_j, xt::newaxis(),
xt::newaxis(), xt::all(),
xt::newaxis())
- bar_y_j
) * M_j,
2
) / l;
// calculate discrimination =
// $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$
xt::view(BS_LBD, xt::all(), xt::all(), m, e, xt::all(), 1) =
xt::nansum(
xt::square(
bar_y_j
- xt::view(bar_y,
xt::all(), xt::all(),
xt::newaxis(),
xt::all())
) * M_j,
2
) / l;
// calculate sharpness =
// $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$
xt::view(BS_LBD, xt::all(), xt::all(), m, e, xt::all(), 2) =
xt::nansum(
xt::square(
y_k_masked_sampled
- xt::view(bar_y, xt::all(),
xt::all(), xt::all(),
xt::newaxis())
),
-1
) / l;
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BS_LBD,
xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(),
xt::newaxis(), xt::newaxis(),
xt::all(), xt::newaxis()))
) = NAN;
return BS_LBD;
}
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
/// Compute the Brier skill score (BSS).
///
/// \param bs
/// Brier score for each threshold for each time step.
/// shape: (sites, lead times, thresholds, time)
/// \param q_thr
/// Streamflow exceedance threshold(s).
/// shape: (sites, thresholds)
/// \param o_k
/// Observed event outcome.
/// shape: (sites, thresholds, time)
/// \param bar_o
/// Mean event observed outcome.
/// shape: (sites, lead times, subsets, samples, thresholds)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_thr
/// Number of thresholds.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Brier skill score for each subset and for each threshold.
/// shape: (sites, lead times, subsets, samples, thresholds)
template <class XD2>
inline xt::xtensor<double, 5> calc_BSS(
const xt::xtensor<double, 4>& bs,
const XD2& q_thr,
const xt::xtensor<double, 3>& o_k,
const xt::xtensor<double, 5>& bar_o,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_thr,
std::size_t n_msk,
std::size_t n_exp
)
{
// declare and initialise output variable
xt::xtensor<double, 5> BSS =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_thr});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(),
m, xt::newaxis(), xt::all()),
xt::view(o_k, xt::all(), xt::newaxis(),
xt::all(), xt::all()),
NAN
);
auto bs_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(), m,
xt::newaxis(), xt::all()),
bs,
NAN
);
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
auto bs_masked_sampled =
xt::view(bs_masked, xt::all(), xt::all(),
xt::all(), b_exp[e]);
auto bar_o_sampled =
xt::view(bar_o, xt::all(), xt::all(),
xt::all(), e, xt::all());
// calculate reference Brier score(s)
// $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$
xt::xtensor<double, 4> bs_ref =
xt::nanmean(
xt::square(
o_k_masked_sampled -
xt::view(
bar_o_sampled, xt::all(),
xt::all(), m, xt::all(),
xt::newaxis()
)
),
-1,
xt::keep_dims
);
// compute Brier skill score(s)
// $BSS = \frac{1}{n} \sum_{k=1}^{n} 1 - \frac{bs}{bs_{ref}}
xt::view(BSS, xt::all(), xt::all(), m, e, xt::all()) =
xt::nanmean(
xt::where(
xt::equal(bs_ref, 0),
- std::numeric_limits<double>::infinity(),
1 - (bs_masked_sampled / bs_ref)
),
-1
);
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BSS,
xt::isnan(xt::view(q_thr, xt::all(), xt::newaxis(),
xt::newaxis(), xt::newaxis(),
xt::all()))
) = NAN;
return BSS;
}
/// Compute the continuous rank probability score based on the
/// integration over 101 Brier scores (CRPS_FROM_BS), i.e. using the
/// observed minimum, the 99 observed percentiles, and the observed
/// maximum as the exceedance thresholds.
///
/// \param q_obs
/// Streamflow observations.
/// shape: (sites, time)
/// \param q_prd
/// Streamflow predictions.
/// shape: (sites, lead times, members, time)
/// \param is_high_flow_event
/// Whether events correspond to being above the threshold(s).
/// \param t_msk
911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_mbr
/// Number of ensemble members.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// CRPS for each subset and for each threshold.
/// shape: (sites, lead times, subsets, samples)
template <class XD2, class XD4>
inline xt::xtensor<double, 4> calc_CRPS_FROM_BS(
const XD2& q_obs,
const XD4& q_prd,
bool is_high_flow_event,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// declare and initialise output variable
xt::xtensor<double, 4> CRPS_FROM_BS =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto q_obs_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(), m, xt::all()),
xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()),
NAN
);
auto q_prd_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(), m,
xt::newaxis(), xt::all()),
q_prd,
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
for (std::size_t l = 0; l < n_ldt; l++)
{
// compute empirical thresholds from 99 observed quantiles
xt::xtensor<double, 2> thr =
xt::zeros<double>({n_sit, std::size_t {101}});
// /!\ need to compute quantiles one site at a time
// because there is no `xt::nanquantile`, so
// need to filter NaN before computing quantiles
for (std::size_t s = 0; s < n_sit; s++)
{
auto obs = xt::view(q_obs_masked, s, l, b_exp[e]);
auto obs_filtered = xt::filter(
981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
obs, !xt::isnan(obs)
);
if (obs_filtered.size() > 0)
{
xt::view(thr, s, xt::all()) = xt::quantile(
obs_filtered,
xt::arange<double>(0.00, 1.01, 0.01)
);
}
else
{
xt::view(thr, s, xt::all()) = NAN;
}
}
// compute observed and predicted event outcomes
auto o_k = elements::calc_o_k(
xt::view(q_obs_masked, xt::all(), l,
xt::all()),
thr, is_high_flow_event
);
auto y_k = elements::calc_y_k(
elements::calc_sum_f_k(
xt::view(q_prd_masked, xt::all(), l,
xt::newaxis(), xt::all(),
xt::all()),
thr, is_high_flow_event
),
n_mbr
);
// compute 99 Brier scores
auto bs = intermediate::calc_bs(o_k, y_k);
auto bs_masked = xt::where(
xt::view(t_msk, xt::all(), l, xt::newaxis(),
m, xt::newaxis(), xt::all()),
bs,
NAN
);
auto bs_masked_sampled = xt::view(
bs_masked, xt::all(), xt::all(), xt::all(),
b_exp[e]
);
auto BS = xt::nanmean(bs_masked_sampled, -1);
// compute CRPS from integration over 99 Brier scores
xt::view(CRPS_FROM_BS, xt::all(), l, m, e) =
// xt::trapz(y, x, axis=1)
xt::trapz(
xt::view(BS, xt::all(), 0, xt::all()),
thr,
1
);
}
}
}
return CRPS_FROM_BS;
}
}
}
}
#endif //EVALHYD_PROBABILIST_BRIER_HPP