An error occurred while loading the file. Please try again.
-
Guillaume Perréal authored5f3a557c
#include <xtensor/xmath.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xmasked_view.hpp>
#include <xtensor/xoperation.hpp>
#include "probabilist/evaluator.hpp"
namespace eh = evalhyd;
// NOTE ------------------------------------------------------------------------
// All equations in metrics below are following notations from
// "Wilks, D. S. (2011). Statistical methods in the atmospheric sciences.
// Amsterdam; Boston: Elsevier Academic Press. ISBN: 9780123850225".
// In particular, pp. 302-303, 332-333.
// -----------------------------------------------------------------------------
namespace evalhyd
{
namespace probabilist
{
// Compute the Brier score for each time step.
//
// \require o_k:
// Observed event outcome.
// shape: (thresholds, time)
// \require y_k:
// Event probability forecast.
// shape: (thresholds, time)
// \assign bs:
// Brier score for each threshold for each time step.
// shape: (thresholds, time)
void Evaluator::calc_bs()
{
// return computed Brier score(s)
// $bs = (o_k - y_k)^2$
bs = xt::square(o_k - y_k);
}
// Compute the Brier score (BS).
//
// \require t_msk:
// Temporal subsets of the whole record.
// shape: (subsets, time)
// \require bs:
// Brier score for each threshold for each time step.
// shape: (thresholds, time)
// \assign BS:
// Brier score for each subset and for each threshold.
// shape: (subsets, samples, thresholds)
void Evaluator::calc_BS()
{
// initialise output variable
// shape: (subsets, thresholds)
BS = xt::zeros<double>({n_msk, n_exp, n_thr});
// compute variable one mask at a time to minimise memory imprint
for (int m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN);
// compute variable one sample at a time
for (int e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto bs_masked_sampled =
xt::view(bs_masked, xt::all(), b_exp[e]);
// calculate the mean over the time steps
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
// $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$
xt::view(BS, m, e, xt::all()) =
xt::nanmean(bs_masked_sampled, -1);
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BS,
xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(), xt::all()))
) = NAN;
}
// Compute the calibration-refinement decomposition of the Brier score
// into reliability, resolution, and uncertainty.
//
// BS = reliability - resolution + uncertainty
//
// \require q_thr:
// Streamflow exceedance threshold(s).
// shape: (thresholds,)
// \require t_msk:
// Temporal subsets of the whole record.
// shape: (subsets, time)
// \require o_k:
// Observed event outcome.
// shape: (thresholds, time)
// \require y_k:
// Event probability forecast.
// shape: (thresholds, time)
// \require bar_o:
// Mean event observed outcome.
// shape: (subsets, thresholds)
// \assign BS_CRD:
// Brier score components (reliability, resolution, uncertainty)
// for each subset and for each threshold.
// shape: (subsets, samples, thresholds, components)
void Evaluator::calc_BS_CRD()
{
// declare internal variables
// shape: (bins, thresholds, time)
xt::xtensor<double, 3> msk_bins;
// shape: (bins, thresholds)
xt::xtensor<double, 2> N_i, bar_o_i;
// shape: (bins,)
xt::xtensor<double, 1> y_i;
// compute range of forecast values $y_i$
y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr;
// initialise output variable
// shape: (subsets, thresholds, components)
BS_CRD = xt::zeros<double>({n_msk, n_exp, n_thr, std::size_t {3}});
// compute variable one mask at a time to minimise memory imprint
for (int m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN);
auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN);
// compute variable one sample at a time
for (int e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), b_exp[e]);
auto y_k_masked_sampled =
xt::view(y_k_masked, xt::all(), b_exp[e]);
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
auto t_msk_sampled =
xt::view(xt::row(t_msk, m), b_exp[e]);
auto bar_o_sampled =
xt::view(bar_o, xt::all(), e, xt::all());
// calculate length of subset
auto l = xt::sum(t_msk_sampled);
// compute mask to subsample time steps belonging to same forecast bin
// (where bins are defined as the range of forecast values)
msk_bins = xt::equal(
// force evaluation to avoid segfault
xt::eval(y_k_masked_sampled),
xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis())
);
// compute number of forecasts in each forecast bin $N_i$
N_i = xt::nansum(msk_bins, -1);
// compute subsample relative frequency
// $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$
bar_o_i = xt::where(
N_i > 0,
xt::nansum(
xt::where(
msk_bins,
xt::view(o_k_masked_sampled, xt::newaxis(),
xt::all(), xt::all()),
0.
),
-1
) / N_i,
0.
);
// calculate reliability =
// $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$
xt::view(BS_CRD, m, e, xt::all(), 0) =
xt::nansum(
xt::square(
xt::view(y_i, xt::all(), xt::newaxis())
- bar_o_i
) * N_i,
0
) / l;
// calculate resolution =
// $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$
xt::view(BS_CRD, m, e, xt::all(), 1) =
xt::nansum(
xt::square(
bar_o_i - xt::view(bar_o_sampled, m)
) * N_i,
0
) / l;
// calculate uncertainty = $\bar{o} (1 - \bar{o})$
xt::view(BS_CRD, m, e, xt::all(), 2) =
xt::view(bar_o_sampled, m)
* (1 - xt::view(bar_o_sampled, m));
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BS_CRD,
xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(),
xt::all(), xt::newaxis()))
) = NAN;
}
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
// Compute the likelihood-base rate decomposition of the Brier score
// into type 2 bias, discrimination, and sharpness (a.k.a. refinement).
//
// BS = type 2 bias - discrimination + sharpness
//
// \require t_msk:
// Temporal subsets of the whole record.
// shape: (subsets, time)
// \require o_k:
// Observed event outcome.
// shape: (thresholds, time)
// \require y_k:
// Event probability forecast.
// shape: (thresholds, time)
// \return BS_LBD:
// Brier score components (type 2 bias, discrimination, sharpness)
// for each subset and for each threshold.
// shape: (subsets, samples, thresholds, components)
void Evaluator::calc_BS_LBD()
{
// declare internal variables
// shape: (bins, thresholds, time)
xt::xtensor<double, 3> msk_bins;
// shape: (thresholds,)
xt::xtensor<double, 1> bar_y;
// shape: (bins, thresholds)
xt::xtensor<double, 2> M_j, bar_y_j;
// shape: (bins,)
xt::xtensor<double, 1> o_j;
// set the range of observed values $o_j$
o_j = {0., 1.};
// declare and initialise output variable
// shape: (subsets, thresholds, components)
BS_LBD = xt::zeros<double>({n_msk, n_exp, n_thr, std::size_t {3}});
// compute variable one mask at a time to minimise memory imprint
for (int m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN);
auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN);
// compute variable one sample at a time
for (int e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), b_exp[e]);
auto y_k_masked_sampled =
xt::view(y_k_masked, xt::all(), b_exp[e]);
auto t_msk_sampled =
xt::view(t_msk, xt::all(), b_exp[e]);
// calculate length of subset
auto l = xt::sum(xt::row(t_msk_sampled, m));
// compute mask to subsample time steps belonging to same observation bin
// (where bins are defined as the range of forecast values)
msk_bins = xt::equal(
// force evaluation to avoid segfault
xt::eval(o_k_masked_sampled),
xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis())
);
// compute number of observations in each observation bin $M_j$
M_j = xt::nansum(msk_bins, -1);
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
// compute subsample relative frequency
// $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$
bar_y_j = xt::where(
M_j > 0,
xt::nansum(
xt::where(
msk_bins,
xt::view(
y_k_masked_sampled,
xt::newaxis(), xt::all(), xt::all()
),
0.
),
-1
) / M_j,
0.
);
// compute mean "climatology" forecast probability
// $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$
bar_y = xt::nanmean(y_k_masked_sampled, -1);
// calculate type 2 bias =
// $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$
xt::view(BS_LBD, m, e, xt::all(), 0) =
xt::nansum(
xt::square(
xt::view(o_j, xt::all(), xt::newaxis())
- bar_y_j
) * M_j,
0
) / l;
// calculate discrimination =
// $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$
xt::view(BS_LBD, m, e, xt::all(), 1) =
xt::nansum(
xt::square(
bar_y_j - bar_y
) * M_j,
0
) / l;
// calculate sharpness =
// $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$
xt::view(BS_LBD, m, e, xt::all(), 2) =
xt::nansum(
xt::square(
y_k_masked_sampled
- xt::view(bar_y, xt::all(), xt::newaxis())
),
-1
) / l;
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BS_LBD,
xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(),
xt::all(), xt::newaxis()))
) = NAN;
}
// Compute the Brier skill score (BSS).
//
// \require t_msk:
// Temporal subsets of the whole record.
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
// shape: (subsets, time)
// \require o_k:
// Observed event outcome.
// shape: (thresholds, time)
// \require bar_o:
// Mean event observed outcome.
// shape: (subsets, thresholds)
// \require bs:
// Brier scores for each time step.
// shape: (thresholds, time)
// \assign BSS:
// Brier skill score for each subset and for each threshold.
// shape: (subsets, samples, thresholds)
void Evaluator::calc_BSS()
{
// declare and initialise output variable
// shape: (subsets, thresholds)
BSS = xt::zeros<double>({n_msk, n_exp, n_thr});
// compute variable one mask at a time to minimise memory imprint
for (int m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN);
auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN);
// compute variable one sample at a time
for (int e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto o_k_masked_sampled =
xt::view(o_k_masked, xt::all(), b_exp[e]);
auto bs_masked_sampled =
xt::view(bs_masked, xt::all(), b_exp[e]);
auto bar_o_sampled =
xt::view(bar_o, xt::all(), e, xt::all());
// calculate reference Brier score(s)
// $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$
xt::xtensor<double, 2> bs_ref =
xt::nanmean(
xt::square(
o_k_masked_sampled -
xt::view(
xt::view(bar_o_sampled, m),
xt::all(), xt::newaxis()
)
),
-1, xt::keep_dims
);
// compute Brier skill score(s)
// $BSS = \frac{1}{n} \sum_{k=1}^{n} 1 - \frac{bs}{bs_{ref}}
xt::view(BSS, m, e, xt::all()) =
xt::nanmean(
xt::where(
xt::equal(bs_ref, 0),
0, 1 - (bs_masked_sampled / bs_ref)
),
-1
);
}
}
// assign NaN where thresholds were not provided (i.e. set as NaN)
xt::masked_view(
BSS,
xt::isnan(xt::view(q_thr, xt::newaxis(), xt::newaxis(),
xt::all()))
421422423424425
) = NAN;
}
}
}