Commit b90350d2 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

add ranks-based probabilistic metrics (REL_DIAG, DS, AS)

1 merge request!3release v0.1.0
Pipeline #43608 passed with stage
in 2 minutes and 37 seconds
Showing with 528 additions and 1 deletion
+528 -1
......@@ -15,6 +15,7 @@
#include "brier.hpp"
#include "quantiles.hpp"
#include "contingency.hpp"
#include "ranks.hpp"
namespace evalhyd
......@@ -57,6 +58,9 @@ namespace evalhyd
xtl::xoptional<xt::xtensor<double, 5>, bool> ct_b;
xtl::xoptional<xt::xtensor<double, 5>, bool> ct_c;
xtl::xoptional<xt::xtensor<double, 5>, bool> ct_d;
// > Ranks-based
xtl::xoptional<xt::xtensor<double, 3>, bool> r_k;
xtl::xoptional<xt::xtensor<double, 5>, bool> o_j;
// members for intermediate evaluation metrics
// (i.e. before the reduction along the temporal axis)
......@@ -86,6 +90,10 @@ namespace evalhyd
xtl::xoptional<xt::xtensor<double, 6>, bool> FAR;
xtl::xoptional<xt::xtensor<double, 6>, bool> CSI;
xtl::xoptional<xt::xtensor<double, 5>, bool> ROCSS;
// > Ranks-based
xtl::xoptional<xt::xtensor<double, 6>, bool> REL_DIAG;
xtl::xoptional<xt::xtensor<double, 4>, bool> DS;
xtl::xoptional<xt::xtensor<double, 4>, bool> AS;
// methods to get optional parameters
auto get_q_thr()
......@@ -242,6 +250,29 @@ namespace evalhyd
return ct_d.value();
};
xt::xtensor<double, 3> get_r_k()
{
if (!r_k.has_value())
{
r_k = elements::calc_r_k(
q_obs, get_q_qnt(), n_mbr
);
}
return r_k.value();
};
xt::xtensor<double, 5> get_o_j()
{
if (!o_j.has_value())
{
o_j = elements::calc_o_j(
get_r_k(), t_msk, b_exp,
n_sit, n_ldt, n_mbr, n_msk, n_exp
);
}
return o_j.value();
};
// methods to compute intermediate metrics
xt::xtensor<double, 4> get_bs()
{
......@@ -511,6 +542,42 @@ namespace evalhyd
}
return ROCSS.value();
};
xt::xtensor<double, 6> get_REL_DIAG()
{
if (!REL_DIAG.has_value())
{
REL_DIAG = metrics::calc_REL_DIAG(
get_o_j(), t_msk, b_exp,
n_sit, n_ldt, n_mbr, n_msk, n_exp
);
}
return REL_DIAG.value();
};
xt::xtensor<double, 4> get_DS()
{
if (!DS.has_value())
{
DS = metrics::calc_DS(
get_o_j(), t_msk, b_exp,
n_sit, n_ldt, n_mbr, n_msk, n_exp
);
}
return DS.value();
};
xt::xtensor<double, 4> get_AS()
{
if (!AS.has_value())
{
AS = metrics::calc_AS(
get_r_k(), t_msk, b_exp,
n_sit, n_ldt, n_mbr, n_msk, n_exp
);
}
return AS.value();
};
};
}
}
......
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_PROBABILIST_RANKS_HPP
#define EVALHYD_PROBABILIST_RANKS_HPP
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xindex_view.hpp>
#include <xtensor/xsort.hpp>
#include <xtensor/xrandom.hpp>
namespace evalhyd
{
namespace probabilist
{
namespace elements
{
/// Compute the position of the observations amongst the ensemble
/// member predictions (i.e. their ranks).
///
/// \param q_obs
/// Streamflow observations.
/// shape: (sites, time)
/// \param q_qnt
/// Streamflow quantiles.
/// shape: (sites, lead times, quantiles, time)
/// \param n_mbr
/// Number of ensemble members.
/// \return
/// Ranks of streamflow observations.
/// shape: (sites, lead times, time)
template <class XD2, class XD4>
inline xt::xtensor<double, 3> calc_r_k(
const XD2& q_obs,
const XD4& q_qnt,
std::size_t n_mbr
)
{
xt::xtensor<double, 3> ranks = xt::zeros<double>(
{q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)}
);
xt::view(ranks, xt::all()) = NAN;
xt::xtensor<double, 3> min_ranks = xt::zeros<double>(
{q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)}
);
xt::view(min_ranks, xt::all()) = NAN;
xt::xtensor<double, 3> max_ranks = xt::zeros<double>(
{q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)}
);
xt::view(max_ranks, xt::all()) = NAN;
for (int m = 0; m < n_mbr; m++)
{
// strictly below a member and no rank yet
xt::view(ranks, xt::all()) = xt::where(
(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all())
< xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()))
&&
xt::isnan(ranks),
m,
ranks
);
// first time tied with a member
xt::view(min_ranks, xt::all()) = xt::where(
xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()),
xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()))
&&
xt::isnan(min_ranks),
m,
min_ranks
);
// again tied with a member
xt::view(max_ranks, xt::all()) = xt::where(
xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()),
xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()))
&&
!xt::isnan(min_ranks),
m + 1,
max_ranks
);
}
// above last member
xt::view(ranks, xt::all()) = xt::where(
xt::view(q_obs, xt::all(), xt::newaxis(), xt::all())
> xt::view(q_qnt, xt::all(), xt::all(), n_mbr - 1, xt::all()),
n_mbr,
ranks
);
// for ties, take random rank between min and max
xt::view(ranks, xt::all()) = xt::where(
!xt::isnan(min_ranks),
min_ranks
+ xt::round((max_ranks - max_ranks + 1)
* xt::random::rand<double>(ranks.shape())),
ranks
);
return ranks;
}
/// Compute the number of observations for all possible rank values.
///
/// \param r_k
/// Ranks of streamflow observations.
/// shape: (sites, lead times, time)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_mbr
/// Number of ensemble members.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Tallies of streamflow observations for all possible ranks.
/// shape: (sites, lead times, subsets, samples, ranks)
inline xt::xtensor<double, 5> calc_o_j(
const xt::xtensor<double, 3>& r_k,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 5> o_j =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_mbr + 1});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto r_k_masked = xt::where(
xt::view(t_msk, xt::all(), xt::all(), m, xt::all()),
r_k,
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto r_k_masked_sampled =
xt::view(r_k_masked, xt::all(), xt::all(),
b_exp[e]);
for (int j = 0; j < n_mbr + 1; j++)
{
// compute the observed relative frequency
// $o_j = \sum_{k \in M_j} r_k$
xt::view(o_j, xt::all(), xt::all(), m, e, j) =
xt::nansum(
xt::equal(r_k_masked_sampled, j),
-1
);
}
}
}
return o_j;
}
}
namespace metrics
{
/// Compute the reliability diagram X and Y axes.
///
/// \param o_j
/// Tallies of streamflow observations for all possible ranks.
/// shape: (sites, lead times, subsets, samples, ranks)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_mbr
/// Number of ensemble members.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// X and Y axes of the reliability diagram.
/// shape: (sites, lead times, subsets, samples, ranks, axes)
inline xt::xtensor<double, 6> calc_REL_DIAG(
const xt::xtensor<double, 5>& o_j,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 6> REL_DIAG =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp,
n_mbr + 1, std::size_t {2}});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto t_msk_sampled =
xt::view(t_msk, xt::all(), xt::all(),
m, b_exp[e]);
// calculate length of subset
auto l = xt::sum(t_msk_sampled, -1);
// compute the observed relative frequency
// $\bar{o_j} = \frac{1}{n} \sum_{k \in M_j} r_k$
xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), 0) =
xt::cumsum(
xt::view(o_j, xt::all(), xt::all(),
m, e, xt::all())
/ l,
-1
);
// compute the forecast probability $y_i$
xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), 1) =
xt::arange<double>(double(n_mbr + 1)) / n_mbr;
}
}
return REL_DIAG;
}
/// Compute the Delta score.
///
/// \param o_j
/// Tallies of streamflow observations for all possible ranks.
/// shape: (sites, lead times, subsets, samples, ranks)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_mbr
/// Number of ensemble members.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Delta scores.
/// shape: (sites, lead times, subsets, samples)
inline xt::xtensor<double, 4> calc_DS(
const xt::xtensor<double, 5>& o_j,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 4> DS =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto t_msk_sampled =
xt::view(t_msk, xt::all(), xt::all(),
m, b_exp[e]);
// calculate length of subset
auto l = xt::sum(t_msk_sampled, -1);
// compute the Delta score
// $\bar{o_j} = \frac{1}{n} \sum_{k \in M_j} r_k$
xt::view(DS, xt::all(), xt::all(), m, e) =
xt::nansum(
xt::square(
xt::view(o_j, xt::all(), xt::all(), m, e, xt::all())
- (l / (n_mbr + 1))
),
-1
) / (l * n_mbr / (n_mbr + 1));
}
}
return DS;
}
/// Compute the Alpha score.
///
/// \param r_k
/// Ranks of streamflow observations.
/// shape: (sites, lead times, time)
/// \param t_msk
/// Temporal subsets of the whole record.
/// shape: (sites, lead times, subsets, time)
/// \param b_exp
/// Boostrap samples.
/// shape: (samples, time slice)
/// \param n_sit
/// Number of sites.
/// \param n_ldt
/// Number of lead times.
/// \param n_mbr
/// Number of ensemble members.
/// \param n_msk
/// Number of temporal subsets.
/// \param n_exp
/// Number of bootstrap samples.
/// \return
/// Alpha scores.
/// shape: (sites, lead times, subsets, samples)
inline xt::xtensor<double, 4> calc_AS(
const xt::xtensor<double, 3>& r_k,
const xt::xtensor<bool, 4>& t_msk,
const std::vector<xt::xkeep_slice<int>>& b_exp,
std::size_t n_sit,
std::size_t n_ldt,
std::size_t n_mbr,
std::size_t n_msk,
std::size_t n_exp
)
{
// initialise output variable
xt::xtensor<double, 4> AS =
xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
// compute one site and one leadtime at a time because of
// potential NaN (of varying numbers across sites/lead times)
// in the ranks that are not put at the end with `xt::sort`
// (unlike `numpy.sort`) which prevent from an easy conversion
// from rank to probability
for (std::size_t s = 0; s < n_sit; s++)
{
for (std::size_t l = 0; l < n_ldt; l++)
{
// compute variable one mask at a time to minimise memory imprint
for (std::size_t m = 0; m < n_msk; m++)
{
// apply the mask
// (using NaN workaround until reducers work on masked_view)
auto r_k_masked = xt::where(
xt::view(t_msk, s, l, m, xt::all()),
xt::view(r_k, s, l, xt::all()),
NAN
);
// compute variable one sample at a time
for (std::size_t e = 0; e < n_exp; e++)
{
// apply the bootstrap sampling
auto r_k_masked_sampled =
xt::view(r_k_masked, b_exp[e]);
// notations below follow Renard et al. (2010)
// https://doi.org/10.1029/2009WR008328
// compute observed p values
// $p_{x(i)}$
auto p_x_i = xt::sort(
xt::eval(
// filter out NaNs
xt::filter(
r_k_masked_sampled,
!xt::isnan(r_k_masked_sampled)
)
/ n_mbr
)
);
// calculate length of realisations
// $N_x$
auto N_x = p_x_i.size();
// compute theoretical p values
// $p_{x(i)}^{(th)}$
auto p_x_i_th =
xt::arange<double>(double(N_x)) / (N_x - 1);
// compute area between the predictive curve and
// the 1:1 line in the Q-Q plot
// $\alpha'_x$
auto alpha_prime_x = xt::nanmean(
xt::abs(p_x_i - p_x_i_th)
);
// compute the alpha score
// $\alpha_x = 1 - 2 \alpha'_x$
xt::view(AS, s, l, m, e) = 1 - 2 * alpha_prime_x;
}
}
}
}
return AS;
}
}
}
}
#endif //EVALHYD_PROBABILIST_RANKS_HPP
\ No newline at end of file
......@@ -198,7 +198,8 @@ namespace evalhyd
metrics,
{"BS", "BSS", "BS_CRD", "BS_LBD",
"QS", "CRPS",
"POD", "POFD", "FAR", "CSI", "ROCSS"}
"POD", "POFD", "FAR", "CSI", "ROCSS",
"REL_DIAG", "DS", "AS"}
);
// check optional parameters
......@@ -436,6 +437,24 @@ namespace evalhyd
uncertainty::summarise(evaluator.get_ROCSS(), summary)
);
}
else if ( metric == "REL_DIAG" )
{
r.emplace_back(
uncertainty::summarise(evaluator.get_REL_DIAG(), summary)
);
}
else if ( metric == "DS" )
{
r.emplace_back(
uncertainty::summarise(evaluator.get_DS(), summary)
);
}
else if ( metric == "AS" )
{
r.emplace_back(
uncertainty::summarise(evaluator.get_AS(), summary)
);
}
}
return r;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment