Commit fadb5fa9 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

deal with both missing observations/predictions for determinist

Unlike 3de89046, this also deals with
missing predictions. This effectively pairwise remove those time steps
where either observations or predictions are missing data. This does so
without creating copies of the input data, via the use of xfunctions
stored as class members.

The unit test for missing data is updated to include NAN also in the
predictions (with a varying number of NAN across the 5 time series).
Showing with 69 additions and 35 deletions
+69 -35
......@@ -35,14 +35,17 @@ namespace evalhyd
/// :Parameters:
///
/// q_obs: ``xt::xexpression<A>``
/// Streamflow observations. Time steps with missing observation
/// Streamflow observations. Time steps with missing observations
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed.
/// shape: ({1+, ...}, time)
///
/// q_prd: ``xt::xexpression<A>``
/// Streamflow predictions.
/// Streamflow predictions. Time steps with missing predictions
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed.
/// shape: ({1+, ...}, time)
///
/// metrics: ``std::vector<std::string>``
......
......@@ -26,10 +26,20 @@ namespace evalhyd
template <class A>
class Evaluator
{
// define type for function to be used to "mask" (i.e. yield NaNs) the
// time steps for which the observations/predictions are missing data
typedef xt::xfunction<xt::detail::conditional_ternary,
xt::xfunction<xt::math::isnan_fun, const A&>,
xt::xscalar<float>,
const A&> nan_function;
private:
// members for input data
const A& q_obs;
const A& q_prd;
const A& _q_obs;
const A& _q_prd;
nan_function q_obs;
nan_function q_prd;
// members for computational elements
A mean_obs;
A mean_prd;
......@@ -43,17 +53,33 @@ namespace evalhyd
// constructor method
Evaluator(const xt::xexpression<A>& obs,
const xt::xexpression<A>& prd) :
q_obs{obs.derived_cast()},
q_prd{prd.derived_cast()}
_q_obs{obs.derived_cast()},
_q_prd{prd.derived_cast()},
// "mask" (i.e. use NaN) observations where predictions are
// missing (i.e. NaN)
q_obs{xt::where(xt::isnan(prd.derived_cast()),
NAN, obs.derived_cast())},
// "mask" (i.e. use NaN) predictions where observations are
// missing (i.e. NaN)
q_prd{xt::where(xt::isnan(obs.derived_cast()),
NAN, prd.derived_cast())}
{
// check that data dimensions are compatible
if (q_prd.dimension() != q_obs.dimension())
if (_q_prd.dimension() != _q_obs.dimension())
{
throw std::runtime_error(
"number of dimensions of 'sim' and 'obs' "
"must be identical"
);
}
else if (_q_prd.shape(_q_prd.dimension() - 1)
!= _q_obs.shape( _q_obs.dimension() - 1))
{
throw std::runtime_error(
"length of time series of 'sim' and 'obs' "
"must be identical"
);
}
};
// members for evaluation metrics
......@@ -103,10 +129,7 @@ namespace evalhyd
template <class A>
void Evaluator<A>::calc_mean_prd()
{
mean_prd = xt::nanmean(
xt::where(xt::isnan(q_obs), NAN, q_prd),
-1, xt::keep_dims
);
mean_prd = xt::nanmean(q_prd, -1, xt::keep_dims);
}
// Compute the quadratic error between observations and predictions.
......@@ -157,9 +180,7 @@ namespace evalhyd
template <class A>
void Evaluator<A>::calc_quad_prd()
{
quad_prd = xt::square(
xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd
);
quad_prd = xt::square(q_prd - mean_prd);
}
// Compute the Pearson correlation coefficient.
......@@ -191,8 +212,7 @@ namespace evalhyd
// calculate error in timing and dynamics $r_{pearson}$
// (Pearson's correlation coefficient)
auto r_num = xt::nansum(
(xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd)
* (q_obs - mean_obs),
(q_prd - mean_prd) * (q_obs - mean_obs),
-1, xt::keep_dims
);
auto r_den = xt::sqrt(
......@@ -218,8 +238,7 @@ namespace evalhyd
void Evaluator<A>::calc_bias()
{
// calculate $bias$
bias = xt::nansum(xt::where(xt::isnan(q_obs), NAN, q_prd),
-1, xt::keep_dims)
bias = xt::nansum(q_prd, -1, xt::keep_dims)
/ xt::nansum(q_obs, -1, xt::keep_dims);
}
......@@ -287,8 +306,7 @@ namespace evalhyd
void Evaluator<A>::calc_KGE()
{
// calculate error in spread of flow $alpha$
auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd);
auto alpha = detail::std_dev(q_prd_, mean_prd)
auto alpha = detail::std_dev(q_prd, mean_prd)
/ detail::std_dev(q_obs, mean_obs);
// compute KGE
......@@ -326,8 +344,7 @@ namespace evalhyd
void Evaluator<A>::calc_KGEPRIME()
{
// calculate error in spread of flow $gamma$
auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd);
auto gamma = (detail::std_dev(q_prd_, mean_prd) / mean_prd)
auto gamma = (detail::std_dev(q_prd, mean_prd) / mean_prd)
/ (detail::std_dev(q_obs, mean_obs) / mean_obs);
// compute KGE
......
......@@ -189,6 +189,12 @@ TEST(DeterministTests, TestMissingData)
// add some missing observations artificially by assigning NaN values
xt::view(observed, xt::all(), xt::range(0, 20)) = NAN;
// add some missing predictions artificially by assigning NaN values
xt::view(observed, 0, xt::range(20, 23)) = NAN;
xt::view(observed, 1, xt::range(20, 26)) = NAN;
xt::view(observed, 2, xt::range(20, 29)) = NAN;
xt::view(observed, 3, xt::range(20, 32)) = NAN;
xt::view(observed, 4, xt::range(20, 35)) = NAN;
// compute metrics with observations containing NaN values
std::vector<xt::xarray<double>> metrics_nan =
......@@ -196,19 +202,27 @@ TEST(DeterministTests, TestMissingData)
observed, predicted, metrics
);
// compute metrics on subset of observations and predictions
// (i.e. eliminating region with NaN in observations manually)
xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _));
xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _));
std::vector<xt::xarray<double>> metrics_sbs =
evalhyd::evald<xt::xtensor<double, 2>>(
obs, prd, metrics
);
for (int m = 0; m < metrics.size(); m++) {
EXPECT_TRUE(
xt::allclose(metrics_nan[m], metrics_sbs[m])
) << "Failure for (" << metrics[m] << ")";
for (int p = 0; p < predicted.shape(0); p++) {
// compute metrics on subset of observations and predictions
// (i.e. eliminating region with NaN in observations manually)
xt::xtensor<double, 1> obs =
xt::view(observed, 0, xt::range(3*(p+1), _));
xt::xtensor<double, 1> prd =
xt::view(predicted, p, xt::range(3*(p+1), _));
std::vector<xt::xarray<double>> metrics_sbs =
evalhyd::evald<xt::xtensor<double, 1>>(
obs, prd, {metrics[m]}
);
// compare to check results are the same
EXPECT_TRUE(
xt::allclose(
xt::view(metrics_nan[m], p),
metrics_sbs[0]
)
) << "Failure for (" << metrics[m] << ")";
}
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment