Commit fadb5fa9 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

deal with both missing observations/predictions for determinist

Unlike 3de89046, this also deals with
missing predictions. This effectively pairwise remove those time steps
where either observations or predictions are missing data. This does so
without creating copies of the input data, via the use of xfunctions
stored as class members.

The unit test for missing data is updated to include NAN also in the
predictions (with a varying number of NAN across the 5 time series).
Showing with 69 additions and 35 deletions
+69 -35
...@@ -35,14 +35,17 @@ namespace evalhyd ...@@ -35,14 +35,17 @@ namespace evalhyd
/// :Parameters: /// :Parameters:
/// ///
/// q_obs: ``xt::xexpression<A>`` /// q_obs: ``xt::xexpression<A>``
/// Streamflow observations. Time steps with missing observation /// Streamflow observations. Time steps with missing observations
/// must be assigned `NAN` values. Those time steps will be ignored /// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the /// both in the observations and the predictions before the
/// *metrics* are computed. /// *metrics* are computed.
/// shape: ({1+, ...}, time) /// shape: ({1+, ...}, time)
/// ///
/// q_prd: ``xt::xexpression<A>`` /// q_prd: ``xt::xexpression<A>``
/// Streamflow predictions. /// Streamflow predictions. Time steps with missing predictions
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed.
/// shape: ({1+, ...}, time) /// shape: ({1+, ...}, time)
/// ///
/// metrics: ``std::vector<std::string>`` /// metrics: ``std::vector<std::string>``
......
...@@ -26,10 +26,20 @@ namespace evalhyd ...@@ -26,10 +26,20 @@ namespace evalhyd
template <class A> template <class A>
class Evaluator class Evaluator
{ {
// define type for function to be used to "mask" (i.e. yield NaNs) the
// time steps for which the observations/predictions are missing data
typedef xt::xfunction<xt::detail::conditional_ternary,
xt::xfunction<xt::math::isnan_fun, const A&>,
xt::xscalar<float>,
const A&> nan_function;
private: private:
// members for input data // members for input data
const A& q_obs; const A& _q_obs;
const A& q_prd; const A& _q_prd;
nan_function q_obs;
nan_function q_prd;
// members for computational elements // members for computational elements
A mean_obs; A mean_obs;
A mean_prd; A mean_prd;
...@@ -43,17 +53,33 @@ namespace evalhyd ...@@ -43,17 +53,33 @@ namespace evalhyd
// constructor method // constructor method
Evaluator(const xt::xexpression<A>& obs, Evaluator(const xt::xexpression<A>& obs,
const xt::xexpression<A>& prd) : const xt::xexpression<A>& prd) :
q_obs{obs.derived_cast()}, _q_obs{obs.derived_cast()},
q_prd{prd.derived_cast()} _q_prd{prd.derived_cast()},
// "mask" (i.e. use NaN) observations where predictions are
// missing (i.e. NaN)
q_obs{xt::where(xt::isnan(prd.derived_cast()),
NAN, obs.derived_cast())},
// "mask" (i.e. use NaN) predictions where observations are
// missing (i.e. NaN)
q_prd{xt::where(xt::isnan(obs.derived_cast()),
NAN, prd.derived_cast())}
{ {
// check that data dimensions are compatible // check that data dimensions are compatible
if (q_prd.dimension() != q_obs.dimension()) if (_q_prd.dimension() != _q_obs.dimension())
{ {
throw std::runtime_error( throw std::runtime_error(
"number of dimensions of 'sim' and 'obs' " "number of dimensions of 'sim' and 'obs' "
"must be identical" "must be identical"
); );
} }
else if (_q_prd.shape(_q_prd.dimension() - 1)
!= _q_obs.shape( _q_obs.dimension() - 1))
{
throw std::runtime_error(
"length of time series of 'sim' and 'obs' "
"must be identical"
);
}
}; };
// members for evaluation metrics // members for evaluation metrics
...@@ -103,10 +129,7 @@ namespace evalhyd ...@@ -103,10 +129,7 @@ namespace evalhyd
template <class A> template <class A>
void Evaluator<A>::calc_mean_prd() void Evaluator<A>::calc_mean_prd()
{ {
mean_prd = xt::nanmean( mean_prd = xt::nanmean(q_prd, -1, xt::keep_dims);
xt::where(xt::isnan(q_obs), NAN, q_prd),
-1, xt::keep_dims
);
} }
// Compute the quadratic error between observations and predictions. // Compute the quadratic error between observations and predictions.
...@@ -157,9 +180,7 @@ namespace evalhyd ...@@ -157,9 +180,7 @@ namespace evalhyd
template <class A> template <class A>
void Evaluator<A>::calc_quad_prd() void Evaluator<A>::calc_quad_prd()
{ {
quad_prd = xt::square( quad_prd = xt::square(q_prd - mean_prd);
xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd
);
} }
// Compute the Pearson correlation coefficient. // Compute the Pearson correlation coefficient.
...@@ -191,8 +212,7 @@ namespace evalhyd ...@@ -191,8 +212,7 @@ namespace evalhyd
// calculate error in timing and dynamics $r_{pearson}$ // calculate error in timing and dynamics $r_{pearson}$
// (Pearson's correlation coefficient) // (Pearson's correlation coefficient)
auto r_num = xt::nansum( auto r_num = xt::nansum(
(xt::where(xt::isnan(q_obs), NAN, q_prd) - mean_prd) (q_prd - mean_prd) * (q_obs - mean_obs),
* (q_obs - mean_obs),
-1, xt::keep_dims -1, xt::keep_dims
); );
auto r_den = xt::sqrt( auto r_den = xt::sqrt(
...@@ -218,8 +238,7 @@ namespace evalhyd ...@@ -218,8 +238,7 @@ namespace evalhyd
void Evaluator<A>::calc_bias() void Evaluator<A>::calc_bias()
{ {
// calculate $bias$ // calculate $bias$
bias = xt::nansum(xt::where(xt::isnan(q_obs), NAN, q_prd), bias = xt::nansum(q_prd, -1, xt::keep_dims)
-1, xt::keep_dims)
/ xt::nansum(q_obs, -1, xt::keep_dims); / xt::nansum(q_obs, -1, xt::keep_dims);
} }
...@@ -287,8 +306,7 @@ namespace evalhyd ...@@ -287,8 +306,7 @@ namespace evalhyd
void Evaluator<A>::calc_KGE() void Evaluator<A>::calc_KGE()
{ {
// calculate error in spread of flow $alpha$ // calculate error in spread of flow $alpha$
auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); auto alpha = detail::std_dev(q_prd, mean_prd)
auto alpha = detail::std_dev(q_prd_, mean_prd)
/ detail::std_dev(q_obs, mean_obs); / detail::std_dev(q_obs, mean_obs);
// compute KGE // compute KGE
...@@ -326,8 +344,7 @@ namespace evalhyd ...@@ -326,8 +344,7 @@ namespace evalhyd
void Evaluator<A>::calc_KGEPRIME() void Evaluator<A>::calc_KGEPRIME()
{ {
// calculate error in spread of flow $gamma$ // calculate error in spread of flow $gamma$
auto q_prd_ = xt::where(xt::isnan(q_obs), NAN, q_prd); auto gamma = (detail::std_dev(q_prd, mean_prd) / mean_prd)
auto gamma = (detail::std_dev(q_prd_, mean_prd) / mean_prd)
/ (detail::std_dev(q_obs, mean_obs) / mean_obs); / (detail::std_dev(q_obs, mean_obs) / mean_obs);
// compute KGE // compute KGE
......
...@@ -189,6 +189,12 @@ TEST(DeterministTests, TestMissingData) ...@@ -189,6 +189,12 @@ TEST(DeterministTests, TestMissingData)
// add some missing observations artificially by assigning NaN values // add some missing observations artificially by assigning NaN values
xt::view(observed, xt::all(), xt::range(0, 20)) = NAN; xt::view(observed, xt::all(), xt::range(0, 20)) = NAN;
// add some missing predictions artificially by assigning NaN values
xt::view(observed, 0, xt::range(20, 23)) = NAN;
xt::view(observed, 1, xt::range(20, 26)) = NAN;
xt::view(observed, 2, xt::range(20, 29)) = NAN;
xt::view(observed, 3, xt::range(20, 32)) = NAN;
xt::view(observed, 4, xt::range(20, 35)) = NAN;
// compute metrics with observations containing NaN values // compute metrics with observations containing NaN values
std::vector<xt::xarray<double>> metrics_nan = std::vector<xt::xarray<double>> metrics_nan =
...@@ -196,19 +202,27 @@ TEST(DeterministTests, TestMissingData) ...@@ -196,19 +202,27 @@ TEST(DeterministTests, TestMissingData)
observed, predicted, metrics observed, predicted, metrics
); );
// compute metrics on subset of observations and predictions
// (i.e. eliminating region with NaN in observations manually)
xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _));
xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _));
std::vector<xt::xarray<double>> metrics_sbs =
evalhyd::evald<xt::xtensor<double, 2>>(
obs, prd, metrics
);
for (int m = 0; m < metrics.size(); m++) { for (int m = 0; m < metrics.size(); m++) {
EXPECT_TRUE( for (int p = 0; p < predicted.shape(0); p++) {
xt::allclose(metrics_nan[m], metrics_sbs[m]) // compute metrics on subset of observations and predictions
) << "Failure for (" << metrics[m] << ")"; // (i.e. eliminating region with NaN in observations manually)
xt::xtensor<double, 1> obs =
xt::view(observed, 0, xt::range(3*(p+1), _));
xt::xtensor<double, 1> prd =
xt::view(predicted, p, xt::range(3*(p+1), _));
std::vector<xt::xarray<double>> metrics_sbs =
evalhyd::evald<xt::xtensor<double, 1>>(
obs, prd, {metrics[m]}
);
// compare to check results are the same
EXPECT_TRUE(
xt::allclose(
xt::view(metrics_nan[m], p),
metrics_sbs[0]
)
) << "Failure for (" << metrics[m] << ")";
}
} }
} }
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment