Commit 397501ad authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

deal with missing data flagged as NaN in observations/predictions

The general approach is to "eliminate" the time steps where observations
or predictions are missing as early as possible in the algorithm. The
best approach seemed to update the user-provided temporal masks to
also mask those time steps with missing data.

An alternative approach would have been to create a view on the
observations and predictions, e.g. using something like
`xt::view(obs, ..., xt::drop(...))`, but this produces a non-contiguous
view which cannot be sorted with `xt::sort` later to determine the
quantiles.

This is documented in `evalp` docstring and new unit tests are added.
No related merge requests found
Pipeline #38528 passed with stages
in 2 minutes and 33 seconds
Showing with 115 additions and 3 deletions
+115 -3
...@@ -22,11 +22,17 @@ namespace evalhyd ...@@ -22,11 +22,17 @@ namespace evalhyd
/// :Parameters: /// :Parameters:
/// ///
/// q_obs: ``xt::xtensor<double, 2>`` /// q_obs: ``xt::xtensor<double, 2>``
/// Streamflow observations. /// Streamflow observations. Time steps with missing observation
/// must be assigned `NAN` values. Those time steps will be pairwise
/// ignored in the observations and the predictions before the
/// *metrics* are computed.
/// shape: (sites, time) /// shape: (sites, time)
/// ///
/// q_prd: ``xt::xtensor<double, 4>`` /// q_prd: ``xt::xtensor<double, 4>``
/// Streamflow predictions. /// Streamflow predictions. Time steps with missing prediction
/// must be assigned `NAN` values. Those time steps will be pairwise
/// ignored in the observations and the predictions before the
/// *metrics* are computed.
/// shape: (sites, lead times, members, time) /// shape: (sites, lead times, members, time)
/// ///
/// metrics: ``std::vector<std::string>`` /// metrics: ``std::vector<std::string>``
......
#ifndef EVALHYD_PROBABILIST_EVALUATOR_H #ifndef EVALHYD_PROBABILIST_EVALUATOR_H
#define EVALHYD_PROBABILIST_EVALUATOR_H #define EVALHYD_PROBABILIST_EVALUATOR_H
#include <stdexcept>
#include <xtensor/xtensor.hpp> #include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp> #include <xtensor/xview.hpp>
#include <xtensor/xslice.hpp>
using view1d_xtensor2d_type = decltype( using view1d_xtensor2d_type = decltype(
xt::view( xt::view(
...@@ -74,6 +76,22 @@ namespace evalhyd ...@@ -74,6 +76,22 @@ namespace evalhyd
n_msk = t_msk.shape(0); n_msk = t_msk.shape(0);
n_mbr = q_prd.shape(0); n_mbr = q_prd.shape(0);
n_thr = q_thr.size(); n_thr = q_thr.size();
// drop time steps where observations and/or predictions are NaN
auto obs_nan = xt::isnan(q_obs);
auto prd_nan = xt::isnan(q_prd);
auto sum_nan = xt::eval(xt::sum(prd_nan, -1));
if (xt::amin(sum_nan) != xt::amax(sum_nan))
{
throw std::runtime_error(
"predictions across members feature non-equal lengths"
);
}
auto msk_nan = xt::where(obs_nan | xt::row(prd_nan, 0))[0];
xt::view(t_msk, xt::all(), xt::keep(msk_nan)) = false;
}; };
// members for intermediate evaluation metrics // members for intermediate evaluation metrics
......
...@@ -167,6 +167,94 @@ TEST(ProbabilistTests, TestMasks) ...@@ -167,6 +167,94 @@ TEST(ProbabilistTests, TestMasks)
// check results are identical // check results are identical
for (int m = 0; m < metrics.size(); m++) for (int m = 0; m < metrics.size(); m++)
{ {
EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m])); EXPECT_TRUE(xt::allclose(metrics_masked[m], metrics_subset[m]))
<< "Failure for (" << metrics[m] << ")";
}
}
TEST(ProbabilistTests, TestMissingData)
{
xt::xtensor<double, 2> thresholds
{{ 4., 5. }};
std::vector<std::string> metrics =
{"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"};
// compute metrics on series with NaN
xt::xtensor<double, 4> forecast_nan {{
// leadtime 1
{{ 5.3, 4.2, 5.7, 2.3, NAN },
{ 4.3, 4.2, 4.7, 4.3, NAN },
{ 5.3, 5.2, 5.7, 2.3, NAN }},
// leadtime 2
{{ NAN, 4.2, 5.7, 2.3, 3.1 },
{ NAN, 4.2, 4.7, 4.3, 3.3 },
{ NAN, 5.2, 5.7, 2.3, 3.9 }}
}};
xt::xtensor<double, 2> observed_nan
{{ 4.7, 4.3, NAN, 2.7, 4.1 }};
std::vector<xt::xarray<double>> metrics_nan =
eh::evalp(
observed_nan,
forecast_nan,
metrics,
thresholds
);
// compute metrics on manually subset series (one leadtime at a time)
xt::xtensor<double, 4> forecast_pp1 {{
// leadtime 1
{{ 5.3, 4.2, 2.3 },
{ 4.3, 4.2, 4.3 },
{ 5.3, 5.2, 2.3 }},
}};
xt::xtensor<double, 2> observed_pp1
{{ 4.7, 4.3, 2.7 }};
std::vector<xt::xarray<double>> metrics_pp1 =
eh::evalp(
observed_pp1,
forecast_pp1,
metrics,
thresholds
);
xt::xtensor<double, 4> forecast_pp2 {{
// leadtime 2
{{ 4.2, 2.3, 3.1 },
{ 4.2, 4.3, 3.3 },
{ 5.2, 2.3, 3.9 }}
}};
xt::xtensor<double, 2> observed_pp2
{{ 4.3, 2.7, 4.1 }};
std::vector<xt::xarray<double>> metrics_pp2 =
eh::evalp(
observed_pp2,
forecast_pp2,
metrics,
thresholds
);
// check that numerical results are identical
for (int m = 0; m < metrics.size(); m++) {
// for leadtime 1
EXPECT_TRUE(
xt::allclose(
xt::view(metrics_nan[m], xt::all(), 0),
xt::view(metrics_pp1[m], xt::all(), 0)
)
) << "Failure for (" << metrics[m] << ", " << "leadtime 1)";
// for leadtime 2
EXPECT_TRUE(
xt::allclose(
xt::view(metrics_nan[m], xt::all(), 1),
xt::view(metrics_pp2[m], xt::all(), 0)
)
) << "Failure for (" << metrics[m] << ", " << "leadtime 2)";
} }
} }
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment