Commit 3e42e722 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

explicitly add subsets dimension to `evald`

In order to harmonise with `evalp`, and in view to use `evald` in `evalp`,
the subsets dimension (corresponding to the temporal masks) needed to be
explicitly included in the output of `evald` rather than allowing a
limited number of masks corresponding to the number of time series
evaluated to be used.
No related merge requests found
Pipeline #40154 passed with stages
in 4 minutes and 4 seconds
Showing with 294 additions and 293 deletions
+294 -293
......@@ -28,17 +28,15 @@ namespace evalhyd
/// Streamflow observations. Time steps with missing observations
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed. Observations and predictions must be
/// broadcastable.
/// shape: (1 or series, time)
/// *metrics* are computed.
/// shape: (1, time)
///
/// q_prd: ``xt::xtensor<double, 2>``
/// Streamflow predictions. Time steps with missing predictions
/// must be assigned `NAN` values. Those time steps will be ignored
/// both in the observations and the predictions before the
/// *metrics* are computed. Observations and predictions must feature
/// be broadcastable.
/// shape: (1 or series, time)
/// *metrics* are computed.
/// shape: (series, time)
///
/// metrics: ``std::vector<std::string>``
/// The sequence of evaluation metrics to be computed.
......@@ -85,11 +83,11 @@ namespace evalhyd
/// the subset). If provided, masks must feature the same number of
/// dimensions as observations and predictions, and it must
/// broadcastable with both of them.
/// shape: (1 or series, time)
/// shape: (subsets, time)
///
/// .. seealso:: :doc:`../../functionalities/temporal-masking`
///
/// m_cdt: ``xt::xtensor<std::array<char, 32>, 2>``, optional
/// m_cdt: ``xt::xtensor<std::array<char, 32>, 1>``, optional
/// Masking conditions to use to generate temporal subsets. Each
/// condition consists in a string and can be specified on observed
/// streamflow values/statistics (mean, median, quantile), or on time
......@@ -97,7 +95,7 @@ namespace evalhyd
/// precedence. If not provided and neither is *t_msk*, no subset is
/// performed. If provided, there must be as many conditions as there
/// are time series of observations.
/// shape: (1 or series, 1)
/// shape: (subsets,)
///
/// .. seealso:: :doc:`../../functionalities/conditional-masking`
///
......@@ -125,7 +123,7 @@ namespace evalhyd
/// ``std::vector<xt::xarray<double>>``
/// The sequence of evaluation metrics computed
/// in the same order as given in *metrics*.
/// shape: (metrics,)<(1 or series, samples)>
/// shape: (metrics,)<(series, subsets, samples)>
///
/// :Examples:
///
......@@ -168,7 +166,7 @@ namespace evalhyd
const double exponent = 1,
double epsilon = -9,
const xt::xtensor<bool, 2>& t_msk = {},
const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {},
const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
const std::unordered_map<std::string, int>& bootstrap =
{{"n_samples", -9}, {"len_sample", -9}},
const std::vector<std::string>& dts = {}
......@@ -183,6 +181,30 @@ namespace evalhyd
// check that optional parameters are valid
eh::utils::check_bootstrap(bootstrap);
// check that data dimensions are compatible
// > time
if (q_obs.shape(1) != q_prd.shape(1))
throw std::runtime_error(
"observations and predictions feature different "
"temporal lengths"
);
if (t_msk.size() > 0)
if (q_obs.shape(1) != t_msk.shape(1))
throw std::runtime_error(
"observations and masks feature different "
"temporal lengths"
);
// > series
if (q_obs.shape(0) != 1)
throw std::runtime_error(
"observations contain more than one time series"
);
// retrieve dimensions
std::size_t n_tim = q_obs.shape(1);
std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) :
(m_cdt.size() > 0 ? m_cdt.shape(0) : 1);
// initialise a mask if none provided
// (corresponding to no temporal subset)
auto gen_msk = [&]() {
......@@ -192,95 +214,22 @@ namespace evalhyd
// else if m_cdt provided, use them to generate t_msk
else if (m_cdt.size() > 0)
{
xt::xtensor<bool, 2> c_msk = xt::zeros<bool>(q_obs.shape());
// flatten arrays to bypass n-dim considerations
// (possible because shapes are constrained to be the same)
if (m_cdt.shape(m_cdt.dimension() - 1) != 1)
throw std::runtime_error("length of last axis in masking "
"conditions must be equal to one");
for (int a = 0; a < m_cdt.dimension() - 1; a++)
if (q_obs.shape(a) != m_cdt.shape(a))
throw std::runtime_error("masking conditions and observations "
"feature incompatible shapes");
auto f_cdt = xt::flatten(m_cdt);
auto f_msk = xt::flatten(c_msk);
auto f_obs = xt::flatten(q_obs);
// determine length of temporal axis
auto nt = q_obs.shape(q_obs.dimension() - 1);
xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim});
// generate mask gradually, one condition at a time
for (int i = 0; i < m_cdt.size(); i++)
{
xt::view(f_msk, xt::range(i*nt, (i+1)*nt)) =
evalhyd::masks::generate_mask_from_conditions(
xt::view(f_cdt, i),
xt::view(f_obs, xt::range(i*nt, (i+1)*nt))
for (int m = 0; m < n_msk; m++)
xt::view(c_msk, m) =
eh::masks::generate_mask_from_conditions(
m_cdt[0], xt::view(q_obs, 0), q_prd
);
}
return c_msk;
}
// if neither t_msk nor m_cdt provided, generate dummy mask
else
return xt::xtensor<bool, 2>{xt::ones<bool>(q_obs.shape())};
return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})};
};
const xt::xtensor<bool, 2> msk = gen_msk();
// check that observations, predictions, and masks dimensions are compatible
if (q_obs.dimension() != q_prd.dimension())
{
throw std::runtime_error(
"observations and predictions feature "
"different numbers of dimensions"
);
}
if (q_obs.dimension() != msk.dimension())
{
throw std::runtime_error(
"observations and masks feature "
"different numbers of dimensions"
);
}
if (!dts.empty())
if (q_obs.shape(q_obs.dimension() - 1) != dts.size())
throw std::runtime_error(
"observations and datetimes feature different "
"temporal lengths"
);
auto obs_shp = q_obs.shape();
auto prd_shp = q_prd.shape();
auto msk_shp = msk.shape();
// check observations, predictions, and masks are broadcastable
if (obs_shp != prd_shp)
for (int a = 0; a < q_obs.dimension(); a++)
if (obs_shp[a] != prd_shp[a])
if ((obs_shp[a] != 1) and (prd_shp[a] != 1))
throw std::runtime_error(
"observations and predictions are not "
"broadcastable"
);
if (obs_shp != msk_shp)
for (int a = 0; a < q_obs.dimension(); a++)
if (obs_shp[a] != msk_shp[a])
if ((obs_shp[a] != 1) and (msk_shp[a] != 1))
throw std::runtime_error(
"observations and masks are not broadcastable"
);
if (prd_shp != msk_shp)
for (int a = 0; a < q_prd.dimension(); a++)
if (prd_shp[a] != msk_shp[a])
if ((prd_shp[a] != 1) and (msk_shp[a] != 1))
throw std::runtime_error(
"predictions and masks are not broadcastable"
);
auto msk = gen_msk();
// apply streamflow transformation if requested
auto q_transform = [&](const xt::xtensor<double, 2>& q)
......@@ -349,7 +298,7 @@ namespace evalhyd
{
// if no bootstrap requested, generate one sample
// containing all the time indices once
xt::xtensor<int, 1> all = xt::arange(obs.shape(obs.dimension() - 1));
xt::xtensor<int, 1> all = xt::arange(n_tim);
exp.push_back(xt::keep(all));
}
......
This diff is collapsed.
......@@ -43,36 +43,36 @@ TEST(DeterministTests, TestMetrics)
);
// check results on all metrics
xt::xtensor<double, 2> rmse =
{{777.034272},
{776.878479},
{777.800217},
{778.151082},
{778.61487}};
xt::xtensor<double, 3> rmse =
{{{777.034272}},
{{776.878479}},
{{777.800217}},
{{778.151082}},
{{778.61487 }}};
EXPECT_TRUE(xt::allclose(metrics[0], rmse));
xt::xtensor<double, 2> nse =
{{0.718912},
{0.719025},
{0.718358},
{0.718104},
{0.717767}};
xt::xtensor<double, 3> nse =
{{{0.718912}},
{{0.719025}},
{{0.718358}},
{{0.718104}},
{{0.717767}}};
EXPECT_TRUE(xt::allclose(metrics[1], nse));
xt::xtensor<double, 2> kge =
{{0.748088},
{0.746106},
{0.744111},
{0.743011},
{0.741768}};
xt::xtensor<double, 3> kge =
{{{0.748088}},
{{0.746106}},
{{0.744111}},
{{0.743011}},
{{0.741768}}};
EXPECT_TRUE(xt::allclose(metrics[2], kge));
xt::xtensor<double, 2> kgeprime =
{{0.813141},
{0.812775},
{0.812032},
{0.811787},
{0.811387}};
xt::xtensor<double, 3> kgeprime =
{{{0.813141}},
{{0.812775}},
{{0.812032}},
{{0.811787}},
{{0.811387}}};
EXPECT_TRUE(xt::allclose(metrics[3], kgeprime));
}
......@@ -87,45 +87,45 @@ TEST(DeterministTests, TestTransform)
std::vector<xt::xarray<double>> metrics =
evalhyd::evald(observed, predicted, {"NSE"}, "sqrt");
xt::xtensor<double, 2> nse_sqrt =
{{0.882817},
{0.883023},
{0.883019},
{0.883029},
{0.882972}};
xt::xtensor<double, 3> nse_sqrt =
{{{0.882817}},
{{0.883023}},
{{0.883019}},
{{0.883029}},
{{0.882972}}};
EXPECT_TRUE(xt::allclose(metrics[0], nse_sqrt));
// compute and check results on inverted streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "inv");
xt::xtensor<double, 2> nse_inv =
{{0.737323},
{0.737404},
{0.737429},
{0.737546},
{0.737595}};
xt::xtensor<double, 3> nse_inv =
{{{0.737323}},
{{0.737404}},
{{0.737429}},
{{0.737546}},
{{0.737595}}};
EXPECT_TRUE(xt::allclose(metrics[0], nse_inv));
// compute and check results on square-rooted streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "log");
xt::xtensor<double, 2> nse_log =
{{0.893344},
{0.893523},
{0.893585},
{0.893758},
{0.893793}};
xt::xtensor<double, 3> nse_log =
{{{0.893344}},
{{0.893523}},
{{0.893585}},
{{0.893758}},
{{0.893793}}};
EXPECT_TRUE(xt::allclose(metrics[0], nse_log));
// compute and check results on power-transformed streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "pow", 0.2);
xt::xtensor<double, 2> nse_pow =
{{0.899207},
{0.899395},
{0.899451},
{0.899578},
{0.899588}};
xt::xtensor<double, 3> nse_pow =
{{{0.899207}},
{{0.899395}},
{{0.899451}},
{{0.899578}},
{{0.899588}}};
EXPECT_TRUE(xt::allclose(metrics[0], nse_pow));
}
......@@ -180,8 +180,8 @@ TEST(DeterministTests, TestMaskingConditions)
// conditions on streamflow values _________________________________________
// compute scores using masking conditions on streamflow to subset whole record
xt::xtensor<std::array<char, 32>, 2> q_conditions ={
{{"q_obs{<2000,>3000}"}}
xt::xtensor<std::array<char, 32>, 1> q_conditions ={
{"q_obs{<2000,>3000}"}
};
std::vector<xt::xarray<double>> metrics_q_conditioned =
......@@ -211,8 +211,8 @@ TEST(DeterministTests, TestMaskingConditions)
// conditions on streamflow statistics _____________________________________
// compute scores using masking conditions on streamflow to subset whole record
xt::xtensor<std::array<char, 32>, 2> q_conditions_ ={
{{"q_obs{>=mean}"}}
xt::xtensor<std::array<char, 32>, 1> q_conditions_ ={
{"q_obs{>=mean}"}
};
double mean = xt::mean(observed, {1})();
......@@ -244,8 +244,8 @@ TEST(DeterministTests, TestMaskingConditions)
// conditions on temporal indices __________________________________________
// compute scores using masking conditions on time indices to subset whole record
xt::xtensor<std::array<char, 32>, 2> t_conditions = {
{{"t{0,1,2,3,4,5:97,97,98,99}"}}
xt::xtensor<std::array<char, 32>, 1> t_conditions = {
{"t{0,1,2,3,4,5:97,97,98,99}"}
};
std::vector<xt::xarray<double>> metrics_t_conditioned =
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment