From c875dd93c8184f3d5881e830ca777f70d91b0fae Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Thu, 15 Sep 2022 11:39:59 +0200 Subject: [PATCH] allow masking conditions on obs/prd and using stats (mean/median/quantile#) --- deps/evalhyd | 2 +- src/evalhyd-python.cpp | 6 +++--- tests/test_determinist.py | 30 +++++++++++++++++++++++------- tests/test_probabilist.py | 33 +++++++++++++++++++++++++-------- 4 files changed, 52 insertions(+), 19 deletions(-) diff --git a/deps/evalhyd b/deps/evalhyd index fff3085..54efcfe 160000 --- a/deps/evalhyd +++ b/deps/evalhyd @@ -1 +1 @@ -Subproject commit fff308517dbcd4fa1f47b64a75591eaafdcfa8d1 +Subproject commit 54efcfeac2ccba66aa1ad98b72d92b0765f2d14b diff --git a/src/evalhyd-python.cpp b/src/evalhyd-python.cpp index bac4ccd..860574b 100644 --- a/src/evalhyd-python.cpp +++ b/src/evalhyd-python.cpp @@ -252,14 +252,14 @@ PYBIND11_MODULE(evalhyd, m) shape: (thresholds,) t_msk: `numpy.ndarray`, optional - 3D array of masks to generate temporal subsets of the whole + 4D array of masks to generate temporal subsets of the whole streamflow time series (where True/False is used for the time steps to include/discard in a given subset). If not provided, no subset is performed and only one set of metrics is returned corresponding to the whole time series. If provided, as many sets of metrics are returned as they are masks provided. - shape: (sites, subsets, time) + shape: (sites, lead times, subsets, time) m_cdt: `numpy.ndarray`, optional 2D array of conditions to generate temporal subsets. Each @@ -281,7 +281,7 @@ PYBIND11_MODULE(evalhyd, m) )pbdoc", py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"), py::arg("q_thr") = xt::pytensor<double, 2>({0}), - py::arg("t_msk") = xt::pytensor<bool, 3>({0}), + py::arg("t_msk") = xt::pytensor<bool, 4>({0}), py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}) ); diff --git a/tests/test_determinist.py b/tests/test_determinist.py index 10d54c0..fea8727 100644 --- a/tests/test_determinist.py +++ b/tests/test_determinist.py @@ -96,15 +96,31 @@ class TestMasking(unittest.TestCase): ) def test_conditions(self): - cdt = numpy.array([["q{<2000,>3000}"]], dtype='|S32') + with self.subTest(condtions="observed streamflow values"): + cdt = numpy.array([["q_obs{<2000,>3000}"]], dtype='|S32') - obs = _obs[..., (_obs[0] < 2000) | (_obs[0] > 3000)] - prd = _prd[..., (_obs[0] < 2000) | (_obs[0] > 3000)] + msk = (_obs[0] < 2000) | (_obs[0] > 3000) - numpy.testing.assert_almost_equal( - evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0], - evalhyd.evald(obs, prd, ["NSE"])[0] - ) + obs = _obs[..., msk] + prd = _prd[..., msk] + + numpy.testing.assert_almost_equal( + evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0], + evalhyd.evald(obs, prd, ["NSE"])[0] + ) + + with self.subTest(condtions="observed streamflow statistics"): + cdt = numpy.array([["q_obs{>=median}"]], dtype='|S32') + + msk = _obs[0] >= numpy.median(_obs) + + obs = _obs[..., msk] + prd = _prd[..., msk] + + numpy.testing.assert_almost_equal( + evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0], + evalhyd.evald(obs, prd, ["NSE"])[0] + ) class TestMissingData(unittest.TestCase): diff --git a/tests/test_probabilist.py b/tests/test_probabilist.py index ceef060..6a86805 100644 --- a/tests/test_probabilist.py +++ b/tests/test_probabilist.py @@ -79,7 +79,7 @@ class TestDecomposition(unittest.TestCase): class TestMasking(unittest.TestCase): def test_masks(self): - msk = numpy.ones((_obs.shape[0], 1, _obs.shape[1]), dtype=bool) + msk = numpy.ones((_prd.shape[0], _prd.shape[1], 1, _prd.shape[3]), dtype=bool) msk[..., :99] = False numpy.testing.assert_almost_equal( evalhyd.evalp(_obs, _prd, ["QS"], t_msk=msk)[0], @@ -87,15 +87,32 @@ class TestMasking(unittest.TestCase): ) def test_conditions(self): - cdt = numpy.array([["q{<2000,>3000}"]], dtype='|S32') + with self.subTest(condtions="observed streamflow values"): + cdt = numpy.array([["q_obs{<2000,>3000}"]], dtype='|S32') - obs = _obs[..., (_obs[0] < 2000) | (_obs[0] > 3000)] - prd = _prd[..., (_obs[0] < 2000) | (_obs[0] > 3000)] + msk = (_obs[0] < 2000) | (_obs[0] > 3000) - numpy.testing.assert_almost_equal( - evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0], - evalhyd.evalp(obs, prd, ["QS"])[0] - ) + obs = _obs[..., msk] + prd = _prd[..., msk] + + numpy.testing.assert_almost_equal( + evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0], + evalhyd.evalp(obs, prd, ["QS"])[0] + ) + + with self.subTest(condtions="predicted streamflow statistics"): + cdt = numpy.array([["q_prd_median{<=quantile0.7}"]], dtype='|S32') + + median = numpy.squeeze(numpy.median(_prd, 2)) + msk = median <= numpy.quantile(median, 0.7) + + obs = _obs[..., msk] + prd = _prd[..., msk] + + numpy.testing.assert_almost_equal( + evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0], + evalhyd.evalp(obs, prd, ["QS"])[0] + ) class TestMissingData(unittest.TestCase): -- GitLab