From c875dd93c8184f3d5881e830ca777f70d91b0fae Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Thu, 15 Sep 2022 11:39:59 +0200
Subject: [PATCH] allow masking conditions on obs/prd and using stats
 (mean/median/quantile#)

---
 deps/evalhyd              |  2 +-
 src/evalhyd-python.cpp    |  6 +++---
 tests/test_determinist.py | 30 +++++++++++++++++++++++-------
 tests/test_probabilist.py | 33 +++++++++++++++++++++++++--------
 4 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/deps/evalhyd b/deps/evalhyd
index fff3085..54efcfe 160000
--- a/deps/evalhyd
+++ b/deps/evalhyd
@@ -1 +1 @@
-Subproject commit fff308517dbcd4fa1f47b64a75591eaafdcfa8d1
+Subproject commit 54efcfeac2ccba66aa1ad98b72d92b0765f2d14b
diff --git a/src/evalhyd-python.cpp b/src/evalhyd-python.cpp
index bac4ccd..860574b 100644
--- a/src/evalhyd-python.cpp
+++ b/src/evalhyd-python.cpp
@@ -252,14 +252,14 @@ PYBIND11_MODULE(evalhyd, m)
                     shape: (thresholds,)
 
                 t_msk: `numpy.ndarray`, optional
-                    3D array of masks to generate temporal subsets of the whole
+                    4D array of masks to generate temporal subsets of the whole
                     streamflow time series (where True/False is used for the
                     time steps to include/discard in a given subset). If not
                     provided, no subset is performed and only one set of metrics
                     is returned corresponding to the whole time series. If
                     provided, as many sets of metrics are returned as they are
                     masks provided.
-                    shape: (sites, subsets, time)
+                    shape: (sites, lead times, subsets, time)
 
                 m_cdt: `numpy.ndarray`, optional
                     2D array of conditions to generate temporal subsets. Each
@@ -281,7 +281,7 @@ PYBIND11_MODULE(evalhyd, m)
         )pbdoc",
         py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
         py::arg("q_thr") = xt::pytensor<double, 2>({0}),
-        py::arg("t_msk") = xt::pytensor<bool, 3>({0}),
+        py::arg("t_msk") = xt::pytensor<bool, 4>({0}),
         py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0})
     );
 
diff --git a/tests/test_determinist.py b/tests/test_determinist.py
index 10d54c0..fea8727 100644
--- a/tests/test_determinist.py
+++ b/tests/test_determinist.py
@@ -96,15 +96,31 @@ class TestMasking(unittest.TestCase):
         )
 
     def test_conditions(self):
-        cdt = numpy.array([["q{<2000,>3000}"]], dtype='|S32')
+        with self.subTest(condtions="observed streamflow values"):
+            cdt = numpy.array([["q_obs{<2000,>3000}"]], dtype='|S32')
 
-        obs = _obs[..., (_obs[0] < 2000) | (_obs[0] > 3000)]
-        prd = _prd[..., (_obs[0] < 2000) | (_obs[0] > 3000)]
+            msk = (_obs[0] < 2000) | (_obs[0] > 3000)
 
-        numpy.testing.assert_almost_equal(
-            evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0],
-            evalhyd.evald(obs, prd, ["NSE"])[0]
-        )
+            obs = _obs[..., msk]
+            prd = _prd[..., msk]
+
+            numpy.testing.assert_almost_equal(
+                evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0],
+                evalhyd.evald(obs, prd, ["NSE"])[0]
+            )
+
+        with self.subTest(condtions="observed streamflow statistics"):
+            cdt = numpy.array([["q_obs{>=median}"]], dtype='|S32')
+
+            msk = _obs[0] >= numpy.median(_obs)
+
+            obs = _obs[..., msk]
+            prd = _prd[..., msk]
+
+            numpy.testing.assert_almost_equal(
+                evalhyd.evald(_obs, _prd, ["NSE"], m_cdt=cdt)[0],
+                evalhyd.evald(obs, prd, ["NSE"])[0]
+            )
 
 
 class TestMissingData(unittest.TestCase):
diff --git a/tests/test_probabilist.py b/tests/test_probabilist.py
index ceef060..6a86805 100644
--- a/tests/test_probabilist.py
+++ b/tests/test_probabilist.py
@@ -79,7 +79,7 @@ class TestDecomposition(unittest.TestCase):
 class TestMasking(unittest.TestCase):
 
     def test_masks(self):
-        msk = numpy.ones((_obs.shape[0], 1, _obs.shape[1]), dtype=bool)
+        msk = numpy.ones((_prd.shape[0], _prd.shape[1], 1, _prd.shape[3]), dtype=bool)
         msk[..., :99] = False
         numpy.testing.assert_almost_equal(
             evalhyd.evalp(_obs, _prd, ["QS"], t_msk=msk)[0],
@@ -87,15 +87,32 @@ class TestMasking(unittest.TestCase):
         )
 
     def test_conditions(self):
-        cdt = numpy.array([["q{<2000,>3000}"]], dtype='|S32')
+        with self.subTest(condtions="observed streamflow values"):
+            cdt = numpy.array([["q_obs{<2000,>3000}"]], dtype='|S32')
 
-        obs = _obs[..., (_obs[0] < 2000) | (_obs[0] > 3000)]
-        prd = _prd[..., (_obs[0] < 2000) | (_obs[0] > 3000)]
+            msk = (_obs[0] < 2000) | (_obs[0] > 3000)
 
-        numpy.testing.assert_almost_equal(
-            evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0],
-            evalhyd.evalp(obs, prd, ["QS"])[0]
-        )
+            obs = _obs[..., msk]
+            prd = _prd[..., msk]
+
+            numpy.testing.assert_almost_equal(
+                evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0],
+                evalhyd.evalp(obs, prd, ["QS"])[0]
+            )
+
+        with self.subTest(condtions="predicted streamflow statistics"):
+            cdt = numpy.array([["q_prd_median{<=quantile0.7}"]], dtype='|S32')
+
+            median = numpy.squeeze(numpy.median(_prd, 2))
+            msk = median <= numpy.quantile(median, 0.7)
+
+            obs = _obs[..., msk]
+            prd = _prd[..., msk]
+
+            numpy.testing.assert_almost_equal(
+                evalhyd.evalp(_obs, _prd, ["QS"], m_cdt=cdt)[0],
+                evalhyd.evalp(obs, prd, ["QS"])[0]
+            )
 
 
 class TestMissingData(unittest.TestCase):
-- 
GitLab