use new evalhyd templated functions

db82a4d1 · Thibault Hallouin · 5b8c9ca2 · db82a4d1 · db82a4d1
Commit db82a4d1 authored 2 years ago by Thibault Hallouin
Hide whitespace changes
Inline Side-by-side

Showing

with 25 additions and 286 deletions
+25 -286
--- a/setup.py
+++ b/setup.py
@@ -13,10 +13,7 @@ __version__ = '0.0.1'
 ext_modules = [
    Pybind11Extension(
        "evalhyd",
-        ['src/evalhyd-python.cpp',
-         'deps/evalhyd/src/probabilist/evaluator_brier.cpp',
-         'deps/evalhyd/src/probabilist/evaluator_elements.cpp',
-         'deps/evalhyd/src/probabilist/evaluator_quantiles.cpp'],
+        ['src/evalhyd-python.cpp'],
        include_dirs=[
            numpy.get_include(),
            os.path.join(os.getcwd(), 'deps', 'evalhyd', 'deps', 'xtl',

--- a/src/evalhyd-python.cpp
+++ b/src/evalhyd-python.cpp
@@ -6,6 +6,7 @@
 #define MACRO_STRINGIFY(x) STRINGIFY(x)

 #define FORCE_IMPORT_ARRAY
+#include <xtensor/xexpression.hpp>
 #include <xtensor/xview.hpp>
 #include <xtensor-python/pytensor.hpp>

@@ -17,22 +18,21 @@ using namespace py::literals;

 // reshape 1D tensors to 2D tensors
 auto evald_1d(
-    const xt::xtensor<double, 1>& q_obs,
-    const xt::xtensor<double, 1>& q_prd,
+    const xt::pytensor<double, 1>& q_obs,
+    const xt::pytensor<double, 1>& q_prd,
    const std::vector<std::string>& metrics,
-    const std::string& transform = "none",
-    const double exponent = 1,
-    double epsilon = -9,
-    const xt::xtensor<bool, 2>& t_msk = {},
-    const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
-    const std::unordered_map<std::string, int>& bootstrap =
-        {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}},
-    const std::vector<std::string>& dts = {}
+    const std::string& transform,
+    const double exponent,
+    double epsilon,
+    const xt::pytensor<bool, 2>& t_msk,
+    const xt::pytensor<std::array<char, 32>, 1>& m_cdt,
+    const std::unordered_map<std::string, int>& bootstrap,
+    const std::vector<std::string>& dts
 )
 {
-    return evalhyd::evald(
-        xt::view(q_obs, xt::newaxis(), xt::all()),
-        xt::view(q_prd, xt::newaxis(), xt::all()),
+    return evalhyd::evald<xt::pytensor<double, 2>, xt::pytensor<bool, 2>>(
+        xt::pytensor<double, 2>(xt::view(q_obs, xt::newaxis(), xt::all())),
+        xt::pytensor<double, 2>(xt::view(q_prd, xt::newaxis(), xt::all())),
        metrics,
        transform,
        exponent,
@@ -49,105 +49,13 @@ PYBIND11_MODULE(evalhyd, m)
 {
    xt::import_numpy();

-    m.doc() = R"pbdoc(
-        Utility for evaluation of streamflow predictions.
-    )pbdoc";
+    m.doc() = "Utility for evaluation of streamflow predictions";

    // deterministic evaluation
    m.def(
-        "evald", evald_1d,
-        R"pbdoc(
-            Function to evaluate deterministic streamflow predictions.
-
-            :Parameters:
-
-                q_obs: `numpy.ndarray`
-                   1D array of streamflow observations. Time steps with
-                   missing observations must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (time,)
-
-                q_prd: `numpy.ndarray`
-                   1D array of streamflow predictions. Time steps with
-                   missing predictions must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (time,)
-
-                metrics: `List[str]`
-                   The sequence of evaluation metrics to be computed.
-
-                transform: `str`, optional
-                   The transformation to apply to both streamflow observations
-                   and predictions prior to the calculation of the *metrics*.
-
-                exponent: `float`, optional
-                   The value of the exponent n to use when the *transform* is
-                   the power function. If not provided (or set to default value
-                   1), the streamflow observations and predictions remain
-                   untransformed.
-
-                epsilon: `float`, optional
-                   The value of the small constant ε to add to both the
-                   streamflow observations and predictions prior to the
-                   calculation of the *metrics* when the *transform* is the
-                   reciprocal function, the natural logarithm, or the power
-                   function with a negative exponent (since none are defined
-                   for 0). If not provided (or set to default value -9),
-                   one hundredth of the mean of the streamflow observations
-                   is used as value for epsilon.
-
-                t_msk: `numpy.ndarray`, optional
-                   1D array of mask(s) used to generate temporal subsets of
-                   the whole streamflow time series (where True/False is used for
-                   the time steps to include/discard in a given subset). If not
-                   provided and neither is *m_cdt*, no subset is performed. If
-                   provided, masks must feature the same number of dimensions as
-                   observations and predictions, and it must broadcastable with
-                   both of them.
-                   shape: (subsets, time)
-
-                m_cdt: `numpy.ndarray`, optional
-                   1D array of masking condition(s) to use to generate
-                   temporal subsets. Each condition consists in a string and
-                   can be specified on observed streamflow values/statistics
-                   (mean, median, quantile), or on time indices. If provided
-                   in combination with *t_msk*, the latter takes precedence.
-                   If not provided and neither is *t_msk*, no subset is
-                   performed. If provided, only one condition per time series
-                   of observations can be provided.
-                   shape: (subsets,)
-
-                bootstrap: `dict`, optional
-                   Parameters for the bootstrapping method used to estimate the
-                   sampling uncertainty in the evaluation of the predictions.
-                   Three parameters are mandatory ('n_samples' the number of
-                   random samples, 'len_sample' the length of one sample in
-                   number of years, and 'summary' the statistics to return to
-                   characterise the sampling distribution), and one parameter
-                   is optional ('seed'). If not provided, no bootstrapping is
-                   performed. If provided, *dts* must also be provided.
-
-                dts: `List[str]`, optional
-                   Datetimes. The corresponding date and time for the temporal
-                   dimension of the streamflow observations and predictions.
-                   The date and time must be specified in a string following
-                   the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
-                   (e.g. the 21st of May 2007 at 4 in the afternoon is
-                   "2007-05-21 16:00:00"). If provided, it is only used if
-                   *bootstrap* is also provided.
-                   shape: (time,)
-
-            :Returns:
-
-                `List[numpy.ndarray]`
-                    The sequence of evaluation metrics computed
-                    in the same order as given in *metrics*.
-                    shape: [(1, subsets, samples), ...]
-        )pbdoc",
+        "evald",
+        &evald_1d,
+        "Function to evaluate deterministic streamflow predictions (1D)",
        py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
        py::arg("transform") = "none",
        py::arg("exponent") = 1,
@@ -158,100 +66,11 @@ PYBIND11_MODULE(evalhyd, m)
            py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
        py::arg("dts") = py::list()
    );
-    m.def(
-        "evald", evalhyd::evald,
-        R"pbdoc(
-            Function to evaluate deterministic streamflow predictions.
-
-            :Parameters:
-
-                q_obs: `numpy.ndarray`
-                   2D array of streamflow observations. Time steps with
-                   missing observations must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (1, time)
-
-                q_prd: `numpy.ndarray`
-                   2D array of streamflow predictions. Time steps with
-                   missing predictions must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (series, time)
-
-                metrics: `List[str]`
-                   The sequence of evaluation metrics to be computed.
-
-                transform: `str`, optional
-                   The transformation to apply to both streamflow observations
-                   and predictions prior to the calculation of the *metrics*.

-                exponent: `float`, optional
-                   The value of the exponent n to use when the *transform* is
-                   the power function. If not provided (or set to default value
-                   1), the streamflow observations and predictions remain
-                   untransformed.
-
-                epsilon: `float`, optional
-                   The value of the small constant ε to add to both the
-                   streamflow observations and predictions prior to the
-                   calculation of the *metrics* when the *transform* is the
-                   reciprocal function, the natural logarithm, or the power
-                   function with a negative exponent (since none are defined
-                   for 0). If not provided (or set to default value -9),
-                   one hundredth of the mean of the streamflow observations
-                   is used as value for epsilon.
-
-                t_msk: `numpy.ndarray`, optional
-                   2D array of mask(s) used to generate temporal subsets of
-                   the whole streamflow time series (where True/False is used for
-                   the time steps to include/discard in a given subset). If not
-                   provided and neither is *m_cdt*, no subset is performed. If
-                   provided, masks must feature the same number of dimensions as
-                   observations and predictions, and it must broadcastable with
-                   both of them.
-                   shape: (subsets, time)
-
-                m_cdt: `numpy.ndarray`, optional
-                   1D array of masking condition(s) to use to generate
-                   temporal subsets. Each condition consists in a string and
-                   can be specified on observed streamflow values/statistics
-                   (mean, median, quantile), or on time indices. If provided
-                   in combination with *t_msk*, the latter takes precedence.
-                   If not provided and neither is *t_msk*, no subset is
-                   performed. If provided, only one condition per time series
-                   of observations can be provided.
-                   shape: (subsets,)
-
-                bootstrap: `dict`, optional
-                   Parameters for the bootstrapping method used to estimate the
-                   sampling uncertainty in the evaluation of the predictions.
-                   Three parameters are mandatory ('n_samples' the number of
-                   random samples, 'len_sample' the length of one sample in
-                   number of years, and 'summary' the statistics to return to
-                   characterise the sampling distribution), and one parameter
-                   is optional ('seed'). If not provided, no bootstrapping is
-                   performed. If provided, *dts* must also be provided.
-
-                dts: `List[str]`, optional
-                   Datetimes. The corresponding date and time for the temporal
-                   dimension of the streamflow observations and predictions.
-                   The date and time must be specified in a string following
-                   the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
-                   (e.g. the 21st of May 2007 at 4 in the afternoon is
-                   "2007-05-21 16:00:00"). If provided, it is only used if
-                   *bootstrap* is also provided.
-                   shape: (time,)
-
-            :Returns:
-
-                `List[numpy.ndarray]`
-                   The sequence of evaluation metrics computed
-                   in the same order as given in *metrics*.
-                   shape: [(series, subsets, samples), ...]
-        )pbdoc",
+    m.def(
+        "evald",
+        &evalhyd::evald<xt::pytensor<double, 2>, xt::pytensor<bool, 2>>,
+        "Function to evaluate deterministic streamflow predictions (2D)",
        py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
        py::arg("transform") = "none",
        py::arg("exponent") = 1,
@@ -265,86 +84,9 @@ PYBIND11_MODULE(evalhyd, m)

    // probabilistic evaluation
    m.def(
-        "evalp", evalhyd::evalp,
-        R"pbdoc(
-            Function to evaluate probabilistic streamflow predictions.
-
-            :Parameters:
-
-                q_obs: `numpy.ndarray`
-                   2D array of streamflow observations. Time steps with
-                   missing observations must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (sites, time)
-
-                q_prd: `numpy.ndarray`
-                   4D array of streamflow predictions. Time steps with
-                   missing predictions must be assigned `numpy.nan`
-                   values. Those time steps will be ignored both in
-                   the observations and in the predictions before the
-                   *metrics* are computed.
-                   shape: (sites, lead times, members, time)
-
-                metrics: `List[str]`
-                   The sequence of evaluation metrics to be computed.
-
-                q_thr: `List[float]`, optional
-                   The streamflow threshold(s) to consider for the *metrics*
-                   assessing the prediction of exceedance events. If not
-                   provided, set to default value as an empty `list`.
-                   shape: (thresholds,)
-
-                t_msk: `numpy.ndarray`, optional
-                   4D array of masks to generate temporal subsets of the whole
-                   streamflow time series (where True/False is used for the
-                   time steps to include/discard in a given subset). If not
-                   provided, no subset is performed and only one set of metrics
-                   is returned corresponding to the whole time series. If
-                   provided, as many sets of metrics are returned as they are
-                   masks provided.
-                   shape: (sites, lead times, subsets, time)
-
-                m_cdt: `numpy.ndarray`, optional
-                   2D array of conditions to generate temporal subsets. Each
-                   condition consists in a string and can be specified on
-                   observed/predicted streamflow values/statistics (mean,
-                   median, quantile), or on time indices. If provided in
-                   combination with t_msk, the latter takes precedence. If not
-                   provided and neither is t_msk, no subset is performed and
-                   only one set of metrics is returned corresponding to the
-                   whole time series. If provided, as many sets of metrics are
-                   returned as they are conditions provided.
-                   shape: (sites, subsets)
-
-                bootstrap: `dict`, optional
-                   Parameters for the bootstrapping method used to estimate the
-                   sampling uncertainty in the evaluation of the predictions.
-                   Three parameters are mandatory ('n_samples' the number of
-                   random samples, 'len_sample' the length of one sample in
-                   number of years, and 'summary' the statistics to return to
-                   characterise the sampling distribution), and one parameter
-                   is optional ('seed'). If not provided, no bootstrapping is
-                   performed. If provided, *dts* must also be provided.
-
-                dts: `List[str]`, optional
-                   Datetimes. The corresponding date and time for the temporal
-                   dimension of the streamflow observations and predictions.
-                   The date and time must be specified in a string following
-                   the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
-                   (e.g. the 21st of May 2007 at 4 in the afternoon is
-                   "2007-05-21 16:00:00"). If provided, it is only used if
-                   *bootstrap* is also provided.
-                   shape: (time,)
-
-            :Returns:
-
-                `List[numpy.ndarray]`
-                   The sequence of evaluation metrics computed
-                   in the same order as given in *metrics*.
-                   shape: [(sites, lead times, subsets, samples, {quantiles,} {thresholds,} {components}), ...]
-        )pbdoc",
+        "evalp",
+        &evalhyd::evalp<xt::pytensor<double, 2>, xt::pytensor<double, 4>, xt::pytensor<bool, 4>>,
+        "Function to evaluate probabilistic streamflow predictions",
        py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
        py::arg("q_thr") = xt::pytensor<double, 2>({0}),
        py::arg("t_msk") = xt::pytensor<bool, 4>({0}),