Commit 78ff67b0 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

implement bootstrapping for metric uncertainty estimation

No related merge requests found
Pipeline #40539 passed with stage
in 2 minutes and 37 seconds
Showing with 375 additions and 135 deletions
+375 -135
Subproject commit 54efcfeac2ccba66aa1ad98b72d92b0765f2d14b Subproject commit 44e1bdf16d38a4cf51de7a003864be63d91a31fb
...@@ -6,12 +6,43 @@ ...@@ -6,12 +6,43 @@
#define MACRO_STRINGIFY(x) STRINGIFY(x) #define MACRO_STRINGIFY(x) STRINGIFY(x)
#define FORCE_IMPORT_ARRAY #define FORCE_IMPORT_ARRAY
#include <xtensor/xview.hpp>
#include <xtensor-python/pytensor.hpp> #include <xtensor-python/pytensor.hpp>
#include "evalhyd/evald.hpp" #include "evalhyd/evald.hpp"
#include "evalhyd/evalp.hpp" #include "evalhyd/evalp.hpp"
namespace py = pybind11; namespace py = pybind11;
using namespace py::literals;
// reshape 1D tensors to 2D tensors
auto evald_1d(
const xt::xtensor<double, 1>& q_obs,
const xt::xtensor<double, 1>& q_prd,
const std::vector<std::string>& metrics,
const std::string& transform = "none",
const double exponent = 1,
double epsilon = -9,
const xt::xtensor<bool, 2>& t_msk = {},
const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
const std::unordered_map<std::string, int>& bootstrap =
{{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}},
const std::vector<std::string>& dts = {}
)
{
return evalhyd::evald(
xt::view(q_obs, xt::newaxis(), xt::all()),
xt::view(q_prd, xt::newaxis(), xt::all()),
metrics,
transform,
exponent,
epsilon,
t_msk,
m_cdt,
bootstrap,
dts
);
}
// Python Module and Docstrings // Python Module and Docstrings
PYBIND11_MODULE(evalhyd, m) PYBIND11_MODULE(evalhyd, m)
...@@ -24,30 +55,30 @@ PYBIND11_MODULE(evalhyd, m) ...@@ -24,30 +55,30 @@ PYBIND11_MODULE(evalhyd, m)
// deterministic evaluation // deterministic evaluation
m.def( m.def(
"evald", evalhyd::evald<1>, "evald", evald_1d,
R"pbdoc( R"pbdoc(
Function to evaluate deterministic streamflow predictions. Function to evaluate deterministic streamflow predictions.
:Parameters: :Parameters:
q_obs: `numpy.ndarray` q_obs: `numpy.ndarray`
1D array of streamflow observations. Time steps with 1D array of streamflow observations. Time steps with
missing observations must be assigned `numpy.nan` missing observations must be assigned `numpy.nan`
values. Those time steps will be ignored both in values. Those time steps will be ignored both in
the observations and in the predictions before the the observations and in the predictions before the
*metrics* are computed. *metrics* are computed.
shape: (time,) shape: (time,)
q_prd: `numpy.ndarray` q_prd: `numpy.ndarray`
1D array of streamflow predictions. Time steps with 1D array of streamflow predictions. Time steps with
missing predictions must be assigned `numpy.nan` missing predictions must be assigned `numpy.nan`
values. Those time steps will be ignored both in values. Those time steps will be ignored both in
the observations and in the predictions before the the observations and in the predictions before the
*metrics* are computed. *metrics* are computed.
shape: (time,) shape: (time,)
metrics: `List[str]` metrics: `List[str]`
The sequence of evaluation metrics to be computed. The sequence of evaluation metrics to be computed.
transform: `str`, optional transform: `str`, optional
The transformation to apply to both streamflow observations The transformation to apply to both streamflow observations
...@@ -95,7 +126,7 @@ PYBIND11_MODULE(evalhyd, m) ...@@ -95,7 +126,7 @@ PYBIND11_MODULE(evalhyd, m)
provided, masks must feature the same number of dimensions as provided, masks must feature the same number of dimensions as
observations and predictions, and it must broadcastable with observations and predictions, and it must broadcastable with
both of them. both of them.
shape: (time,) shape: (subsets, time)
m_cdt: `numpy.ndarray`, optional m_cdt: `numpy.ndarray`, optional
1D array of masking condition(s) to use to generate 1D array of masking condition(s) to use to generate
...@@ -106,46 +137,69 @@ PYBIND11_MODULE(evalhyd, m) ...@@ -106,46 +137,69 @@ PYBIND11_MODULE(evalhyd, m)
If not provided and neither is *t_msk*, no subset is If not provided and neither is *t_msk*, no subset is
performed. If provided, only one condition per time series performed. If provided, only one condition per time series
of observations can be provided. of observations can be provided.
shape: (1,) shape: (subsets,)
bootstrap: `dict`, optional
Parameters for the bootstrapping method used to estimate the
sampling uncertainty in the evaluation of the predictions.
Two parameters are mandatory ('n_samples' the number of
random samples, 'len_sample' the length of one sample in
number of years, and 'summary' the statistics to return to
characterise the sampling distribution), and one parameter
is optional ('seed'). If not provided, no bootstrapping is
performed. If provided, *dts* must also be provided.
dts: `List[str]`, optional
Datetimes. The corresponding date and time for the temporal
dimension of the streamflow observations and predictions.
The date and time must be specified in a string following
the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
(e.g. the 21st of May 2007 at 4 in the afternoon is
"2007-05-21 16:00:00"). If provided, it is only used if
*bootstrap* is also provided.
:Returns: :Returns:
`List[numpy.ndarray]` `List[numpy.ndarray]`
The sequence of evaluation metrics computed The sequence of evaluation metrics computed
in the same order as given in *metrics*. in the same order as given in *metrics*.
shape: [(components,)+] shape: [(1, subsets, samples), ...]
)pbdoc", )pbdoc",
py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"), py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
py::arg("transform") = "none", py::arg("exponent") = 1, py::arg("transform") = "none",
py::arg("exponent") = 1,
py::arg("epsilon") = -9, py::arg("epsilon") = -9,
py::arg("t_msk") = xt::pytensor<bool, 1>({}), py::arg("t_msk") = xt::pytensor<bool, 2>({0}),
py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 1>({}) py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 1>({}),
py::arg("bootstrap") =
py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
py::arg("dts") = py::list()
); );
m.def( m.def(
"evald", evalhyd::evald<2>, "evald", evalhyd::evald,
R"pbdoc( R"pbdoc(
Function to evaluate deterministic streamflow predictions. Function to evaluate deterministic streamflow predictions.
:Parameters: :Parameters:
q_obs: `numpy.ndarray` q_obs: `numpy.ndarray`
2D array of streamflow observations. Time steps with 2D array of streamflow observations. Time steps with
missing observations must be assigned `numpy.nan` missing observations must be assigned `numpy.nan`
values. Those time steps will be ignored both in values. Those time steps will be ignored both in
the observations and in the predictions before the the observations and in the predictions before the
*metrics* are computed. *metrics* are computed.
shape: (1+, time) shape: (1, time)
q_prd: `numpy.ndarray` q_prd: `numpy.ndarray`
2D array of streamflow predictions. Time steps with 2D array of streamflow predictions. Time steps with
missing predictions must be assigned `numpy.nan` missing predictions must be assigned `numpy.nan`
values. Those time steps will be ignored both in values. Those time steps will be ignored both in
the observations and in the predictions before the the observations and in the predictions before the
*metrics* are computed. *metrics* are computed.
shape: (1+, time) shape: (series, time)
metrics: `List[str]` metrics: `List[str]`
The sequence of evaluation metrics to be computed. The sequence of evaluation metrics to be computed.
transform: `str`, optional transform: `str`, optional
The transformation to apply to both streamflow observations The transformation to apply to both streamflow observations
...@@ -193,10 +247,10 @@ PYBIND11_MODULE(evalhyd, m) ...@@ -193,10 +247,10 @@ PYBIND11_MODULE(evalhyd, m)
provided, masks must feature the same number of dimensions as provided, masks must feature the same number of dimensions as
observations and predictions, and it must broadcastable with observations and predictions, and it must broadcastable with
both of them. both of them.
shape: (1+, time) shape: (subsets, time)
m_cdt: `numpy.ndarray`, optional m_cdt: `numpy.ndarray`, optional
2D array of masking condition(s) to use to generate 1D array of masking condition(s) to use to generate
temporal subsets. Each condition consists in a string and temporal subsets. Each condition consists in a string and
can be specified on observed streamflow values/statistics can be specified on observed streamflow values/statistics
(mean, median, quantile), or on time indices. If provided (mean, median, quantile), or on time indices. If provided
...@@ -204,20 +258,43 @@ PYBIND11_MODULE(evalhyd, m) ...@@ -204,20 +258,43 @@ PYBIND11_MODULE(evalhyd, m)
If not provided and neither is *t_msk*, no subset is If not provided and neither is *t_msk*, no subset is
performed. If provided, only one condition per time series performed. If provided, only one condition per time series
of observations can be provided. of observations can be provided.
shape: (1+, 1) shape: (subsets,)
bootstrap: `dict`, optional
Parameters for the bootstrapping method used to estimate the
sampling uncertainty in the evaluation of the predictions.
Two parameters are mandatory ('n_samples' the number of
random samples, 'len_sample' the length of one sample in
number of years, and 'summary' the statistics to return to
characterise the sampling distribution), and one parameter
is optional ('seed'). If not provided, no bootstrapping is
performed. If provided, *dts* must also be provided.
dts: `List[str]`, optional
Datetimes. The corresponding date and time for the temporal
dimension of the streamflow observations and predictions.
The date and time must be specified in a string following
the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
(e.g. the 21st of May 2007 at 4 in the afternoon is
"2007-05-21 16:00:00"). If provided, it is only used if
*bootstrap* is also provided.
:Returns: :Returns:
`List[numpy.ndarray]` `List[numpy.ndarray]`
The sequence of evaluation metrics computed The sequence of evaluation metrics computed
in the same order as given in *metrics*. in the same order as given in *metrics*.
shape: [(1+, components), ...] shape: [(series, subsets, samples), ...]
)pbdoc", )pbdoc",
py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"), py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
py::arg("transform") = "none", py::arg("exponent") = 1, py::arg("transform") = "none",
py::arg("exponent") = 1,
py::arg("epsilon") = -9, py::arg("epsilon") = -9,
py::arg("t_msk") = xt::pytensor<bool, 2>({0}), py::arg("t_msk") = xt::pytensor<bool, 2>({0}),
py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}) py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 1>({}),
py::arg("bootstrap") =
py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
py::arg("dts") = py::list()
); );
// probabilistic evaluation // probabilistic evaluation
...@@ -229,63 +306,85 @@ PYBIND11_MODULE(evalhyd, m) ...@@ -229,63 +306,85 @@ PYBIND11_MODULE(evalhyd, m)
:Parameters: :Parameters:
q_obs: `numpy.ndarray` q_obs: `numpy.ndarray`
2D array of streamflow observations. Time steps with 2D array of streamflow observations. Time steps with
missing observations must be assigned `numpy.nan` missing observations must be assigned `numpy.nan`
values. Those time steps will be ignored both in values. Those time steps will be ignored both in
the observations and in the predictions before the the observations and in the predictions before the
*metrics* are computed. *metrics* are computed.
shape: (sites, time) shape: (sites, time)
q_prd: `numpy.ndarray` q_prd: `numpy.ndarray`
4D array of streamflow predictions. Time steps with 4D array of streamflow predictions. Time steps with
missing predictions must be assigned `numpy.nan` missing predictions must be assigned `numpy.nan`
values. Those time steps will be ignored both in values. Those time steps will be ignored both in
the observations and in the predictions before the the observations and in the predictions before the
*metrics* are computed. *metrics* are computed.
shape: (sites, lead times, members, time) shape: (sites, lead times, members, time)
metrics: `List[str]` metrics: `List[str]`
The sequence of evaluation metrics to be computed. The sequence of evaluation metrics to be computed.
q_thr: `List[float]`, optional q_thr: `List[float]`, optional
The streamflow threshold(s) to consider for the *metrics* The streamflow threshold(s) to consider for the *metrics*
assessing the prediction of exceedance events. If not assessing the prediction of exceedance events. If not
provided, set to default value as an empty `list`. provided, set to default value as an empty `list`.
shape: (thresholds,) shape: (thresholds,)
t_msk: `numpy.ndarray`, optional t_msk: `numpy.ndarray`, optional
4D array of masks to generate temporal subsets of the whole 4D array of masks to generate temporal subsets of the whole
streamflow time series (where True/False is used for the streamflow time series (where True/False is used for the
time steps to include/discard in a given subset). If not time steps to include/discard in a given subset). If not
provided, no subset is performed and only one set of metrics provided, no subset is performed and only one set of metrics
is returned corresponding to the whole time series. If is returned corresponding to the whole time series. If
provided, as many sets of metrics are returned as they are provided, as many sets of metrics are returned as they are
masks provided. masks provided.
shape: (sites, lead times, subsets, time) shape: (sites, lead times, subsets, time)
m_cdt: `numpy.ndarray`, optional m_cdt: `numpy.ndarray`, optional
2D array of conditions to generate temporal subsets. Each 2D array of conditions to generate temporal subsets. Each
condition consists in a string and can be specified on condition consists in a string and can be specified on
observed/predicted streamflow values/statistics (mean, observed/predicted streamflow values/statistics (mean,
median, quantile), or on time indices. If provided in median, quantile), or on time indices. If provided in
combination with t_msk, the latter takes precedence. If not combination with t_msk, the latter takes precedence. If not
provided and neither is t_msk, no subset is performed and provided and neither is t_msk, no subset is performed and
only one set of metrics is returned corresponding to the only one set of metrics is returned corresponding to the
whole time series. If provided, as many sets of metrics are whole time series. If provided, as many sets of metrics are
returned as they are conditions provided. returned as they are conditions provided.
shape: (sites, subsets) shape: (sites, subsets)
bootstrap: `dict`, optional
Parameters for the bootstrapping method used to estimate the
sampling uncertainty in the evaluation of the predictions.
Two parameters are mandatory ('n_samples' the number of
random samples, 'len_sample' the length of one sample in
number of years, and 'summary' the statistics to return to
characterise the sampling distribution), and one parameter
is optional ('seed'). If not provided, no bootstrapping is
performed. If provided, *dts* must also be provided.
dts: `List[str]`, optional
Datetimes. The corresponding date and time for the temporal
dimension of the streamflow observations and predictions.
The date and time must be specified in a string following
the ISO 8601-1:2019 standard, i.e. "YYYY-MM-DD hh:mm:ss"
(e.g. the 21st of May 2007 at 4 in the afternoon is
"2007-05-21 16:00:00"). If provided, it is only used if
*bootstrap* is also provided.
:Returns: :Returns:
`List[numpy.ndarray]` `List[numpy.ndarray]`
The sequence of evaluation metrics computed The sequence of evaluation metrics computed
in the same order as given in *metrics*. in the same order as given in *metrics*.
shape: [(sites, lead times, subsets, {quantiles,} {thresholds,} {components}), ...] shape: [(sites, lead times, subsets, samples, {quantiles,} {thresholds,} {components}), ...]
)pbdoc", )pbdoc",
py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"), py::arg("q_obs"), py::arg("q_prd"), py::arg("metrics"),
py::arg("q_thr") = xt::pytensor<double, 2>({0}), py::arg("q_thr") = xt::pytensor<double, 2>({0}),
py::arg("t_msk") = xt::pytensor<bool, 4>({0}), py::arg("t_msk") = xt::pytensor<bool, 4>({0}),
py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}) py::arg("m_cdt") = xt::pytensor<std::array<char, 32>, 2>({0}),
py::arg("bootstrap") =
py::dict("n_samples"_a=-9, "len_sample"_a=-9, "summary"_a=0),
py::arg("dts") = py::list()
); );
#ifdef VERSION_INFO #ifdef VERSION_INFO
......
1963-10-01 00:00:00,1963-10-02 00:00:00,1963-10-03 00:00:00,1963-10-04 00:00:00,1963-10-05 00:00:00,1963-10-06 00:00:00,1963-10-07 00:00:00,1963-10-08 00:00:00,1963-10-09 00:00:00,1963-10-10 00:00:00,1963-10-11 00:00:00,1963-10-12 00:00:00,1963-10-13 00:00:00,1963-10-14 00:00:00,1963-10-15 00:00:00,1963-10-16 00:00:00,1963-10-17 00:00:00,1963-10-18 00:00:00,1963-10-19 00:00:00,1963-10-20 00:00:00,1963-10-21 00:00:00,1963-10-22 00:00:00,1963-10-23 00:00:00,1963-10-24 00:00:00,1963-10-25 00:00:00,1963-10-26 00:00:00,1963-10-27 00:00:00,1963-10-28 00:00:00,1963-10-29 00:00:00,1963-10-30 00:00:00,1963-10-31 00:00:00,1963-11-01 00:00:00,1963-11-02 00:00:00,1963-11-03 00:00:00,1963-11-04 00:00:00,1963-11-05 00:00:00,1963-11-06 00:00:00,1963-11-07 00:00:00,1963-11-08 00:00:00,1963-11-09 00:00:00,1963-11-10 00:00:00,1963-11-11 00:00:00,1963-11-12 00:00:00,1963-11-13 00:00:00,1963-11-14 00:00:00,1963-11-15 00:00:00,1963-11-16 00:00:00,1963-11-17 00:00:00,1963-11-18 00:00:00,1963-11-19 00:00:00,1963-11-20 00:00:00,1963-11-21 00:00:00,1963-11-22 00:00:00,1963-11-23 00:00:00,1963-11-24 00:00:00,1963-11-25 00:00:00,1963-11-26 00:00:00,1963-11-27 00:00:00,1963-11-28 00:00:00,1963-11-29 00:00:00,1963-11-30 00:00:00,1963-12-01 00:00:00,1963-12-02 00:00:00,1963-12-03 00:00:00,1963-12-04 00:00:00,1963-12-05 00:00:00,1963-12-06 00:00:00,1963-12-07 00:00:00,1963-12-08 00:00:00,1963-12-09 00:00:00,1963-12-10 00:00:00,1963-12-11 00:00:00,1963-12-12 00:00:00,1963-12-13 00:00:00,1963-12-14 00:00:00,1963-12-15 00:00:00,1963-12-16 00:00:00,1963-12-17 00:00:00,1963-12-18 00:00:00,1963-12-19 00:00:00,1963-12-20 00:00:00,1963-12-21 00:00:00,1963-12-22 00:00:00,1963-12-23 00:00:00,1963-12-24 00:00:00,1963-12-25 00:00:00,1963-12-26 00:00:00,1963-12-27 00:00:00,1963-12-28 00:00:00,1963-12-29 00:00:00,1963-12-30 00:00:00,1963-12-31 00:00:00,1964-01-01 00:00:00,1964-01-02 00:00:00,1964-01-03 00:00:00,1964-01-04 00:00:00,1964-01-05 00:00:00,1964-01-06 00:00:00,1964-01-07 00:00:00,1964-01-08 00:00:00,1964-01-09 00:00:00,1964-01-10 00:00:00,1964-01-11 00:00:00,1964-01-12 00:00:00,1964-01-13 00:00:00,1964-01-14 00:00:00,1964-01-15 00:00:00,1964-01-16 00:00:00,1964-01-17 00:00:00,1964-01-18 00:00:00,1964-01-19 00:00:00,1964-01-20 00:00:00,1964-01-21 00:00:00,1964-01-22 00:00:00,1964-01-23 00:00:00,1964-01-24 00:00:00,1964-01-25 00:00:00,1964-01-26 00:00:00,1964-01-27 00:00:00,1964-01-28 00:00:00,1964-01-29 00:00:00,1964-01-30 00:00:00,1964-01-31 00:00:00,1964-02-01 00:00:00,1964-02-02 00:00:00,1964-02-03 00:00:00,1964-02-04 00:00:00,1964-02-05 00:00:00,1964-02-06 00:00:00,1964-02-07 00:00:00,1964-02-08 00:00:00,1964-02-09 00:00:00,1964-02-10 00:00:00,1964-02-11 00:00:00,1964-02-12 00:00:00,1964-02-13 00:00:00,1964-02-14 00:00:00,1964-02-15 00:00:00,1964-02-16 00:00:00,1964-02-17 00:00:00,1964-02-18 00:00:00,1964-02-19 00:00:00,1964-02-20 00:00:00,1964-02-21 00:00:00,1964-02-22 00:00:00,1964-02-23 00:00:00,1964-02-24 00:00:00,1964-02-25 00:00:00,1964-02-26 00:00:00,1964-02-27 00:00:00,1964-02-28 00:00:00,1964-02-29 00:00:00,1964-03-01 00:00:00,1964-03-02 00:00:00,1964-03-03 00:00:00,1964-03-04 00:00:00,1964-03-05 00:00:00,1964-03-06 00:00:00,1964-03-07 00:00:00,1964-03-08 00:00:00,1964-03-09 00:00:00,1964-03-10 00:00:00,1964-03-11 00:00:00,1964-03-12 00:00:00,1964-03-13 00:00:00,1964-03-14 00:00:00,1964-03-15 00:00:00,1964-03-16 00:00:00,1964-03-17 00:00:00,1964-03-18 00:00:00,1964-03-19 00:00:00,1964-03-20 00:00:00,1964-03-21 00:00:00,1964-03-22 00:00:00,1964-03-23 00:00:00,1964-03-24 00:00:00,1964-03-25 00:00:00,1964-03-26 00:00:00,1964-03-27 00:00:00,1964-03-28 00:00:00,1964-03-29 00:00:00,1964-03-30 00:00:00,1964-03-31 00:00:00,1964-04-01 00:00:00,1964-04-02 00:00:00,1964-04-03 00:00:00,1964-04-04 00:00:00,1964-04-05 00:00:00,1964-04-06 00:00:00,1964-04-07 00:00:00,1964-04-08 00:00:00,1964-04-09 00:00:00,1964-04-10 00:00:00,1964-04-11 00:00:00,1964-04-12 00:00:00,1964-04-13 00:00:00,1964-04-14 00:00:00,1964-04-15 00:00:00,1964-04-16 00:00:00,1964-04-17 00:00:00,1964-04-18 00:00:00,1964-04-19 00:00:00,1964-04-20 00:00:00,1964-04-21 00:00:00,1964-04-22 00:00:00,1964-04-23 00:00:00,1964-04-24 00:00:00,1964-04-25 00:00:00,1964-04-26 00:00:00,1964-04-27 00:00:00,1964-04-28 00:00:00,1964-04-29 00:00:00,1964-04-30 00:00:00,1964-05-01 00:00:00,1964-05-02 00:00:00,1964-05-03 00:00:00,1964-05-04 00:00:00,1964-05-05 00:00:00,1964-05-06 00:00:00,1964-05-07 00:00:00,1964-05-08 00:00:00,1964-05-09 00:00:00,1964-05-10 00:00:00,1964-05-11 00:00:00,1964-05-12 00:00:00,1964-05-13 00:00:00,1964-05-14 00:00:00,1964-05-15 00:00:00,1964-05-16 00:00:00,1964-05-17 00:00:00,1964-05-18 00:00:00,1964-05-19 00:00:00,1964-05-20 00:00:00,1964-05-21 00:00:00,1964-05-22 00:00:00,1964-05-23 00:00:00,1964-05-24 00:00:00,1964-05-25 00:00:00,1964-05-26 00:00:00,1964-05-27 00:00:00,1964-05-28 00:00:00,1964-05-29 00:00:00,1964-05-30 00:00:00,1964-05-31 00:00:00,1964-06-01 00:00:00,1964-06-02 00:00:00,1964-06-03 00:00:00,1964-06-04 00:00:00,1964-06-05 00:00:00,1964-06-06 00:00:00,1964-06-07 00:00:00,1964-06-08 00:00:00,1964-06-09 00:00:00,1964-06-10 00:00:00,1964-06-11 00:00:00,1964-06-12 00:00:00,1964-06-13 00:00:00,1964-06-14 00:00:00,1964-06-15 00:00:00,1964-06-16 00:00:00,1964-06-17 00:00:00,1964-06-18 00:00:00,1964-06-19 00:00:00,1964-06-20 00:00:00,1964-06-21 00:00:00,1964-06-22 00:00:00,1964-06-23 00:00:00,1964-06-24 00:00:00,1964-06-25 00:00:00,1964-06-26 00:00:00,1964-06-27 00:00:00,1964-06-28 00:00:00,1964-06-29 00:00:00,1964-06-30 00:00:00,1964-07-01 00:00:00,1964-07-02 00:00:00,1964-07-03 00:00:00,1964-07-04 00:00:00,1964-07-05 00:00:00,1964-07-06 00:00:00,1964-07-07 00:00:00,1964-07-08 00:00:00,1964-07-09 00:00:00,1964-07-10 00:00:00,1964-07-11 00:00:00,1964-07-12 00:00:00,1964-07-13 00:00:00,1964-07-14 00:00:00,1964-07-15 00:00:00,1964-07-16 00:00:00,1964-07-17 00:00:00,1964-07-18 00:00:00,1964-07-19 00:00:00,1964-07-20 00:00:00,1964-07-21 00:00:00,1964-07-22 00:00:00,1964-07-23 00:00:00,1964-07-24 00:00:00,1964-07-25 00:00:00,1964-07-26 00:00:00,1964-07-27 00:00:00,1964-07-28 00:00:00,1964-07-29 00:00:00,1964-07-30 00:00:00,1964-07-31 00:00:00,1964-08-01 00:00:00,1964-08-02 00:00:00,1964-08-03 00:00:00,1964-08-04 00:00:00,1964-08-05 00:00:00,1964-08-06 00:00:00,1964-08-07 00:00:00,1964-08-08 00:00:00,1964-08-09 00:00:00,1964-08-10 00:00:00,1964-08-11 00:00:00,1964-08-12 00:00:00,1964-08-13 00:00:00,1964-08-14 00:00:00,1964-08-15 00:00:00,1964-08-16 00:00:00,1964-08-17 00:00:00,1964-08-18 00:00:00,1964-08-19 00:00:00,1964-08-20 00:00:00,1964-08-21 00:00:00,1964-08-22 00:00:00,1964-08-23 00:00:00,1964-08-24 00:00:00,1964-08-25 00:00:00,1964-08-26 00:00:00,1964-08-27 00:00:00,1964-08-28 00:00:00,1964-08-29 00:00:00,1964-08-30 00:00:00,1964-08-31 00:00:00,1964-09-01 00:00:00,1964-09-02 00:00:00,1964-09-03 00:00:00,1964-09-04 00:00:00,1964-09-05 00:00:00,1964-09-06 00:00:00,1964-09-07 00:00:00,1964-09-08 00:00:00,1964-09-09 00:00:00,1964-09-10 00:00:00,1964-09-11 00:00:00,1964-09-12 00:00:00,1964-09-13 00:00:00,1964-09-14 00:00:00,1964-09-15 00:00:00,1964-09-16 00:00:00,1964-09-17 00:00:00,1964-09-18 00:00:00,1964-09-19 00:00:00,1964-09-20 00:00:00,1964-09-21 00:00:00,1964-09-22 00:00:00,1964-09-23 00:00:00,1964-09-24 00:00:00,1964-09-25 00:00:00,1964-09-26 00:00:00,1964-09-27 00:00:00,1964-09-28 00:00:00,1964-09-29 00:00:00,1964-09-30 00:00:00
43.0,49.0,52.0,61.0,62.0,62.0,62.0,62.0,61.0,53.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,53.0,59.0,66.0,72.0,78.0,83.0,89.0,115.0,116.0,140.0,196.0,169.0,186.0,508.0,284.0,206.0,322.0,405.0,541.0,1570.0,887.0,6250.0,2280.0,973.0,887.0,633.0,551.0,478.0,389.0,303.0,286.0,267.0,227.0,195.0,175.0,183.0,182.0,177.0,172.0,168.0,163.0,158.0,152.0,138.0,132.0,122.0,113.0,109.0,109.0,109.0,109.0,106.0,95.0,94.0,94.0,94.0,99.0,120.0,129.0,150.0,157.0,168.0,170.0,167.0,150.0,135.0,133.0,130.0,119.0,114.0,120.0,124.0,131.0,133.0,133.0,142.0,717.0,605.0,372.0,275.0,207.0,170.0,153.0,138.0,133.0,124.0,123.0,122.0,122.0,123.0,132.0,505.0,740.0,989.0,1910.0,868.0,606.0,445.0,359.0,298.0,278.0,256.0,251.0,251.0,250.0,246.0,225.0,220.0,219.0,227.0,422.0,569.0,404.0,315.0,277.0,239.0,254.0,458.0,593.0,543.0,565.0,479.0,550.0,544.0,414.0,328.0,309.0,276.0,235.0,215.0,178.0,170.0,157.0,155.0,154.0,155.0,163.0,319.0,801.0,716.0,516.0,1800.0,1750.0,1040.0,1050.0,712.0,1240.0,5760.0,2870.0,1230.0,812.0,608.0,494.0,414.0,339.0,881.0,1440.0,969.0,638.0,457.0,346.0,302.0,272.0,251.0,239.0,238.0,238.0,235.0,215.0,194.0,181.0,183.0,540.0,360.0,298.0,261.0,359.0,449.0,308.0,253.0,213.0,193.0,173.0,163.0,154.0,154.0,167.0,186.0,167.0,155.0,152.0,144.0,143.0,141.0,129.0,108.0,105.0,104.0,102.0,100.0,99.0,97.0,96.0,95.0,95.0,94.0,94.0,93.0,91.0,90.0,89.0,88.0,87.0,86.0,85.0,84.0,82.0,81.0,80.0,79.0,78.0,77.0,76.0,74.0,73.0,73.0,72.0,72.0,71.0,71.0,71.0,70.0,70.0,69.0,69.0,68.0,68.0,68.0,67.0,67.0,66.0,66.0,65.0,65.0,65.0,66.0,65.0,65.0,64.0,63.0,62.0,62.0,61.0,60.0,59.0,59.0,58.0,58.0,57.0,57.0,56.0,56.0,55.0,55.0,54.0,54.0,53.0,53.0,52.0,51.0,51.0,50.0,49.0,49.0,48.0,48.0,47.0,46.0,46.0,45.0,44.0,44.0,43.0,43.0,42.0,41.0,41.0,40.0,39.0,39.0,38.0,38.0,37.0,36.0,36.0,37.0,37.0,37.0,37.0,38.0,38.0,41.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,38.0,41.0,42.0,42.0,42.0,42.0,40.0,36.0,35.0,35.0,35.0,35.0,34.0,34.0,34.0,33.0,33.0
This diff is collapsed.
...@@ -13,29 +13,29 @@ class TestMetrics(unittest.TestCase): ...@@ -13,29 +13,29 @@ class TestMetrics(unittest.TestCase):
expected = { expected = {
'RMSE': 'RMSE':
[[777.03427238], [[[777.03427238]],
[776.87847854], [[776.87847854]],
[777.80021654], [[777.80021654]],
[778.15108180], [[778.15108180]],
[778.61486998]], [[778.61486998]]],
'NSE': 'NSE':
[[0.71891219], [[[0.71891219]],
[0.71902490], [[0.71902490]],
[0.71835777], [[0.71835777]],
[0.71810361], [[0.71810361]],
[0.71776748]], [[0.71776748]]],
'KGE': 'KGE':
[[0.74808767], [[[0.74808767]],
[0.74610620], [[0.74610620]],
[0.74411103], [[0.74411103]],
[0.74301085], [[0.74301085]],
[0.74176777]], [[0.74176777]]],
'KGEPRIME': 'KGEPRIME':
[[0.81314075], [[[0.81314075]],
[0.81277485], [[0.81277485]],
[0.81203242], [[0.81203242]],
[0.81178671], [[0.81178671]],
[0.81138658]] [[0.81138658]]]
} }
def test_metrics_2d(self): def test_metrics_2d(self):
...@@ -51,7 +51,7 @@ class TestMetrics(unittest.TestCase): ...@@ -51,7 +51,7 @@ class TestMetrics(unittest.TestCase):
with self.subTest(metric=metric): with self.subTest(metric=metric):
numpy.testing.assert_almost_equal( numpy.testing.assert_almost_equal(
evalhyd.evald(_obs[0], _prd[0], [metric])[0], evalhyd.evald(_obs[0], _prd[0], [metric])[0],
self.expected[metric][0] [self.expected[metric][0]]
) )
...@@ -97,7 +97,7 @@ class TestMasking(unittest.TestCase): ...@@ -97,7 +97,7 @@ class TestMasking(unittest.TestCase):
def test_conditions(self): def test_conditions(self):
with self.subTest(condtions="observed streamflow values"): with self.subTest(condtions="observed streamflow values"):
cdt = numpy.array([["q_obs{<2000,>3000}"]], dtype='|S32') cdt = numpy.array(["q_obs{<2000,>3000}"], dtype='|S32')
msk = (_obs[0] < 2000) | (_obs[0] > 3000) msk = (_obs[0] < 2000) | (_obs[0] > 3000)
...@@ -110,7 +110,7 @@ class TestMasking(unittest.TestCase): ...@@ -110,7 +110,7 @@ class TestMasking(unittest.TestCase):
) )
with self.subTest(condtions="observed streamflow statistics"): with self.subTest(condtions="observed streamflow statistics"):
cdt = numpy.array([["q_obs{>=median}"]], dtype='|S32') cdt = numpy.array(["q_obs{>=median}"], dtype='|S32')
msk = _obs[0] >= numpy.median(_obs) msk = _obs[0] >= numpy.median(_obs)
...@@ -126,27 +126,65 @@ class TestMasking(unittest.TestCase): ...@@ -126,27 +126,65 @@ class TestMasking(unittest.TestCase):
class TestMissingData(unittest.TestCase): class TestMissingData(unittest.TestCase):
def test_nan(self): def test_nan(self):
for metric in ('RMSE', 'NSE', 'KGE', 'KGEPRIME'):
obs = numpy.array(
[[4.7, numpy.nan, 5.5, 2.7, 4.1]]
)
prd = numpy.array(
[[5.3, 4.2, 5.7, 2.3, numpy.nan],
[numpy.nan, 4.2, 4.7, 4.3, 3.3],
[5.3, 5.2, 5.7, numpy.nan, 3.9]]
)
with self.subTest(metric=metric):
res = evalhyd.evald(obs, prd, [metric])[0]
for i in range(prd.shape[0]):
msk = ~numpy.isnan(obs[0]) & ~numpy.isnan(prd[i])
numpy.testing.assert_almost_equal(
# missing data flagged as NaN
res[[i]],
# missing data pairwise deleted from series
evalhyd.evald(
obs[:, msk],
prd[i, msk][numpy.newaxis],
[metric]
)[0]
)
class TestUncertainty(unittest.TestCase):
def test_bootstrap(self):
prd_1yr = numpy.genfromtxt(
"./data/q_prd_1yr.csv", delimiter=',', skip_header=1
)
obs_1yr = numpy.genfromtxt(
"./data/q_obs_1yr.csv", delimiter=',', skip_header=1
)[numpy.newaxis]
dts_1yr = numpy.genfromtxt(
"./data/q_obs_1yr.csv", delimiter=',', dtype=str, skip_footer=1
)
obs_3yrs = numpy.hstack((obs_1yr,) * 3)
prd_3yrs = numpy.hstack((prd_1yr,) * 3)
for metric in ('RMSE', 'NSE', 'KGE', 'KGEPRIME'): for metric in ('RMSE', 'NSE', 'KGE', 'KGEPRIME'):
with self.subTest(metric=metric): with self.subTest(metric=metric):
numpy.testing.assert_almost_equal( numpy.testing.assert_almost_equal(
# missing data flagged as NaN # bootstrap with only one year of data
evalhyd.evald( # (compare last sample only to have matching dimensions)
[[4.7, numpy.nan, 5.5, 2.7, 4.1]],
[[5.3, 4.2, 5.7, 2.3, numpy.nan],
[numpy.nan, 4.2, 4.7, 4.3, 3.3],
[5.3, 5.2, 5.7, numpy.nan, 3.9]],
[metric]
)[0],
# missing data pairwise deleted from series
evalhyd.evald( evalhyd.evald(
[[4.7, 5.5, 2.7], obs_1yr, prd_1yr, [metric],
[5.5, 2.7, 4.1], bootstrap={
[4.7, 5.5, 4.1]], "n_samples": 10, "len_sample": 3, "summary": 0
[[5.3, 5.7, 2.3], },
[4.7, 4.3, 3.3], dts=dts_1yr
[5.3, 5.7, 3.9]], )[0][..., [0]],
[metric] # repeat year of data three times to correspond to a
)[0] # bootstrap sample of length 3
evalhyd.evald(obs_3yrs, prd_3yrs, [metric])[0]
) )
...@@ -166,6 +204,9 @@ if __name__ == '__main__': ...@@ -166,6 +204,9 @@ if __name__ == '__main__':
test_suite.addTests( test_suite.addTests(
test_loader.loadTestsFromTestCase(TestMissingData) test_loader.loadTestsFromTestCase(TestMissingData)
) )
test_suite.addTests(
test_loader.loadTestsFromTestCase(TestUncertainty)
)
runner = unittest.TextTestRunner(verbosity=2) runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(test_suite) result = runner.run(test_suite)
......
...@@ -16,27 +16,27 @@ class TestMetrics(unittest.TestCase): ...@@ -16,27 +16,27 @@ class TestMetrics(unittest.TestCase):
expected_thr = { expected_thr = {
'BS': 'BS':
[[[[0.1081672, 0.073954980, 0.08681672, numpy.nan]]]], [[[[[0.1081672, 0.073954980, 0.08681672, numpy.nan]]]]],
'BSS': 'BSS':
[[[[0.56240422, 0.66612211, 0.56288391, numpy.nan]]]], [[[[[0.56240422, 0.66612211, 0.56288391, numpy.nan]]]]],
'BS_CRD': 'BS_CRD':
[[[[[0.01335634, 0.15237434, 0.24718520], [[[[[[0.01335634, 0.15237434, 0.24718520],
[0.00550861, 0.15305671, 0.22150309], [0.00550861, 0.15305671, 0.22150309],
[0.00753750, 0.11933328, 0.19861250], [0.00753750, 0.11933328, 0.19861250],
[numpy.nan, numpy.nan, numpy.nan]]]]], [numpy.nan, numpy.nan, numpy.nan]]]]]],
'BS_LBD': 'BS_LBD':
[[[[[0.01244569, 0.14933386, 0.24505537], [[[[[[0.01244569, 0.14933386, 0.24505537],
[0.00801337, 0.14745568, 0.21339730], [0.00801337, 0.14745568, 0.21339730],
[0.01719462, 0.10479711, 0.17441921], [0.01719462, 0.10479711, 0.17441921],
[numpy.nan, numpy.nan, numpy.nan]]]]] [numpy.nan, numpy.nan, numpy.nan]]]]]]
} }
expected_qtl = { expected_qtl = {
'QS': 'QS':
[[[[321.1607717, 294.3494105, 265.70418006, [[[[[321.1607717, 294.3494105, 265.70418006,
236.15648446, 206.03965702]]]], 236.15648446, 206.03965702]]]]],
'CRPS': 'CRPS':
[[[176.63504823]]] [[[[176.63504823]]]]
} }
def test_threshold_metrics(self): def test_threshold_metrics(self):
...@@ -143,6 +143,50 @@ class TestMissingData(unittest.TestCase): ...@@ -143,6 +143,50 @@ class TestMissingData(unittest.TestCase):
) )
class TestUncertainty(unittest.TestCase):
def test_bootstrap(self):
thr = numpy.array([[690, 534, 445, numpy.nan]])
prd_1yr = numpy.genfromtxt(
"./data/q_prd_1yr.csv", delimiter=',', skip_header=1
)
obs_1yr = numpy.genfromtxt(
"./data/q_obs_1yr.csv", delimiter=',', skip_header=1
)
dts_1yr = numpy.genfromtxt(
"./data/q_obs_1yr.csv", delimiter=',', dtype=str, skip_footer=1
)
obs_3yrs = numpy.hstack((obs_1yr,) * 3)
prd_3yrs = numpy.hstack((prd_1yr,) * 3)
for metric in ("BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"):
with self.subTest(metric=metric):
numpy.testing.assert_almost_equal(
# bootstrap with only one year of data
# (compare last sample only to have matching dimensions)
evalhyd.evalp(
obs_1yr[numpy.newaxis],
prd_1yr[numpy.newaxis, numpy.newaxis],
[metric],
q_thr=thr,
bootstrap={
"n_samples": 10, "len_sample": 3, "summary": 0
},
dts=dts_1yr
)[0][:, :, :, [0]],
# repeat year of data three times to correspond to a
# bootstrap sample of length 3
evalhyd.evalp(
obs_3yrs[numpy.newaxis],
prd_3yrs[numpy.newaxis, numpy.newaxis],
[metric],
q_thr=thr
)[0]
)
if __name__ == '__main__': if __name__ == '__main__':
test_loader = unittest.TestLoader() test_loader = unittest.TestLoader()
test_suite = unittest.TestSuite() test_suite = unittest.TestSuite()
...@@ -159,6 +203,9 @@ if __name__ == '__main__': ...@@ -159,6 +203,9 @@ if __name__ == '__main__':
test_suite.addTests( test_suite.addTests(
test_loader.loadTestsFromTestCase(TestMissingData) test_loader.loadTestsFromTestCase(TestMissingData)
) )
test_suite.addTests(
test_loader.loadTestsFromTestCase(TestUncertainty)
)
runner = unittest.TextTestRunner(verbosity=2) runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(test_suite) result = runner.run(test_suite)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment