diff --git a/include/evalhyd/detail/probabilist/brier.hpp b/include/evalhyd/detail/probabilist/brier.hpp index e39e523acf1b720e3e4e05557bb980c5b9cd5160..13226deede2f7cfc46c34a8510173fa81c218cb3 100644 --- a/include/evalhyd/detail/probabilist/brier.hpp +++ b/include/evalhyd/detail/probabilist/brier.hpp @@ -9,7 +9,10 @@ #include <xtensor/xtensor.hpp> #include <xtensor/xview.hpp> +#include <xtensor/xindex_view.hpp> #include <xtensor/xmasked_view.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xmath.hpp> // NOTE ------------------------------------------------------------------------ @@ -38,10 +41,10 @@ namespace evalhyd /// \return /// Event observed outcome. /// shape: (sites, thresholds, time) - template<class XD2> + template<class XD2a, class XD2b> inline xt::xtensor<double, 3> calc_o_k( - const XD2& q_obs, - const XD2& q_thr, + const XD2a& q_obs, + const XD2b& q_thr, bool is_high_flow_event ) { @@ -890,6 +893,153 @@ namespace evalhyd return BSS; } + + /// Compute the continuous rank probability score based on the + /// integration over the Brier scores (CRPS_FROM_BS). + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_prd + /// Streamflow predictions. + /// shape: (sites, lead times, members, time) + /// \param is_high_flow_event + /// Whether events correspond to being above the threshold(s). + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// CRPS for each subset and for each threshold. + /// shape: (sites, lead times, subsets, samples) + template <class XD2, class XD4> + inline xt::xtensor<double, 4> calc_CRPS_FROM_BS( + const XD2& q_obs, + const XD4& q_prd, + bool is_high_flow_event, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_msk, + std::size_t n_exp + ) + { + // declare and initialise output variable + xt::xtensor<double, 4> CRPS_FROM_BS = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto q_obs_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()), + NAN + ); + auto q_prd_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, + xt::newaxis(), xt::all()), + q_prd, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + for (std::size_t l = 0; l < n_ldt; l++) + { + // compute empirical thresholds from 99 observed quantiles + xt::xtensor<double, 2> thr = + xt::zeros<double>({n_sit, std::size_t {99}}); + + // /!\ need to compute quantiles one site at a time + // because there is no `xt::nanquantile`, so + // need to filter NaN before computing quantiles + for (std::size_t s = 0; s < n_sit; s++) + { + auto obs = xt::view(q_obs_masked, s, l, b_exp[e]); + + auto obs_filtered = xt::filter( + obs, !xt::isnan(obs) + ); + + if (obs_filtered.size() > 0) + { + xt::view(thr, s, xt::all()) = xt::quantile( + obs_filtered, + xt::arange<double>(0.01, 1.0, 0.01) + ); + } + else + { + xt::view(thr, s, xt::all()) = NAN; + } + } + + // compute observed and predicted event outcomes + auto o_k = elements::calc_o_k( + xt::view(q_obs_masked, xt::all(), l, + xt::all()), + thr, is_high_flow_event + ); + + auto y_k = elements::calc_y_k( + elements::calc_sum_f_k( + xt::view(q_prd_masked, xt::all(), l, + xt::newaxis(), xt::all(), + xt::all()), + thr, is_high_flow_event + ), + n_mbr + ); + + // compute 99 Brier scores + auto bs = intermediate::calc_bs(o_k, y_k); + + auto bs_masked = xt::where( + xt::view(t_msk, xt::all(), l, xt::newaxis(), + m, xt::newaxis(), xt::all()), + bs, + NAN + ); + + auto bs_masked_sampled = xt::view( + bs_masked, xt::all(), xt::all(), xt::all(), + b_exp[e] + ); + + auto BS = xt::nanmean(bs_masked_sampled, -1); + + // compute CRPS from integration over 99 Brier scores + xt::view(CRPS_FROM_BS, xt::all(), l, m, e) = + // xt::trapz(y, x, axis=1) + xt::trapz( + xt::view(BS, xt::all(), 0, xt::all()), + thr, + 1 + ); + } + } + } + + return CRPS_FROM_BS; + } } } } diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp index 88eb1361203384311048e7534e5b1d369d4a67c9..e1f5b4860c8fe47a431b18c36ff4ace7d91ebe1e 100644 --- a/include/evalhyd/detail/probabilist/evaluator.hpp +++ b/include/evalhyd/detail/probabilist/evaluator.hpp @@ -82,7 +82,7 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 4>, bool> bs; // > Quantiles-based xtl::xoptional<xt::xtensor<double, 4>, bool> qs; - xtl::xoptional<xt::xtensor<double, 3>, bool> crps; + xtl::xoptional<xt::xtensor<double, 3>, bool> crps_from_qs; // > Contingency table-based xtl::xoptional<xt::xtensor<double, 5>, bool> pod; xtl::xoptional<xt::xtensor<double, 5>, bool> pofd; @@ -98,9 +98,10 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 6>, bool> BS_CRD; xtl::xoptional<xt::xtensor<double, 6>, bool> BS_LBD; xtl::xoptional<xt::xtensor<double, 5>, bool> BSS; + xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_BS; // > Quantiles-based xtl::xoptional<xt::xtensor<double, 5>, bool> QS; - xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS; + xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_QS; // > Contingency table-based xtl::xoptional<xt::xtensor<double, 6>, bool> POD; xtl::xoptional<xt::xtensor<double, 6>, bool> POFD; @@ -392,15 +393,15 @@ namespace evalhyd return qs.value(); }; - xt::xtensor<double, 3> get_crps() + xt::xtensor<double, 3> get_crps_from_qs() { - if (!crps.has_value()) + if (!crps_from_qs.has_value()) { - crps = intermediate::calc_crps( + crps_from_qs = intermediate::calc_crps_from_qs( get_qs(), n_mbr ); } - return crps.value(); + return crps_from_qs.value(); }; xt::xtensor<double, 5> get_pod() @@ -586,6 +587,18 @@ namespace evalhyd return BSS.value(); }; + xt::xtensor<double, 4> get_CRPS_FROM_BS() + { + if (!CRPS_FROM_BS.has_value()) + { + CRPS_FROM_BS = metrics::calc_CRPS_FROM_BS( + q_obs, q_prd, is_high_flow_event(), t_msk, b_exp, + n_sit, n_ldt, n_mbr, n_msk, n_exp + ); + } + return CRPS_FROM_BS.value(); + }; + xt::xtensor<double, 5> get_QS() { if (!QS.has_value()) @@ -598,16 +611,16 @@ namespace evalhyd return QS.value(); }; - xt::xtensor<double, 4> get_CRPS() + xt::xtensor<double, 4> get_CRPS_FROM_QS() { - if (!CRPS.has_value()) + if (!CRPS_FROM_QS.has_value()) { - CRPS = metrics::calc_CRPS( - get_crps(), t_msk, b_exp, + CRPS_FROM_QS = metrics::calc_CRPS_FROM_QS( + get_crps_from_qs(), t_msk, b_exp, n_sit, n_ldt, n_msk, n_exp ); } - return CRPS.value(); + return CRPS_FROM_QS.value(); }; xt::xtensor<double, 6> get_POD() diff --git a/include/evalhyd/detail/probabilist/quantiles.hpp b/include/evalhyd/detail/probabilist/quantiles.hpp index 5a2fe60602e930550af19fbfd6e5091701e49ee9..a25ad61d234b717846001fe739a4025977b2b473 100644 --- a/include/evalhyd/detail/probabilist/quantiles.hpp +++ b/include/evalhyd/detail/probabilist/quantiles.hpp @@ -8,6 +8,7 @@ #include <xtensor/xtensor.hpp> #include <xtensor/xview.hpp> #include <xtensor/xsort.hpp> +#include <xtensor/xmath.hpp> // NOTE ------------------------------------------------------------------------ @@ -100,7 +101,7 @@ namespace evalhyd /// \return /// CRPS for each time step. /// shape: (sites, lead times, time) - inline xt::xtensor<double, 3> calc_crps( + inline xt::xtensor<double, 3> calc_crps_from_qs( const xt::xtensor<double, 4>& qs, std::size_t n_mbr ) @@ -182,8 +183,8 @@ namespace evalhyd return QS; } - /// Compute the continuous rank probability score (CRPS) based - /// on quantile scores. + /// Compute the continuous rank probability score based on the + /// integration over the quantile scores (CRPS_FROM_QS). /// /// \param crps /// CRPS for each time step. @@ -205,7 +206,7 @@ namespace evalhyd /// \return /// CRPS. /// shape: (sites, lead times, subsets, samples) - inline xt::xtensor<double, 4> calc_CRPS( + inline xt::xtensor<double, 4> calc_CRPS_FROM_QS( const xt::xtensor<double, 3>& crps, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, @@ -216,7 +217,7 @@ namespace evalhyd ) { // initialise output variable - xt::xtensor<double, 4> CRPS = + xt::xtensor<double, 4> CRPS_FROM_QS = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); // compute variable one mask at a time to minimise memory imprint @@ -240,12 +241,12 @@ namespace evalhyd // calculate the mean over the time steps // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$ - xt::view(CRPS, xt::all(), xt::all(), m, e) = + xt::view(CRPS_FROM_QS, xt::all(), xt::all(), m, e) = xt::squeeze(xt::nanmean(crps_masked_sampled, -1)); } } - return CRPS; + return CRPS_FROM_QS; } } } diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 5dfe36984077f92cff08fe908e7755721d5eab49..95e6f3d51facf3aa8fa55b1319ba88a709bd0841 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -224,8 +224,8 @@ namespace evalhyd // check that the metrics/diagnostics to be computed are valid utils::check_metrics( metrics, - {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", - "QS", "CRPS", + {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS", + "QS", "CRPS_FROM_QS", "POD", "POFD", "FAR", "CSI", "ROCSS", "RANK_HIST", "DS", "AS", "CR", "AW", "AWN", "AWI", "WS", "WSS"} @@ -444,16 +444,22 @@ namespace evalhyd uncertainty::summarise_p(evaluator.get_REL_DIAG(), summary) ); } + else if ( metric == "CRPS_FROM_BS" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CRPS_FROM_BS(), summary) + ); + } else if ( metric == "QS" ) { r.emplace_back( uncertainty::summarise_p(evaluator.get_QS(), summary) ); } - else if ( metric == "CRPS" ) + else if ( metric == "CRPS_FROM_QS" ) { r.emplace_back( - uncertainty::summarise_p(evaluator.get_CRPS(), summary) + uncertainty::summarise_p(evaluator.get_CRPS_FROM_QS(), summary) ); } else if ( metric == "POD" ) diff --git a/tests/expected/evalp/CRPS_FROM_BS.csv b/tests/expected/evalp/CRPS_FROM_BS.csv new file mode 100644 index 0000000000000000000000000000000000000000..03c315fdb46fe52a75aef395dcb8d9d039ac0152 --- /dev/null +++ b/tests/expected/evalp/CRPS_FROM_BS.csv @@ -0,0 +1 @@ +221.3421266369226 diff --git a/tests/expected/evalp/CRPS.csv b/tests/expected/evalp/CRPS_FROM_QS.csv similarity index 100% rename from tests/expected/evalp/CRPS.csv rename to tests/expected/evalp/CRPS_FROM_QS.csv diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index c7ead8a6817d33d2939a136e864fe7b0a57ff7a1..f3324508f3020083b910354dc69cd617671412f4 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -30,8 +30,8 @@ using namespace xt::placeholders; // required for `_` to work std::vector<std::string> all_metrics_p = { - "BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", - "QS", "CRPS", + "BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS", + "QS", "CRPS_FROM_QS", "POD", "POFD", "FAR", "CSI", "ROCSS", "RANK_HIST", "DS", "AS", "CR", "AW", "AWN", "AWI", "WS", "WSS" @@ -84,7 +84,7 @@ TEST(ProbabilistTests, TestBrier) // compute scores xt::xtensor<double, 2> thresholds = {{690, 534, 445, NAN}}; - std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG"}; + std::vector<std::string> metrics = {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS"}; std::vector<xt::xarray<double>> results = evalhyd::evalp( @@ -127,7 +127,7 @@ TEST(ProbabilistTests, TestQuantiles) auto expected = load_expected_p(); // compute scores - std::vector<std::string> metrics = {"QS", "CRPS"}; + std::vector<std::string> metrics = {"QS", "CRPS_FROM_QS"}; std::vector<xt::xarray<double>> results = evalhyd::evalp(