diff --git a/include/evalhyd/detail/determinist/diagnostics.hpp b/include/evalhyd/detail/determinist/diagnostics.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5e5f96aaf35ecdfdbaae1a6c7e437a4a70c86653 --- /dev/null +++ b/include/evalhyd/detail/determinist/diagnostics.hpp @@ -0,0 +1,61 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_DETERMINIST_DIAGNOSTICS_HPP +#define EVALHYD_DETERMINIST_DIAGNOSTICS_HPP + +namespace evalhyd +{ + namespace determinist + { + namespace elements + { + /// Counts the number of time steps available in given period. + /// + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (series, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_srs + /// Number of prediction series. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Time step counts. + /// shape: (series, subsets, samples) + inline xt::xtensor<double, 3> calc_t_counts( + const xt::xtensor<bool, 3>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_srs, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 3> t_counts = + xt::zeros<double>({n_srs, n_msk, n_exp}); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), b_exp[e]); + + // calculate the mean over the time steps + xt::view(t_counts, xt::all(), xt::all(), e) = + xt::sum(t_msk_sampled, -1); + } + + return t_counts; + } + } + } +} + +#endif //EVALHYD_DETERMINIST_DIAGNOSTICS_HPP diff --git a/include/evalhyd/detail/determinist/evaluator.hpp b/include/evalhyd/detail/determinist/evaluator.hpp index 0aeca94ea846977a1e1525a6c69a980ca038198f..df2be573e2927a141647d36ca4f37e5fc0b152a0 100644 --- a/include/evalhyd/detail/determinist/evaluator.hpp +++ b/include/evalhyd/detail/determinist/evaluator.hpp @@ -11,6 +11,7 @@ #include <xtensor/xexpression.hpp> #include <xtensor/xtensor.hpp> +#include "diagnostics.hpp" #include "errors.hpp" #include "efficiencies.hpp" @@ -37,6 +38,7 @@ namespace evalhyd std::size_t n_exp; // members for computational elements + xtl::xoptional<xt::xtensor<double, 3>, bool> t_counts; xtl::xoptional<xt::xtensor<double, 4>, bool> mean_obs; xtl::xoptional<xt::xtensor<double, 4>, bool> mean_prd; xtl::xoptional<xt::xtensor<double, 2>, bool> err; @@ -67,6 +69,17 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 4>, bool> KGENP_D; // methods to compute elements + xt::xtensor<double, 3> get_t_counts() + { + if (!t_counts.has_value()) + { + t_counts = elements::calc_t_counts( + t_msk, b_exp, n_srs, n_msk, n_exp + ); + } + return t_counts.value(); + }; + xt::xtensor<double, 4> get_mean_obs() { if (!mean_obs.has_value()) @@ -403,6 +416,12 @@ namespace evalhyd } return KGENP_D.value(); }; + + // methods to compute diagnostics + xt::xtensor<double, 3> get_completeness() + { + return get_t_counts(); + }; }; } } diff --git a/include/evalhyd/detail/probabilist/brier.hpp b/include/evalhyd/detail/probabilist/brier.hpp index 0638080b260ca1f34b843c58606b8bea0007c2ab..013395e24d8fb9a42f795e1a8c2a0a63e2447f04 100644 --- a/include/evalhyd/detail/probabilist/brier.hpp +++ b/include/evalhyd/detail/probabilist/brier.hpp @@ -466,12 +466,9 @@ namespace evalhyd /// \param REL_DIAG /// Axes of the reliability diagram and the sampling histogram. /// shape: (sites, lead times, thresholds, time) - /// \param t_msk - /// Temporal subsets of the whole record. - /// shape: (sites, lead times, subsets, time) - /// \param b_exp - /// Boostrap samples. - /// shape: (samples, time slice) + /// \param t_counts + /// Time step counts for the period. + /// shape: (sites, lead times, subsets, samples) /// \param n_sit /// Number of sites. /// \param n_ldt @@ -491,8 +488,7 @@ namespace evalhyd const XD2& q_thr, const xt::xtensor<double, 5>& bar_o, const xt::xtensor<double, 7>& REL_DIAG, - const xt::xtensor<bool, 4>& t_msk, - const std::vector<xt::xkeep_slice<int>>& b_exp, + const xt::xtensor<double, 4>& t_counts, std::size_t n_sit, std::size_t n_ldt, std::size_t n_thr, @@ -511,13 +507,8 @@ namespace evalhyd // compute variable one sample at a time for (std::size_t e = 0; e < n_exp; e++) { - // apply the bootstrap sampling - auto t_msk_sampled = - xt::view(t_msk, xt::all(), xt::all(), m, - xt::newaxis(), b_exp[e]); - - // calculate length of subset - auto l = xt::sum(t_msk_sampled, -1); + // retrieve length of period + auto l = xt::view(t_counts, xt::all(), xt::all(), m, e); // retrieve range of forecast values $y_i$ auto y_i = xt::eval( @@ -598,6 +589,9 @@ namespace evalhyd /// \param b_exp /// Boostrap samples. /// shape: (samples, time slice) + /// \param t_counts + /// Time step counts for the period. + /// shape: (sites, lead times, subsets, samples) /// \param n_sit /// Number of sites. /// \param n_ldt @@ -619,6 +613,7 @@ namespace evalhyd const xt::xtensor<double, 4>& y_k, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, + const xt::xtensor<double, 4>& t_counts, std::size_t n_sit, std::size_t n_ldt, std::size_t n_thr, @@ -673,12 +668,9 @@ namespace evalhyd auto y_k_masked_sampled = xt::view(y_k_masked, xt::all(), xt::all(), xt::all(), b_exp[e]); - auto t_msk_sampled = - xt::view(t_msk, xt::all(), xt::all(), m, - xt::newaxis(), b_exp[e]); - // calculate length of subset - auto l = xt::sum(t_msk_sampled, -1); + // retrieve length of period + auto l = xt::view(t_counts, xt::all(), xt::all(), m, e); // compute mask to subsample time steps belonging to same observation bin // (where bins are defined as the range of forecast values) diff --git a/include/evalhyd/detail/probabilist/diagnostics.hpp b/include/evalhyd/detail/probabilist/diagnostics.hpp new file mode 100644 index 0000000000000000000000000000000000000000..84ee988fb1f5e577dd1f73fa6c76b2beb04b32db --- /dev/null +++ b/include/evalhyd/detail/probabilist/diagnostics.hpp @@ -0,0 +1,64 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_DIAGNOSTICS_HPP +#define EVALHYD_PROBABILIST_DIAGNOSTICS_HPP + +namespace evalhyd +{ + namespace probabilist + { + namespace elements + { + /// Counts the number of time steps available in given period. + /// + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// Time step counts. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_t_counts( + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> t_counts = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto t_msk_sampled = + xt::view(t_msk, xt::all(), xt::all(), xt::all(), b_exp[e]); + + // calculate the mean over the time steps + xt::view(t_counts, xt::all(), xt::all(), xt::all(), e) = + xt::sum(t_msk_sampled, -1); + } + + return t_counts; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_DIAGNOSTICS_HPP diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp index 128ca2802a4b019417193543608511eccfd67fc1..c3dc9669eb758d20e7a7f5cce341f85cdcab4e02 100644 --- a/include/evalhyd/detail/probabilist/evaluator.hpp +++ b/include/evalhyd/detail/probabilist/evaluator.hpp @@ -12,6 +12,7 @@ #include <xtensor/xtensor.hpp> #include <xtensor/xview.hpp> +#include "diagnostics.hpp" #include "brier.hpp" #include "quantiles.hpp" #include "contingency.hpp" @@ -51,6 +52,8 @@ namespace evalhyd std::size_t n_exp; // members for computational elements + // > Diagnostics + xtl::xoptional<xt::xtensor<double, 4>, bool> t_counts; // > Brier-based xtl::xoptional<xt::xtensor<double, 3>, bool> o_k; xtl::xoptional<xt::xtensor<double, 5>, bool> bar_o; @@ -174,6 +177,17 @@ namespace evalhyd } // methods to compute elements + xt::xtensor<double, 4> get_t_counts() + { + if (!t_counts.has_value()) + { + t_counts = elements::calc_t_counts( + t_msk, b_exp, n_sit, n_ldt, n_msk, n_exp + ); + } + return t_counts.value(); + }; + xt::xtensor<double, 3> get_o_k() { if (!o_k.has_value()) @@ -540,7 +554,7 @@ namespace evalhyd { BS_CRD = metrics::calc_BS_CRD( get_q_thr(), get_bar_o(), get_REL_DIAG(), - t_msk, b_exp, + get_t_counts(), n_sit, n_ldt, n_thr, n_msk, n_exp ); } @@ -553,7 +567,8 @@ namespace evalhyd { BS_LBD = metrics::calc_BS_LBD( get_q_thr(), get_o_k(), get_y_k(), - t_msk, b_exp, n_sit, n_ldt, n_thr, n_msk, n_exp + t_msk, b_exp, get_t_counts(), + n_sit, n_ldt, n_thr, n_msk, n_exp ); } return BS_LBD.value(); @@ -760,6 +775,12 @@ namespace evalhyd } return WSS.value(); }; + + // methods to compute diagnostics + xt::xtensor<double, 4> get_completeness() + { + return get_t_counts(); + }; }; } } diff --git a/include/evalhyd/detail/utils.hpp b/include/evalhyd/detail/utils.hpp index 94eae5387361ff4cbbd8513e3c4698048667f6f7..abab9b40f333f4854c92fc3198cc22b39fe3e5ea 100644 --- a/include/evalhyd/detail/utils.hpp +++ b/include/evalhyd/detail/utils.hpp @@ -43,6 +43,30 @@ namespace evalhyd } } + // Procedure to check that all elements in the list of diagnostics are + // valid diagnostics. + // + // \param requested_diags + // Vector of strings for the diagnostic(s) to be computed. + // \param valid_diags + // Vector of strings for the diagnostic(s) to can be computed. + inline void check_diags ( + const std::vector<std::string>& requested_diags, + const std::vector<std::string>& valid_diags + ) + { + for (const auto& diag : requested_diags) + { + if (std::find(valid_diags.begin(), valid_diags.end(), diag) + == valid_diags.end()) + { + throw std::runtime_error( + "invalid evaluation diagnostic: " + diag + ); + } + } + } + // Procedure to check that all elements for a bootstrap experiment // are provided and valid. // diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index 76d2a8fcda8d8ca12e2f705fb6e9696cb7268123..759b825f341f32d25e4cefd2296ea08371b93134 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -123,12 +123,16 @@ namespace evalhyd /// A value for the seed used by random generators. This parameter /// guarantees the reproducibility of the metric values between calls. /// + /// diagnostics: ``std::vector<std::string>``, optional + /// The sequence of evaluation diagnostics to be computed. + /// /// :Returns: /// /// ``std::vector<xt::xarray<double>>`` - /// The sequence of evaluation metrics computed - /// in the same order as given in *metrics*. - /// shape: (metrics,)<(series, subsets, samples, {components})> + /// The sequence of evaluation metrics computed in the same order + /// as given in *metrics*, followed by the sequence of evaluation + /// diagnostics computed in the same order as given in *diagnostics*. + /// shape: (metrics+diagnostics,)<(series, subsets, samples, {components})> /// /// :Examples: /// @@ -181,7 +185,9 @@ namespace evalhyd xtl::missing<const std::unordered_map<std::string, int>>(), const std::vector<std::string>& dts = {}, xtl::xoptional<const int, bool> seed = - xtl::missing<const int>() + xtl::missing<const int>(), + xtl::xoptional<const std::vector<std::string>, bool> diagnostics = + xtl::missing<const std::vector<std::string>>() ) { // check ranks of tensors @@ -205,7 +211,7 @@ namespace evalhyd const XB3& t_msk_ = t_msk.derived_cast(); const XS2& m_cdt_ = m_cdt.derived_cast(); - // check that the metrics to be computed are valid + // check that the metrics/diagnostics to be computed are valid utils::check_metrics( metrics, {"MAE", "MARE", "MSE", "RMSE", @@ -213,6 +219,14 @@ namespace evalhyd "KGENP", "KGENP_D"} ); + if ( diagnostics.has_value() ) + { + utils::check_diags( + diagnostics.value(), + {"completeness"} + ); + } + // check that optional parameters are valid if (bootstrap.has_value()) { @@ -504,6 +518,19 @@ namespace evalhyd } } + if ( diagnostics.has_value() ) + { + for ( const auto& diagnostic : diagnostics.value() ) + { + if ( diagnostic == "completeness" ) + { + r.emplace_back( + evaluator.get_completeness() + ); + } + } + } + return r; }; } diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 7c50c1a38b2a4c5d18be45c6918b965953be4aab..deea6b41e3f789f8e18bb0e04b704c0249101d6f 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -128,12 +128,16 @@ namespace evalhyd /// A value for the seed used by random generators. This parameter /// guarantees the reproducibility of the metric values between calls. /// + /// diagnostics: ``std::vector<std::string>``, optional + /// The sequence of evaluation diagnostics to be computed. + /// /// :Returns: /// /// ``std::vector<xt::xarray<double>>`` /// The sequence of evaluation metrics computed in the same order - /// as given in *metrics*. - /// shape: (metrics,)<(sites, lead times, subsets, samples, + /// as given in *metrics*, followed by the sequence of evaluation + /// diagnostics computed in the same order as given in *diagnostics*. + /// shape: (metrics+diagnostics,)<(sites, lead times, subsets, samples, /// {quantiles,} {thresholds,} {components,} {ranks,} {intervals})> /// /// :Examples: @@ -178,7 +182,9 @@ namespace evalhyd xtl::missing<const std::unordered_map<std::string, int>>(), const std::vector<std::string>& dts = {}, xtl::xoptional<const int, bool> seed = - xtl::missing<const int>() + xtl::missing<const int>(), + xtl::xoptional<const std::vector<std::string>, bool> diagnostics = + xtl::missing<const std::vector<std::string>>() ) { // check ranks of tensors @@ -212,7 +218,7 @@ namespace evalhyd // adapt vector to tensor const xt::xtensor<double, 1> c_lvl_ = xt::adapt(c_lvl); - // check that the metrics to be computed are valid + // check that the metrics/diagnostics to be computed are valid utils::check_metrics( metrics, {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", @@ -222,6 +228,14 @@ namespace evalhyd "CR", "AW", "AWN", "AWI", "WS", "WSS"} ); + if ( diagnostics.has_value() ) + { + utils::check_diags( + diagnostics.value(), + {"completeness"} + ); + } + // check optional parameters if (bootstrap.has_value()) { @@ -525,6 +539,19 @@ namespace evalhyd } } + if ( diagnostics.has_value() ) + { + for ( const auto& diagnostic : diagnostics.value() ) + { + if ( diagnostic == "completeness" ) + { + r.emplace_back( + evaluator.get_completeness() + ); + } + } + } + return r; } } diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 2246bc4628213abef7b585a8659391ba80c6f620..8bcd4dfe7a86898eef652a63c1e2b8f8567a6b3c 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -537,3 +537,57 @@ TEST(DeterministTests, TestBootstrapSummary) } } } + +TEST(DeterministTests, TestCompleteness) +{ + std::vector<std::string> diags = {"completeness"}; + + // compute metrics on series with NaN + xt::xtensor<double, 2> prd = { + { 5.3, NAN, 5.7, 2.3, 3.3, 4.1 }, + { 4.3, 4.2, 4.7, 4.3, 3.3, 2.8 }, + { 5.3, NAN, 5.7, 2.3, 3.8, NAN } + }; + + xt::xtensor<double, 2> obs = + {{ 4.7, 4.3, NAN, 2.7, 4.1, 5.0 }}; + + xt::xtensor<bool, 3> msk = { + {{ true, true, true, false, true, true }, + { true, true, true, true, true, true }}, + {{ true, true, true, true, true, false }, + { true, true, true, true, true, true }}, + {{ true, true, true, false, false, true }, + { true, true, true, true, true, true }} + }; + + std::vector<xt::xarray<double>> results = + evalhyd::evald( + obs, + prd, + std::vector<std::string> {}, // metrics + xtl::missing<const std::string>(), // transform + xtl::missing<double>(), // exponent + xtl::missing<double>(), // epsilon + msk, // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + xtl::missing<const std::unordered_map<std::string, int>>(), // bootstrap + {}, // dts + xtl::missing<const int>(), // seed + diags + ); + + // check that numerical results are identical + xt::xtensor<double, 3> expected = { + {{ 3. }, + { 4. }}, + {{ 4. }, + { 5. }}, + {{ 1. }, + { 3. }} + }; + + EXPECT_TRUE( + xt::all(xt::isclose(results[0], expected, 1e-05, 1e-08, true)) + ); +} diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index 5bd3f49863b79ef3e1b2ad6793d52e25059844cb..c7ead8a6817d33d2939a136e864fe7b0a57ff7a1 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -817,3 +817,63 @@ TEST(ProbabilistTests, TestBootstrapSummary) } } } + +TEST(ProbabilistTests, TestCompleteness) +{ + std::vector<std::string> diags = {"completeness"}; + + // compute metrics on series with NaN + xt::xtensor<double, 4> prd = {{ + // leadtime 1 + {{ 5.3, NAN, 5.7, 2.3, 3.3, NAN }, + { 4.3, NAN, 4.7, 4.3, 3.4, NAN }, + { 5.3, NAN, 5.7, 2.3, 3.8, NAN }}, + // leadtime 2 + {{ NAN, 4.2, 5.7, 2.3, 3.1, 4.1 }, + { NAN, 4.2, 4.7, 4.3, 3.3, 2.8 }, + { NAN, 5.2, 5.7, 2.3, 3.9, 3.5 }} + }}; + + xt::xtensor<double, 2> obs = + {{ 4.7, 4.3, NAN, 2.7, 4.1, 5.0 }}; + + xt::xtensor<bool, 4> msk = {{ + // leadtime 1 + {{ true, true, true, false, true, true }, + { true, true, true, true, true, true }}, + // leadtime 2 + {{ true, true, true, true, true, false }, + { true, true, true, true, true, true }}, + }}; + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + obs, + prd, + std::vector<std::string> {}, // metrics + xt::xtensor<double, 2>({}), // thresholds + xtl::missing<const std::string>(), // events + {}, + msk, // t_msk + xt::xtensor<std::array<char, 32>, 2>({}), // m_cdt + xtl::missing<const std::unordered_map<std::string, int>>(), // bootstrap + {}, // dts + xtl::missing<const int>(), // seed + diags + ); + + // check that numerical results are identical + xt::xtensor<double, 4> expected = {{ + // leadtime 1 + {{ 2. }, + { 3. }}, + // leadtime 2 + {{ 3. }, + { 4. }}, + }}; + + EXPECT_TRUE( + xt::all(xt::isclose(results[0], expected, 1e-05, 1e-08, true)) + ); +} +