From c8d07b82ab8676eb1d898cfefe58356ba09097dc Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Fri, 24 Mar 2023 12:56:29 +0100 Subject: [PATCH] add another version of CRPS (from empirical CDF) new metric is called CRPS_FROM_ECDF --- include/evalhyd/detail/probabilist/cdf.hpp | 200 ++++++++++++++++++ .../evalhyd/detail/probabilist/evaluator.hpp | 28 +++ .../evalhyd/detail/probabilist/intervals.hpp | 3 +- .../evalhyd/detail/probabilist/quantiles.hpp | 6 +- include/evalhyd/evalp.hpp | 7 + tests/expected/evalp/CRPS_FROM_ECDF.csv | 1 + tests/test_probabilist.cpp | 32 +++ 7 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 include/evalhyd/detail/probabilist/cdf.hpp create mode 100644 tests/expected/evalp/CRPS_FROM_ECDF.csv diff --git a/include/evalhyd/detail/probabilist/cdf.hpp b/include/evalhyd/detail/probabilist/cdf.hpp new file mode 100644 index 0000000..1f5489e --- /dev/null +++ b/include/evalhyd/detail/probabilist/cdf.hpp @@ -0,0 +1,200 @@ +// Copyright (c) 2023, INRAE. +// Distributed under the terms of the GPL-3 Licence. +// The full licence is in the file LICENCE, distributed with this software. + +#ifndef EVALHYD_PROBABILIST_CDF_HPP +#define EVALHYD_PROBABILIST_CDF_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xmath.hpp> + + +namespace evalhyd +{ + namespace probabilist + { + namespace intermediate + { + /// Compute the CRPS for each time step as the distance between the + /// observed and empirical (i.e. constructed from the ensemble + /// predictions) quadratic cumulative density functions (CDFs). + /// + /// \param q_obs + /// Streamflow observations. + /// shape: (sites, time) + /// \param q_qnt + /// Streamflow prediction quantiles. + /// shape: (sites, lead times, quantiles, time) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_mbr + /// Number of ensemble members. + /// \param n_tim + /// Number of time steps. + /// \return + /// CRPS for each time step. + /// shape: (sites, lead times, time) + template <class XD2> + inline xt::xtensor<double, 3> calc_crps_from_ecdf( + const XD2& q_obs, + const xt::xtensor<double, 4>& q_qnt, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_mbr, + std::size_t n_tim + ) + { + // notations below follow Hersbach (2000) + // https://doi.org/10.1175/1520-0434(2000)015<0559:DOTCRP>2.0.CO;2 + + // declare and initialise internal variables + xt::xtensor<double, 4> alpha_i = + xt::zeros<double>({n_mbr + 1, n_sit, n_ldt, n_tim}); + + xt::xtensor<double, 4> beta_i = + xt::zeros<double>({n_mbr + 1, n_sit, n_ldt, n_tim}); + + // case x_a < x_1 + // i.e. observation is an outlier before predictive range + auto x_a = xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()); + auto x_1 = xt::view(q_qnt, xt::all(), xt::all(), 0, xt::all()); + + auto is_before = x_a < x_1; + xt::view(beta_i, 0, xt::all()) = xt::where( + is_before, x_1 - x_a, xt::view(beta_i, 0) + ); + + for (std::size_t m = 0; m < n_mbr - 1; m++) + { + auto x_i = xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()); + auto x_ip1 = xt::view(q_qnt, xt::all(), xt::all(), m + 1, xt::all()); + + // case x_a < x_i + // i.e. observation below given member + auto is_below = x_a < x_i; + xt::view(beta_i, m + 1, xt::all()) = xt::where( + is_below, x_ip1 - x_i, xt::view(beta_i, m + 1) + ); + + // case x_i <= x_a <= x_{i+1} + // i.e. observation between given member and next member + auto is_between = (x_i <= x_a) && (x_a <= x_ip1); + xt::view(alpha_i, m + 1, xt::all()) = xt::where( + is_between, x_a - x_i, xt::view(alpha_i, m + 1) + ); + xt::view(beta_i, m + 1, xt::all()) = xt::where( + is_between, x_ip1 - x_a, xt::view(beta_i, m + 1) + ); + + // case x_a > x_{i+1} + // i.e. observation above next member + auto is_above = x_a > x_ip1; + xt::view(alpha_i, m + 1, xt::all()) = xt::where( + is_above, x_ip1 - x_i, xt::view(alpha_i, m + 1) + ); + } + + // case x_a > x_N + // i.e. observation is an outlier beyond predictive range + auto x_N = xt::view(q_qnt, xt::all(), xt::all(), n_mbr - 1, xt::all()); + + auto is_beyond = x_a > x_N; + xt::view(alpha_i, n_mbr, xt::all()) = xt::where( + is_beyond, x_a - x_N, xt::view(alpha_i, n_mbr) + ); + + // compute crps as difference between the quadratic CDFs + auto p_i_2 = xt::eval( + xt::view( + xt::square(xt::arange<double>(n_mbr + 1) / n_mbr), + xt::all(), xt::newaxis(), xt::newaxis(), + xt::newaxis() + ) + ); + + auto crps_from_ecdf = xt::sum( + (alpha_i * p_i_2) + (beta_i * (1 - p_i_2)), + 0 + ); + + return crps_from_ecdf; + } + } + + namespace metrics + { + /// Compute the continuous rank probability score based on the + /// integration over the quadratic difference between the observed + /// and empirical cumulative density functions (CRPS_FROM_ECDF). + /// + /// \param crps_from_ecdf + /// CRPS for each time step. + /// shape: (sites, lead times, time) + /// \param t_msk + /// Temporal subsets of the whole record. + /// shape: (sites, lead times, subsets, time) + /// \param b_exp + /// Boostrap samples. + /// shape: (samples, time slice) + /// \param n_sit + /// Number of sites. + /// \param n_ldt + /// Number of lead times. + /// \param n_tim + /// Number of time steps. + /// \param n_msk + /// Number of temporal subsets. + /// \param n_exp + /// Number of bootstrap samples. + /// \return + /// CRPS. + /// shape: (sites, lead times, subsets, samples) + inline xt::xtensor<double, 4> calc_CRPS_FROM_ECDF( + const xt::xtensor<double, 3>& crps_from_ecdf, + const xt::xtensor<bool, 4>& t_msk, + const std::vector<xt::xkeep_slice<int>>& b_exp, + std::size_t n_sit, + std::size_t n_ldt, + std::size_t n_msk, + std::size_t n_exp + ) + { + // initialise output variable + xt::xtensor<double, 4> CRPS_FROM_ECDF = + xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp}); + + // compute variable one mask at a time to minimise memory imprint + for (std::size_t m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto crps_masked = xt::where( + xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), + crps_from_ecdf, + NAN + ); + + // compute variable one sample at a time + for (std::size_t e = 0; e < n_exp; e++) + { + // apply the bootstrap sampling + auto crps_masked_sampled = xt::view( + crps_masked, xt::all(), xt::all(), b_exp[e] + ); + + // calculate the mean over the time steps + xt::view(CRPS_FROM_ECDF, xt::all(), xt::all(), m, e) = + xt::nanmean(crps_masked_sampled, -1); + } + } + + return CRPS_FROM_ECDF; + } + } + } +} + +#endif //EVALHYD_PROBABILIST_CDF_HPP \ No newline at end of file diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp index e1f5b48..7e23c8e 100644 --- a/include/evalhyd/detail/probabilist/evaluator.hpp +++ b/include/evalhyd/detail/probabilist/evaluator.hpp @@ -14,6 +14,7 @@ #include "diagnostics.hpp" #include "brier.hpp" +#include "cdf.hpp" #include "quantiles.hpp" #include "contingency.hpp" #include "ranks.hpp" @@ -80,6 +81,8 @@ namespace evalhyd // (i.e. before the reduction along the temporal axis) // > Brier-based xtl::xoptional<xt::xtensor<double, 4>, bool> bs; + // > CDF-based + xtl::xoptional<xt::xtensor<double, 3>, bool> crps_from_ecdf; // > Quantiles-based xtl::xoptional<xt::xtensor<double, 4>, bool> qs; xtl::xoptional<xt::xtensor<double, 3>, bool> crps_from_qs; @@ -99,6 +102,8 @@ namespace evalhyd xtl::xoptional<xt::xtensor<double, 6>, bool> BS_LBD; xtl::xoptional<xt::xtensor<double, 5>, bool> BSS; xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_BS; + // > CDF-based + xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_ECDF; // > Quantiles-based xtl::xoptional<xt::xtensor<double, 5>, bool> QS; xtl::xoptional<xt::xtensor<double, 4>, bool> CRPS_FROM_QS; @@ -382,6 +387,17 @@ namespace evalhyd return bs.value(); }; + xt::xtensor<double, 3> get_crps_from_ecdf() + { + if (!crps_from_ecdf.has_value()) + { + crps_from_ecdf = intermediate::calc_crps_from_ecdf( + q_obs, get_q_qnt(), n_sit, n_ldt, n_mbr, n_tim + ); + } + return crps_from_ecdf.value(); + }; + xt::xtensor<double, 4> get_qs() { if (!qs.has_value()) @@ -599,6 +615,18 @@ namespace evalhyd return CRPS_FROM_BS.value(); }; + xt::xtensor<double, 4> get_CRPS_FROM_ECDF() + { + if (!CRPS_FROM_ECDF.has_value()) + { + CRPS_FROM_ECDF = metrics::calc_CRPS_FROM_ECDF( + get_crps_from_ecdf(), t_msk, b_exp, + n_sit, n_ldt, n_msk, n_exp + ); + } + return CRPS_FROM_ECDF.value(); + }; + xt::xtensor<double, 5> get_QS() { if (!QS.has_value()) diff --git a/include/evalhyd/detail/probabilist/intervals.hpp b/include/evalhyd/detail/probabilist/intervals.hpp index 0a2f4c2..c36a235 100644 --- a/include/evalhyd/detail/probabilist/intervals.hpp +++ b/include/evalhyd/detail/probabilist/intervals.hpp @@ -296,7 +296,8 @@ namespace evalhyd std::size_t n_itv, std::size_t n_msk, std::size_t n_exp - ) { + ) + { // initialise output variable xt::xtensor<double, 5> METRIC = xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_itv}); diff --git a/include/evalhyd/detail/probabilist/quantiles.hpp b/include/evalhyd/detail/probabilist/quantiles.hpp index a25ad61..88bd528 100644 --- a/include/evalhyd/detail/probabilist/quantiles.hpp +++ b/include/evalhyd/detail/probabilist/quantiles.hpp @@ -186,7 +186,7 @@ namespace evalhyd /// Compute the continuous rank probability score based on the /// integration over the quantile scores (CRPS_FROM_QS). /// - /// \param crps + /// \param crps_from_qs /// CRPS for each time step. /// shape: (sites, lead times, time) /// \param t_msk @@ -207,7 +207,7 @@ namespace evalhyd /// CRPS. /// shape: (sites, lead times, subsets, samples) inline xt::xtensor<double, 4> calc_CRPS_FROM_QS( - const xt::xtensor<double, 3>& crps, + const xt::xtensor<double, 3>& crps_from_qs, const xt::xtensor<bool, 4>& t_msk, const std::vector<xt::xkeep_slice<int>>& b_exp, std::size_t n_sit, @@ -227,7 +227,7 @@ namespace evalhyd // (using NaN workaround until reducers work on masked_view) auto crps_masked = xt::where( xt::view(t_msk, xt::all(), xt::all(), m, xt::all()), - crps, + crps_from_qs, NAN ); diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 95e6f3d..e3eeb7b 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -225,6 +225,7 @@ namespace evalhyd utils::check_metrics( metrics, {"BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS", + "CRPS_FROM_ECDF", "QS", "CRPS_FROM_QS", "POD", "POFD", "FAR", "CSI", "ROCSS", "RANK_HIST", "DS", "AS", @@ -450,6 +451,12 @@ namespace evalhyd uncertainty::summarise_p(evaluator.get_CRPS_FROM_BS(), summary) ); } + else if ( metric == "CRPS_FROM_ECDF" ) + { + r.emplace_back( + uncertainty::summarise_p(evaluator.get_CRPS_FROM_ECDF(), summary) + ); + } else if ( metric == "QS" ) { r.emplace_back( diff --git a/tests/expected/evalp/CRPS_FROM_ECDF.csv b/tests/expected/evalp/CRPS_FROM_ECDF.csv new file mode 100644 index 0000000..13832cc --- /dev/null +++ b/tests/expected/evalp/CRPS_FROM_ECDF.csv @@ -0,0 +1 @@ +271.9578705197483 diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index f332450..fd8f24e 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -31,6 +31,7 @@ using namespace xt::placeholders; // required for `_` to work std::vector<std::string> all_metrics_p = { "BS", "BSS", "BS_CRD", "BS_LBD", "REL_DIAG", "CRPS_FROM_BS", + "CRPS_FROM_ECDF", "QS", "CRPS_FROM_QS", "POD", "POFD", "FAR", "CSI", "ROCSS", "RANK_HIST", "DS", "AS", @@ -116,6 +117,37 @@ TEST(ProbabilistTests, TestBrier) } } +TEST(ProbabilistTests, TestCDF) +{ + // read in data + xt::xtensor<double, 1> observed; + xt::xtensor<double, 2> predicted; + std::tie(observed, predicted) = load_data_p(); + + // read in expected results + auto expected = load_expected_p(); + + // compute scores + std::vector<std::string> metrics = {"CRPS_FROM_ECDF"}; + + std::vector<xt::xarray<double>> results = + evalhyd::evalp( + // shape: (sites [1], time [t]) + xt::eval(xt::view(observed, xt::newaxis(), xt::all())), + // shape: (sites [1], lead times [1], members [m], time [t]) + xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())), + metrics + ); + + // check results + for (std::size_t m = 0; m < metrics.size(); m++) + { + EXPECT_TRUE(xt::all(xt::isclose( + results[m], expected[metrics[m]], 1e-05, 1e-08, true + ))) << "Failure for (" << metrics[m] << ")"; + } +} + TEST(ProbabilistTests, TestQuantiles) { // read in data -- GitLab