add ranks-based probabilistic metrics (REL_DIAG, DS, AS)

b90350d2 · Thibault Hallouin · 3c9406ae · b90350d2 · b90350d2 · b90350d2
Commit b90350d2 authored 2 years ago by Thibault Hallouin
Hide whitespace changes
Inline Side-by-side

Showing

with 528 additions and 1 deletion
+528 -1
--- a/include/evalhyd/detail/probabilist/evaluator.hpp
+++ b/include/evalhyd/detail/probabilist/evaluator.hpp
@@ -15,6 +15,7 @@
 #include "brier.hpp"
 #include "quantiles.hpp"
 #include "contingency.hpp"
+#include "ranks.hpp"


 namespace evalhyd
@@ -57,6 +58,9 @@ namespace evalhyd
            xtl::xoptional<xt::xtensor<double, 5>, bool> ct_b;
            xtl::xoptional<xt::xtensor<double, 5>, bool> ct_c;
            xtl::xoptional<xt::xtensor<double, 5>, bool> ct_d;
+            // > Ranks-based
+            xtl::xoptional<xt::xtensor<double, 3>, bool> r_k;
+            xtl::xoptional<xt::xtensor<double, 5>, bool> o_j;

            // members for intermediate evaluation metrics
            // (i.e. before the reduction along the temporal axis)
@@ -86,6 +90,10 @@ namespace evalhyd
            xtl::xoptional<xt::xtensor<double, 6>, bool> FAR;
            xtl::xoptional<xt::xtensor<double, 6>, bool> CSI;
            xtl::xoptional<xt::xtensor<double, 5>, bool> ROCSS;
+            // > Ranks-based
+            xtl::xoptional<xt::xtensor<double, 6>, bool> REL_DIAG;
+            xtl::xoptional<xt::xtensor<double, 4>, bool> DS;
+            xtl::xoptional<xt::xtensor<double, 4>, bool> AS;

            // methods to get optional parameters
            auto get_q_thr()
@@ -242,6 +250,29 @@ namespace evalhyd
                return ct_d.value();
            };

+            xt::xtensor<double, 3> get_r_k()
+            {
+                if (!r_k.has_value())
+                {
+                    r_k = elements::calc_r_k(
+                            q_obs, get_q_qnt(), n_mbr
+                    );
+                }
+                return r_k.value();
+            };
+
+            xt::xtensor<double, 5> get_o_j()
+            {
+                if (!o_j.has_value())
+                {
+                    o_j = elements::calc_o_j(
+                            get_r_k(), t_msk, b_exp,
+                            n_sit, n_ldt, n_mbr, n_msk, n_exp
+                    );
+                }
+                return o_j.value();
+            };
+
            // methods to compute intermediate metrics
            xt::xtensor<double, 4> get_bs()
            {
@@ -511,6 +542,42 @@ namespace evalhyd
                }
                return ROCSS.value();
            };
+
+            xt::xtensor<double, 6> get_REL_DIAG()
+            {
+                if (!REL_DIAG.has_value())
+                {
+                    REL_DIAG = metrics::calc_REL_DIAG(
+                            get_o_j(), t_msk, b_exp,
+                            n_sit, n_ldt, n_mbr, n_msk, n_exp
+                    );
+                }
+                return REL_DIAG.value();
+            };
+
+            xt::xtensor<double, 4> get_DS()
+            {
+                if (!DS.has_value())
+                {
+                    DS = metrics::calc_DS(
+                            get_o_j(), t_msk, b_exp,
+                            n_sit, n_ldt, n_mbr, n_msk, n_exp
+                    );
+                }
+                return DS.value();
+            };
+
+            xt::xtensor<double, 4> get_AS()
+            {
+                if (!AS.has_value())
+                {
+                    AS = metrics::calc_AS(
+                            get_r_k(), t_msk, b_exp,
+                            n_sit, n_ldt, n_mbr, n_msk, n_exp
+                    );
+                }
+                return AS.value();
+            };
        };
    }
 }

--- a/include/evalhyd/detail/probabilist/ranks.hpp
+++ b/include/evalhyd/detail/probabilist/ranks.hpp
+// Copyright (c) 2023, INRAE.
+// Distributed under the terms of the GPL-3 Licence.
+// The full licence is in the file LICENCE, distributed with this software.
+
+#ifndef EVALHYD_PROBABILIST_RANKS_HPP
+#define EVALHYD_PROBABILIST_RANKS_HPP
+
+#include <xtensor/xtensor.hpp>
+#include <xtensor/xview.hpp>
+#include <xtensor/xindex_view.hpp>
+#include <xtensor/xsort.hpp>
+#include <xtensor/xrandom.hpp>
+
+
+namespace evalhyd
+{
+    namespace probabilist
+    {
+        namespace elements
+        {
+            /// Compute the position of the observations amongst the ensemble
+            /// member predictions (i.e. their ranks).
+            ///
+            /// \param q_obs
+            ///     Streamflow observations.
+            ///     shape: (sites, time)
+            /// \param q_qnt
+            ///     Streamflow quantiles.
+            ///     shape: (sites, lead times, quantiles, time)
+            /// \param n_mbr
+            ///     Number of ensemble members.
+            /// \return
+            ///     Ranks of streamflow observations.
+            ///     shape: (sites, lead times, time)
+            template <class XD2, class XD4>
+            inline xt::xtensor<double, 3> calc_r_k(
+                    const XD2& q_obs,
+                    const XD4& q_qnt,
+                    std::size_t n_mbr
+            )
+            {
+                xt::xtensor<double, 3> ranks = xt::zeros<double>(
+                        {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)}
+                );
+                xt::view(ranks, xt::all()) = NAN;
+
+                xt::xtensor<double, 3> min_ranks = xt::zeros<double>(
+                        {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)}
+                );
+                xt::view(min_ranks, xt::all()) = NAN;
+
+                xt::xtensor<double, 3> max_ranks = xt::zeros<double>(
+                        {q_qnt.shape(0), q_qnt.shape(1), q_qnt.shape(3)}
+                );
+                xt::view(max_ranks, xt::all()) = NAN;
+                
+                for (int m = 0; m < n_mbr; m++)
+                {
+                    // strictly below a member and no rank yet
+                    xt::view(ranks, xt::all()) = xt::where(
+                            (xt::view(q_obs, xt::all(), xt::newaxis(), xt::all())
+                             < xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()))
+                            &&
+                            xt::isnan(ranks),
+                            m,
+                            ranks
+                    );
+
+                    // first time tied with a member
+                    xt::view(min_ranks, xt::all()) = xt::where(
+                            xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()),
+                                      xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()))
+                            &&
+                            xt::isnan(min_ranks),
+                            m,
+                            min_ranks
+                    );
+
+                    // again tied with a member
+                    xt::view(max_ranks, xt::all()) = xt::where(
+                            xt::equal(xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()),
+                                      xt::view(q_qnt, xt::all(), xt::all(), m, xt::all()))
+                            &&
+                            !xt::isnan(min_ranks),
+                            m + 1,
+                            max_ranks
+                    );
+                }
+
+                // above last member
+                xt::view(ranks, xt::all()) = xt::where(
+                        xt::view(q_obs, xt::all(), xt::newaxis(), xt::all())
+                        > xt::view(q_qnt, xt::all(), xt::all(), n_mbr - 1, xt::all()),
+                        n_mbr,
+                        ranks
+                );
+
+                // for ties, take random rank between min and max
+                xt::view(ranks, xt::all()) = xt::where(
+                        !xt::isnan(min_ranks),
+                        min_ranks
+                        + xt::round((max_ranks - max_ranks + 1)
+                                    * xt::random::rand<double>(ranks.shape())),
+                        ranks
+                );
+
+                return ranks;
+            }
+
+            /// Compute the number of observations for all possible rank values.
+            ///
+            /// \param r_k
+            ///     Ranks of streamflow observations.
+            ///     shape: (sites, lead times, time)
+            /// \param t_msk
+            ///     Temporal subsets of the whole record.
+            ///     shape: (sites, lead times, subsets, time)
+            /// \param b_exp
+            ///     Boostrap samples.
+            ///     shape: (samples, time slice)
+            /// \param n_sit
+            ///     Number of sites.
+            /// \param n_ldt
+            ///     Number of lead times.
+            /// \param n_mbr
+            ///     Number of ensemble members.
+            /// \param n_msk
+            ///     Number of temporal subsets.
+            /// \param n_exp
+            ///     Number of bootstrap samples.
+            /// \return
+            ///     Tallies of streamflow observations for all possible ranks.
+            ///     shape: (sites, lead times, subsets, samples, ranks)
+            inline xt::xtensor<double, 5> calc_o_j(
+                    const xt::xtensor<double, 3>& r_k,
+                    const xt::xtensor<bool, 4>& t_msk,
+                    const std::vector<xt::xkeep_slice<int>>& b_exp,
+                    std::size_t n_sit,
+                    std::size_t n_ldt,
+                    std::size_t n_mbr,
+                    std::size_t n_msk,
+                    std::size_t n_exp
+            )
+            {
+                // initialise output variable
+                xt::xtensor<double, 5> o_j =
+                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_mbr + 1});
+
+                // compute variable one mask at a time to minimise memory imprint
+                for (std::size_t m = 0; m < n_msk; m++)
+                {
+                    // apply the mask
+                    // (using NaN workaround until reducers work on masked_view)
+                    auto r_k_masked = xt::where(
+                            xt::view(t_msk, xt::all(), xt::all(), m, xt::all()),
+                            r_k,
+                            NAN
+                    );
+
+                    // compute variable one sample at a time
+                    for (std::size_t e = 0; e < n_exp; e++)
+                    {
+                        // apply the bootstrap sampling
+                        auto r_k_masked_sampled =
+                                xt::view(r_k_masked, xt::all(), xt::all(),
+                                         b_exp[e]);
+
+                        for (int j = 0; j < n_mbr + 1; j++)
+                        {
+                            // compute the observed relative frequency
+                            // $o_j = \sum_{k \in M_j} r_k$
+                            xt::view(o_j, xt::all(), xt::all(), m, e, j) =
+                                    xt::nansum(
+                                            xt::equal(r_k_masked_sampled, j),
+                                            -1
+                                    );
+                        }
+                    }
+                }
+
+                return o_j;
+            }
+        }
+
+        namespace metrics
+        {
+            /// Compute the reliability diagram X and Y axes.
+            ///
+            /// \param o_j
+            ///     Tallies of streamflow observations for all possible ranks.
+            ///     shape: (sites, lead times, subsets, samples, ranks)
+            /// \param t_msk
+            ///     Temporal subsets of the whole record.
+            ///     shape: (sites, lead times, subsets, time)
+            /// \param b_exp
+            ///     Boostrap samples.
+            ///     shape: (samples, time slice)
+            /// \param n_sit
+            ///     Number of sites.
+            /// \param n_ldt
+            ///     Number of lead times.
+            /// \param n_mbr
+            ///     Number of ensemble members.
+            /// \param n_msk
+            ///     Number of temporal subsets.
+            /// \param n_exp
+            ///     Number of bootstrap samples.
+            /// \return
+            ///     X and Y axes of the reliability diagram.
+            ///     shape: (sites, lead times, subsets, samples, ranks, axes)
+            inline xt::xtensor<double, 6> calc_REL_DIAG(
+                    const xt::xtensor<double, 5>& o_j,
+                    const xt::xtensor<bool, 4>& t_msk,
+                    const std::vector<xt::xkeep_slice<int>>& b_exp,
+                    std::size_t n_sit,
+                    std::size_t n_ldt,
+                    std::size_t n_mbr,
+                    std::size_t n_msk,
+                    std::size_t n_exp
+            )
+            {
+                // initialise output variable
+                xt::xtensor<double, 6> REL_DIAG =
+                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp,
+                                           n_mbr + 1, std::size_t {2}});
+
+                // compute variable one mask at a time to minimise memory imprint
+                for (std::size_t m = 0; m < n_msk; m++)
+                {
+                    // compute variable one sample at a time
+                    for (std::size_t e = 0; e < n_exp; e++)
+                    {
+                        // apply the bootstrap sampling
+                        auto t_msk_sampled =
+                                xt::view(t_msk, xt::all(), xt::all(),
+                                         m, b_exp[e]);
+
+                        // calculate length of subset
+                        auto l = xt::sum(t_msk_sampled, -1);
+
+                        // compute the observed relative frequency
+                        // $\bar{o_j} = \frac{1}{n} \sum_{k \in M_j} r_k$
+                        xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), 0) =
+                                xt::cumsum(
+                                        xt::view(o_j, xt::all(), xt::all(),
+                                                 m, e, xt::all())
+                                        / l,
+                                        -1
+                                );
+
+                        // compute the forecast probability $y_i$
+                        xt::view(REL_DIAG, xt::all(), xt::all(), m, e, xt::all(), 1) =
+                                xt::arange<double>(double(n_mbr + 1)) / n_mbr;
+                    }
+                }
+
+                return REL_DIAG;
+            }
+
+            /// Compute the Delta score.
+            ///
+            /// \param o_j
+            ///     Tallies of streamflow observations for all possible ranks.
+            ///     shape: (sites, lead times, subsets, samples, ranks)
+            /// \param t_msk
+            ///     Temporal subsets of the whole record.
+            ///     shape: (sites, lead times, subsets, time)
+            /// \param b_exp
+            ///     Boostrap samples.
+            ///     shape: (samples, time slice)
+            /// \param n_sit
+            ///     Number of sites.
+            /// \param n_ldt
+            ///     Number of lead times.
+            /// \param n_mbr
+            ///     Number of ensemble members.
+            /// \param n_msk
+            ///     Number of temporal subsets.
+            /// \param n_exp
+            ///     Number of bootstrap samples.
+            /// \return
+            ///     Delta scores.
+            ///     shape: (sites, lead times, subsets, samples)
+            inline xt::xtensor<double, 4> calc_DS(
+                    const xt::xtensor<double, 5>& o_j,
+                    const xt::xtensor<bool, 4>& t_msk,
+                    const std::vector<xt::xkeep_slice<int>>& b_exp,
+                    std::size_t n_sit,
+                    std::size_t n_ldt,
+                    std::size_t n_mbr,
+                    std::size_t n_msk,
+                    std::size_t n_exp
+            )
+            {
+                // initialise output variable
+                xt::xtensor<double, 4> DS =
+                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
+
+                // compute variable one mask at a time to minimise memory imprint
+                for (std::size_t m = 0; m < n_msk; m++)
+                {
+                    // compute variable one sample at a time
+                    for (std::size_t e = 0; e < n_exp; e++)
+                    {
+                        // apply the bootstrap sampling
+                        auto t_msk_sampled =
+                                xt::view(t_msk, xt::all(), xt::all(),
+                                         m, b_exp[e]);
+
+                        // calculate length of subset
+                        auto l = xt::sum(t_msk_sampled, -1);
+
+                        // compute the Delta score
+                        // $\bar{o_j} = \frac{1}{n} \sum_{k \in M_j} r_k$
+                        xt::view(DS, xt::all(), xt::all(), m, e) =
+                                xt::nansum(
+                                        xt::square(
+                                                xt::view(o_j, xt::all(), xt::all(), m, e, xt::all())
+                                                - (l / (n_mbr + 1))
+                                        ),
+                                        -1
+                                ) / (l * n_mbr / (n_mbr + 1));
+                    }
+                }
+
+                return DS;
+            }
+
+            /// Compute the Alpha score.
+            ///
+            /// \param r_k
+            ///     Ranks of streamflow observations.
+            ///     shape: (sites, lead times, time)
+            /// \param t_msk
+            ///     Temporal subsets of the whole record.
+            ///     shape: (sites, lead times, subsets, time)
+            /// \param b_exp
+            ///     Boostrap samples.
+            ///     shape: (samples, time slice)
+            /// \param n_sit
+            ///     Number of sites.
+            /// \param n_ldt
+            ///     Number of lead times.
+            /// \param n_mbr
+            ///     Number of ensemble members.
+            /// \param n_msk
+            ///     Number of temporal subsets.
+            /// \param n_exp
+            ///     Number of bootstrap samples.
+            /// \return
+            ///     Alpha scores.
+            ///     shape: (sites, lead times, subsets, samples)
+            inline xt::xtensor<double, 4> calc_AS(
+                    const xt::xtensor<double, 3>& r_k,
+                    const xt::xtensor<bool, 4>& t_msk,
+                    const std::vector<xt::xkeep_slice<int>>& b_exp,
+                    std::size_t n_sit,
+                    std::size_t n_ldt,
+                    std::size_t n_mbr,
+                    std::size_t n_msk,
+                    std::size_t n_exp
+            )
+            {
+                // initialise output variable
+                xt::xtensor<double, 4> AS =
+                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp});
+
+                // compute one site and one leadtime at a time because of
+                // potential NaN (of varying numbers across sites/lead times)
+                // in the ranks that are not put at the end with `xt::sort`
+                // (unlike `numpy.sort`) which prevent from an easy conversion
+                // from rank to probability
+                for (std::size_t s = 0; s < n_sit; s++)
+                {
+                    for (std::size_t l = 0; l < n_ldt; l++)
+                    {
+                        // compute variable one mask at a time to minimise memory imprint
+                        for (std::size_t m = 0; m < n_msk; m++)
+                        {
+                            // apply the mask
+                            // (using NaN workaround until reducers work on masked_view)
+                            auto r_k_masked = xt::where(
+                                    xt::view(t_msk, s, l, m, xt::all()),
+                                    xt::view(r_k, s, l, xt::all()),
+                                    NAN
+                            );
+
+                            // compute variable one sample at a time
+                            for (std::size_t e = 0; e < n_exp; e++)
+                            {
+                                // apply the bootstrap sampling
+                                auto r_k_masked_sampled =
+                                        xt::view(r_k_masked, b_exp[e]);
+
+                                // notations below follow Renard et al. (2010)
+                                // https://doi.org/10.1029/2009WR008328
+
+                                // compute observed p values
+                                // $p_{x(i)}$
+                                auto p_x_i = xt::sort(
+                                        xt::eval(
+                                                // filter out NaNs
+                                                xt::filter(
+                                                        r_k_masked_sampled,
+                                                        !xt::isnan(r_k_masked_sampled)
+                                                )
+                                                / n_mbr
+                                        )
+                                );
+
+                                // calculate length of realisations
+                                // $N_x$
+                                auto N_x = p_x_i.size();
+
+                                // compute theoretical p values
+                                // $p_{x(i)}^{(th)}$
+                                auto p_x_i_th =
+                                        xt::arange<double>(double(N_x)) / (N_x - 1);
+
+                                // compute area between the predictive curve and
+                                // the 1:1 line in the Q-Q plot
+                                // $\alpha'_x$
+                                auto alpha_prime_x = xt::nanmean(
+                                        xt::abs(p_x_i - p_x_i_th)
+                                );
+
+                                // compute the alpha score
+                                // $\alpha_x = 1 - 2 \alpha'_x$
+                                xt::view(AS, s, l, m, e) = 1 - 2 * alpha_prime_x;
+                            }
+                        }
+                    }
+                }
+
+                return AS;
+            }
+        }
+    }
+}
+
+#endif //EVALHYD_PROBABILIST_RANKS_HPP
\ No newline at end of file
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -198,7 +198,8 @@ namespace evalhyd
                metrics,
                {"BS", "BSS", "BS_CRD", "BS_LBD",
                 "QS", "CRPS",
-                 "POD", "POFD", "FAR", "CSI", "ROCSS"}
+                 "POD", "POFD", "FAR", "CSI", "ROCSS",
+                 "REL_DIAG", "DS", "AS"}
        );

        // check optional parameters
@@ -436,6 +437,24 @@ namespace evalhyd
                        uncertainty::summarise(evaluator.get_ROCSS(), summary)
                );
            }
+            else if ( metric == "REL_DIAG" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_REL_DIAG(), summary)
+                );
+            }
+            else if ( metric == "DS" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_DS(), summary)
+                );
+            }
+            else if ( metric == "AS" )
+            {
+                r.emplace_back(
+                        uncertainty::summarise(evaluator.get_AS(), summary)
+                );
+            }
        }

        return r;