From f9c98efdab88ea99a94df06a3b42025a5c60f319 Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Thu, 15 Jun 2023 11:43:41 +0200
Subject: [PATCH] remove WSS and AWI

resolves https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-cpp/-/issues/7
---
 changelog.rst                                 |   6 +
 .../evalhyd/detail/probabilist/evaluator.hpp  |  39 ---
 .../evalhyd/detail/probabilist/intervals.hpp  | 230 ------------------
 include/evalhyd/evalp.hpp                     |  14 +-
 tests/expected/evalp/AWI.csv                  |   1 -
 tests/expected/evalp/WSS.csv                  |   1 -
 tests/test_probabilist.cpp                    |   6 +-
 7 files changed, 10 insertions(+), 287 deletions(-)
 delete mode 100644 tests/expected/evalp/AWI.csv
 delete mode 100644 tests/expected/evalp/WSS.csv

diff --git a/changelog.rst b/changelog.rst
index 2716fa5..0ba34c1 100644
--- a/changelog.rst
+++ b/changelog.rst
@@ -5,6 +5,12 @@ latest
 
 Yet to be versioned and released. Only available from *dev* branch until then.
 
+.. rubric:: Scope changes
+
+* remove `"WSS"` and `"AWI"` as probabilistic evaluation metric because of the
+  arbitrary nature of using the sample climatology as reference/benchmark
+  (`#7-CPP <https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-cpp/-/issues/7>`_)
+
 .. rubric:: Bug fixes
 
 * fix irrelevant rank check failure when passing arrays (and not tensors)
diff --git a/include/evalhyd/detail/probabilist/evaluator.hpp b/include/evalhyd/detail/probabilist/evaluator.hpp
index 6293ea2..976145c 100644
--- a/include/evalhyd/detail/probabilist/evaluator.hpp
+++ b/include/evalhyd/detail/probabilist/evaluator.hpp
@@ -76,7 +76,6 @@ namespace evalhyd
             xtl::xoptional<xt::xtensor<double, 5>, bool> itv_bnds;
             xtl::xoptional<xt::xtensor<double, 4>, bool> obs_in_itv;
             xtl::xoptional<xt::xtensor<double, 4>, bool> itv_width;
-            xtl::xoptional<xt::xtensor<double, 6>, bool> clim_bnds;
 
             // members for intermediate evaluation metrics
             // (i.e. before the reduction along the temporal axis)
@@ -124,9 +123,7 @@ namespace evalhyd
             xtl::xoptional<xt::xtensor<double, 5>, bool> CR;
             xtl::xoptional<xt::xtensor<double, 5>, bool> AW;
             xtl::xoptional<xt::xtensor<double, 5>, bool> AWN;
-            xtl::xoptional<xt::xtensor<double, 5>, bool> AWI;
             xtl::xoptional<xt::xtensor<double, 5>, bool> WS;
-            xtl::xoptional<xt::xtensor<double, 5>, bool> WSS;
             // > Multi-variate
             xtl::xoptional<xt::xtensor<double, 4>, bool> ES;
 
@@ -367,19 +364,6 @@ namespace evalhyd
                 return itv_width.value();
             };
 
-
-            xt::xtensor<double, 6> get_clim_bnds()
-            {
-                if (!clim_bnds.has_value())
-                {
-                    clim_bnds = elements::calc_clim_bnds(
-                            q_obs, get_c_lvl(), t_msk, b_exp,
-                            n_sit, n_ldt, n_itv, n_msk, n_exp
-                    );
-                }
-                return clim_bnds.value();
-            };
-
             // methods to compute intermediate metrics
             xt::xtensor<double, 4> get_bs()
             {
@@ -798,17 +782,6 @@ namespace evalhyd
                 return AWN.value();
             };
 
-            xt::xtensor<double, 5> get_AWI()
-            {
-                if (!AWI.has_value())
-                {
-                    AWI = metrics::calc_AWI(
-                            get_AW(), get_clim_bnds()
-                    );
-                }
-                return AWI.value();
-            };
-
             xt::xtensor<double, 5> get_WS()
             {
                 if (!WS.has_value())
@@ -821,18 +794,6 @@ namespace evalhyd
                 return WS.value();
             };
 
-            xt::xtensor<double, 5> get_WSS()
-            {
-                if (!WSS.has_value())
-                {
-                    WSS = metrics::calc_WSS(
-                            q_obs, get_c_lvl(), get_clim_bnds(), get_WS(),
-                            t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp
-                    );
-                }
-                return WSS.value();
-            };
-
             xt::xtensor<double, 4> get_ES()
             {
                 if (!ES.has_value())
diff --git a/include/evalhyd/detail/probabilist/intervals.hpp b/include/evalhyd/detail/probabilist/intervals.hpp
index c36a235..adbd487 100644
--- a/include/evalhyd/detail/probabilist/intervals.hpp
+++ b/include/evalhyd/detail/probabilist/intervals.hpp
@@ -123,119 +123,6 @@ namespace evalhyd
 
                 return (u - l);
             }
-
-            /// Compute the bounds of the climatology corresponding to the
-            /// confidence levels.
-            ///
-            /// \param q_obs
-            ///     Streamflow predictions.
-            ///     shape: (sites, time)
-            /// \param c_lvl
-            ///     Confidence levels for the predictive intervals.
-            ///     shape: (intervals,)
-            /// \param t_msk
-            ///     Temporal subsets of the whole record.
-            ///     shape: (sites, lead times, subsets, time)
-            /// \param b_exp
-            ///     Boostrap samples.
-            ///     shape: (samples, time slice)
-            /// \param n_sit
-            ///     Number of sites.
-            /// \param n_ldt
-            ///     Number of lead times.
-            /// \param n_itv
-            ///     Number of predictive intervals.
-            /// \param n_msk
-            ///     Number of temporal subsets.
-            /// \param n_exp
-            ///     Number of bootstrap samples.
-            /// \return
-            ///     Climatology bounds.
-            ///     shape: (sites, lead times, subsets, samples, intervals, bounds)
-            template <class XD2>
-            inline xt::xtensor<double, 6> calc_clim_bnds(
-                    const XD2& q_obs,
-                    const xt::xtensor<double, 1>& c_lvl,
-                    const xt::xtensor<bool, 4>& t_msk,
-                    const std::vector<xt::xkeep_slice<int>>& b_exp,
-                    std::size_t n_sit,
-                    std::size_t n_ldt,
-                    std::size_t n_itv,
-                    std::size_t n_msk,
-                    std::size_t n_exp
-            )
-            {
-                // compute "climatology" average width
-                xt::xtensor<double, 6> clim_bnds =
-                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp,
-                                           n_itv, std::size_t {2}});
-
-                // determine quantiles forming the predictive intervals
-                // from the confidence levels
-                xt::xtensor<double, 2> quantiles =
-                        xt::zeros<double>({n_itv, std::size_t {2}});
-                xt::col(quantiles, 0) = 0.5 - c_lvl / 200.;
-                xt::col(quantiles, 1) = 0.5 + c_lvl / 200.;
-
-                // compute variable one mask at a time to minimise memory imprint
-                for (std::size_t m = 0; m < n_msk; m++)
-                {
-                    // apply the mask
-                    // (using NaN workaround until reducers work on masked_view)
-                    auto q_obs_masked = xt::where(
-                            xt::view(t_msk, xt::all(), xt::all(), m, xt::all()),
-                            xt::view(q_obs, xt::all(), xt::newaxis(), xt::all()),
-                            NAN
-                    );
-
-                    // compute variable one sample at a time
-                    for (std::size_t e = 0; e < n_exp; e++)
-                    {
-                        // apply the bootstrap sampling
-                        auto q_obs_masked_sampled =
-                                xt::view(q_obs_masked, xt::all(), xt::all(), b_exp[e]);
-
-                        // compute "climatology" interval
-                        for (std::size_t s = 0; s < n_sit; s++)
-                        {
-                            for (std::size_t l = 0; l < n_ldt; l++)
-                            {
-                                for (std::size_t i = 0; i < n_itv; i++)
-                                {
-                                    auto obs = xt::view(q_obs_masked_sampled,
-                                                        s, l, xt::all());
-                                    auto obs_filtered =
-                                            xt::filter(obs, !xt::isnan(obs));
-
-                                    if (obs_filtered.size() > 0)
-                                    {
-                                        // lower bound
-                                        xt::view(clim_bnds, s, l, m, e, i, 0) =
-                                                xt::quantile(
-                                                        obs_filtered,
-                                                        {quantiles(i, 0)}
-                                                )();
-                                        // upper bound
-                                        xt::view(clim_bnds, s, l, m, e, i, 1) =
-                                                xt::quantile(
-                                                        obs_filtered,
-                                                        {quantiles(i, 1)}
-                                                )();
-                                    }
-                                    else
-                                    {
-                                        xt::view(clim_bnds, s, l, m, e, i, xt::all()) =
-                                                NAN;
-                                    }
-
-                                }
-                            }
-                        }
-                    }
-                }
-
-                return clim_bnds;
-            }
         }
 
         namespace intermediate
@@ -487,34 +374,6 @@ namespace evalhyd
                                  - std::numeric_limits<double>::infinity());
             }
 
-            /// Compute the Average Width Index (AWI).
-            ///
-            /// \param AW
-            ///     Average widths.
-            ///     shape: (sites, lead times, subsets, samples, intervals)
-            /// \param clim_bnds
-            ///     Climatology bounds.
-            ///     shape: (sites, lead times, subsets, samples, intervals, bounds)
-            /// \return
-            ///     Average width indices.
-            ///     shape: (sites, lead times, subsets, samples, intervals)
-            inline xt::xtensor<double, 5> calc_AWI(
-                    const xt::xtensor<double, 5>& AW,
-                    const xt::xtensor<double, 6>& clim_bnds
-            )
-            {
-                // compute "climatology" average width
-                auto AW_clim =
-                        xt::view(clim_bnds, xt::all(), xt::all(), xt::all(),
-                                 xt::all(), xt::all(), 1)
-                        - xt::view(clim_bnds, xt::all(), xt::all(), xt::all(),
-                                   xt::all(), xt::all(), 0);
-
-                return xt::where(AW_clim > 0,
-                                 1 - (AW / AW_clim),
-                                 - std::numeric_limits<double>::infinity());
-            }
-
             /// Compute the Winkler scores (WS), also known as interval score.
             ///
             /// \param ws
@@ -554,95 +413,6 @@ namespace evalhyd
                         ws, t_msk, b_exp, n_sit, n_ldt, n_itv, n_msk, n_exp
                 );
             }
-
-            /// Compute the Winkler skill scores (WSS).
-            ///
-            /// \param q_obs
-            ///     Streamflow predictions.
-            ///     shape: (sites, time)
-            /// \param c_lvl
-            ///     Confidence levels for the predictive intervals.
-            ///     shape: (intervals,)
-            /// \param clim_bnds
-            ///     Climatology bounds.
-            ///     shape: (sites, lead times, subsets, samples, intervals, bounds)
-            /// \param WS
-            ///     Winkler scores.
-            ///     shape: (sites, lead times, subsets, samples, intervals)
-            /// \param t_msk
-            ///     Temporal subsets of the whole record.
-            ///     shape: (sites, lead times, subsets, time)
-            /// \param b_exp
-            ///     Boostrap samples.
-            ///     shape: (samples, time slice)
-            /// \param n_sit
-            ///     Number of sites.
-            /// \param n_ldt
-            ///     Number of lead times.
-            /// \param n_itv
-            ///     Number of predictive intervals.
-            /// \param n_msk
-            ///     Number of temporal subsets.
-            /// \param n_exp
-            ///     Number of bootstrap samples.
-            /// \return
-            ///     Winkler skill scores.
-            ///     shape: (sites, lead times, subsets, samples, intervals)
-            template <class XD2>
-            inline xt::xtensor<double, 5> calc_WSS(
-                    const XD2& q_obs,
-                    const xt::xtensor<double, 1>& c_lvl,
-                    const xt::xtensor<double, 6>& clim_bnds,
-                    const xt::xtensor<double, 5>& WS,
-                    const xt::xtensor<bool, 4>& t_msk,
-                    const std::vector<xt::xkeep_slice<int>>& b_exp,
-                    std::size_t n_sit,
-                    std::size_t n_ldt,
-                    std::size_t n_itv,
-                    std::size_t n_msk,
-                    std::size_t n_exp
-            )
-            {
-                // compute "climatology" Winkler score
-                xt::xtensor<double, 5> WS_clim =
-                        xt::zeros<double>({n_sit, n_ldt, n_msk, n_exp, n_itv});
-
-                for (std::size_t l = 0; l < n_ldt; l++)
-                {
-                    for (std::size_t m = 0; m < n_msk; m++)
-                    {
-                        for (std::size_t e = 0; e < n_exp; e++)
-                        {
-                            auto ws_clim = intermediate::calc_ws(
-                                    q_obs, c_lvl,
-                                    xt::view(clim_bnds, xt::all(), l, xt::newaxis(),
-                                             m, e, xt::all(), xt::all(),
-                                             xt::newaxis())
-                            );
-
-                            // apply the mask
-                            // (using NaN workaround until reducers work on masked_view)
-                            auto ws_clim_masked = xt::where(
-                                    xt::view(t_msk, xt::all(), l, m, xt::newaxis(), xt::all()),
-                                    xt::view(ws_clim, xt::all(), l, xt::all(), xt::all()),
-                                    NAN
-                            );
-
-                            // apply the bootstrap sampling
-                            auto ws_clim_masked_sampled =
-                                    xt::view(ws_clim_masked, xt::all(), xt::all(), b_exp[e]);
-
-                            xt::view(WS_clim, xt::all(), l, m, e, xt::all()) =
-                                    xt::nanmean(ws_clim_masked_sampled, -1);
-                        }
-                    }
-                }
-
-                // compute the Winkler skill score
-                return xt::where(WS_clim > 0,
-                                 1 - (WS / WS_clim),
-                                 - std::numeric_limits<double>::infinity());
-            }
         }
     }
 }
diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp
index c79f47d..c044120 100644
--- a/include/evalhyd/evalp.hpp
+++ b/include/evalhyd/evalp.hpp
@@ -240,7 +240,7 @@ namespace evalhyd
                  "QS", "CRPS_FROM_QS",
                  "POD", "POFD", "FAR", "CSI", "ROCSS",
                  "RANK_HIST", "DS", "AS",
-                 "CR", "AW", "AWN", "AWI", "WS", "WSS",
+                 "CR", "AW", "AWN", "WS",
                  "ES"}
         );
 
@@ -547,24 +547,12 @@ namespace evalhyd
                         uncertainty::summarise_p(evaluator.get_AWN(), summary)
                 );
             }
-            else if ( metric == "AWI" )
-            {
-                r.emplace_back(
-                        uncertainty::summarise_p(evaluator.get_AWI(), summary)
-                );
-            }
             else if ( metric == "WS" )
             {
                 r.emplace_back(
                         uncertainty::summarise_p(evaluator.get_WS(), summary)
                 );
             }
-            else if ( metric == "WSS" )
-            {
-                r.emplace_back(
-                        uncertainty::summarise_p(evaluator.get_WSS(), summary)
-                );
-            }
             else if ( metric == "ES" )
             {
                 r.emplace_back(
diff --git a/tests/expected/evalp/AWI.csv b/tests/expected/evalp/AWI.csv
deleted file mode 100644
index 8b3b7e7..0000000
--- a/tests/expected/evalp/AWI.csv
+++ /dev/null
@@ -1 +0,0 @@
-0.9821120161733,0.9880951944476
diff --git a/tests/expected/evalp/WSS.csv b/tests/expected/evalp/WSS.csv
deleted file mode 100644
index fd929da..0000000
--- a/tests/expected/evalp/WSS.csv
+++ /dev/null
@@ -1 +0,0 @@
-0.6621887740287,0.4360388849930
diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp
index 65b16cd..f4e53f7 100644
--- a/tests/test_probabilist.cpp
+++ b/tests/test_probabilist.cpp
@@ -35,7 +35,7 @@ std::vector<std::string> all_metrics_p = {
         "QS", "CRPS_FROM_QS",
         "POD", "POFD", "FAR", "CSI", "ROCSS",
         "RANK_HIST", "DS", "AS",
-        "CR", "AW", "AWN", "AWI", "WS", "WSS",
+        "CR", "AW", "AWN", "WS",
         "ES"
 };
 
@@ -263,7 +263,7 @@ TEST(ProbabilistTests, TestIntervals)
     auto expected = load_expected_p();
 
     // compute scores
-    std::vector<std::string> metrics = {"CR", "AW", "AWN", "AWI", "WS", "WSS"};
+    std::vector<std::string> metrics = {"CR", "AW", "AWN", "WS"};
 
     std::vector<xt::xarray<double>> results =
             evalhyd::evalp(
@@ -271,7 +271,7 @@ TEST(ProbabilistTests, TestIntervals)
                     xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
                     // shape: (sites [1], lead times [1], members [m], time [t])
                     xt::eval(xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all())),
-                    {"CR", "AW", "AWN", "AWI", "WS", "WSS"},
+                    {"CR", "AW", "AWN", "WS"},
                     xt::xtensor<double, 2>({}),
                     "",  // events
                     {30., 80.}  // c_lvl
-- 
GitLab