From 9167a047d7388e9632be87179728270fdfe4132c Mon Sep 17 00:00:00 2001
From: Thibault Hallouin <thibault.hallouin@inrae.fr>
Date: Wed, 19 Apr 2023 13:40:37 +0200
Subject: [PATCH] fix bug with masking conditions when asking for bounds on
 quantiles

e.g. "q_prd_median{>quantile0.3,<quantile0.7}" was not working
because the string was too long, but the regex were also quite
complicated, so this revises the approach and simplifies the regex

but to overcome the length limitation with C++ regex, "quantile" now
need to be specified as "qtl" to be shorter
---
 include/evalhyd/detail/masks.hpp | 113 ++++++++++++++-----------------
 1 file changed, 52 insertions(+), 61 deletions(-)

diff --git a/include/evalhyd/detail/masks.hpp b/include/evalhyd/detail/masks.hpp
index 60b94f2..8a2fa4d 100644
--- a/include/evalhyd/detail/masks.hpp
+++ b/include/evalhyd/detail/masks.hpp
@@ -35,7 +35,7 @@ namespace evalhyd
             // observed or predicted (median or mean for probabilist) streamflow
             // e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9}
             std::regex exp_q (
-                    R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?(mean|median|quantile[0-9]+\.?[0-9]*|[0-9]+\.?[0-9]*)),*)+)\})"
+                    R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl[0-1]\.[0-9]+,?|[0-9]+\.?[0-9]*,?))+)\})"
             );
 
             for (std::sregex_iterator i =
@@ -51,7 +51,7 @@ namespace evalhyd
                 std::vector<std::vector<std::string>> conditions;
 
                 // pattern supported to specify masking conditions based on streamflow
-                std::regex ex (R"(([><!=]?=?)(mean|median|quantile|[0-9]+\.?[0-9]*)([0-9]+\.?[0-9]*)?)");
+                std::regex ex (R"((<|>|<=|>=|==|!=)(mean|median|qtl[0-1]\.[0-9]+|[0-9]+\.?[0-9]*))");
 
                 for (std::sregex_iterator j =
                         std::sregex_iterator(str.begin(), str.end(), ex);
@@ -59,35 +59,22 @@ namespace evalhyd
                 {
                     const std::smatch & mt = *j;
 
-                    // check that operator is provided and is supported
-                    std::set<std::string> supported_op =
-                            {"<", ">", "<=", ">=", "!=", "=="};
-                    if (mt[1].str().empty())
+                    if ((mt[2].str() == "median")
+                        || (mt[2].str() == "mean"))
                     {
-                        throw std::runtime_error(
-                                "missing operator for streamflow masking condition"
-                        );
+                        conditions.push_back({mt[1].str(), mt[2].str(), ""});
                     }
-                    else if (supported_op.find(mt[1]) != supported_op.end())
+                    else if ((mt[2].str().length() >= 3)
+                             && (mt[2].str().substr(0, 3) == "qtl"))
                     {
-                        if ((mt[2].str() == "median")
-                            || (mt[2].str() == "mean")
-                            || (mt[2].str() == "quantile"))
-                        {
-                            conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()});
-                        }
-                        else
-                        {
-                            // it is a simple numerical value, swap last two
-                            conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()});
-                        }
+                        conditions.push_back(
+                            {mt[1].str(), "qtl", mt[2].str().substr(3)}
+                        );
                     }
                     else
                     {
-                        throw std::runtime_error(
-                                "invalid operator for streamflow masking "
-                                "condition: " + mt[1].str()
-                        );
+                        // it is a simple numerical value
+                        conditions.push_back({mt[1].str(), "", mt[2].str()});
                     }
                 }
 
@@ -105,7 +92,7 @@ namespace evalhyd
 
             // pattern supported to specify conditions to generate masks on time index
             // e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:}
-            std::regex exp_t (R"(([t])\{(((([0-9]+|[:]):?[0-9]*),*)+)\})");
+            std::regex exp_t (R"((t)\{(:|([0-9]+:[0-9]+,?|[0-9]+,?)+)\})");
 
             for (std::sregex_iterator i =
                     std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t);
@@ -119,40 +106,46 @@ namespace evalhyd
                 // process masking conditions on time index
                 std::vector<std::vector<std::string>> condition;
 
-                // pattern supported to specify masking conditions based on time index
-                std::regex e (R"(([0-9]+|[:]):?([0-9]*))");
-
-                for (std::sregex_iterator j =
-                        std::sregex_iterator(s.begin(), s.end(), e);
-                     j != std::sregex_iterator(); j++)
+                // check whether it is all indices (i.e. t{:})
+                if (s == ":")
+                {
+                    condition.emplace_back();
+                }
+                else
                 {
-                    const std::smatch & m = *j;
+                    // pattern supported to specify masking conditions based on time index
+                    std::regex e (R"([0-9]+:[0-9]+|[0-9]+)");
 
-                    // check whether it is all indices, a range of indices, or an index
-                    if (m[1] == ":")
+                    for (std::sregex_iterator j =
+                            std::sregex_iterator(s.begin(), s.end(), e);
+                         j != std::sregex_iterator(); j++)
                     {
-                        // it is all indices (i.e. t{:}) so keep everything
-                        condition.emplace_back();
-                    }
-                    else if (m[2].str().empty())
-                    {
-                        // it is an index (i.e. t{#})
-                        condition.push_back({m[1].str()});
-                    }
-                    else
-                    {
-                        // it is a range of indices (i.e. t{#:#})
-                        // generate sequence of integer indices from range
-                        std::vector<int> vi(std::stoi(m[2].str())
-                                            - std::stoi(m[1].str()));
-                        std::iota(vi.begin(), vi.end(), std::stoi(m[1].str()));
-                        // convert to sequence of integer indices to string indices
-                        std::vector<std::string> vs;
-                        std::transform(std::begin(vi), std::end(vi),
-                                       std::back_inserter(vs),
-                                       [](int d) { return std::to_string(d); });
-
-                        condition.push_back(vs);
+                        const std::smatch & m = *j;
+
+                        // check whether it is a range of indices, or an index
+                        if (m[0].str().find(":") != std::string::npos)
+                        {
+                            // it is a range of indices (i.e. t{#:#})
+                            std::string s_ = m[0].str();
+                            std::string beg = s_.substr(0, s_.find(":"));
+                            std::string end = s_.substr(s_.find(":") + 1);
+
+                            // generate sequence of integer indices from range
+                            std::vector<int> vi(std::stoi(end) - std::stoi(beg));
+                            std::iota(vi.begin(), vi.end(), std::stoi(beg));
+                            // convert to sequence of integer indices to string indices
+                            std::vector<std::string> vs;
+                            std::transform(std::begin(vi), std::end(vi),
+                                           std::back_inserter(vs),
+                                           [](int d) { return std::to_string(d); });
+
+                            condition.push_back(vs);
+                        }
+                        else
+                        {
+                            // it is an index (i.e. t{#})
+                            condition.push_back({m[0].str()});
+                        }
                     }
                 }
 
@@ -232,8 +225,6 @@ namespace evalhyd
                     auto q = get_q();
 
                     // define lambda function to precompute mean/median/quantile
-
-
                     auto get_val =
                             [&](const std::string& str, const std::string& num)
                     {
@@ -249,7 +240,7 @@ namespace evalhyd
                         {
                             return xt::mean(q)();
                         }
-                        else  // (str == "quantile")
+                        else  // (str == "qtl")
                         {
                             return xt::quantile(q, {std::stod(num)})();
                         }
@@ -266,7 +257,7 @@ namespace evalhyd
                     if (cond.size() == 2)
                     {
                         opr1 = cond[0][0];
-                        val1= get_val(cond[0][1], cond[0][2]);
+                        val1 = get_val(cond[0][1], cond[0][2]);
                         opr2 = cond[1][0];
                         val2 = get_val(cond[1][1], cond[1][2]);
 
-- 
GitLab