From 9167a047d7388e9632be87179728270fdfe4132c Mon Sep 17 00:00:00 2001 From: Thibault Hallouin <thibault.hallouin@inrae.fr> Date: Wed, 19 Apr 2023 13:40:37 +0200 Subject: [PATCH] fix bug with masking conditions when asking for bounds on quantiles e.g. "q_prd_median{>quantile0.3,<quantile0.7}" was not working because the string was too long, but the regex were also quite complicated, so this revises the approach and simplifies the regex but to overcome the length limitation with C++ regex, "quantile" now need to be specified as "qtl" to be shorter --- include/evalhyd/detail/masks.hpp | 113 ++++++++++++++----------------- 1 file changed, 52 insertions(+), 61 deletions(-) diff --git a/include/evalhyd/detail/masks.hpp b/include/evalhyd/detail/masks.hpp index 60b94f2..8a2fa4d 100644 --- a/include/evalhyd/detail/masks.hpp +++ b/include/evalhyd/detail/masks.hpp @@ -35,7 +35,7 @@ namespace evalhyd // observed or predicted (median or mean for probabilist) streamflow // e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9} std::regex exp_q ( - R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?(mean|median|quantile[0-9]+\.?[0-9]*|[0-9]+\.?[0-9]*)),*)+)\})" + R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl[0-1]\.[0-9]+,?|[0-9]+\.?[0-9]*,?))+)\})" ); for (std::sregex_iterator i = @@ -51,7 +51,7 @@ namespace evalhyd std::vector<std::vector<std::string>> conditions; // pattern supported to specify masking conditions based on streamflow - std::regex ex (R"(([><!=]?=?)(mean|median|quantile|[0-9]+\.?[0-9]*)([0-9]+\.?[0-9]*)?)"); + std::regex ex (R"((<|>|<=|>=|==|!=)(mean|median|qtl[0-1]\.[0-9]+|[0-9]+\.?[0-9]*))"); for (std::sregex_iterator j = std::sregex_iterator(str.begin(), str.end(), ex); @@ -59,35 +59,22 @@ namespace evalhyd { const std::smatch & mt = *j; - // check that operator is provided and is supported - std::set<std::string> supported_op = - {"<", ">", "<=", ">=", "!=", "=="}; - if (mt[1].str().empty()) + if ((mt[2].str() == "median") + || (mt[2].str() == "mean")) { - throw std::runtime_error( - "missing operator for streamflow masking condition" - ); + conditions.push_back({mt[1].str(), mt[2].str(), ""}); } - else if (supported_op.find(mt[1]) != supported_op.end()) + else if ((mt[2].str().length() >= 3) + && (mt[2].str().substr(0, 3) == "qtl")) { - if ((mt[2].str() == "median") - || (mt[2].str() == "mean") - || (mt[2].str() == "quantile")) - { - conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()}); - } - else - { - // it is a simple numerical value, swap last two - conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()}); - } + conditions.push_back( + {mt[1].str(), "qtl", mt[2].str().substr(3)} + ); } else { - throw std::runtime_error( - "invalid operator for streamflow masking " - "condition: " + mt[1].str() - ); + // it is a simple numerical value + conditions.push_back({mt[1].str(), "", mt[2].str()}); } } @@ -105,7 +92,7 @@ namespace evalhyd // pattern supported to specify conditions to generate masks on time index // e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:} - std::regex exp_t (R"(([t])\{(((([0-9]+|[:]):?[0-9]*),*)+)\})"); + std::regex exp_t (R"((t)\{(:|([0-9]+:[0-9]+,?|[0-9]+,?)+)\})"); for (std::sregex_iterator i = std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t); @@ -119,40 +106,46 @@ namespace evalhyd // process masking conditions on time index std::vector<std::vector<std::string>> condition; - // pattern supported to specify masking conditions based on time index - std::regex e (R"(([0-9]+|[:]):?([0-9]*))"); - - for (std::sregex_iterator j = - std::sregex_iterator(s.begin(), s.end(), e); - j != std::sregex_iterator(); j++) + // check whether it is all indices (i.e. t{:}) + if (s == ":") + { + condition.emplace_back(); + } + else { - const std::smatch & m = *j; + // pattern supported to specify masking conditions based on time index + std::regex e (R"([0-9]+:[0-9]+|[0-9]+)"); - // check whether it is all indices, a range of indices, or an index - if (m[1] == ":") + for (std::sregex_iterator j = + std::sregex_iterator(s.begin(), s.end(), e); + j != std::sregex_iterator(); j++) { - // it is all indices (i.e. t{:}) so keep everything - condition.emplace_back(); - } - else if (m[2].str().empty()) - { - // it is an index (i.e. t{#}) - condition.push_back({m[1].str()}); - } - else - { - // it is a range of indices (i.e. t{#:#}) - // generate sequence of integer indices from range - std::vector<int> vi(std::stoi(m[2].str()) - - std::stoi(m[1].str())); - std::iota(vi.begin(), vi.end(), std::stoi(m[1].str())); - // convert to sequence of integer indices to string indices - std::vector<std::string> vs; - std::transform(std::begin(vi), std::end(vi), - std::back_inserter(vs), - [](int d) { return std::to_string(d); }); - - condition.push_back(vs); + const std::smatch & m = *j; + + // check whether it is a range of indices, or an index + if (m[0].str().find(":") != std::string::npos) + { + // it is a range of indices (i.e. t{#:#}) + std::string s_ = m[0].str(); + std::string beg = s_.substr(0, s_.find(":")); + std::string end = s_.substr(s_.find(":") + 1); + + // generate sequence of integer indices from range + std::vector<int> vi(std::stoi(end) - std::stoi(beg)); + std::iota(vi.begin(), vi.end(), std::stoi(beg)); + // convert to sequence of integer indices to string indices + std::vector<std::string> vs; + std::transform(std::begin(vi), std::end(vi), + std::back_inserter(vs), + [](int d) { return std::to_string(d); }); + + condition.push_back(vs); + } + else + { + // it is an index (i.e. t{#}) + condition.push_back({m[0].str()}); + } } } @@ -232,8 +225,6 @@ namespace evalhyd auto q = get_q(); // define lambda function to precompute mean/median/quantile - - auto get_val = [&](const std::string& str, const std::string& num) { @@ -249,7 +240,7 @@ namespace evalhyd { return xt::mean(q)(); } - else // (str == "quantile") + else // (str == "qtl") { return xt::quantile(q, {std::stod(num)})(); } @@ -266,7 +257,7 @@ namespace evalhyd if (cond.size() == 2) { opr1 = cond[0][0]; - val1= get_val(cond[0][1], cond[0][2]); + val1 = get_val(cond[0][1], cond[0][2]); opr2 = cond[1][0]; val2 = get_val(cond[1][1], cond[1][2]); -- GitLab