Commit 9167a047 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

fix bug with masking conditions when asking for bounds on quantiles

e.g. "q_prd_median{>quantile0.3,<quantile0.7}" was not working
because the string was too long, but the regex were also quite
complicated, so this revises the approach and simplifies the regex

but to overcome the length limitation with C++ regex, "quantile" now
need to be specified as "qtl" to be shorter
No related merge requests found
Pipeline #46392 passed with stage
in 3 minutes and 58 seconds
Showing with 52 additions and 61 deletions
+52 -61
...@@ -35,7 +35,7 @@ namespace evalhyd ...@@ -35,7 +35,7 @@ namespace evalhyd
// observed or predicted (median or mean for probabilist) streamflow // observed or predicted (median or mean for probabilist) streamflow
// e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9} // e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9}
std::regex exp_q ( std::regex exp_q (
R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?(mean|median|quantile[0-9]+\.?[0-9]*|[0-9]+\.?[0-9]*)),*)+)\})" R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl[0-1]\.[0-9]+,?|[0-9]+\.?[0-9]*,?))+)\})"
); );
for (std::sregex_iterator i = for (std::sregex_iterator i =
...@@ -51,7 +51,7 @@ namespace evalhyd ...@@ -51,7 +51,7 @@ namespace evalhyd
std::vector<std::vector<std::string>> conditions; std::vector<std::vector<std::string>> conditions;
// pattern supported to specify masking conditions based on streamflow // pattern supported to specify masking conditions based on streamflow
std::regex ex (R"(([><!=]?=?)(mean|median|quantile|[0-9]+\.?[0-9]*)([0-9]+\.?[0-9]*)?)"); std::regex ex (R"((<|>|<=|>=|==|!=)(mean|median|qtl[0-1]\.[0-9]+|[0-9]+\.?[0-9]*))");
for (std::sregex_iterator j = for (std::sregex_iterator j =
std::sregex_iterator(str.begin(), str.end(), ex); std::sregex_iterator(str.begin(), str.end(), ex);
...@@ -59,35 +59,22 @@ namespace evalhyd ...@@ -59,35 +59,22 @@ namespace evalhyd
{ {
const std::smatch & mt = *j; const std::smatch & mt = *j;
// check that operator is provided and is supported if ((mt[2].str() == "median")
std::set<std::string> supported_op = || (mt[2].str() == "mean"))
{"<", ">", "<=", ">=", "!=", "=="};
if (mt[1].str().empty())
{ {
throw std::runtime_error( conditions.push_back({mt[1].str(), mt[2].str(), ""});
"missing operator for streamflow masking condition"
);
} }
else if (supported_op.find(mt[1]) != supported_op.end()) else if ((mt[2].str().length() >= 3)
&& (mt[2].str().substr(0, 3) == "qtl"))
{ {
if ((mt[2].str() == "median") conditions.push_back(
|| (mt[2].str() == "mean") {mt[1].str(), "qtl", mt[2].str().substr(3)}
|| (mt[2].str() == "quantile")) );
{
conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()});
}
else
{
// it is a simple numerical value, swap last two
conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()});
}
} }
else else
{ {
throw std::runtime_error( // it is a simple numerical value
"invalid operator for streamflow masking " conditions.push_back({mt[1].str(), "", mt[2].str()});
"condition: " + mt[1].str()
);
} }
} }
...@@ -105,7 +92,7 @@ namespace evalhyd ...@@ -105,7 +92,7 @@ namespace evalhyd
// pattern supported to specify conditions to generate masks on time index // pattern supported to specify conditions to generate masks on time index
// e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:} // e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:}
std::regex exp_t (R"(([t])\{(((([0-9]+|[:]):?[0-9]*),*)+)\})"); std::regex exp_t (R"((t)\{(:|([0-9]+:[0-9]+,?|[0-9]+,?)+)\})");
for (std::sregex_iterator i = for (std::sregex_iterator i =
std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t); std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t);
...@@ -119,40 +106,46 @@ namespace evalhyd ...@@ -119,40 +106,46 @@ namespace evalhyd
// process masking conditions on time index // process masking conditions on time index
std::vector<std::vector<std::string>> condition; std::vector<std::vector<std::string>> condition;
// pattern supported to specify masking conditions based on time index // check whether it is all indices (i.e. t{:})
std::regex e (R"(([0-9]+|[:]):?([0-9]*))"); if (s == ":")
{
for (std::sregex_iterator j = condition.emplace_back();
std::sregex_iterator(s.begin(), s.end(), e); }
j != std::sregex_iterator(); j++) else
{ {
const std::smatch & m = *j; // pattern supported to specify masking conditions based on time index
std::regex e (R"([0-9]+:[0-9]+|[0-9]+)");
// check whether it is all indices, a range of indices, or an index for (std::sregex_iterator j =
if (m[1] == ":") std::sregex_iterator(s.begin(), s.end(), e);
j != std::sregex_iterator(); j++)
{ {
// it is all indices (i.e. t{:}) so keep everything const std::smatch & m = *j;
condition.emplace_back();
} // check whether it is a range of indices, or an index
else if (m[2].str().empty()) if (m[0].str().find(":") != std::string::npos)
{ {
// it is an index (i.e. t{#}) // it is a range of indices (i.e. t{#:#})
condition.push_back({m[1].str()}); std::string s_ = m[0].str();
} std::string beg = s_.substr(0, s_.find(":"));
else std::string end = s_.substr(s_.find(":") + 1);
{
// it is a range of indices (i.e. t{#:#}) // generate sequence of integer indices from range
// generate sequence of integer indices from range std::vector<int> vi(std::stoi(end) - std::stoi(beg));
std::vector<int> vi(std::stoi(m[2].str()) std::iota(vi.begin(), vi.end(), std::stoi(beg));
- std::stoi(m[1].str())); // convert to sequence of integer indices to string indices
std::iota(vi.begin(), vi.end(), std::stoi(m[1].str())); std::vector<std::string> vs;
// convert to sequence of integer indices to string indices std::transform(std::begin(vi), std::end(vi),
std::vector<std::string> vs; std::back_inserter(vs),
std::transform(std::begin(vi), std::end(vi), [](int d) { return std::to_string(d); });
std::back_inserter(vs),
[](int d) { return std::to_string(d); }); condition.push_back(vs);
}
condition.push_back(vs); else
{
// it is an index (i.e. t{#})
condition.push_back({m[0].str()});
}
} }
} }
...@@ -232,8 +225,6 @@ namespace evalhyd ...@@ -232,8 +225,6 @@ namespace evalhyd
auto q = get_q(); auto q = get_q();
// define lambda function to precompute mean/median/quantile // define lambda function to precompute mean/median/quantile
auto get_val = auto get_val =
[&](const std::string& str, const std::string& num) [&](const std::string& str, const std::string& num)
{ {
...@@ -249,7 +240,7 @@ namespace evalhyd ...@@ -249,7 +240,7 @@ namespace evalhyd
{ {
return xt::mean(q)(); return xt::mean(q)();
} }
else // (str == "quantile") else // (str == "qtl")
{ {
return xt::quantile(q, {std::stod(num)})(); return xt::quantile(q, {std::stod(num)})();
} }
...@@ -266,7 +257,7 @@ namespace evalhyd ...@@ -266,7 +257,7 @@ namespace evalhyd
if (cond.size() == 2) if (cond.size() == 2)
{ {
opr1 = cond[0][0]; opr1 = cond[0][0];
val1= get_val(cond[0][1], cond[0][2]); val1 = get_val(cond[0][1], cond[0][2]);
opr2 = cond[1][0]; opr2 = cond[1][0];
val2 = get_val(cond[1][1], cond[1][2]); val2 = get_val(cond[1][1], cond[1][2]);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment