Commit 9167a047 authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

fix bug with masking conditions when asking for bounds on quantiles

e.g. "q_prd_median{>quantile0.3,<quantile0.7}" was not working
because the string was too long, but the regex were also quite
complicated, so this revises the approach and simplifies the regex

but to overcome the length limitation with C++ regex, "quantile" now
need to be specified as "qtl" to be shorter
No related merge requests found
Pipeline #46392 passed with stage
in 3 minutes and 58 seconds
Showing with 52 additions and 61 deletions
+52 -61
......@@ -35,7 +35,7 @@ namespace evalhyd
// observed or predicted (median or mean for probabilist) streamflow
// e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9}
std::regex exp_q (
R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?(mean|median|quantile[0-9]+\.?[0-9]*|[0-9]+\.?[0-9]*)),*)+)\})"
R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl[0-1]\.[0-9]+,?|[0-9]+\.?[0-9]*,?))+)\})"
);
for (std::sregex_iterator i =
......@@ -51,7 +51,7 @@ namespace evalhyd
std::vector<std::vector<std::string>> conditions;
// pattern supported to specify masking conditions based on streamflow
std::regex ex (R"(([><!=]?=?)(mean|median|quantile|[0-9]+\.?[0-9]*)([0-9]+\.?[0-9]*)?)");
std::regex ex (R"((<|>|<=|>=|==|!=)(mean|median|qtl[0-1]\.[0-9]+|[0-9]+\.?[0-9]*))");
for (std::sregex_iterator j =
std::sregex_iterator(str.begin(), str.end(), ex);
......@@ -59,35 +59,22 @@ namespace evalhyd
{
const std::smatch & mt = *j;
// check that operator is provided and is supported
std::set<std::string> supported_op =
{"<", ">", "<=", ">=", "!=", "=="};
if (mt[1].str().empty())
if ((mt[2].str() == "median")
|| (mt[2].str() == "mean"))
{
throw std::runtime_error(
"missing operator for streamflow masking condition"
);
conditions.push_back({mt[1].str(), mt[2].str(), ""});
}
else if (supported_op.find(mt[1]) != supported_op.end())
else if ((mt[2].str().length() >= 3)
&& (mt[2].str().substr(0, 3) == "qtl"))
{
if ((mt[2].str() == "median")
|| (mt[2].str() == "mean")
|| (mt[2].str() == "quantile"))
{
conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()});
}
else
{
// it is a simple numerical value, swap last two
conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()});
}
conditions.push_back(
{mt[1].str(), "qtl", mt[2].str().substr(3)}
);
}
else
{
throw std::runtime_error(
"invalid operator for streamflow masking "
"condition: " + mt[1].str()
);
// it is a simple numerical value
conditions.push_back({mt[1].str(), "", mt[2].str()});
}
}
......@@ -105,7 +92,7 @@ namespace evalhyd
// pattern supported to specify conditions to generate masks on time index
// e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:}
std::regex exp_t (R"(([t])\{(((([0-9]+|[:]):?[0-9]*),*)+)\})");
std::regex exp_t (R"((t)\{(:|([0-9]+:[0-9]+,?|[0-9]+,?)+)\})");
for (std::sregex_iterator i =
std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t);
......@@ -119,40 +106,46 @@ namespace evalhyd
// process masking conditions on time index
std::vector<std::vector<std::string>> condition;
// pattern supported to specify masking conditions based on time index
std::regex e (R"(([0-9]+|[:]):?([0-9]*))");
for (std::sregex_iterator j =
std::sregex_iterator(s.begin(), s.end(), e);
j != std::sregex_iterator(); j++)
// check whether it is all indices (i.e. t{:})
if (s == ":")
{
condition.emplace_back();
}
else
{
const std::smatch & m = *j;
// pattern supported to specify masking conditions based on time index
std::regex e (R"([0-9]+:[0-9]+|[0-9]+)");
// check whether it is all indices, a range of indices, or an index
if (m[1] == ":")
for (std::sregex_iterator j =
std::sregex_iterator(s.begin(), s.end(), e);
j != std::sregex_iterator(); j++)
{
// it is all indices (i.e. t{:}) so keep everything
condition.emplace_back();
}
else if (m[2].str().empty())
{
// it is an index (i.e. t{#})
condition.push_back({m[1].str()});
}
else
{
// it is a range of indices (i.e. t{#:#})
// generate sequence of integer indices from range
std::vector<int> vi(std::stoi(m[2].str())
- std::stoi(m[1].str()));
std::iota(vi.begin(), vi.end(), std::stoi(m[1].str()));
// convert to sequence of integer indices to string indices
std::vector<std::string> vs;
std::transform(std::begin(vi), std::end(vi),
std::back_inserter(vs),
[](int d) { return std::to_string(d); });
condition.push_back(vs);
const std::smatch & m = *j;
// check whether it is a range of indices, or an index
if (m[0].str().find(":") != std::string::npos)
{
// it is a range of indices (i.e. t{#:#})
std::string s_ = m[0].str();
std::string beg = s_.substr(0, s_.find(":"));
std::string end = s_.substr(s_.find(":") + 1);
// generate sequence of integer indices from range
std::vector<int> vi(std::stoi(end) - std::stoi(beg));
std::iota(vi.begin(), vi.end(), std::stoi(beg));
// convert to sequence of integer indices to string indices
std::vector<std::string> vs;
std::transform(std::begin(vi), std::end(vi),
std::back_inserter(vs),
[](int d) { return std::to_string(d); });
condition.push_back(vs);
}
else
{
// it is an index (i.e. t{#})
condition.push_back({m[0].str()});
}
}
}
......@@ -232,8 +225,6 @@ namespace evalhyd
auto q = get_q();
// define lambda function to precompute mean/median/quantile
auto get_val =
[&](const std::string& str, const std::string& num)
{
......@@ -249,7 +240,7 @@ namespace evalhyd
{
return xt::mean(q)();
}
else // (str == "quantile")
else // (str == "qtl")
{
return xt::quantile(q, {std::stod(num)})();
}
......@@ -266,7 +257,7 @@ namespace evalhyd
if (cond.size() == 2)
{
opr1 = cond[0][0];
val1= get_val(cond[0][1], cond[0][2]);
val1 = get_val(cond[0][1], cond[0][2]);
opr2 = cond[1][0];
val2 = get_val(cond[1][1], cond[1][2]);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment