An error occurred while loading the file. Please try again.
-
Thibault Hallouin authored
Unlike 3de89046, this also deals with missing predictions. This effectively pairwise remove those time steps where either observations or predictions are missing data. This does so without creating copies of the input data, via the use of xfunctions stored as class members. The unit test for missing data is updated to include NAN also in the predictions (with a varying number of NAN across the 5 time series).
fadb5fa9
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_MASKS_HPP
#define EVALHYD_MASKS_HPP
#include <map>
#include <set>
#include <vector>
#include <array>
#include <string>
#include <regex>
#include <stdexcept>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xsort.hpp>
#include <xtensor/xindex_view.hpp>
typedef std::map<std::string, std::vector<std::vector<std::string>>> msk_tree;
namespace evalhyd
{
namespace masks
{
/// Function to parse a string containing masking conditions.
inline msk_tree parse_masking_conditions(std::string msk_str)
{
msk_tree subset;
// pattern supported to specify conditions to generate masks on
// observed or predicted (median or mean for probabilist) streamflow
// e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9}
std::regex exp_q (
R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl(0|1)\.(0|1|2|3|4|5|6|7|8|9)+,?|(0|1|2|3|4|5|6|7|8|9)+\.?(0|1|2|3|4|5|6|7|8|9)*,?))+)\})"
// NOTE: this should be `R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl[0-1]\.[0-9]+,?|[0-9]+\.?[0-9]*,?))+)\})"`
// but there is a bug in the building chain for R packages
// https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6
);
for (std::sregex_iterator i =
std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_q);
i != std::sregex_iterator(); i++)
{
const std::smatch & mtc = *i;
std::string var = mtc[1];
std::string str = mtc[2];
// process masking conditions on streamflow
std::vector<std::vector<std::string>> conditions;
// pattern supported to specify masking conditions based on streamflow
std::regex ex (
R"((<|>|<=|>=|==|!=)(mean|median|qtl(0|1)\.(0|1|2|3|4|5|6|7|8|9)+|(0|1|2|3|4|5|6|7|8|9)+\.?(0|1|2|3|4|5|6|7|8|9)*))"
// NOTE: this should be `R"((<|>|<=|>=|==|!=)(mean|median|qtl[0-1]\.[0-9]+|[0-9]+\.?[0-9]*))"`
// but there is a bug in the building chain for R packages
// https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6
);
for (std::sregex_iterator j =
std::sregex_iterator(str.begin(), str.end(), ex);
j != std::sregex_iterator(); j++)
{
const std::smatch & mt = *j;
if ((mt[2].str() == "median")
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
|| (mt[2].str() == "mean"))
{
conditions.push_back({mt[1].str(), mt[2].str(), ""});
}
else if ((mt[2].str().length() >= 3)
&& (mt[2].str().substr(0, 3) == "qtl"))
{
conditions.push_back(
{mt[1].str(), "qtl", mt[2].str().substr(3)}
);
}
else
{
// it is a simple numerical value
conditions.push_back({mt[1].str(), "", mt[2].str()});
}
}
// check that a maximum of two conditions were provided
if (conditions.size() > 2)
{
throw std::runtime_error(
"no more than two streamflow masking conditions "
"can be provided"
);
}
subset[var] = conditions;
}
// pattern supported to specify conditions to generate masks on time index
// e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:}
std::regex exp_t (
R"((t)\{(:|((0|1|2|3|4|5|6|7|8|9)+:(0|1|2|3|4|5|6|7|8|9)+,?|(0|1|2|3|4|5|6|7|8|9)+,?)+)\})"
// NOTE: this should be `R"((t)\{(:|([0-9]+:[0-9]+,?|[0-9]+,?)+)\})"`
// but there is a bug in the building chain for R packages
// https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6
);
for (std::sregex_iterator i =
std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t);
i != std::sregex_iterator(); i++)
{
const std::smatch & mtc = *i;
std::string var = mtc[1];
std::string s = mtc[2];
// process masking conditions on time index
std::vector<std::vector<std::string>> condition;
// check whether it is all indices (i.e. t{:})
if (s == ":")
{
condition.emplace_back();
}
else
{
// pattern supported to specify masking conditions based on time index
std::regex e (
R"((0|1|2|3|4|5|6|7|8|9)+:(0|1|2|3|4|5|6|7|8|9)+|(0|1|2|3|4|5|6|7|8|9)+)"
// NOTE: this should be `R"([0-9]+:[0-9]+|[0-9]+)"`
// but there is a bug in the building chain for R packages
// https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6
);
for (std::sregex_iterator j =
std::sregex_iterator(s.begin(), s.end(), e);
j != std::sregex_iterator(); j++)
{
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
const std::smatch & m = *j;
// check whether it is a range of indices, or an index
if (m[0].str().find(':') != std::string::npos)
{
// it is a range of indices (i.e. t{#:#})
std::string s_ = m[0].str();
std::string beg = s_.substr(0, s_.find(':'));
std::string end = s_.substr(s_.find(':') + 1);
// generate sequence of integer indices from range
std::vector<int> vi(std::stoi(end) - std::stoi(beg));
std::iota(vi.begin(), vi.end(), std::stoi(beg));
// convert to sequence of integer indices to string indices
std::vector<std::string> vs;
std::transform(std::begin(vi), std::end(vi),
std::back_inserter(vs),
[](int d) { return std::to_string(d); });
condition.push_back(vs);
}
else
{
// it is an index (i.e. t{#})
condition.push_back({m[0].str()});
}
}
}
subset[var] = condition;
}
return subset;
}
/// Function to generate temporal mask based on masking conditions
template<class X1, class X2>
inline xt::xtensor<bool, 1> generate_mask_from_conditions(
const std::array<char, 32>& msk_char_arr,
const X1& q_obs,
const X2& q_prd
)
{
// parse string to identify masking conditions
std::string msk_str(msk_char_arr.begin(), msk_char_arr.end());
msk_tree subset = parse_masking_conditions(msk_str);
// check if conditions were found in parsing
if (subset.empty())
{
throw std::runtime_error(
"no valid condition found to generate mask(s)"
);
}
// initialise a boolean expression for the masks
xt::xtensor<bool, 1> t_msk = xt::zeros<bool>(q_obs.shape());
// populate the masks given the conditions
for (const auto & var_cond : subset)
{
auto var = var_cond.first;
auto cond = var_cond.second;
// condition on streamflow
if ((var == "q_obs") || (var == "q_prd_median")
|| (var == "q_prd_mean"))
{
// preprocess streamflow depending on kind
auto get_q = [&]() {
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
if (var == "q_obs")
{
return xt::xtensor<double, 1>(q_obs);
}
else if (var == "q_prd_median")
{
if (q_prd.shape(0) == 1)
{
throw std::runtime_error(
"condition on streamflow predictions "
"not allowed for generating masks"
);
}
xt::xtensor<double, 1> q_prd_median =
xt::median(q_prd, 0);
return q_prd_median;
}
else
{
// i.e. (var == "q_prd_mean")
if (q_prd.shape(0) == 1)
{
throw std::runtime_error(
"condition on streamflow predictions "
"not allowed for generating masks"
);
}
xt::xtensor<double, 1> q_prd_mean =
xt::mean(q_prd, 0);
return q_prd_mean;
}
};
auto q = get_q();
// define lambda function to precompute mean/median/quantile
auto get_val =
[&](const std::string& str, const std::string& num)
{
if (str.empty()) // it is a simple numerical value
{
return std::stod(num);
}
else
{
auto q_filtered = xt::filter(q, !xt::isnan(q));
if (q_filtered.size() > 0)
{
if (str == "median")
{
return xt::median(q_filtered);
}
else if (str == "mean")
{
return xt::mean(q_filtered)();
}
else // (str == "qtl")
{
return xt::quantile(q_filtered, {std::stod(num)})();
}
}
else
{
return double(NAN);
}
}
};
// preprocess conditions to identify special cases
// within/without
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
bool within = false;
bool without = false;
std::string opr1, opr2;
double val1, val2;
if (cond.size() == 2)
{
opr1 = cond[0][0];
val1 = get_val(cond[0][1], cond[0][2]);
opr2 = cond[1][0];
val2 = get_val(cond[1][1], cond[1][2]);
if ((opr1 == "<") || (opr1 == "<="))
{
if ((opr2 == ">") || (opr2 == ">="))
{
if (val2 > val1)
{
without = true;
}
else
{
within = true;
}
}
}
else if ((opr1 == ">") || (opr1 == ">="))
{
if ((opr2 == "<") || (opr2 == "<="))
{
if (val2 > val1)
{
within = true;
}
else
{
without = true;
}
}
}
}
// process conditions, starting with special cases
// within/without
if (within)
{
if ((opr1 == "<") && (opr2 == ">"))
{
t_msk = xt::where((q < val1) && (q > val2),
1, t_msk);
}
else if ((opr1 == "<=") && (opr2 == ">"))
{
t_msk = xt::where((q <= val1) && (q > val2),
1, t_msk);
}
else if ((opr1 == "<") && (opr2 == ">="))
{
t_msk = xt::where((q < val1) && (q >= val2),
1, t_msk);
}
else if ((opr1 == "<=") && (opr2 == ">="))
{
t_msk = xt::where((q <= val1) && (q >= val2),
1, t_msk);
}
if ((opr2 == "<") && (opr1 == ">"))
{
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
t_msk = xt::where((q < val2) && (q > val1),
1, t_msk);
}
else if ((opr2 == "<=") && (opr1 == ">"))
{
t_msk = xt::where((q <= val2) && (q > val1),
1, t_msk);
}
else if ((opr2 == "<") && (opr1 == ">="))
{
t_msk = xt::where((q < val2) && (q >= val1),
1, t_msk);
}
else if ((opr2 == "<=") && (opr1 == ">="))
{
t_msk = xt::where((q <= val2) && (q >= val1),
1, t_msk);
}
}
else if (without)
{
if ((opr1 == "<") && (opr2 == ">"))
{
t_msk = xt::where((q < val1) || (q > val2),
1, t_msk);
}
else if ((opr1 == "<=") && (opr2 == ">"))
{
t_msk = xt::where((q <= val1) || (q > val2),
1, t_msk);
}
else if ((opr1 == "<") && (opr2 == ">="))
{
t_msk = xt::where((q < val1) || (q >= val2),
1, t_msk);
}
else if ((opr1 == "<=") && (opr2 == ">="))
{
t_msk = xt::where((q <= val1) && (q >= val2),
1, t_msk);
}
if ((opr2 == "<") && (opr1 == ">"))
{
t_msk = xt::where((q < val2) || (q > val1),
1, t_msk);
}
else if ((opr2 == "<=") && (opr1 == ">"))
{
t_msk = xt::where((q <= val2) || (q > val1),
1, t_msk);
}
else if ((opr2 == "<") && (opr1 == ">="))
{
t_msk = xt::where((q < val2) || (q >= val1),
1, t_msk);
}
else if ((opr2 == "<=") && (opr1 == ">="))
{
t_msk = xt::where((q <= val2) || (q >= val1),
1, t_msk);
}
}
else
{
for (const auto & opr_val : cond)
{
auto opr = opr_val[0];
double val = get_val(opr_val[1], opr_val[2]);
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
// apply masking condition to given subset
if (opr == "<")
{
t_msk = xt::where(
q < val, 1, t_msk
);
}
else if (opr == ">")
{
t_msk = xt::where(
q > val, 1, t_msk
);
}
else if (opr == "<=")
{
t_msk = xt::where(
q <= val, 1, t_msk
);
}
else if (opr == ">=")
{
t_msk = xt::where(
q >= val, 1, t_msk
);
}
else if (opr == "==")
{
t_msk = xt::where(
xt::equal(q, val), 1, t_msk
);
}
else if (opr == "!=")
{
t_msk = xt::where(
xt::not_equal(q, val), 1, t_msk
);
}
}
}
}
// condition on time index
else if (var == "t")
{
for (const auto & sequence : cond)
{
if (sequence.empty())
{
// i.e. t{:}
xt::view(t_msk, xt::all()) = 1;
}
else
{
// convert string indices to integer indices
std::vector<int> vi;
std::transform(std::begin(sequence),
std::end(sequence),
std::back_inserter(vi),
[](const std::string& s)
{ return std::stoi(s); });
// apply masked indices to given subset
xt::index_view(t_msk, vi) = 1;
}
}
}
}
return t_msk;
}
}
}
491492493
#endif //EVALHYD_MASKS_HPP