masks.hpp 16.37 KiB
#ifndef EVALHYD_MASKS_HPP
#define EVALHYD_MASKS_HPP
#include <map>
#include <set>
#include <vector>
#include <array>
#include <string>
#include <regex>
#include <stdexcept>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xsort.hpp>
#include <xtensor/xindex_view.hpp>
#include "maths.hpp"
namespace eh = evalhyd;
typedef std::map<std::string, std::vector<std::vector<std::string>>> msk_tree;
namespace evalhyd
    namespace masks
        /// Function to parse a string containing masking conditions.
        inline msk_tree parse_masking_conditions(std::string msk_str)
            msk_tree subset;
            // pattern supported to specify conditions to generate masks on
            // observed or predicted (median or mean for probabilist) streamflow
            // e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9}
            std::regex exp_q (
                    R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?(mean|median|quantile[0-9]+\.?[0-9]*|[0-9]+\.?[0-9]*)),*)+)\})"
            for (std::sregex_iterator i =
                    std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_q);
                 i != std::sregex_iterator(); i++)
                const std::smatch & mtc = *i;
                std::string var = mtc[1];
                std::string str = mtc[2];
                // process masking conditions on streamflow
                std::vector<std::vector<std::string>> conditions;
                // pattern supported to specify masking conditions based on streamflow
                std::regex ex (R"(([><!=]?=?)(mean|median|quantile|[0-9]+\.?[0-9]*)([0-9]+\.?[0-9]*)?)");
                for (std::sregex_iterator j =
                        std::sregex_iterator(str.begin(), str.end(), ex);
                     j != std::sregex_iterator(); j++)
                    const std::smatch & mt = *j;
                    // check that operator is provided and is supported
                    std::set<std::string> supported_op =
                            {"<", ">", "<=", ">=", "!=", "=="};
                    if (mt[1].str().empty())
                        throw std::runtime_error(
                                "missing operator for streamflow masking condition"
                    else if (supported_op.find(mt[1]) != supported_op.end())
                        if ((mt[2].str() == "median")
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
|| (mt[2].str() == "mean") || (mt[2].str() == "quantile")) conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()}); else // it is a simple numerical value, swap last two conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()}); } else throw std::runtime_error( "invalid operator for streamflow masking " "condition: " + mt[1].str() ); } // check that a maximum of two conditions were provided if (conditions.size() > 2) throw std::runtime_error( "no more than two streamflow masking conditions " "can be provided" ); subset[var] = conditions; } // pattern supported to specify conditions to generate masks on time index // e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:} std::regex exp_t (R"(([t])\{(((([0-9]+|[:]):?[0-9]*),*)+)\})"); for (std::sregex_iterator i = std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t); i != std::sregex_iterator(); i++) { const std::smatch & mtc = *i; std::string var = mtc[1]; std::string s = mtc[2]; // process masking conditions on time index std::vector<std::vector<std::string>> condition; // pattern supported to specify masking conditions based on time index std::regex e (R"(([0-9]+|[:]):?([0-9]*))"); for (std::sregex_iterator j = std::sregex_iterator(s.begin(), s.end(), e); j != std::sregex_iterator(); j++) { const std::smatch & m = *j; // check whether it is all indices, a range of indices, or an index if (m[1] == ":") // it is all indices (i.e. t{:}) so keep everything condition.emplace_back(); else if (m[2].str().empty()) // it is an index (i.e. t{#}) condition.push_back({m[1].str()}); else { // it is a range of indices (i.e. t{#:#}) // generate sequence of integer indices from range std::vector<int> vi(std::stoi(m[2].str()) - std::stoi(m[1].str())); std::iota(vi.begin(), vi.end(), std::stoi(m[1].str())); // convert to sequence of integer indices to string indices std::vector<std::string> vs; std::transform(std::begin(vi), std::end(vi), std::back_inserter(vs), [](int d) { return std::to_string(d); }); condition.push_back(vs);
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
} } subset[var] = condition; } return subset; } /// Function to generate temporal mask based on masking conditions inline xt::xtensor<bool, 1> generate_mask_from_conditions( const std::array<char, 32>& msk_char_arr, const xt::xtensor<double, 1>& q_obs, const xt::xtensor<double, 2>& q_prd = {} ) { // parse string to identify masking conditions std::string msk_str(msk_char_arr.begin(), msk_char_arr.end()); msk_tree subset = parse_masking_conditions(msk_str); // check if conditions were found in parsing if (subset.empty()) throw std::runtime_error( "no valid condition found to generate mask(s)" ); // initialise a boolean expression for the masks xt::xtensor<bool, 1> t_msk = xt::zeros<bool>(q_obs.shape()); // populate the masks given the conditions for (const auto & var_cond : subset) { auto var = var_cond.first; auto cond = var_cond.second; // condition on streamflow if ((var == "q_obs") || (var == "q_prd_median") || (var == "q_prd_mean")) { // preprocess streamflow depending on kind auto get_q = [&]() { if (var == "q_obs") { return q_obs; } else if (var == "q_prd_median") { if (q_prd.size() < 1) throw std::runtime_error( "condition on streamflow predictions " "not allowed for generating masks" ); xt::xtensor<double, 1> q_prd_median = xt::median(q_prd, 0); return q_prd_median; } else { // i.e. (var == "q_prd_mean") if (q_prd.size() < 1) throw std::runtime_error( "condition on streamflow predictions " "not allowed for generating masks" ); xt::xtensor<double, 1> q_prd_mean = xt::mean(q_prd, 0); return q_prd_mean; } }; auto q = get_q(); // define lambda function to precompute mean/median/quantile auto get_val = [&](const std::string& str, const std::string& num) { if (str.empty())
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
// it is a simple numerical value return std::stod(num); else if (str == "median") return xt::median(q); else if (str == "mean") return xt::mean(q)(); else // (str == "quantile") return eh::maths::quantile(q, std::stod(num)); }; // preprocess conditions to identify special cases // within/without bool within = false; bool without = false; std::string opr1, opr2; double val1, val2; if (cond.size() == 2) { opr1 = cond[0][0]; val1= get_val(cond[0][1], cond[0][2]); opr2 = cond[1][0]; val2 = get_val(cond[1][1], cond[1][2]); if ((opr1 == "<") || (opr1 == "<=")) { if ((opr2 == ">") || (opr2 == ">=")) { if (val2 > val1) without = true; else { within = true; } } } else if ((opr1 == ">") || (opr1 == ">=")) { if ((opr2 == "<") || (opr2 == "<=")) { if (val2 > val1) within = true; else { without = true; } } } } // process conditions, starting with special cases // within/without if (within) { if ((opr1 == "<") && (opr2 == ">")) t_msk = xt::where((q < val1) & (q > val2), 1, t_msk); else if ((opr1 == "<=") && (opr2 == ">")) t_msk = xt::where((q <= val1) & (q > val2), 1, t_msk); else if ((opr1 == "<") && (opr2 == ">=")) t_msk = xt::where((q < val1) & (q >= val2), 1, t_msk); else if ((opr1 == "<=") && (opr2 == ">=")) t_msk = xt::where((q <= val1) & (q >= val2), 1, t_msk); if ((opr2 == "<") && (opr1 == ">")) t_msk = xt::where((q < val2) & (q > val1), 1, t_msk); else if ((opr2 == "<=") && (opr1 == ">")) t_msk = xt::where((q <= val2) & (q > val1), 1, t_msk); else if ((opr2 == "<") && (opr1 == ">=")) t_msk = xt::where((q < val2) & (q >= val1),
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
1, t_msk); else if ((opr2 == "<=") && (opr1 == ">=")) t_msk = xt::where((q <= val2) & (q >= val1), 1, t_msk); } else if (without) { if ((opr1 == "<") && (opr2 == ">")) t_msk = xt::where((q < val1) | (q > val2), 1, t_msk); else if ((opr1 == "<=") && (opr2 == ">")) t_msk = xt::where((q <= val1) | (q > val2), 1, t_msk); else if ((opr1 == "<") && (opr2 == ">=")) t_msk = xt::where((q < val1) | (q >= val2), 1, t_msk); else if ((opr1 == "<=") && (opr2 == ">=")) t_msk = xt::where((q <= val1) & (q >= val2), 1, t_msk); if ((opr2 == "<") && (opr1 == ">")) t_msk = xt::where((q < val2) | (q > val1), 1, t_msk); else if ((opr2 == "<=") && (opr1 == ">")) t_msk = xt::where((q <= val2) | (q > val1), 1, t_msk); else if ((opr2 == "<") && (opr1 == ">=")) t_msk = xt::where((q < val2) | (q >= val1), 1, t_msk); else if ((opr2 == "<=") && (opr1 == ">=")) t_msk = xt::where((q <= val2) | (q >= val1), 1, t_msk); } else { for (const auto & opr_val : cond) { auto opr = opr_val[0]; double val = get_val(opr_val[1], opr_val[2]); // apply masking condition to given subset if (opr == "<") t_msk = xt::where( q < val, 1, t_msk ); else if (opr == ">") t_msk = xt::where( q > val, 1, t_msk ); else if (opr == "<=") t_msk = xt::where( q <= val, 1, t_msk ); else if (opr == ">=") t_msk = xt::where( q >= val, 1, t_msk ); else if (opr == "==") t_msk = xt::where( xt::equal(q, val), 1, t_msk ); else if (opr == "!=") t_msk = xt::where( xt::not_equal(q, val), 1, t_msk ); } } } // condition on time index else if (var == "t")
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
{ for (const auto & sequence : cond) { if (sequence.empty()) // i.e. t{:} xt::view(t_msk, xt::all()) = 1; else { // convert string indices to integer indices std::vector<int> vi; std::transform(std::begin(sequence), std::end(sequence), std::back_inserter(vi), [](const std::string& s) { return std::stoi(s); }); // apply masked indices to given subset xt::index_view(t_msk, vi) = 1; } } } } return t_msk; } } } #endif //EVALHYD_MASKS_HPP