diff --git a/src/masks.hpp b/src/masks.hpp index c61bb0fb520889350c4e1924e43630fe0070527c..d3ce0e696bce233c694bdae3833aa5be5497a55b 100644 --- a/src/masks.hpp +++ b/src/masks.hpp @@ -15,6 +15,10 @@ #include <xtensor/xsort.hpp> #include <xtensor/xindex_view.hpp> +#include "maths.hpp" + +namespace eh = evalhyd; + typedef std::map<std::string, std::vector<std::vector<std::string>>> msk_tree; namespace evalhyd @@ -30,7 +34,7 @@ namespace evalhyd // observed or predicted (median or mean for probabilist) streamflow // e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9} std::regex exp_q ( - R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?[0-9]+\.?[0-9]*),*)+)\})" + R"((q_obs|q_prd_median|q_prd_mean)\{((([><!=]?=?(mean|median|quantile[0-9]+\.?[0-9]*|[0-9]+\.?[0-9]*)),*)+)\})" ); for (std::sregex_iterator i = @@ -40,33 +44,41 @@ namespace evalhyd const std::smatch & mtc = *i; std::string var = mtc[1]; - std::string s = mtc[2]; + std::string str = mtc[2]; // process masking conditions on streamflow std::vector<std::vector<std::string>> conditions; // pattern supported to specify masking conditions based on streamflow - std::regex e (R"(([><!=]?=?)([0-9]+\.?[0-9]*))"); + std::regex ex (R"(([><!=]?=?)(mean|median|quantile|[0-9]+\.?[0-9]*)([0-9]+\.?[0-9]*)?)"); for (std::sregex_iterator j = - std::sregex_iterator(s.begin(), s.end(), e); + std::sregex_iterator(str.begin(), str.end(), ex); j != std::sregex_iterator(); j++) { - const std::smatch & m = *j; + const std::smatch & mt = *j; // check that operator is provided and is supported std::set<std::string> supported_op = {"<", ">", "<=", ">=", "!=", "=="}; - if (supported_op.find(m[1]) != supported_op.end()) - conditions.push_back({m[1].str(), m[2].str()}); - else if (m[1].str().empty()) + if (mt[1].str().empty()) throw std::runtime_error( "missing operator for streamflow masking condition" ); + else if (supported_op.find(mt[1]) != supported_op.end()) + { + if ((mt[2].str() == "median") + or (mt[2].str() == "mean") + or (mt[2].str() == "quantile")) + conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()}); + else + // it is a simple numerical value, swap last two + conditions.push_back({mt[1].str(), mt[3].str(), mt[2].str()}); + } else throw std::runtime_error( "invalid operator for streamflow masking " - "condition: " + m[1].str() + "condition: " + mt[1].str() ); } @@ -193,6 +205,19 @@ namespace evalhyd }; auto q = get_q(); + // define lambda function to precompute mean/median/quantile + auto get_val = [&](const std::string& str, const std::string& num) { + if (str.empty()) + // it is a simple numerical value + return std::stod(num); + else if (str == "median") + return xt::median(q); + else if (str == "mean") + return xt::mean(q)(); + else // (str == "quantile") + return eh::maths::quantile(q, std::stod(num)); + }; + // preprocess conditions to identify special cases // within/without bool within = false; @@ -204,9 +229,9 @@ namespace evalhyd if (cond.size() == 2) { opr1 = cond[0][0]; - val1 = std::stod(cond[0][1]); + val1= get_val(cond[0][1], cond[0][2]); opr2 = cond[1][0]; - val2 = std::stod(cond[1][1]); + val2 = get_val(cond[1][1], cond[1][2]); if ((opr1 == "<") or (opr1 == "<=")) { @@ -291,9 +316,7 @@ namespace evalhyd for (const auto & opr_val : cond) { auto opr = opr_val[0]; - - // convert comparison value from string to double - double val = std::stod(opr_val[1]); + double val = get_val(opr_val[1], opr_val[2]); // apply masking condition to given subset if (opr == "<") diff --git a/src/maths.hpp b/src/maths.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f0613fbbf2aefd843816a480692bd17d87b6ac74 --- /dev/null +++ b/src/maths.hpp @@ -0,0 +1,65 @@ +#ifndef EVALHYD_MATHS_HPP +#define EVALHYD_MATHS_HPP + +#include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xsort.hpp> +#include <xtensor/xbuilder.hpp> +#include <xtensor/xutils.hpp> + +#include <cmath> + +namespace evalhyd +{ + namespace maths + { + inline double quantile(const xt::xtensor<double, 1>& t, double p) + { + std::size_t n = t.size(); + + // compute virtual index + auto virtual_idx = (double(n) - 1) * p; + + if (std::fmod(virtual_idx, 1) == 0) + // virtual index is an integer + { + std::size_t i = {static_cast<unsigned long long>(virtual_idx)}; + std::array<std::size_t, 1> kth = {i}; + auto values = xt::partition(t, kth); + return xt::mean(xt::view(values, xt::range(i, i + 1)))(); + } + else + // virtual index is not an integer + { + // determine indices before and after virtual index + std::size_t prv_idx = std::floor(virtual_idx); + std::size_t nxt_idx = prv_idx + 1; + + // deal with edge cases + if (virtual_idx > double(n) - 1) + { + prv_idx = n - 1; + nxt_idx = n - 1; + } + if (virtual_idx < 0) + { + prv_idx = 0; + nxt_idx = 0; + } + + std::array<std::size_t, 2> kth = {prv_idx, nxt_idx}; + auto values = xt::partition(t, kth); + auto vw = xt::view(values, xt::range(prv_idx, nxt_idx + 1)); + + // perform linear interpolation to determine quantile + auto gamma = virtual_idx - double(prv_idx); + auto prv = xt::amin(vw); + auto nxt = xt::amax(vw); + + return (prv + (nxt - prv) * gamma)(); + } + } + } +} + +#endif //EVALHYD_MATHS_HPP