masks.hpp 20.45 KiB
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_MASKS_HPP
#define EVALHYD_MASKS_HPP
#include <map>
#include <set>
#include <vector>
#include <array>
#include <string>
#include <regex>
#include <stdexcept>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xsort.hpp>
#include <xtensor/xindex_view.hpp>
typedef std::map<std::string, std::vector<std::vector<std::string>>> msk_tree;
namespace evalhyd
    namespace masks
        /// Function to parse a string containing masking conditions.
        inline msk_tree parse_masking_conditions(std::string msk_str)
            msk_tree subset;
            // pattern supported to specify conditions to generate masks on
            // observed or predicted (median or mean for probabilist) streamflow
            // e.g. q{>9.} q{<9} q{>=99.0} q{<=99} q{>9,<99} q{==9} q{!=9}
            std::regex exp_q (
                    R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl(0|1)\.(0|1|2|3|4|5|6|7|8|9)+,?|(0|1|2|3|4|5|6|7|8|9)+\.?(0|1|2|3|4|5|6|7|8|9)*,?))+)\})"
                    // NOTE: this should be `R"((q_obs|q_prd_median|q_prd_mean)\{(((<|>|<=|>=|==|!=)(mean,?|median,?|qtl[0-1]\.[0-9]+,?|[0-9]+\.?[0-9]*,?))+)\})"`
                    //       but there is a bug in the building chain for R packages
                    //       https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6
            for (std::sregex_iterator i =
                    std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_q);
                 i != std::sregex_iterator(); i++)
                const std::smatch & mtc = *i;
                std::string var = mtc[1];
                std::string str = mtc[2];
                // process masking conditions on streamflow
                std::vector<std::vector<std::string>> conditions;
                // pattern supported to specify masking conditions based on streamflow
                std::regex ex (
                        R"((<|>|<=|>=|==|!=)(mean|median|qtl(0|1)\.(0|1|2|3|4|5|6|7|8|9)+|(0|1|2|3|4|5|6|7|8|9)+\.?(0|1|2|3|4|5|6|7|8|9)*))"
                        // NOTE: this should be `R"((<|>|<=|>=|==|!=)(mean|median|qtl[0-1]\.[0-9]+|[0-9]+\.?[0-9]*))"`
                        //       but there is a bug in the building chain for R packages
                        //       https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6
                for (std::sregex_iterator j =
                        std::sregex_iterator(str.begin(), str.end(), ex);
                     j != std::sregex_iterator(); j++)
                    const std::smatch & mt = *j;
                    if ((mt[2].str() == "median")
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
|| (mt[2].str() == "mean")) { conditions.push_back({mt[1].str(), mt[2].str(), ""}); } else if ((mt[2].str().length() >= 3) && (mt[2].str().substr(0, 3) == "qtl")) { conditions.push_back( {mt[1].str(), "qtl", mt[2].str().substr(3)} ); } else { // it is a simple numerical value conditions.push_back({mt[1].str(), "", mt[2].str()}); } } // check that a maximum of two conditions were provided if (conditions.size() > 2) { throw std::runtime_error( "no more than two streamflow masking conditions " "can be provided" ); } subset[var] = conditions; } // pattern supported to specify conditions to generate masks on time index // e.g. t{0:10} t{0:10,20:30} t{0,1,2,3} t{0:10,30,40,50} t{:} std::regex exp_t ( R"((t)\{(:|((0|1|2|3|4|5|6|7|8|9)+:(0|1|2|3|4|5|6|7|8|9)+,?|(0|1|2|3|4|5|6|7|8|9)+,?)+)\})" // NOTE: this should be `R"((t)\{(:|([0-9]+:[0-9]+,?|[0-9]+,?)+)\})"` // but there is a bug in the building chain for R packages // https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6 ); for (std::sregex_iterator i = std::sregex_iterator(msk_str.begin(), msk_str.end(), exp_t); i != std::sregex_iterator(); i++) { const std::smatch & mtc = *i; std::string var = mtc[1]; std::string s = mtc[2]; // process masking conditions on time index std::vector<std::vector<std::string>> condition; // check whether it is all indices (i.e. t{:}) if (s == ":") { condition.emplace_back(); } else { // pattern supported to specify masking conditions based on time index std::regex e ( R"((0|1|2|3|4|5|6|7|8|9)+:(0|1|2|3|4|5|6|7|8|9)+|(0|1|2|3|4|5|6|7|8|9)+)" // NOTE: this should be `R"([0-9]+:[0-9]+|[0-9]+)"` // but there is a bug in the building chain for R packages // https://gitlab.irstea.fr/HYCAR-Hydro/evalhyd/evalhyd-r/-/issues/6 ); for (std::sregex_iterator j = std::sregex_iterator(s.begin(), s.end(), e); j != std::sregex_iterator(); j++) {