evald.cpp 9.10 KiB
#include <unordered_map>
#include <vector>
#include <array>
#include <stdexcept>
#include <xtensor/xexpression.hpp>
#include <xtensor/xarray.hpp>
#include <xtensor/xscalar.hpp>
#include "evalhyd/evald.hpp"
#include "utils.hpp"
#include "masks.hpp"
#include "maths.hpp"
#include "uncertainty.hpp"
#include "determinist/evaluator.hpp"
namespace eh = evalhyd;
namespace evalhyd
    std::vector<xt::xarray<double>> evald(
            const xt::xtensor<double, 2>& q_obs,
            const xt::xtensor<double, 2>& q_prd,
            const std::vector<std::string>& metrics,
            const std::string& transform,
            const double exponent,
            double epsilon,
            const xt::xtensor<bool, 2>& t_msk,
            const xt::xtensor<std::array<char, 32>, 1>& m_cdt,
            const std::unordered_map<std::string, int>& bootstrap,
            const std::vector<std::string>& dts
        // check that the metrics to be computed are valid
        utils::check_metrics(
                metrics,
                {"RMSE", "NSE", "KGE", "KGEPRIME"}
        // check that optional parameters are valid
        eh::utils::check_bootstrap(bootstrap);
        // check that data dimensions are compatible
        // > time
        if (q_obs.shape(1) != q_prd.shape(1))
            throw std::runtime_error(
                    "observations and predictions feature different "
                    "temporal lengths"
        if (t_msk.size() > 0)
            if (q_obs.shape(1) != t_msk.shape(1))
                throw std::runtime_error(
                        "observations and masks feature different "
                        "temporal lengths"
        if (!dts.empty())
            if (q_obs.shape(1) != dts.size())
                throw std::runtime_error(
                        "observations and datetimes feature different "
                        "temporal lengths"
        // > series
        if (q_obs.shape(0) != 1)
            throw std::runtime_error(
                    "observations contain more than one time series"
        // retrieve dimensions
        std::size_t n_tim = q_obs.shape(1);
        std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) :
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
(m_cdt.size() > 0 ? m_cdt.shape(0) : 1); // initialise a mask if none provided // (corresponding to no temporal subset) auto gen_msk = [&]() { // if t_msk provided, it takes priority if (t_msk.size() > 0) return t_msk; // else if m_cdt provided, use them to generate t_msk else if (m_cdt.size() > 0) { xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim}); for (int m = 0; m < n_msk; m++) xt::view(c_msk, m) = eh::masks::generate_mask_from_conditions( m_cdt[0], xt::view(q_obs, 0), q_prd ); return c_msk; } // if neither t_msk nor m_cdt provided, generate dummy mask else return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})}; }; auto msk = gen_msk(); // apply streamflow transformation if requested auto q_transform = [&](const xt::xtensor<double, 2>& q) { if ( transform == "none" || (transform == "pow" && exponent == 1)) { return q; } else if ( transform == "sqrt" ) { return xt::eval(xt::sqrt(q)); } else if ( transform == "inv" ) { if ( epsilon == -9 ) // determine an epsilon value to avoid zero divide epsilon = xt::mean(q_obs)() * 0.01; return xt::eval(1. / (q + epsilon)); } else if ( transform == "log" ) { if ( epsilon == -9 ) // determine an epsilon value to avoid log zero epsilon = xt::mean(q_obs)() * 0.01; return xt::eval(xt::log(q + epsilon)); } else if ( transform == "pow" ) { if ( exponent < 0 ) { if ( epsilon == -9 ) // determine an epsilon value to avoid zero divide epsilon = xt::mean(q_obs)() * 0.01; return xt::eval(xt::pow(q + epsilon, exponent)); } else { return xt::eval(xt::pow(q, exponent)); } }
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
else { throw std::runtime_error( "invalid streamflow transformation: " + transform ); } }; auto obs = q_transform(q_obs); auto prd = q_transform(q_prd); // generate bootstrap experiment if requested std::vector<xt::xkeep_slice<int>> exp; auto n_samples = bootstrap.find("n_samples")->second; auto len_sample = bootstrap.find("len_sample")->second; if ((n_samples != -9) && (len_sample != -9)) { if (dts.empty()) throw std::runtime_error( "bootstrap requested but datetimes not provided" ); exp = eh::uncertainty::bootstrap( dts, n_samples, len_sample ); } else { // if no bootstrap requested, generate one sample // containing all the time indices once xt::xtensor<int, 1> all = xt::arange(n_tim); exp.push_back(xt::keep(all)); } // instantiate determinist evaluator eh::determinist::Evaluator evaluator(obs, prd, msk, exp); // declare maps for memoisation purposes std::unordered_map<std::string, std::vector<std::string>> elt; std::unordered_map<std::string, std::vector<std::string>> dep; // register potentially recurring computation elt across metrics elt["RMSE"] = {"quad_err"}; elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"}; elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", "r_pearson", "alpha", "bias"}; elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", "r_pearson", "alpha", "bias"}; // register nested metrics (i.e. metric dependent on another metric) // TODO // determine required elt/dep to be pre-computed std::vector<std::string> req_elt; std::vector<std::string> req_dep; eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); // pre-compute required elt for ( const auto& element : req_elt ) { if ( element == "mean_obs" ) evaluator.calc_mean_obs(); else if ( element == "mean_prd" ) evaluator.calc_mean_prd(); else if ( element == "quad_err" ) evaluator.calc_quad_err(); else if ( element == "quad_obs" ) evaluator.calc_quad_obs(); else if ( element == "quad_prd" )
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
evaluator.calc_quad_prd(); else if ( element == "r_pearson" ) evaluator.calc_r_pearson(); else if ( element == "alpha" ) evaluator.calc_alpha(); else if ( element == "bias" ) evaluator.calc_bias(); } // pre-compute required dep for ( const auto& dependency : req_dep ) { // TODO } // retrieve or compute requested metrics std::vector<xt::xarray<double>> r; auto summary = bootstrap.find("summary")->second; for ( const auto& metric : metrics ) { if ( metric == "RMSE" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_RMSE(); r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); } else if ( metric == "NSE" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_NSE(); r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); } else if ( metric == "KGE" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_KGE(); r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); } else if ( metric == "KGEPRIME" ) { if (std::find(req_dep.begin(), req_dep.end(), metric) == req_dep.end()) evaluator.calc_KGEPRIME(); r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); } } return r; } }