diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index 7ae018e12ae57a0b5b337c0f8d78898b37895fa7..34a879f00cdd235fd14f78297e70ee4bec84a0fe 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -1,21 +1,12 @@ -#ifndef EVALHYD_DETERMINIST_HPP -#define EVALHYD_DETERMINIST_HPP +#ifndef EVALHYD_EVALD_HPP +#define EVALHYD_EVALD_HPP #include <unordered_map> #include <vector> -#include <array> -#include <stdexcept> + #include <xtensor/xexpression.hpp> #include <xtensor/xarray.hpp> -#include <xtensor/xscalar.hpp> - -#include "../../src/utils.hpp" -#include "../../src/masks.hpp" -#include "../../src/maths.hpp" -#include "../../src/uncertainty.hpp" -#include "../../src/determinist/evaluator.hpp" -namespace eh = evalhyd; namespace evalhyd { @@ -158,239 +149,7 @@ namespace evalhyd const std::unordered_map<std::string, int>& bootstrap = {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, const std::vector<std::string>& dts = {} - ) - { - // check that the metrics to be computed are valid - utils::check_metrics( - metrics, - {"RMSE", "NSE", "KGE", "KGEPRIME"} - ); - - // check that optional parameters are valid - eh::utils::check_bootstrap(bootstrap); - - // check that data dimensions are compatible - // > time - if (q_obs.shape(1) != q_prd.shape(1)) - throw std::runtime_error( - "observations and predictions feature different " - "temporal lengths" - ); - if (t_msk.size() > 0) - if (q_obs.shape(1) != t_msk.shape(1)) - throw std::runtime_error( - "observations and masks feature different " - "temporal lengths" - ); - if (!dts.empty()) - if (q_obs.shape(1) != dts.size()) - throw std::runtime_error( - "observations and datetimes feature different " - "temporal lengths" - ); - // > series - if (q_obs.shape(0) != 1) - throw std::runtime_error( - "observations contain more than one time series" - ); - - // retrieve dimensions - std::size_t n_tim = q_obs.shape(1); - std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) : - (m_cdt.size() > 0 ? m_cdt.shape(0) : 1); - - // initialise a mask if none provided - // (corresponding to no temporal subset) - auto gen_msk = [&]() { - // if t_msk provided, it takes priority - if (t_msk.size() > 0) - return t_msk; - // else if m_cdt provided, use them to generate t_msk - else if (m_cdt.size() > 0) - { - xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim}); - - for (int m = 0; m < n_msk; m++) - xt::view(c_msk, m) = - eh::masks::generate_mask_from_conditions( - m_cdt[0], xt::view(q_obs, 0), q_prd - ); - - return c_msk; - } - // if neither t_msk nor m_cdt provided, generate dummy mask - else - return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})}; - }; - - auto msk = gen_msk(); - - // apply streamflow transformation if requested - auto q_transform = [&](const xt::xtensor<double, 2>& q) - { - if ( transform == "none" || (transform == "pow" && exponent == 1)) - { - return q; - } - else if ( transform == "sqrt" ) - { - return xt::eval(xt::sqrt(q)); - } - else if ( transform == "inv" ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid zero divide - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(1. / (q + epsilon)); - } - else if ( transform == "log" ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid log zero - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(xt::log(q + epsilon)); - } - else if ( transform == "pow" ) - { - if ( exponent < 0 ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid zero divide - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(xt::pow(q + epsilon, exponent)); - } - else - { - return xt::eval(xt::pow(q, exponent)); - } - } - else - { - throw std::runtime_error( - "invalid streamflow transformation: " + transform - ); - } - }; - - auto obs = q_transform(q_obs); - auto prd = q_transform(q_prd); - - // generate bootstrap experiment if requested - std::vector<xt::xkeep_slice<int>> exp; - auto n_samples = bootstrap.find("n_samples")->second; - auto len_sample = bootstrap.find("len_sample")->second; - if ((n_samples != -9) and (len_sample != -9)) - { - if (dts.empty()) - throw std::runtime_error( - "bootstrap requested but datetimes not provided" - ); - - exp = eh::uncertainty::bootstrap( - dts, n_samples, len_sample - ); - } - else - { - // if no bootstrap requested, generate one sample - // containing all the time indices once - xt::xtensor<int, 1> all = xt::arange(n_tim); - exp.push_back(xt::keep(all)); - } - - // instantiate determinist evaluator - eh::determinist::Evaluator evaluator(obs, prd, msk, exp); - - // declare maps for memoisation purposes - std::unordered_map<std::string, std::vector<std::string>> elt; - std::unordered_map<std::string, std::vector<std::string>> dep; - - // register potentially recurring computation elt across metrics - elt["RMSE"] = {"quad_err"}; - elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"}; - elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", - "r_pearson", "alpha", "bias"}; - elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", - "r_pearson", "alpha", "bias"}; - - // register nested metrics (i.e. metric dependent on another metric) - // TODO - - // determine required elt/dep to be pre-computed - std::vector<std::string> req_elt; - std::vector<std::string> req_dep; - - eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); - - // pre-compute required elt - for ( const auto& element : req_elt ) - { - if ( element == "mean_obs" ) - evaluator.calc_mean_obs(); - else if ( element == "mean_prd" ) - evaluator.calc_mean_prd(); - else if ( element == "quad_err" ) - evaluator.calc_quad_err(); - else if ( element == "quad_obs" ) - evaluator.calc_quad_obs(); - else if ( element == "quad_prd" ) - evaluator.calc_quad_prd(); - else if ( element == "r_pearson" ) - evaluator.calc_r_pearson(); - else if ( element == "alpha" ) - evaluator.calc_alpha(); - else if ( element == "bias" ) - evaluator.calc_bias(); - } - - // pre-compute required dep - for ( const auto& dependency : req_dep ) - { - // TODO - } - - // retrieve or compute requested metrics - std::vector<xt::xarray<double>> r; - - auto summary = bootstrap.find("summary")->second; - - for ( const auto& metric : metrics ) - { - if ( metric == "RMSE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_RMSE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); - } - else if ( metric == "NSE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_NSE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); - } - else if ( metric == "KGE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_KGE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); - } - else if ( metric == "KGEPRIME" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_KGEPRIME(); - r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); - } - } - - return r; - } + ); } -#endif //EVALHYD_DETERMINIST_HPP +#endif //EVALHYD_EVALD_HPP diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 5d4c1f00328c0c0edbbe00fc785f007ab50b2f46..6fbce8f1bf4bbbcd6dcc9cf3de92e0bd9c1e93b8 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -1,22 +1,12 @@ -#ifndef EVALHYD_PROBABILIST_HPP -#define EVALHYD_PROBABILIST_HPP +#ifndef EVALHYD_EVALP_HPP +#define EVALHYD_EVALP_HPP -#include <utility> #include <unordered_map> #include <vector> -#include <array> -#include <stdexcept> + #include <xtensor/xtensor.hpp> #include <xtensor/xarray.hpp> -#include <xtensor/xview.hpp> - -#include "../../src/utils.hpp" -#include "../../src/masks.hpp" -#include "../../src/maths.hpp" -#include "../../src/uncertainty.hpp" -#include "../../src/probabilist/evaluator.h" -namespace eh = evalhyd; namespace evalhyd { @@ -135,262 +125,7 @@ namespace evalhyd const std::unordered_map<std::string, int>& bootstrap = {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, const std::vector<std::string>& dts = {} - ) - { - // check that the metrics to be computed are valid - eh::utils::check_metrics( - metrics, - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"} - ); - - // check that optional parameters are given as arguments - eh::utils::evalp::check_optionals(metrics, q_thr); - eh::utils::check_bootstrap(bootstrap); - - // check that data dimensions are compatible - // > time - if (q_obs.shape(1) != q_prd.shape(3)) - throw std::runtime_error( - "observations and predictions feature different " - "temporal lengths" - ); - if (t_msk.size() > 0) - if (q_obs.shape(1) != t_msk.shape(3)) - throw std::runtime_error( - "observations and masks feature different " - "temporal lengths" - ); - if (!dts.empty()) - if (q_obs.shape(1) != dts.size()) - throw std::runtime_error( - "observations and datetimes feature different " - "temporal lengths" - ); - // > leadtimes - if (t_msk.size() > 0) - if (q_prd.shape(1) != t_msk.shape(1)) - throw std::runtime_error( - "predictions and temporal masks feature different " - "numbers of lead times" - ); - // > sites - if (q_obs.shape(0) != q_prd.shape(0)) - throw std::runtime_error( - "observations and predictions feature different " - "numbers of sites" - ); - if (t_msk.size() > 0) - if (q_obs.shape(0) != t_msk.shape(0)) - throw std::runtime_error( - "observations and temporal masks feature different " - "numbers of sites" - ); - if (m_cdt.size() > 0) - if (q_obs.shape(0) != m_cdt.shape(0)) - throw std::runtime_error( - "observations and masking conditions feature different " - "numbers of sites" - ); - - // retrieve dimensions - std::size_t n_sit = q_prd.shape(0); - std::size_t n_ltm = q_prd.shape(1); - std::size_t n_mbr = q_prd.shape(2); - std::size_t n_tim = q_prd.shape(3); - std::size_t n_thr = q_thr.shape(1); - std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(2) : - (m_cdt.size() > 0 ? m_cdt.shape(1) : 1); - std::size_t n_exp = bootstrap.find("n_samples")->second == -9 ? 1: - bootstrap.find("n_samples")->second; - - // register metrics number of dimensions - std::unordered_map<std::string, std::vector<std::size_t>> dim; - - dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; - dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; - dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; - dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; - dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; - dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; - - // declare maps for memoisation purposes - std::unordered_map<std::string, std::vector<std::string>> elt; - std::unordered_map<std::string, std::vector<std::string>> dep; - - // register potentially recurring computation elements across metrics - elt["bs"] = {"o_k", "y_k"}; - elt["BSS"] = {"o_k", "bar_o"}; - elt["BS_CRD"] = {"o_k", "bar_o", "y_k"}; - elt["BS_LBD"] = {"o_k", "y_k"}; - elt["qs"] = {"q_qnt"}; - - // register nested metrics (i.e. metric dependent on another metric) - dep["BS"] = {"bs"}; - dep["BSS"] = {"bs"}; - dep["QS"] = {"qs"}; - dep["CRPS"] = {"qs", "crps"}; - - // determine required elt/dep to be pre-computed - std::vector<std::string> req_elt; - std::vector<std::string> req_dep; - - eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); - - // generate masks from conditions if provided - auto gen_msk = [&]() { - xt::xtensor<bool, 4> c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); - if (m_cdt.size() > 0) - for (int s = 0; s < n_sit; s++) - for (int l = 0; l < n_ltm; l++) - for (int m = 0; m < n_msk; m++) - xt::view(c_msk, s, l, m) = - eh::masks::generate_mask_from_conditions( - xt::view(m_cdt, s, m), - xt::view(q_obs, s), - xt::view(q_prd, s, l) - ); - return c_msk; - }; - const xt::xtensor<bool, 4> c_msk = gen_msk(); - - // generate bootstrap experiment if requested - std::vector<xt::xkeep_slice<int>> b_exp; - auto n_samples = bootstrap.find("n_samples")->second; - auto len_sample = bootstrap.find("len_sample")->second; - if ((n_samples != -9) and (len_sample != -9)) - { - if (dts.empty()) - throw std::runtime_error( - "bootstrap requested but datetimes not provided" - ); - - b_exp = eh::uncertainty::bootstrap( - dts, n_samples, len_sample - ); - } - else - { - // if no bootstrap requested, generate one sample - // containing all the time indices once - xt::xtensor<int, 1> all = xt::arange(n_tim); - b_exp.push_back(xt::keep(all)); - } - - // initialise data structure for outputs - std::vector<xt::xarray<double>> r; - for (const auto& metric : metrics) - r.emplace_back(xt::zeros<double>(dim[metric])); - - auto summary = bootstrap.find("summary")->second; - - // compute variables one site at a time to minimise memory imprint - for (int s = 0; s < n_sit; s++) - // compute variables one lead time at a time to minimise memory imprint - for (int l = 0; l < n_ltm; l++) - { - // instantiate probabilist evaluator - const auto q_obs_v = xt::view(q_obs, s, xt::all()); - const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all()); - const auto q_thr_v = xt::view(q_thr, s, xt::all()); - const auto t_msk_v = - t_msk.size() > 0 ? - xt::view(t_msk, s, l, xt::all(), xt::all()) : - (m_cdt.size() > 0 ? - xt::view(c_msk, s, l, xt::all(), xt::all()) : - xt::view(t_msk, s, l, xt::all(), xt::all())); - - eh::probabilist::Evaluator evaluator( - q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp - ); - - // pre-compute required elt - for (const auto& element : req_elt) - { - if ( element == "o_k" ) - evaluator.calc_o_k(); - else if ( element == "bar_o" ) - evaluator.calc_bar_o(); - else if ( element == "y_k" ) - evaluator.calc_y_k(); - else if ( element == "q_qnt" ) - evaluator.calc_q_qnt(); - } - - // pre-compute required dep - for (const auto& dependency : req_dep) - { - if ( dependency == "bs" ) - evaluator.calc_bs(); - else if ( dependency == "qs" ) - evaluator.calc_qs(); - else if ( dependency == "crps" ) - evaluator.calc_crps(); - } - - // retrieve or compute requested metrics - for (int m = 0; m < metrics.size(); m++) - { - const auto& metric = metrics[m]; - - if ( metric == "BS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS(); - // (sites, lead times, subsets, samples, thresholds) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS, summary); - } - else if ( metric == "BSS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BSS(); - // (sites, lead times, subsets, samples, thresholds) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BSS, summary); - } - else if ( metric == "BS_CRD" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS_CRD(); - // (sites, lead times, subsets, samples, thresholds, components) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS_CRD, summary); - } - else if ( metric == "BS_LBD" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS_LBD(); - // (sites, lead times, subsets, samples, thresholds, components) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS_LBD, summary); - } - else if ( metric == "QS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_QS(); - // (sites, lead times, subsets, samples, quantiles) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.QS, summary); - } - else if ( metric == "CRPS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_CRPS(); - // (sites, lead times, subsets, samples) - xt::view(r[m], s, l, xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.CRPS, summary); - } - } - } - - return r; - } + ); } -#endif //EVALHYD_PROBABILIST_HPP +#endif //EVALHYD_EVALP_HPP diff --git a/src/determinist/evald.cpp b/src/determinist/evald.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b39dd1484fc9ad2c48e76802d19a96c9174fcf51 --- /dev/null +++ b/src/determinist/evald.cpp @@ -0,0 +1,264 @@ +#include <unordered_map> +#include <vector> +#include <array> +#include <stdexcept> +#include <xtensor/xexpression.hpp> +#include <xtensor/xarray.hpp> +#include <xtensor/xscalar.hpp> + +#include "../utils.hpp" +#include "../masks.hpp" +#include "../maths.hpp" +#include "../uncertainty.hpp" +#include "evaluator.hpp" + +namespace eh = evalhyd; + +namespace evalhyd +{ + std::vector<xt::xarray<double>> evald( + const xt::xtensor<double, 2>& q_obs, + const xt::xtensor<double, 2>& q_prd, + const std::vector<std::string>& metrics, + const std::string& transform = "none", + const double exponent = 1, + double epsilon = -9, + const xt::xtensor<bool, 2>& t_msk = {}, + const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {}, + const std::unordered_map<std::string, int>& bootstrap = + {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, + const std::vector<std::string>& dts = {} + ) + { + // check that the metrics to be computed are valid + utils::check_metrics( + metrics, + {"RMSE", "NSE", "KGE", "KGEPRIME"} + ); + + // check that optional parameters are valid + eh::utils::check_bootstrap(bootstrap); + + // check that data dimensions are compatible + // > time + if (q_obs.shape(1) != q_prd.shape(1)) + throw std::runtime_error( + "observations and predictions feature different " + "temporal lengths" + ); + if (t_msk.size() > 0) + if (q_obs.shape(1) != t_msk.shape(1)) + throw std::runtime_error( + "observations and masks feature different " + "temporal lengths" + ); + if (!dts.empty()) + if (q_obs.shape(1) != dts.size()) + throw std::runtime_error( + "observations and datetimes feature different " + "temporal lengths" + ); + // > series + if (q_obs.shape(0) != 1) + throw std::runtime_error( + "observations contain more than one time series" + ); + + // retrieve dimensions + std::size_t n_tim = q_obs.shape(1); + std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) : + (m_cdt.size() > 0 ? m_cdt.shape(0) : 1); + + // initialise a mask if none provided + // (corresponding to no temporal subset) + auto gen_msk = [&]() { + // if t_msk provided, it takes priority + if (t_msk.size() > 0) + return t_msk; + // else if m_cdt provided, use them to generate t_msk + else if (m_cdt.size() > 0) + { + xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim}); + + for (int m = 0; m < n_msk; m++) + xt::view(c_msk, m) = + eh::masks::generate_mask_from_conditions( + m_cdt[0], xt::view(q_obs, 0), q_prd + ); + + return c_msk; + } + // if neither t_msk nor m_cdt provided, generate dummy mask + else + return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})}; + }; + + auto msk = gen_msk(); + + // apply streamflow transformation if requested + auto q_transform = [&](const xt::xtensor<double, 2>& q) + { + if ( transform == "none" || (transform == "pow" && exponent == 1)) + { + return q; + } + else if ( transform == "sqrt" ) + { + return xt::eval(xt::sqrt(q)); + } + else if ( transform == "inv" ) + { + if ( epsilon == -9 ) + // determine an epsilon value to avoid zero divide + epsilon = xt::mean(q_obs)() * 0.01; + + return xt::eval(1. / (q + epsilon)); + } + else if ( transform == "log" ) + { + if ( epsilon == -9 ) + // determine an epsilon value to avoid log zero + epsilon = xt::mean(q_obs)() * 0.01; + + return xt::eval(xt::log(q + epsilon)); + } + else if ( transform == "pow" ) + { + if ( exponent < 0 ) + { + if ( epsilon == -9 ) + // determine an epsilon value to avoid zero divide + epsilon = xt::mean(q_obs)() * 0.01; + + return xt::eval(xt::pow(q + epsilon, exponent)); + } + else + { + return xt::eval(xt::pow(q, exponent)); + } + } + else + { + throw std::runtime_error( + "invalid streamflow transformation: " + transform + ); + } + }; + + auto obs = q_transform(q_obs); + auto prd = q_transform(q_prd); + + // generate bootstrap experiment if requested + std::vector<xt::xkeep_slice<int>> exp; + auto n_samples = bootstrap.find("n_samples")->second; + auto len_sample = bootstrap.find("len_sample")->second; + if ((n_samples != -9) and (len_sample != -9)) + { + if (dts.empty()) + throw std::runtime_error( + "bootstrap requested but datetimes not provided" + ); + + exp = eh::uncertainty::bootstrap( + dts, n_samples, len_sample + ); + } + else + { + // if no bootstrap requested, generate one sample + // containing all the time indices once + xt::xtensor<int, 1> all = xt::arange(n_tim); + exp.push_back(xt::keep(all)); + } + + // instantiate determinist evaluator + eh::determinist::Evaluator evaluator(obs, prd, msk, exp); + + // declare maps for memoisation purposes + std::unordered_map<std::string, std::vector<std::string>> elt; + std::unordered_map<std::string, std::vector<std::string>> dep; + + // register potentially recurring computation elt across metrics + elt["RMSE"] = {"quad_err"}; + elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"}; + elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", + "r_pearson", "alpha", "bias"}; + elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", + "r_pearson", "alpha", "bias"}; + + // register nested metrics (i.e. metric dependent on another metric) + // TODO + + // determine required elt/dep to be pre-computed + std::vector<std::string> req_elt; + std::vector<std::string> req_dep; + + eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); + + // pre-compute required elt + for ( const auto& element : req_elt ) + { + if ( element == "mean_obs" ) + evaluator.calc_mean_obs(); + else if ( element == "mean_prd" ) + evaluator.calc_mean_prd(); + else if ( element == "quad_err" ) + evaluator.calc_quad_err(); + else if ( element == "quad_obs" ) + evaluator.calc_quad_obs(); + else if ( element == "quad_prd" ) + evaluator.calc_quad_prd(); + else if ( element == "r_pearson" ) + evaluator.calc_r_pearson(); + else if ( element == "alpha" ) + evaluator.calc_alpha(); + else if ( element == "bias" ) + evaluator.calc_bias(); + } + + // pre-compute required dep + for ( const auto& dependency : req_dep ) + { + // TODO + } + + // retrieve or compute requested metrics + std::vector<xt::xarray<double>> r; + + auto summary = bootstrap.find("summary")->second; + + for ( const auto& metric : metrics ) + { + if ( metric == "RMSE" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_RMSE(); + r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); + } + else if ( metric == "NSE" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_NSE(); + r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); + } + else if ( metric == "KGE" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_KGE(); + r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); + } + else if ( metric == "KGEPRIME" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_KGEPRIME(); + r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); + } + } + + return r; + } +} diff --git a/src/probabilist/evalp.cpp b/src/probabilist/evalp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..71f2e3800655bb836b1c7a8aaa2be7997df719a6 --- /dev/null +++ b/src/probabilist/evalp.cpp @@ -0,0 +1,286 @@ +#include <utility> +#include <unordered_map> +#include <vector> +#include <array> +#include <stdexcept> +#include <xtensor/xtensor.hpp> +#include <xtensor/xarray.hpp> +#include <xtensor/xview.hpp> + +#include "../utils.hpp" +#include "../masks.hpp" +#include "../maths.hpp" +#include "../uncertainty.hpp" +#include "evaluator.h" + +namespace eh = evalhyd; + +namespace evalhyd +{ + std::vector<xt::xarray<double>> evalp( + const xt::xtensor<double, 2>& q_obs, + const xt::xtensor<double, 4>& q_prd, + const std::vector<std::string>& metrics, + const xt::xtensor<double, 2>& q_thr = {}, + const xt::xtensor<bool, 4>& t_msk = {}, + const xt::xtensor<std::array<char, 32>, 2>& m_cdt = {}, + const std::unordered_map<std::string, int>& bootstrap = + {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, + const std::vector<std::string>& dts = {} + ) + { + // check that the metrics to be computed are valid + eh::utils::check_metrics( + metrics, + {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"} + ); + + // check that optional parameters are given as arguments + eh::utils::evalp::check_optionals(metrics, q_thr); + eh::utils::check_bootstrap(bootstrap); + + // check that data dimensions are compatible + // > time + if (q_obs.shape(1) != q_prd.shape(3)) + throw std::runtime_error( + "observations and predictions feature different " + "temporal lengths" + ); + if (t_msk.size() > 0) + if (q_obs.shape(1) != t_msk.shape(3)) + throw std::runtime_error( + "observations and masks feature different " + "temporal lengths" + ); + if (!dts.empty()) + if (q_obs.shape(1) != dts.size()) + throw std::runtime_error( + "observations and datetimes feature different " + "temporal lengths" + ); + // > leadtimes + if (t_msk.size() > 0) + if (q_prd.shape(1) != t_msk.shape(1)) + throw std::runtime_error( + "predictions and temporal masks feature different " + "numbers of lead times" + ); + // > sites + if (q_obs.shape(0) != q_prd.shape(0)) + throw std::runtime_error( + "observations and predictions feature different " + "numbers of sites" + ); + if (t_msk.size() > 0) + if (q_obs.shape(0) != t_msk.shape(0)) + throw std::runtime_error( + "observations and temporal masks feature different " + "numbers of sites" + ); + if (m_cdt.size() > 0) + if (q_obs.shape(0) != m_cdt.shape(0)) + throw std::runtime_error( + "observations and masking conditions feature different " + "numbers of sites" + ); + + // retrieve dimensions + std::size_t n_sit = q_prd.shape(0); + std::size_t n_ltm = q_prd.shape(1); + std::size_t n_mbr = q_prd.shape(2); + std::size_t n_tim = q_prd.shape(3); + std::size_t n_thr = q_thr.shape(1); + std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(2) : + (m_cdt.size() > 0 ? m_cdt.shape(1) : 1); + std::size_t n_exp = bootstrap.find("n_samples")->second == -9 ? 1: + bootstrap.find("n_samples")->second; + + // register metrics number of dimensions + std::unordered_map<std::string, std::vector<std::size_t>> dim; + + dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; + dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; + dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; + dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; + dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; + dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; + + // declare maps for memoisation purposes + std::unordered_map<std::string, std::vector<std::string>> elt; + std::unordered_map<std::string, std::vector<std::string>> dep; + + // register potentially recurring computation elements across metrics + elt["bs"] = {"o_k", "y_k"}; + elt["BSS"] = {"o_k", "bar_o"}; + elt["BS_CRD"] = {"o_k", "bar_o", "y_k"}; + elt["BS_LBD"] = {"o_k", "y_k"}; + elt["qs"] = {"q_qnt"}; + + // register nested metrics (i.e. metric dependent on another metric) + dep["BS"] = {"bs"}; + dep["BSS"] = {"bs"}; + dep["QS"] = {"qs"}; + dep["CRPS"] = {"qs", "crps"}; + + // determine required elt/dep to be pre-computed + std::vector<std::string> req_elt; + std::vector<std::string> req_dep; + + eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); + + // generate masks from conditions if provided + auto gen_msk = [&]() { + xt::xtensor<bool, 4> c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); + if (m_cdt.size() > 0) + for (int s = 0; s < n_sit; s++) + for (int l = 0; l < n_ltm; l++) + for (int m = 0; m < n_msk; m++) + xt::view(c_msk, s, l, m) = + eh::masks::generate_mask_from_conditions( + xt::view(m_cdt, s, m), + xt::view(q_obs, s), + xt::view(q_prd, s, l) + ); + return c_msk; + }; + const xt::xtensor<bool, 4> c_msk = gen_msk(); + + // generate bootstrap experiment if requested + std::vector<xt::xkeep_slice<int>> b_exp; + auto n_samples = bootstrap.find("n_samples")->second; + auto len_sample = bootstrap.find("len_sample")->second; + if ((n_samples != -9) and (len_sample != -9)) + { + if (dts.empty()) + throw std::runtime_error( + "bootstrap requested but datetimes not provided" + ); + + b_exp = eh::uncertainty::bootstrap( + dts, n_samples, len_sample + ); + } + else + { + // if no bootstrap requested, generate one sample + // containing all the time indices once + xt::xtensor<int, 1> all = xt::arange(n_tim); + b_exp.push_back(xt::keep(all)); + } + + // initialise data structure for outputs + std::vector<xt::xarray<double>> r; + for (const auto& metric : metrics) + r.emplace_back(xt::zeros<double>(dim[metric])); + + auto summary = bootstrap.find("summary")->second; + + // compute variables one site at a time to minimise memory imprint + for (int s = 0; s < n_sit; s++) + // compute variables one lead time at a time to minimise memory imprint + for (int l = 0; l < n_ltm; l++) + { + // instantiate probabilist evaluator + const auto q_obs_v = xt::view(q_obs, s, xt::all()); + const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all()); + const auto q_thr_v = xt::view(q_thr, s, xt::all()); + const auto t_msk_v = + t_msk.size() > 0 ? + xt::view(t_msk, s, l, xt::all(), xt::all()) : + (m_cdt.size() > 0 ? + xt::view(c_msk, s, l, xt::all(), xt::all()) : + xt::view(t_msk, s, l, xt::all(), xt::all())); + + eh::probabilist::Evaluator evaluator( + q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp + ); + + // pre-compute required elt + for (const auto& element : req_elt) + { + if ( element == "o_k" ) + evaluator.calc_o_k(); + else if ( element == "bar_o" ) + evaluator.calc_bar_o(); + else if ( element == "y_k" ) + evaluator.calc_y_k(); + else if ( element == "q_qnt" ) + evaluator.calc_q_qnt(); + } + + // pre-compute required dep + for (const auto& dependency : req_dep) + { + if ( dependency == "bs" ) + evaluator.calc_bs(); + else if ( dependency == "qs" ) + evaluator.calc_qs(); + else if ( dependency == "crps" ) + evaluator.calc_crps(); + } + + // retrieve or compute requested metrics + for (int m = 0; m < metrics.size(); m++) + { + const auto& metric = metrics[m]; + + if ( metric == "BS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BS(); + // (sites, lead times, subsets, samples, thresholds) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BS, summary); + } + else if ( metric == "BSS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BSS(); + // (sites, lead times, subsets, samples, thresholds) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BSS, summary); + } + else if ( metric == "BS_CRD" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BS_CRD(); + // (sites, lead times, subsets, samples, thresholds, components) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BS_CRD, summary); + } + else if ( metric == "BS_LBD" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BS_LBD(); + // (sites, lead times, subsets, samples, thresholds, components) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BS_LBD, summary); + } + else if ( metric == "QS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_QS(); + // (sites, lead times, subsets, samples, quantiles) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.QS, summary); + } + else if ( metric == "CRPS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_CRPS(); + // (sites, lead times, subsets, samples) + xt::view(r[m], s, l, xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.CRPS, summary); + } + } + } + + return r; + } +}