diff --git a/.gitignore b/.gitignore index 574f8a042a29ad9c822aa5fd84011298f7faa388..ff217764ac1a24209bc3aa4bf0614ea45936aaa9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,5 @@ # CMake directories -cmake-build-debug/ -tests/cmake-build-debug/ - -# GoogleTest directories -tests/googletest-subbuild/ +build/ # macOS specific .DS_Store diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e995a411660356041b782fc53bbcd9ee94d8a978..b6d25f1ae05d8da040d4d6bbfa1c1569a6d9ac8f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,31 +1,16 @@ -image: gcc - -variables: - GIT_SUBMODULE_STRATEGY: recursive +image: mambaorg/micromamba stages: - - build - - test - -default: - before_script: - - apt update && apt -y install make autoconf cmake + - build_and_test build: - stage: build - script: - - mkdir build - - cmake -S tests -B build - - cmake --build build - artifacts: - expire_in: 2 hrs - paths: - - build/ - -test: - stage: test + stage: build_and_test script: - - cd build - - ctest . -V - dependencies: - - build + # install dependencies + - micromamba install --yes --file environment-dev.yml + # configure evalhyd + - cmake -B build -D CMAKE_BUILD_TYPE=Release -D CMAKE_PREFIX_PATH="$CONDA_PREFIX" + # compile evalhyd + - cmake --build build --parallel + # run tests + - ./build/tests/evalhyd_tests diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 38ab18289c7cd2adc6e3c066f26153694ce39ed5..0000000000000000000000000000000000000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "deps/xtensor"] - path = deps/xtensor - url = https://github.com/xtensor-stack/xtensor -[submodule "deps/xtl"] - path = deps/xtl - url = https://github.com/xtensor-stack/xtl diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..049111a0050fff22a616b9a0abcc172e95a02797 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,122 @@ +cmake_minimum_required(VERSION 3.15) + +project( + EvalHyd + LANGUAGES CXX + VERSION 0.0.1 + DESCRIPTION "Utility to evaluate streamflow predictions" +) + +# ------------------------------------------------------------------------------ +# dependencies +# ------------------------------------------------------------------------------ + +find_package(xtensor REQUIRED) +message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor") + +# ------------------------------------------------------------------------------ +# build +# ------------------------------------------------------------------------------ + +# define evalhyd library +add_library( + evalhyd + src/determinist/evald.cpp + src/probabilist/evalp.cpp + src/probabilist/evaluator_brier.cpp + src/probabilist/evaluator_elements.cpp + src/probabilist/evaluator_quantiles.cpp +) + +add_library(EvalHyd::evalhyd ALIAS evalhyd) + +set_target_properties( + evalhyd + PROPERTIES + VISIBILITY_INLINES_HIDDEN ON +) + +target_include_directories( + evalhyd + PUBLIC + $<INSTALL_INTERFACE:include> + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src +) + +target_link_libraries( + evalhyd + PUBLIC + xtensor +) + +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options( + evalhyd + PRIVATE + "/bigobj" + ) +endif() + +target_compile_features( + evalhyd + PUBLIC + cxx_std_14 +) + +# test suite +OPTION(EVALHYD_BUILD_TEST "configure and compile tests" ON) + +if(EVALHYD_BUILD_TEST) + add_subdirectory(tests) +endif() + +# ------------------------------------------------------------------------------ +# installation +# ------------------------------------------------------------------------------ + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +# install library file (.a/.so) +install( + TARGETS evalhyd + EXPORT evalhyd-targets +) + +# install headers +install( + DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/evalhyd" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + FILES_MATCHING PATTERN "*.hpp" +) + +# generate target file +export( + EXPORT evalhyd-targets + FILE "${CMAKE_CURRENT_BINARY_DIR}/EvalHydTargets.cmake" +) + +# install target file +install( + EXPORT evalhyd-targets + FILE "EvalHydTargets.cmake" + NAMESPACE EvalHyd:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/EvalHyd +) + +# generate config file +configure_package_config_file( + EvalHydConfig.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/EvalHydConfig.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/EvalHyd +) + +# install files +install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/EvalHydConfig.cmake" + DESTINATION + "${CMAKE_INSTALL_LIBDIR}/cmake/EvalHyd" +) diff --git a/EvalHydConfig.cmake.in b/EvalHydConfig.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..ab97daed8af49db28f7fc159b741f6c3d641781f --- /dev/null +++ b/EvalHydConfig.cmake.in @@ -0,0 +1,10 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +# find public dependencies +find_dependency(xtensor @xtensor_VERSION@) + +if(NOT TARGET EvalHyd::evalhyd) + include("${CMAKE_CURRENT_LIST_DIR}/EvalHydTargets.cmake") +endif() diff --git a/README.md b/README.md index 6b752f94168a6811b7332a3d5360b5db85425256..6108d5ec014cf07c14be3751894cbfa446f6be09 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,26 @@ Utility to evaluate deterministic and probabilistic streamflow predictions Documentation: https://hycar-hydro.gitlab.irstea.page/evalhyd/evalhyd-docs/cpp + +## How to build + +Configure project in debug mode finding libraries in conda environment: +```shell +cmake -B build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_PREFIX_PATH="$CONDA_PREFIX" +``` + +Compile with: +```shell +cmake --build build/ --parallel 2 +``` + +Run tests with: +```shell +./build/tests/evalhyd_tests +``` + +## How to install + +```shell +cmake --install build/ --prefix <path> +``` diff --git a/deps/xtensor b/deps/xtensor deleted file mode 160000 index 7c5078b9b584e10b681c96414224d7957b9472e9..0000000000000000000000000000000000000000 --- a/deps/xtensor +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7c5078b9b584e10b681c96414224d7957b9472e9 diff --git a/deps/xtl b/deps/xtl deleted file mode 160000 index e697c91e2a3ac571d120d2b093fb3b250d060a7d..0000000000000000000000000000000000000000 --- a/deps/xtl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e697c91e2a3ac571d120d2b093fb3b250d060a7d diff --git a/environment-dev.yml b/environment-dev.yml new file mode 100644 index 0000000000000000000000000000000000000000..deaaf2428e75d27e30fe3e9fda2f490ad4f6ed81 --- /dev/null +++ b/environment-dev.yml @@ -0,0 +1,14 @@ +channels: + - conda-forge +dependencies: + # Build dependencies + - cxx-compiler + - c-compiler + - cmake + - make + # Host dependencies + - xtl + - xtensor + # Test dependencies + - gtest + - gmock diff --git a/include/evalhyd/evald.hpp b/include/evalhyd/evald.hpp index 7ae018e12ae57a0b5b337c0f8d78898b37895fa7..45ff6591af894998e42063bf2a6b534db476f237 100644 --- a/include/evalhyd/evald.hpp +++ b/include/evalhyd/evald.hpp @@ -1,21 +1,12 @@ -#ifndef EVALHYD_DETERMINIST_HPP -#define EVALHYD_DETERMINIST_HPP +#ifndef EVALHYD_EVALD_HPP +#define EVALHYD_EVALD_HPP #include <unordered_map> #include <vector> -#include <array> -#include <stdexcept> -#include <xtensor/xexpression.hpp> -#include <xtensor/xarray.hpp> -#include <xtensor/xscalar.hpp> -#include "../../src/utils.hpp" -#include "../../src/masks.hpp" -#include "../../src/maths.hpp" -#include "../../src/uncertainty.hpp" -#include "../../src/determinist/evaluator.hpp" +#include <xtensor/xtensor.hpp> +#include <xtensor/xarray.hpp> -namespace eh = evalhyd; namespace evalhyd { @@ -158,239 +149,7 @@ namespace evalhyd const std::unordered_map<std::string, int>& bootstrap = {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, const std::vector<std::string>& dts = {} - ) - { - // check that the metrics to be computed are valid - utils::check_metrics( - metrics, - {"RMSE", "NSE", "KGE", "KGEPRIME"} - ); - - // check that optional parameters are valid - eh::utils::check_bootstrap(bootstrap); - - // check that data dimensions are compatible - // > time - if (q_obs.shape(1) != q_prd.shape(1)) - throw std::runtime_error( - "observations and predictions feature different " - "temporal lengths" - ); - if (t_msk.size() > 0) - if (q_obs.shape(1) != t_msk.shape(1)) - throw std::runtime_error( - "observations and masks feature different " - "temporal lengths" - ); - if (!dts.empty()) - if (q_obs.shape(1) != dts.size()) - throw std::runtime_error( - "observations and datetimes feature different " - "temporal lengths" - ); - // > series - if (q_obs.shape(0) != 1) - throw std::runtime_error( - "observations contain more than one time series" - ); - - // retrieve dimensions - std::size_t n_tim = q_obs.shape(1); - std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) : - (m_cdt.size() > 0 ? m_cdt.shape(0) : 1); - - // initialise a mask if none provided - // (corresponding to no temporal subset) - auto gen_msk = [&]() { - // if t_msk provided, it takes priority - if (t_msk.size() > 0) - return t_msk; - // else if m_cdt provided, use them to generate t_msk - else if (m_cdt.size() > 0) - { - xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim}); - - for (int m = 0; m < n_msk; m++) - xt::view(c_msk, m) = - eh::masks::generate_mask_from_conditions( - m_cdt[0], xt::view(q_obs, 0), q_prd - ); - - return c_msk; - } - // if neither t_msk nor m_cdt provided, generate dummy mask - else - return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})}; - }; - - auto msk = gen_msk(); - - // apply streamflow transformation if requested - auto q_transform = [&](const xt::xtensor<double, 2>& q) - { - if ( transform == "none" || (transform == "pow" && exponent == 1)) - { - return q; - } - else if ( transform == "sqrt" ) - { - return xt::eval(xt::sqrt(q)); - } - else if ( transform == "inv" ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid zero divide - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(1. / (q + epsilon)); - } - else if ( transform == "log" ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid log zero - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(xt::log(q + epsilon)); - } - else if ( transform == "pow" ) - { - if ( exponent < 0 ) - { - if ( epsilon == -9 ) - // determine an epsilon value to avoid zero divide - epsilon = xt::mean(q_obs)() * 0.01; - - return xt::eval(xt::pow(q + epsilon, exponent)); - } - else - { - return xt::eval(xt::pow(q, exponent)); - } - } - else - { - throw std::runtime_error( - "invalid streamflow transformation: " + transform - ); - } - }; - - auto obs = q_transform(q_obs); - auto prd = q_transform(q_prd); - - // generate bootstrap experiment if requested - std::vector<xt::xkeep_slice<int>> exp; - auto n_samples = bootstrap.find("n_samples")->second; - auto len_sample = bootstrap.find("len_sample")->second; - if ((n_samples != -9) and (len_sample != -9)) - { - if (dts.empty()) - throw std::runtime_error( - "bootstrap requested but datetimes not provided" - ); - - exp = eh::uncertainty::bootstrap( - dts, n_samples, len_sample - ); - } - else - { - // if no bootstrap requested, generate one sample - // containing all the time indices once - xt::xtensor<int, 1> all = xt::arange(n_tim); - exp.push_back(xt::keep(all)); - } - - // instantiate determinist evaluator - eh::determinist::Evaluator evaluator(obs, prd, msk, exp); - - // declare maps for memoisation purposes - std::unordered_map<std::string, std::vector<std::string>> elt; - std::unordered_map<std::string, std::vector<std::string>> dep; - - // register potentially recurring computation elt across metrics - elt["RMSE"] = {"quad_err"}; - elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"}; - elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", - "r_pearson", "alpha", "bias"}; - elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", - "r_pearson", "alpha", "bias"}; - - // register nested metrics (i.e. metric dependent on another metric) - // TODO - - // determine required elt/dep to be pre-computed - std::vector<std::string> req_elt; - std::vector<std::string> req_dep; - - eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); - - // pre-compute required elt - for ( const auto& element : req_elt ) - { - if ( element == "mean_obs" ) - evaluator.calc_mean_obs(); - else if ( element == "mean_prd" ) - evaluator.calc_mean_prd(); - else if ( element == "quad_err" ) - evaluator.calc_quad_err(); - else if ( element == "quad_obs" ) - evaluator.calc_quad_obs(); - else if ( element == "quad_prd" ) - evaluator.calc_quad_prd(); - else if ( element == "r_pearson" ) - evaluator.calc_r_pearson(); - else if ( element == "alpha" ) - evaluator.calc_alpha(); - else if ( element == "bias" ) - evaluator.calc_bias(); - } - - // pre-compute required dep - for ( const auto& dependency : req_dep ) - { - // TODO - } - - // retrieve or compute requested metrics - std::vector<xt::xarray<double>> r; - - auto summary = bootstrap.find("summary")->second; - - for ( const auto& metric : metrics ) - { - if ( metric == "RMSE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_RMSE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); - } - else if ( metric == "NSE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_NSE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); - } - else if ( metric == "KGE" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_KGE(); - r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); - } - else if ( metric == "KGEPRIME" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_KGEPRIME(); - r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); - } - } - - return r; - } + ); } -#endif //EVALHYD_DETERMINIST_HPP +#endif //EVALHYD_EVALD_HPP diff --git a/include/evalhyd/evalp.hpp b/include/evalhyd/evalp.hpp index 5d4c1f00328c0c0edbbe00fc785f007ab50b2f46..6fbce8f1bf4bbbcd6dcc9cf3de92e0bd9c1e93b8 100644 --- a/include/evalhyd/evalp.hpp +++ b/include/evalhyd/evalp.hpp @@ -1,22 +1,12 @@ -#ifndef EVALHYD_PROBABILIST_HPP -#define EVALHYD_PROBABILIST_HPP +#ifndef EVALHYD_EVALP_HPP +#define EVALHYD_EVALP_HPP -#include <utility> #include <unordered_map> #include <vector> -#include <array> -#include <stdexcept> + #include <xtensor/xtensor.hpp> #include <xtensor/xarray.hpp> -#include <xtensor/xview.hpp> - -#include "../../src/utils.hpp" -#include "../../src/masks.hpp" -#include "../../src/maths.hpp" -#include "../../src/uncertainty.hpp" -#include "../../src/probabilist/evaluator.h" -namespace eh = evalhyd; namespace evalhyd { @@ -135,262 +125,7 @@ namespace evalhyd const std::unordered_map<std::string, int>& bootstrap = {{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}}, const std::vector<std::string>& dts = {} - ) - { - // check that the metrics to be computed are valid - eh::utils::check_metrics( - metrics, - {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"} - ); - - // check that optional parameters are given as arguments - eh::utils::evalp::check_optionals(metrics, q_thr); - eh::utils::check_bootstrap(bootstrap); - - // check that data dimensions are compatible - // > time - if (q_obs.shape(1) != q_prd.shape(3)) - throw std::runtime_error( - "observations and predictions feature different " - "temporal lengths" - ); - if (t_msk.size() > 0) - if (q_obs.shape(1) != t_msk.shape(3)) - throw std::runtime_error( - "observations and masks feature different " - "temporal lengths" - ); - if (!dts.empty()) - if (q_obs.shape(1) != dts.size()) - throw std::runtime_error( - "observations and datetimes feature different " - "temporal lengths" - ); - // > leadtimes - if (t_msk.size() > 0) - if (q_prd.shape(1) != t_msk.shape(1)) - throw std::runtime_error( - "predictions and temporal masks feature different " - "numbers of lead times" - ); - // > sites - if (q_obs.shape(0) != q_prd.shape(0)) - throw std::runtime_error( - "observations and predictions feature different " - "numbers of sites" - ); - if (t_msk.size() > 0) - if (q_obs.shape(0) != t_msk.shape(0)) - throw std::runtime_error( - "observations and temporal masks feature different " - "numbers of sites" - ); - if (m_cdt.size() > 0) - if (q_obs.shape(0) != m_cdt.shape(0)) - throw std::runtime_error( - "observations and masking conditions feature different " - "numbers of sites" - ); - - // retrieve dimensions - std::size_t n_sit = q_prd.shape(0); - std::size_t n_ltm = q_prd.shape(1); - std::size_t n_mbr = q_prd.shape(2); - std::size_t n_tim = q_prd.shape(3); - std::size_t n_thr = q_thr.shape(1); - std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(2) : - (m_cdt.size() > 0 ? m_cdt.shape(1) : 1); - std::size_t n_exp = bootstrap.find("n_samples")->second == -9 ? 1: - bootstrap.find("n_samples")->second; - - // register metrics number of dimensions - std::unordered_map<std::string, std::vector<std::size_t>> dim; - - dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; - dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; - dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; - dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; - dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; - dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; - - // declare maps for memoisation purposes - std::unordered_map<std::string, std::vector<std::string>> elt; - std::unordered_map<std::string, std::vector<std::string>> dep; - - // register potentially recurring computation elements across metrics - elt["bs"] = {"o_k", "y_k"}; - elt["BSS"] = {"o_k", "bar_o"}; - elt["BS_CRD"] = {"o_k", "bar_o", "y_k"}; - elt["BS_LBD"] = {"o_k", "y_k"}; - elt["qs"] = {"q_qnt"}; - - // register nested metrics (i.e. metric dependent on another metric) - dep["BS"] = {"bs"}; - dep["BSS"] = {"bs"}; - dep["QS"] = {"qs"}; - dep["CRPS"] = {"qs", "crps"}; - - // determine required elt/dep to be pre-computed - std::vector<std::string> req_elt; - std::vector<std::string> req_dep; - - eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); - - // generate masks from conditions if provided - auto gen_msk = [&]() { - xt::xtensor<bool, 4> c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); - if (m_cdt.size() > 0) - for (int s = 0; s < n_sit; s++) - for (int l = 0; l < n_ltm; l++) - for (int m = 0; m < n_msk; m++) - xt::view(c_msk, s, l, m) = - eh::masks::generate_mask_from_conditions( - xt::view(m_cdt, s, m), - xt::view(q_obs, s), - xt::view(q_prd, s, l) - ); - return c_msk; - }; - const xt::xtensor<bool, 4> c_msk = gen_msk(); - - // generate bootstrap experiment if requested - std::vector<xt::xkeep_slice<int>> b_exp; - auto n_samples = bootstrap.find("n_samples")->second; - auto len_sample = bootstrap.find("len_sample")->second; - if ((n_samples != -9) and (len_sample != -9)) - { - if (dts.empty()) - throw std::runtime_error( - "bootstrap requested but datetimes not provided" - ); - - b_exp = eh::uncertainty::bootstrap( - dts, n_samples, len_sample - ); - } - else - { - // if no bootstrap requested, generate one sample - // containing all the time indices once - xt::xtensor<int, 1> all = xt::arange(n_tim); - b_exp.push_back(xt::keep(all)); - } - - // initialise data structure for outputs - std::vector<xt::xarray<double>> r; - for (const auto& metric : metrics) - r.emplace_back(xt::zeros<double>(dim[metric])); - - auto summary = bootstrap.find("summary")->second; - - // compute variables one site at a time to minimise memory imprint - for (int s = 0; s < n_sit; s++) - // compute variables one lead time at a time to minimise memory imprint - for (int l = 0; l < n_ltm; l++) - { - // instantiate probabilist evaluator - const auto q_obs_v = xt::view(q_obs, s, xt::all()); - const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all()); - const auto q_thr_v = xt::view(q_thr, s, xt::all()); - const auto t_msk_v = - t_msk.size() > 0 ? - xt::view(t_msk, s, l, xt::all(), xt::all()) : - (m_cdt.size() > 0 ? - xt::view(c_msk, s, l, xt::all(), xt::all()) : - xt::view(t_msk, s, l, xt::all(), xt::all())); - - eh::probabilist::Evaluator evaluator( - q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp - ); - - // pre-compute required elt - for (const auto& element : req_elt) - { - if ( element == "o_k" ) - evaluator.calc_o_k(); - else if ( element == "bar_o" ) - evaluator.calc_bar_o(); - else if ( element == "y_k" ) - evaluator.calc_y_k(); - else if ( element == "q_qnt" ) - evaluator.calc_q_qnt(); - } - - // pre-compute required dep - for (const auto& dependency : req_dep) - { - if ( dependency == "bs" ) - evaluator.calc_bs(); - else if ( dependency == "qs" ) - evaluator.calc_qs(); - else if ( dependency == "crps" ) - evaluator.calc_crps(); - } - - // retrieve or compute requested metrics - for (int m = 0; m < metrics.size(); m++) - { - const auto& metric = metrics[m]; - - if ( metric == "BS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS(); - // (sites, lead times, subsets, samples, thresholds) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS, summary); - } - else if ( metric == "BSS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BSS(); - // (sites, lead times, subsets, samples, thresholds) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BSS, summary); - } - else if ( metric == "BS_CRD" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS_CRD(); - // (sites, lead times, subsets, samples, thresholds, components) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS_CRD, summary); - } - else if ( metric == "BS_LBD" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_BS_LBD(); - // (sites, lead times, subsets, samples, thresholds, components) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.BS_LBD, summary); - } - else if ( metric == "QS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_QS(); - // (sites, lead times, subsets, samples, quantiles) - xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.QS, summary); - } - else if ( metric == "CRPS" ) - { - if (std::find(req_dep.begin(), req_dep.end(), metric) - == req_dep.end()) - evaluator.calc_CRPS(); - // (sites, lead times, subsets, samples) - xt::view(r[m], s, l, xt::all(), xt::all()) = - eh::uncertainty::summarise(evaluator.CRPS, summary); - } - } - } - - return r; - } + ); } -#endif //EVALHYD_PROBABILIST_HPP +#endif //EVALHYD_EVALP_HPP diff --git a/src/determinist/evald.cpp b/src/determinist/evald.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0ce84b6f9d6649c0f95b0ff8ac978552cc3c2df1 --- /dev/null +++ b/src/determinist/evald.cpp @@ -0,0 +1,265 @@ +#include <unordered_map> +#include <vector> +#include <array> +#include <stdexcept> +#include <xtensor/xexpression.hpp> +#include <xtensor/xarray.hpp> +#include <xtensor/xscalar.hpp> + +#include "evalhyd/evald.hpp" + +#include "utils.hpp" +#include "masks.hpp" +#include "maths.hpp" +#include "uncertainty.hpp" +#include "determinist/evaluator.hpp" + +namespace eh = evalhyd; + +namespace evalhyd +{ + std::vector<xt::xarray<double>> evald( + const xt::xtensor<double, 2>& q_obs, + const xt::xtensor<double, 2>& q_prd, + const std::vector<std::string>& metrics, + const std::string& transform, + const double exponent, + double epsilon, + const xt::xtensor<bool, 2>& t_msk, + const xt::xtensor<std::array<char, 32>, 1>& m_cdt, + const std::unordered_map<std::string, int>& bootstrap, + const std::vector<std::string>& dts + ) + { + // check that the metrics to be computed are valid + utils::check_metrics( + metrics, + {"RMSE", "NSE", "KGE", "KGEPRIME"} + ); + + // check that optional parameters are valid + eh::utils::check_bootstrap(bootstrap); + + // check that data dimensions are compatible + // > time + if (q_obs.shape(1) != q_prd.shape(1)) + throw std::runtime_error( + "observations and predictions feature different " + "temporal lengths" + ); + if (t_msk.size() > 0) + if (q_obs.shape(1) != t_msk.shape(1)) + throw std::runtime_error( + "observations and masks feature different " + "temporal lengths" + ); + if (!dts.empty()) + if (q_obs.shape(1) != dts.size()) + throw std::runtime_error( + "observations and datetimes feature different " + "temporal lengths" + ); + // > series + if (q_obs.shape(0) != 1) + throw std::runtime_error( + "observations contain more than one time series" + ); + + // retrieve dimensions + std::size_t n_tim = q_obs.shape(1); + std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) : + (m_cdt.size() > 0 ? m_cdt.shape(0) : 1); + + // initialise a mask if none provided + // (corresponding to no temporal subset) + auto gen_msk = [&]() { + // if t_msk provided, it takes priority + if (t_msk.size() > 0) + return t_msk; + // else if m_cdt provided, use them to generate t_msk + else if (m_cdt.size() > 0) + { + xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim}); + + for (int m = 0; m < n_msk; m++) + xt::view(c_msk, m) = + eh::masks::generate_mask_from_conditions( + m_cdt[0], xt::view(q_obs, 0), q_prd + ); + + return c_msk; + } + // if neither t_msk nor m_cdt provided, generate dummy mask + else + return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})}; + }; + + auto msk = gen_msk(); + + // apply streamflow transformation if requested + auto q_transform = [&](const xt::xtensor<double, 2>& q) + { + if ( transform == "none" || (transform == "pow" && exponent == 1)) + { + return q; + } + else if ( transform == "sqrt" ) + { + return xt::eval(xt::sqrt(q)); + } + else if ( transform == "inv" ) + { + if ( epsilon == -9 ) + // determine an epsilon value to avoid zero divide + epsilon = xt::mean(q_obs)() * 0.01; + + return xt::eval(1. / (q + epsilon)); + } + else if ( transform == "log" ) + { + if ( epsilon == -9 ) + // determine an epsilon value to avoid log zero + epsilon = xt::mean(q_obs)() * 0.01; + + return xt::eval(xt::log(q + epsilon)); + } + else if ( transform == "pow" ) + { + if ( exponent < 0 ) + { + if ( epsilon == -9 ) + // determine an epsilon value to avoid zero divide + epsilon = xt::mean(q_obs)() * 0.01; + + return xt::eval(xt::pow(q + epsilon, exponent)); + } + else + { + return xt::eval(xt::pow(q, exponent)); + } + } + else + { + throw std::runtime_error( + "invalid streamflow transformation: " + transform + ); + } + }; + + auto obs = q_transform(q_obs); + auto prd = q_transform(q_prd); + + // generate bootstrap experiment if requested + std::vector<xt::xkeep_slice<int>> exp; + auto n_samples = bootstrap.find("n_samples")->second; + auto len_sample = bootstrap.find("len_sample")->second; + if ((n_samples != -9) && (len_sample != -9)) + { + if (dts.empty()) + throw std::runtime_error( + "bootstrap requested but datetimes not provided" + ); + + exp = eh::uncertainty::bootstrap( + dts, n_samples, len_sample + ); + } + else + { + // if no bootstrap requested, generate one sample + // containing all the time indices once + xt::xtensor<int, 1> all = xt::arange(n_tim); + exp.push_back(xt::keep(all)); + } + + // instantiate determinist evaluator + eh::determinist::Evaluator evaluator(obs, prd, msk, exp); + + // declare maps for memoisation purposes + std::unordered_map<std::string, std::vector<std::string>> elt; + std::unordered_map<std::string, std::vector<std::string>> dep; + + // register potentially recurring computation elt across metrics + elt["RMSE"] = {"quad_err"}; + elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"}; + elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", + "r_pearson", "alpha", "bias"}; + elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd", + "r_pearson", "alpha", "bias"}; + + // register nested metrics (i.e. metric dependent on another metric) + // TODO + + // determine required elt/dep to be pre-computed + std::vector<std::string> req_elt; + std::vector<std::string> req_dep; + + eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); + + // pre-compute required elt + for ( const auto& element : req_elt ) + { + if ( element == "mean_obs" ) + evaluator.calc_mean_obs(); + else if ( element == "mean_prd" ) + evaluator.calc_mean_prd(); + else if ( element == "quad_err" ) + evaluator.calc_quad_err(); + else if ( element == "quad_obs" ) + evaluator.calc_quad_obs(); + else if ( element == "quad_prd" ) + evaluator.calc_quad_prd(); + else if ( element == "r_pearson" ) + evaluator.calc_r_pearson(); + else if ( element == "alpha" ) + evaluator.calc_alpha(); + else if ( element == "bias" ) + evaluator.calc_bias(); + } + + // pre-compute required dep + for ( const auto& dependency : req_dep ) + { + // TODO + } + + // retrieve or compute requested metrics + std::vector<xt::xarray<double>> r; + + auto summary = bootstrap.find("summary")->second; + + for ( const auto& metric : metrics ) + { + if ( metric == "RMSE" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_RMSE(); + r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary)); + } + else if ( metric == "NSE" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_NSE(); + r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary)); + } + else if ( metric == "KGE" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_KGE(); + r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary)); + } + else if ( metric == "KGEPRIME" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_KGEPRIME(); + r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary)); + } + } + + return r; + } +} diff --git a/src/masks.hpp b/src/masks.hpp index d3ce0e696bce233c694bdae3833aa5be5497a55b..d4a1c6bd8c9e09013bb4c23b15165f3ea67ee5a1 100644 --- a/src/masks.hpp +++ b/src/masks.hpp @@ -68,8 +68,8 @@ namespace evalhyd else if (supported_op.find(mt[1]) != supported_op.end()) { if ((mt[2].str() == "median") - or (mt[2].str() == "mean") - or (mt[2].str() == "quantile")) + || (mt[2].str() == "mean") + || (mt[2].str() == "quantile")) conditions.push_back({mt[1].str(), mt[2].str(), mt[3].str()}); else // it is a simple numerical value, swap last two @@ -174,8 +174,8 @@ namespace evalhyd auto cond = var_cond.second; // condition on streamflow - if ((var == "q_obs") or (var == "q_prd_median") - or (var == "q_prd_mean")) + if ((var == "q_obs") || (var == "q_prd_median") + || (var == "q_prd_mean")) { // preprocess streamflow depending on kind auto get_q = [&]() { @@ -233,18 +233,18 @@ namespace evalhyd opr2 = cond[1][0]; val2 = get_val(cond[1][1], cond[1][2]); - if ((opr1 == "<") or (opr1 == "<=")) + if ((opr1 == "<") || (opr1 == "<=")) { - if ((opr2 == ">") or (opr2 == ">=")) + if ((opr2 == ">") || (opr2 == ">=")) { if (val2 > val1) without = true; else { within = true; } } } - else if ((opr1 == ">") or (opr1 == ">=")) + else if ((opr1 == ">") || (opr1 == ">=")) { - if ((opr2 == "<") or (opr2 == "<=")) + if ((opr2 == "<") || (opr2 == "<=")) { if (val2 > val1) within = true; @@ -257,57 +257,57 @@ namespace evalhyd // within/without if (within) { - if ((opr1 == "<") and (opr2 == ">")) + if ((opr1 == "<") && (opr2 == ">")) t_msk = xt::where((q < val1) & (q > val2), 1, t_msk); - else if ((opr1 == "<=") and (opr2 == ">")) + else if ((opr1 == "<=") && (opr2 == ">")) t_msk = xt::where((q <= val1) & (q > val2), 1, t_msk); - else if ((opr1 == "<") and (opr2 == ">=")) + else if ((opr1 == "<") && (opr2 == ">=")) t_msk = xt::where((q < val1) & (q >= val2), 1, t_msk); - else if ((opr1 == "<=") and (opr2 == ">=")) + else if ((opr1 == "<=") && (opr2 == ">=")) t_msk = xt::where((q <= val1) & (q >= val2), 1, t_msk); - if ((opr2 == "<") and (opr1 == ">")) + if ((opr2 == "<") && (opr1 == ">")) t_msk = xt::where((q < val2) & (q > val1), 1, t_msk); - else if ((opr2 == "<=") and (opr1 == ">")) + else if ((opr2 == "<=") && (opr1 == ">")) t_msk = xt::where((q <= val2) & (q > val1), 1, t_msk); - else if ((opr2 == "<") and (opr1 == ">=")) + else if ((opr2 == "<") && (opr1 == ">=")) t_msk = xt::where((q < val2) & (q >= val1), 1, t_msk); - else if ((opr2 == "<=") and (opr1 == ">=")) + else if ((opr2 == "<=") && (opr1 == ">=")) t_msk = xt::where((q <= val2) & (q >= val1), 1, t_msk); } else if (without) { - if ((opr1 == "<") and (opr2 == ">")) + if ((opr1 == "<") && (opr2 == ">")) t_msk = xt::where((q < val1) | (q > val2), 1, t_msk); - else if ((opr1 == "<=") and (opr2 == ">")) + else if ((opr1 == "<=") && (opr2 == ">")) t_msk = xt::where((q <= val1) | (q > val2), 1, t_msk); - else if ((opr1 == "<") and (opr2 == ">=")) + else if ((opr1 == "<") && (opr2 == ">=")) t_msk = xt::where((q < val1) | (q >= val2), 1, t_msk); - else if ((opr1 == "<=") and (opr2 == ">=")) + else if ((opr1 == "<=") && (opr2 == ">=")) t_msk = xt::where((q <= val1) & (q >= val2), 1, t_msk); - if ((opr2 == "<") and (opr1 == ">")) + if ((opr2 == "<") && (opr1 == ">")) t_msk = xt::where((q < val2) | (q > val1), 1, t_msk); - else if ((opr2 == "<=") and (opr1 == ">")) + else if ((opr2 == "<=") && (opr1 == ">")) t_msk = xt::where((q <= val2) | (q > val1), 1, t_msk); - else if ((opr2 == "<") and (opr1 == ">=")) + else if ((opr2 == "<") && (opr1 == ">=")) t_msk = xt::where((q < val2) | (q >= val1), 1, t_msk); - else if ((opr2 == "<=") and (opr1 == ">=")) + else if ((opr2 == "<=") && (opr1 == ">=")) t_msk = xt::where((q <= val2) | (q >= val1), 1, t_msk); } diff --git a/src/probabilist/evalp.cpp b/src/probabilist/evalp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48f709d6feef3bfd83b848b726ab1154d48c030d --- /dev/null +++ b/src/probabilist/evalp.cpp @@ -0,0 +1,286 @@ +#include <utility> +#include <unordered_map> +#include <vector> +#include <array> +#include <stdexcept> +#include <xtensor/xtensor.hpp> +#include <xtensor/xarray.hpp> +#include <xtensor/xview.hpp> + +#include "evalhyd/evalp.hpp" +#include "utils.hpp" +#include "masks.hpp" +#include "maths.hpp" +#include "uncertainty.hpp" +#include "probabilist/evaluator.hpp" + +namespace eh = evalhyd; + +namespace evalhyd +{ + std::vector<xt::xarray<double>> evalp( + const xt::xtensor<double, 2>& q_obs, + const xt::xtensor<double, 4>& q_prd, + const std::vector<std::string>& metrics, + const xt::xtensor<double, 2>& q_thr, + const xt::xtensor<bool, 4>& t_msk, + const xt::xtensor<std::array<char, 32>, 2>& m_cdt, + const std::unordered_map<std::string, int>& bootstrap, + const std::vector<std::string>& dts + ) + { + // check that the metrics to be computed are valid + eh::utils::check_metrics( + metrics, + {"BS", "BSS", "BS_CRD", "BS_LBD", "QS", "CRPS"} + ); + + // check that optional parameters are given as arguments + eh::utils::evalp::check_optionals(metrics, q_thr); + eh::utils::check_bootstrap(bootstrap); + + // check that data dimensions are compatible + // > time + if (q_obs.shape(1) != q_prd.shape(3)) + throw std::runtime_error( + "observations and predictions feature different " + "temporal lengths" + ); + if (t_msk.size() > 0) + if (q_obs.shape(1) != t_msk.shape(3)) + throw std::runtime_error( + "observations and masks feature different " + "temporal lengths" + ); + if (!dts.empty()) + if (q_obs.shape(1) != dts.size()) + throw std::runtime_error( + "observations and datetimes feature different " + "temporal lengths" + ); + // > leadtimes + if (t_msk.size() > 0) + if (q_prd.shape(1) != t_msk.shape(1)) + throw std::runtime_error( + "predictions and temporal masks feature different " + "numbers of lead times" + ); + // > sites + if (q_obs.shape(0) != q_prd.shape(0)) + throw std::runtime_error( + "observations and predictions feature different " + "numbers of sites" + ); + if (t_msk.size() > 0) + if (q_obs.shape(0) != t_msk.shape(0)) + throw std::runtime_error( + "observations and temporal masks feature different " + "numbers of sites" + ); + if (m_cdt.size() > 0) + if (q_obs.shape(0) != m_cdt.shape(0)) + throw std::runtime_error( + "observations and masking conditions feature different " + "numbers of sites" + ); + + // retrieve dimensions + std::size_t n_sit = q_prd.shape(0); + std::size_t n_ltm = q_prd.shape(1); + std::size_t n_mbr = q_prd.shape(2); + std::size_t n_tim = q_prd.shape(3); + std::size_t n_thr = q_thr.shape(1); + std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(2) : + (m_cdt.size() > 0 ? m_cdt.shape(1) : 1); + std::size_t n_exp = bootstrap.find("n_samples")->second == -9 ? 1: + bootstrap.find("n_samples")->second; + + // register metrics number of dimensions + std::unordered_map<std::string, std::vector<std::size_t>> dim; + + dim["BS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; + dim["BSS"] = {n_sit, n_ltm, n_msk, n_exp, n_thr}; + dim["BS_CRD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; + dim["BS_LBD"] = {n_sit, n_ltm, n_msk, n_exp, n_thr, 3}; + dim["QS"] = {n_sit, n_ltm, n_msk, n_exp, n_mbr}; + dim["CRPS"] = {n_sit, n_ltm, n_msk, n_exp}; + + // declare maps for memoisation purposes + std::unordered_map<std::string, std::vector<std::string>> elt; + std::unordered_map<std::string, std::vector<std::string>> dep; + + // register potentially recurring computation elements across metrics + elt["bs"] = {"o_k", "y_k"}; + elt["BSS"] = {"o_k", "bar_o"}; + elt["BS_CRD"] = {"o_k", "bar_o", "y_k"}; + elt["BS_LBD"] = {"o_k", "y_k"}; + elt["qs"] = {"q_qnt"}; + + // register nested metrics (i.e. metric dependent on another metric) + dep["BS"] = {"bs"}; + dep["BSS"] = {"bs"}; + dep["QS"] = {"qs"}; + dep["CRPS"] = {"qs", "crps"}; + + // determine required elt/dep to be pre-computed + std::vector<std::string> req_elt; + std::vector<std::string> req_dep; + + eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep); + + // generate masks from conditions if provided + auto gen_msk = [&]() { + xt::xtensor<bool, 4> c_msk = xt::zeros<bool>({n_sit, n_ltm, n_msk, n_tim}); + if (m_cdt.size() > 0) + for (int s = 0; s < n_sit; s++) + for (int l = 0; l < n_ltm; l++) + for (int m = 0; m < n_msk; m++) + xt::view(c_msk, s, l, m) = + eh::masks::generate_mask_from_conditions( + xt::view(m_cdt, s, m), + xt::view(q_obs, s), + xt::view(q_prd, s, l) + ); + return c_msk; + }; + const xt::xtensor<bool, 4> c_msk = gen_msk(); + + // generate bootstrap experiment if requested + std::vector<xt::xkeep_slice<int>> b_exp; + auto n_samples = bootstrap.find("n_samples")->second; + auto len_sample = bootstrap.find("len_sample")->second; + if ((n_samples != -9) && (len_sample != -9)) + { + if (dts.empty()) + throw std::runtime_error( + "bootstrap requested but datetimes not provided" + ); + + b_exp = eh::uncertainty::bootstrap( + dts, n_samples, len_sample + ); + } + else + { + // if no bootstrap requested, generate one sample + // containing all the time indices once + xt::xtensor<int, 1> all = xt::arange(n_tim); + b_exp.push_back(xt::keep(all)); + } + + // initialise data structure for outputs + std::vector<xt::xarray<double>> r; + for (const auto& metric : metrics) + r.emplace_back(xt::zeros<double>(dim[metric])); + + auto summary = bootstrap.find("summary")->second; + + // compute variables one site at a time to minimise memory imprint + for (int s = 0; s < n_sit; s++) + // compute variables one lead time at a time to minimise memory imprint + for (int l = 0; l < n_ltm; l++) + { + // instantiate probabilist evaluator + const auto q_obs_v = xt::view(q_obs, s, xt::all()); + const auto q_prd_v = xt::view(q_prd, s, l, xt::all(), xt::all()); + const auto q_thr_v = xt::view(q_thr, s, xt::all()); + const auto t_msk_v = + t_msk.size() > 0 ? + xt::view(t_msk, s, l, xt::all(), xt::all()) : + (m_cdt.size() > 0 ? + xt::view(c_msk, s, l, xt::all(), xt::all()) : + xt::view(t_msk, s, l, xt::all(), xt::all())); + + eh::probabilist::Evaluator evaluator( + q_obs_v, q_prd_v, q_thr_v, t_msk_v, b_exp + ); + + // pre-compute required elt + for (const auto& element : req_elt) + { + if ( element == "o_k" ) + evaluator.calc_o_k(); + else if ( element == "bar_o" ) + evaluator.calc_bar_o(); + else if ( element == "y_k" ) + evaluator.calc_y_k(); + else if ( element == "q_qnt" ) + evaluator.calc_q_qnt(); + } + + // pre-compute required dep + for (const auto& dependency : req_dep) + { + if ( dependency == "bs" ) + evaluator.calc_bs(); + else if ( dependency == "qs" ) + evaluator.calc_qs(); + else if ( dependency == "crps" ) + evaluator.calc_crps(); + } + + // retrieve or compute requested metrics + for (int m = 0; m < metrics.size(); m++) + { + const auto& metric = metrics[m]; + + if ( metric == "BS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BS(); + // (sites, lead times, subsets, samples, thresholds) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BS, summary); + } + else if ( metric == "BSS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BSS(); + // (sites, lead times, subsets, samples, thresholds) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BSS, summary); + } + else if ( metric == "BS_CRD" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BS_CRD(); + // (sites, lead times, subsets, samples, thresholds, components) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BS_CRD, summary); + } + else if ( metric == "BS_LBD" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_BS_LBD(); + // (sites, lead times, subsets, samples, thresholds, components) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.BS_LBD, summary); + } + else if ( metric == "QS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_QS(); + // (sites, lead times, subsets, samples, quantiles) + xt::view(r[m], s, l, xt::all(), xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.QS, summary); + } + else if ( metric == "CRPS" ) + { + if (std::find(req_dep.begin(), req_dep.end(), metric) + == req_dep.end()) + evaluator.calc_CRPS(); + // (sites, lead times, subsets, samples) + xt::view(r[m], s, l, xt::all(), xt::all()) = + eh::uncertainty::summarise(evaluator.CRPS, summary); + } + } + } + + return r; + } +} diff --git a/src/probabilist/evaluator.h b/src/probabilist/evaluator.hpp similarity index 100% rename from src/probabilist/evaluator.h rename to src/probabilist/evaluator.hpp diff --git a/src/probabilist/evaluator_brier.cpp b/src/probabilist/evaluator_brier.cpp index bbadf845336f9ca2acaee93cc68b0ebac9965d0a..2528aef8fb811add2845b19bc1bb5709081c9c10 100644 --- a/src/probabilist/evaluator_brier.cpp +++ b/src/probabilist/evaluator_brier.cpp @@ -3,7 +3,7 @@ #include <xtensor/xmasked_view.hpp> #include <xtensor/xoperation.hpp> -#include "evaluator.h" +#include "probabilist/evaluator.hpp" namespace eh = evalhyd; diff --git a/src/probabilist/evaluator_elements.cpp b/src/probabilist/evaluator_elements.cpp index e09b17987d408f61f766f8cca7baec2eacc3e89e..bdce6c1b729a600343ac2bea710caea604a7658c 100644 --- a/src/probabilist/evaluator_elements.cpp +++ b/src/probabilist/evaluator_elements.cpp @@ -2,7 +2,7 @@ #include <xtensor/xview.hpp> #include <xtensor/xsort.hpp> -#include "evaluator.h" +#include "probabilist/evaluator.hpp" namespace evalhyd { diff --git a/src/probabilist/evaluator_quantiles.cpp b/src/probabilist/evaluator_quantiles.cpp index bd580e7fe0582c761a108c152ab54cc8297c73b3..c6f37e23a716251c046fe35d3c11e7c5a6b46f73 100644 --- a/src/probabilist/evaluator_quantiles.cpp +++ b/src/probabilist/evaluator_quantiles.cpp @@ -3,7 +3,7 @@ #include <xtensor/xview.hpp> #include <xtensor/xoperation.hpp> -#include "evaluator.h" +#include "probabilist/evaluator.hpp" namespace eh = evalhyd; diff --git a/src/uncertainty.hpp b/src/uncertainty.hpp index 168f5a1886d231ec04c98105f6556ee0f9e5cc87..fd26b45e17c868d9ad49b0498862a283522a5aa1 100644 --- a/src/uncertainty.hpp +++ b/src/uncertainty.hpp @@ -89,7 +89,7 @@ namespace evalhyd // check that year is complete (without a rigorous leap year check) int n_days = xt::sum(wdw)(); - if ((n_days != 365) and (n_days != 366)) { + if ((n_days != 365) && (n_days != 366)) { throw std::runtime_error( "year starting in " + std::to_string(y) + " is incomplete" diff --git a/src/utils.hpp b/src/utils.hpp index 60761d26e1e694d7cfde6483d6bb93788573df6c..7cc02a56f2a43732d098064eddc825042bdae24e 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -119,7 +119,7 @@ namespace evalhyd ); auto s = bootstrap.find("summary")->second; // TODO: change upper bound when mean+stddev and quantiles implemented - if ((s < 0) or (s > 0)) + if ((s < 0) || (s > 0)) throw std::runtime_error( "invalid value for bootstrap summary" ); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e2a033a3bbeed109c9ab2ed59b0e262a7e7428b8..eb0ffa74d796b7820d5e722453d3549175276e2c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,52 +1,43 @@ -cmake_minimum_required(VERSION 3.18) -project(evalhyd_tests) +cmake_minimum_required(VERSION 3.15) -set(CMAKE_CXX_STANDARD 14) - -# GOOGLETEST CONFIG ------------------------------------------------------------ -include(FetchContent) -FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip -) -# For Windows: Prevent overriding the parent project's compiler/linker settings -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -FetchContent_MakeAvailable(googletest) - -# EVALHYD CONFIG --------------------------------------------------------------- -include_directories("../deps/xtl/include") -include_directories("../deps/xtensor/include") - -include_directories(../include) -include_directories(../src) - -SET(GCC_EVALHYD_COMPILE_FLAGS "-O3") -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_EVALHYD_COMPILE_FLAGS}") - -# TEST SUITE ------------------------------------------------------------------- -include_directories(data) +# ------------------------------------------------------------------------------ +# dependencies +# ------------------------------------------------------------------------------ +find_package(GTest REQUIRED) +# ------------------------------------------------------------------------------ +# build +# ------------------------------------------------------------------------------ add_executable( evalhyd_tests test_determinist.cpp - ../include/evalhyd/evald.hpp - ../src/determinist/evaluator.hpp - ../src/utils.hpp - ../src/masks.hpp test_probabilist.cpp - ../include/evalhyd/evalp.hpp - ../src/probabilist/evaluator.h - ../src/probabilist/evaluator_brier.cpp - ../src/probabilist/evaluator_quantiles.cpp - ../src/probabilist/evaluator_elements.cpp test_uncertainty.cpp - ../src/uncertainty.hpp ) -target_link_libraries(evalhyd_tests gtest gtest_main) -enable_testing() -add_test( - NAME evalhyd_tests - COMMAND evalhyd_tests - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} +set_target_properties( + evalhyd_tests + PROPERTIES + VISIBILITY_INLINES_HIDDEN ON + CXX_VISIBILITY_PRESET hidden +) + +target_include_directories( + evalhyd_tests + PRIVATE + ${CMAKE_SOURCE_DIR}/src +) + +target_compile_definitions( + evalhyd_tests + PRIVATE + EVALHYD_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/data" +) + +target_link_libraries( + evalhyd_tests + PRIVATE + EvalHyd::evalhyd + GTest::GTest + GTest::Main ) diff --git a/tests/test_determinist.cpp b/tests/test_determinist.cpp index 0000cbbeb210aaffe9ad3ea72721ea4bc0b3c6f1..14358994aa579e404e4eb767f9dc004da37103b6 100644 --- a/tests/test_determinist.cpp +++ b/tests/test_determinist.cpp @@ -3,6 +3,7 @@ #include <tuple> #include <array> #include <gtest/gtest.h> + #include <xtensor/xtensor.hpp> #include <xtensor/xview.hpp> #include <xtensor/xmanipulation.hpp> @@ -10,17 +11,21 @@ #include "evalhyd/evald.hpp" +#ifndef EVALHYD_DATA_DIR +#error "need to define data directory" +#endif + using namespace xt::placeholders; // required for `_` to work std::tuple<xt::xtensor<double, 2>, xt::xtensor<double, 2>> load_data_d() { // read in data std::ifstream ifs; - ifs.open("./data/q_obs.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_obs.csv"); xt::xtensor<double, 2> observed = xt::load_csv<int>(ifs); ifs.close(); - ifs.open("./data/q_prd.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_prd.csv"); xt::xtensor<double, 2> predicted = xt::view( xt::load_csv<double>(ifs), xt::range(0, 5), xt::all() ); @@ -180,8 +185,8 @@ TEST(DeterministTests, TestMaskingConditions) // conditions on streamflow values _________________________________________ // compute scores using masking conditions on streamflow to subset whole record - xt::xtensor<std::array<char, 32>, 1> q_conditions ={ - {"q_obs{<2000,>3000}"} + xt::xtensor<std::array<char, 32>, 1> q_conditions = { + std::array<char, 32> {"q_obs{<2000,>3000}"} }; std::vector<xt::xarray<double>> metrics_q_conditioned = @@ -212,7 +217,7 @@ TEST(DeterministTests, TestMaskingConditions) // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 1> q_conditions_ ={ - {"q_obs{>=mean}"} + std::array<char, 32> {"q_obs{>=mean}"} }; double mean = xt::mean(observed, {1})(); @@ -245,7 +250,7 @@ TEST(DeterministTests, TestMaskingConditions) // compute scores using masking conditions on time indices to subset whole record xt::xtensor<std::array<char, 32>, 1> t_conditions = { - {"t{0,1,2,3,4,5:97,97,98,99}"} + std::array<char, 32> {"t{0,1,2,3,4,5:97,97,98,99}"} }; std::vector<xt::xarray<double>> metrics_t_conditioned = @@ -329,16 +334,16 @@ TEST(DeterministTests, TestBootstrap) // read in data std::ifstream ifs; - ifs.open("./data/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); ifs.close(); std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); - ifs.open("./data/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); ifs.close(); - ifs.open("./data/q_prd_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); @@ -347,7 +352,7 @@ TEST(DeterministTests, TestBootstrap) {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; std::vector<xt::xarray<double>> metrics_bts = - eh::evald( + evalhyd::evald( xt::view(observed, xt::newaxis(), xt::all()), predicted, metrics, @@ -372,7 +377,7 @@ TEST(DeterministTests, TestBootstrap) xt::concatenate(xt::xtuple(predicted, predicted, predicted), 1); std::vector<xt::xarray<double>> metrics_rep = - eh::evald( + evalhyd::evald( xt::view(observed_x3, xt::newaxis(), xt::all()), predicted_x3, metrics diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index 4583adea463bd1c9834a3b9978408909d196ed87..7f72a7fa6bcb9871299c8b54a0e79a862d5fc9c9 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -4,23 +4,28 @@ #include <array> #include <gtest/gtest.h> #include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> +#include <xtensor/xsort.hpp> #include <xtensor/xmanipulation.hpp> #include <xtensor/xcsv.hpp> #include "evalhyd/evalp.hpp" -using namespace xt::placeholders; // required for `_` to work +#ifndef EVALHYD_DATA_DIR +#error "need to define data directory" +#endif +using namespace xt::placeholders; // required for `_` to work std::tuple<xt::xtensor<double, 1>, xt::xtensor<double, 2>> load_data_p() { // read in data std::ifstream ifs; - ifs.open("./data/q_obs.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_obs.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<int>(ifs)); ifs.close(); - ifs.open("./data/q_prd.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_prd.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs); ifs.close(); @@ -196,7 +201,7 @@ TEST(ProbabilistTests, TestMaskingConditions) // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 2> q_conditions = { - {{"q_obs{<2000,>3000}"}} + {std::array<char, 32> {"q_obs{<2000,>3000}"}} }; std::vector<xt::xarray<double>> metrics_q_conditioned = @@ -229,7 +234,7 @@ TEST(ProbabilistTests, TestMaskingConditions) // compute scores using masking conditions on streamflow to subset whole record xt::xtensor<std::array<char, 32>, 2> q_conditions_ = { - {{"q_prd_mean{>=median}"}} + {std::array<char, 32> {"q_prd_mean{>=median}"}} }; auto q_prd_mean = xt::mean(predicted, {0}, xt::keep_dims); @@ -265,7 +270,7 @@ TEST(ProbabilistTests, TestMaskingConditions) // compute scores using masking conditions on time indices to subset whole record xt::xtensor<std::array<char, 32>, 2> t_conditions = { - {{"t{0,1,2,3,4,5:97,97,98,99}"}} + {std::array<char, 32> {"t{0,1,2,3,4,5:97,97,98,99}"}} }; std::vector<xt::xarray<double>> metrics_t_conditioned = @@ -318,7 +323,7 @@ TEST(ProbabilistTests, TestMissingData) {{ 4.7, 4.3, NAN, 2.7, 4.1 }}; std::vector<xt::xarray<double>> metrics_nan = - eh::evalp( + evalhyd::evalp( observed_nan, forecast_nan, metrics, @@ -337,7 +342,7 @@ TEST(ProbabilistTests, TestMissingData) {{ 4.7, 4.3, 2.7 }}; std::vector<xt::xarray<double>> metrics_pp1 = - eh::evalp( + evalhyd::evalp( observed_pp1, forecast_pp1, metrics, @@ -355,7 +360,7 @@ TEST(ProbabilistTests, TestMissingData) {{ 4.3, 2.7, 4.1 }}; std::vector<xt::xarray<double>> metrics_pp2 = - eh::evalp( + evalhyd::evalp( observed_pp2, forecast_pp2, metrics, @@ -392,16 +397,16 @@ TEST(ProbabilistTests, TestBootstrap) // read in data std::ifstream ifs; - ifs.open("./data/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); xt::xtensor<std::string, 1> x_dts = xt::squeeze(xt::load_csv<std::string>(ifs, ',', 0, 1)); ifs.close(); std::vector<std::string> datetimes (x_dts.begin(), x_dts.end()); - ifs.open("./data/q_obs_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_obs_1yr.csv"); xt::xtensor<double, 1> observed = xt::squeeze(xt::load_csv<double>(ifs, ',', 1)); ifs.close(); - ifs.open("./data/q_prd_1yr.csv"); + ifs.open(EVALHYD_DATA_DIR "/q_prd_1yr.csv"); xt::xtensor<double, 2> predicted = xt::load_csv<double>(ifs, ',', 1); ifs.close(); @@ -410,7 +415,7 @@ TEST(ProbabilistTests, TestBootstrap) {{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}}; std::vector<xt::xarray<double>> metrics_bts = - eh::evalp( + evalhyd::evalp( xt::view(observed, xt::newaxis(), xt::all()), xt::view(predicted, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, @@ -433,7 +438,7 @@ TEST(ProbabilistTests, TestBootstrap) xt::concatenate(xt::xtuple(predicted, predicted, predicted), 1); std::vector<xt::xarray<double>> metrics_rep = - eh::evalp( + evalhyd::evalp( xt::view(observed_x3, xt::newaxis(), xt::all()), xt::view(predicted_x3, xt::newaxis(), xt::newaxis(), xt::all(), xt::all()), metrics, diff --git a/tests/test_uncertainty.cpp b/tests/test_uncertainty.cpp index 091e74a35e56d4100344c9f3f28d9aab3ffefbbf..1bfb312240310b6ec5fe3849b65127644394c9e6 100644 --- a/tests/test_uncertainty.cpp +++ b/tests/test_uncertainty.cpp @@ -1,5 +1,6 @@ #include <unordered_map> #include <gtest/gtest.h> + #include <xtensor/xtensor.hpp> #include <xtensor/xrandom.hpp> #include <xtensor/xio.hpp>