Commit 80e0a7ff authored by Thibault Hallouin's avatar Thibault Hallouin
Browse files

bring back xexpression in evald signature

1 merge request!3release v0.1.0
Pipeline #42703 passed with stage
in 2 minutes and 26 seconds
Showing with 276 additions and 294 deletions
+276 -294
......@@ -21,7 +21,6 @@ message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor")
# define evalhyd library
add_library(
evalhyd
src/determinist/evald.cpp
src/probabilist/evalp.cpp
src/probabilist/evaluator_brier.cpp
src/probabilist/evaluator_elements.cpp
......
......@@ -3,10 +3,18 @@
#include <unordered_map>
#include <vector>
#include <optional>
#include <xtensor/xexpression.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xarray.hpp>
#include "utils.hpp"
#include "masks.hpp"
#include "maths.hpp"
#include "uncertainty.hpp"
#include "determinist/evaluator.hpp"
namespace evalhyd
{
......@@ -137,19 +145,257 @@ namespace evalhyd
/// evalhyd::evald(obs, prd, {"NSE"}, "none", 1, -9, msk);
///
/// \endrst
template <class D2, class B2>
std::vector<xt::xarray<double>> evald(
const xt::xtensor<double, 2>& q_obs,
const xt::xtensor<double, 2>& q_prd,
const xt::xexpression<D2>& q_obs,
const xt::xexpression<D2>& q_prd,
const std::vector<std::string>& metrics,
const std::string& transform = "none",
double exponent = 1,
double epsilon = -9,
const xt::xtensor<bool, 2>& t_msk = {},
const xt::xexpression<B2>& t_msk = B2({}),
const xt::xtensor<std::array<char, 32>, 1>& m_cdt = {},
const std::unordered_map<std::string, int>& bootstrap =
{{"n_samples", -9}, {"len_sample", -9}, {"summary", 0}},
const std::vector<std::string>& dts = {}
);
)
{
const D2& q_obs_ = q_obs.derived_cast();
const D2& q_prd_ = q_prd.derived_cast();
const B2& t_msk_ = t_msk.derived_cast();
// check that the metrics to be computed are valid
utils::check_metrics(
metrics,
{"RMSE", "NSE", "KGE", "KGEPRIME"}
);
// check that optional parameters are valid
eh::utils::check_bootstrap(bootstrap);
// check that data dimensions are compatible
// > time
if (q_obs_.shape(1) != q_prd_.shape(1))
throw std::runtime_error(
"observations and predictions feature different "
"temporal lengths"
);
if (t_msk_.size() > 0)
if (q_obs_.shape(1) != t_msk_.shape(1))
throw std::runtime_error(
"observations and masks feature different "
"temporal lengths"
);
if (!dts.empty())
if (q_obs_.shape(1) != dts.size())
throw std::runtime_error(
"observations and datetimes feature different "
"temporal lengths"
);
// > series
if (q_obs_.shape(0) != 1)
throw std::runtime_error(
"observations contain more than one time series"
);
// retrieve dimensions
std::size_t n_tim = q_obs_.shape(1);
std::size_t n_msk = t_msk_.size() > 0 ? t_msk_.shape(0) :
(m_cdt.size() > 0 ? m_cdt.shape(0) : 1);
// initialise a mask if none provided
// (corresponding to no temporal subset)
auto gen_msk = [&]() {
// if t_msk provided, it takes priority
if (t_msk_.size() > 0)
return t_msk_;
// else if m_cdt provided, use them to generate t_msk
else if (m_cdt.size() > 0)
{
xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim});
for (int m = 0; m < n_msk; m++)
xt::view(c_msk, m) =
eh::masks::generate_mask_from_conditions(
m_cdt[0], xt::view(q_obs_, 0), q_prd_
);
return c_msk;
}
// if neither t_msk nor m_cdt provided, generate dummy mask
else
return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})};
};
auto msk = gen_msk();
// apply streamflow transformation if requested
auto q_transform = [&](const D2& q)
{
if ( transform == "none" || (transform == "pow" && exponent == 1))
{
return q;
}
else if ( transform == "sqrt" )
{
return xt::eval(xt::sqrt(q));
}
else if ( transform == "inv" )
{
if ( epsilon == -9 )
// determine an epsilon value to avoid zero divide
epsilon = xt::mean(q_obs_)() * 0.01;
return xt::eval(1. / (q + epsilon));
}
else if ( transform == "log" )
{
if ( epsilon == -9 )
// determine an epsilon value to avoid log zero
epsilon = xt::mean(q_obs_)() * 0.01;
return xt::eval(xt::log(q + epsilon));
}
else if ( transform == "pow" )
{
if ( exponent < 0 )
{
if ( epsilon == -9 )
// determine an epsilon value to avoid zero divide
epsilon = xt::mean(q_obs_)() * 0.01;
return xt::eval(xt::pow(q + epsilon, exponent));
}
else
{
return xt::eval(xt::pow(q, exponent));
}
}
else
{
throw std::runtime_error(
"invalid streamflow transformation: " + transform
);
}
};
auto obs = q_transform(q_obs_);
auto prd = q_transform(q_prd_);
// generate bootstrap experiment if requested
std::vector<xt::xkeep_slice<int>> exp;
auto n_samples = bootstrap.find("n_samples")->second;
auto len_sample = bootstrap.find("len_sample")->second;
if ((n_samples != -9) && (len_sample != -9))
{
if (dts.empty())
throw std::runtime_error(
"bootstrap requested but datetimes not provided"
);
exp = eh::uncertainty::bootstrap(
dts, n_samples, len_sample
);
}
else
{
// if no bootstrap requested, generate one sample
// containing all the time indices once
xt::xtensor<int, 1> all = xt::arange(n_tim);
exp.push_back(xt::keep(all));
}
// instantiate determinist evaluator
eh::determinist::Evaluator evaluator(obs, prd, msk, exp);
// declare maps for memoisation purposes
std::unordered_map<std::string, std::vector<std::string>> elt;
std::unordered_map<std::string, std::vector<std::string>> dep;
// register potentially recurring computation elt across metrics
elt["RMSE"] = {"quad_err"};
elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"};
elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd",
"r_pearson", "alpha", "bias"};
elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd",
"r_pearson", "alpha", "bias"};
// register nested metrics (i.e. metric dependent on another metric)
// TODO
// determine required elt/dep to be pre-computed
std::vector<std::string> req_elt;
std::vector<std::string> req_dep;
eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep);
// pre-compute required elt
for ( const auto& element : req_elt )
{
if ( element == "mean_obs" )
evaluator.calc_mean_obs();
else if ( element == "mean_prd" )
evaluator.calc_mean_prd();
else if ( element == "quad_err" )
evaluator.calc_quad_err();
else if ( element == "quad_obs" )
evaluator.calc_quad_obs();
else if ( element == "quad_prd" )
evaluator.calc_quad_prd();
else if ( element == "r_pearson" )
evaluator.calc_r_pearson();
else if ( element == "alpha" )
evaluator.calc_alpha();
else if ( element == "bias" )
evaluator.calc_bias();
}
// pre-compute required dep
for ( const auto& dependency : req_dep )
{
// TODO
}
// retrieve or compute requested metrics
std::vector<xt::xarray<double>> r;
auto summary = bootstrap.find("summary")->second;
for ( const auto& metric : metrics )
{
if ( metric == "RMSE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_RMSE();
r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary));
}
else if ( metric == "NSE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_NSE();
r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary));
}
else if ( metric == "KGE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_KGE();
r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary));
}
else if ( metric == "KGEPRIME" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_KGEPRIME();
r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary));
}
}
return r;
};
}
#endif //EVALHYD_EVALD_HPP
#include <unordered_map>
#include <vector>
#include <array>
#include <stdexcept>
#include <xtensor/xexpression.hpp>
#include <xtensor/xarray.hpp>
#include <xtensor/xscalar.hpp>
#include "evalhyd/evald.hpp"
#include "utils.hpp"
#include "masks.hpp"
#include "maths.hpp"
#include "uncertainty.hpp"
#include "determinist/evaluator.hpp"
namespace eh = evalhyd;
namespace evalhyd
{
std::vector<xt::xarray<double>> evald(
const xt::xtensor<double, 2>& q_obs,
const xt::xtensor<double, 2>& q_prd,
const std::vector<std::string>& metrics,
const std::string& transform,
double exponent,
double epsilon,
const xt::xtensor<bool, 2>& t_msk,
const xt::xtensor<std::array<char, 32>, 1>& m_cdt,
const std::unordered_map<std::string, int>& bootstrap,
const std::vector<std::string>& dts
)
{
// check that the metrics to be computed are valid
utils::check_metrics(
metrics,
{"RMSE", "NSE", "KGE", "KGEPRIME"}
);
// check that optional parameters are valid
eh::utils::check_bootstrap(bootstrap);
// check that data dimensions are compatible
// > time
if (q_obs.shape(1) != q_prd.shape(1))
throw std::runtime_error(
"observations and predictions feature different "
"temporal lengths"
);
if (t_msk.size() > 0)
if (q_obs.shape(1) != t_msk.shape(1))
throw std::runtime_error(
"observations and masks feature different "
"temporal lengths"
);
if (!dts.empty())
if (q_obs.shape(1) != dts.size())
throw std::runtime_error(
"observations and datetimes feature different "
"temporal lengths"
);
// > series
if (q_obs.shape(0) != 1)
throw std::runtime_error(
"observations contain more than one time series"
);
// retrieve dimensions
std::size_t n_tim = q_obs.shape(1);
std::size_t n_msk = t_msk.size() > 0 ? t_msk.shape(0) :
(m_cdt.size() > 0 ? m_cdt.shape(0) : 1);
// initialise a mask if none provided
// (corresponding to no temporal subset)
auto gen_msk = [&]() {
// if t_msk provided, it takes priority
if (t_msk.size() > 0)
return t_msk;
// else if m_cdt provided, use them to generate t_msk
else if (m_cdt.size() > 0)
{
xt::xtensor<bool, 2> c_msk = xt::zeros<bool>({n_msk, n_tim});
for (int m = 0; m < n_msk; m++)
xt::view(c_msk, m) =
eh::masks::generate_mask_from_conditions(
m_cdt[0], xt::view(q_obs, 0), q_prd
);
return c_msk;
}
// if neither t_msk nor m_cdt provided, generate dummy mask
else
return xt::xtensor<bool, 2>{xt::ones<bool>({std::size_t{1}, n_tim})};
};
auto msk = gen_msk();
// apply streamflow transformation if requested
auto q_transform = [&](const xt::xtensor<double, 2>& q)
{
if ( transform == "none" || (transform == "pow" && exponent == 1))
{
return q;
}
else if ( transform == "sqrt" )
{
return xt::eval(xt::sqrt(q));
}
else if ( transform == "inv" )
{
if ( epsilon == -9 )
// determine an epsilon value to avoid zero divide
epsilon = xt::mean(q_obs)() * 0.01;
return xt::eval(1. / (q + epsilon));
}
else if ( transform == "log" )
{
if ( epsilon == -9 )
// determine an epsilon value to avoid log zero
epsilon = xt::mean(q_obs)() * 0.01;
return xt::eval(xt::log(q + epsilon));
}
else if ( transform == "pow" )
{
if ( exponent < 0 )
{
if ( epsilon == -9 )
// determine an epsilon value to avoid zero divide
epsilon = xt::mean(q_obs)() * 0.01;
return xt::eval(xt::pow(q + epsilon, exponent));
}
else
{
return xt::eval(xt::pow(q, exponent));
}
}
else
{
throw std::runtime_error(
"invalid streamflow transformation: " + transform
);
}
};
auto obs = q_transform(q_obs);
auto prd = q_transform(q_prd);
// generate bootstrap experiment if requested
std::vector<xt::xkeep_slice<int>> exp;
auto n_samples = bootstrap.find("n_samples")->second;
auto len_sample = bootstrap.find("len_sample")->second;
if ((n_samples != -9) && (len_sample != -9))
{
if (dts.empty())
throw std::runtime_error(
"bootstrap requested but datetimes not provided"
);
exp = eh::uncertainty::bootstrap(
dts, n_samples, len_sample
);
}
else
{
// if no bootstrap requested, generate one sample
// containing all the time indices once
xt::xtensor<int, 1> all = xt::arange(n_tim);
exp.push_back(xt::keep(all));
}
// instantiate determinist evaluator
eh::determinist::Evaluator evaluator(obs, prd, msk, exp);
// declare maps for memoisation purposes
std::unordered_map<std::string, std::vector<std::string>> elt;
std::unordered_map<std::string, std::vector<std::string>> dep;
// register potentially recurring computation elt across metrics
elt["RMSE"] = {"quad_err"};
elt["NSE"] = {"mean_obs", "quad_obs", "quad_err"};
elt["KGE"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd",
"r_pearson", "alpha", "bias"};
elt["KGEPRIME"] = {"mean_obs", "mean_prd", "quad_obs", "quad_prd",
"r_pearson", "alpha", "bias"};
// register nested metrics (i.e. metric dependent on another metric)
// TODO
// determine required elt/dep to be pre-computed
std::vector<std::string> req_elt;
std::vector<std::string> req_dep;
eh::utils::find_requirements(metrics, elt, dep, req_elt, req_dep);
// pre-compute required elt
for ( const auto& element : req_elt )
{
if ( element == "mean_obs" )
evaluator.calc_mean_obs();
else if ( element == "mean_prd" )
evaluator.calc_mean_prd();
else if ( element == "quad_err" )
evaluator.calc_quad_err();
else if ( element == "quad_obs" )
evaluator.calc_quad_obs();
else if ( element == "quad_prd" )
evaluator.calc_quad_prd();
else if ( element == "r_pearson" )
evaluator.calc_r_pearson();
else if ( element == "alpha" )
evaluator.calc_alpha();
else if ( element == "bias" )
evaluator.calc_bias();
}
// pre-compute required dep
for ( const auto& dependency : req_dep )
{
// TODO
}
// retrieve or compute requested metrics
std::vector<xt::xarray<double>> r;
auto summary = bootstrap.find("summary")->second;
for ( const auto& metric : metrics )
{
if ( metric == "RMSE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_RMSE();
r.emplace_back(eh::uncertainty::summarise(evaluator.RMSE, summary));
}
else if ( metric == "NSE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_NSE();
r.emplace_back(eh::uncertainty::summarise(evaluator.NSE, summary));
}
else if ( metric == "KGE" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_KGE();
r.emplace_back(eh::uncertainty::summarise(evaluator.KGE, summary));
}
else if ( metric == "KGEPRIME" )
{
if (std::find(req_dep.begin(), req_dep.end(), metric)
== req_dep.end())
evaluator.calc_KGEPRIME();
r.emplace_back(eh::uncertainty::summarise(evaluator.KGEPRIME, summary));
}
}
return r;
}
}
......@@ -43,7 +43,7 @@ TEST(DeterministTests, TestMetrics)
// compute scores (with 2D tensors)
std::vector<xt::xarray<double>> metrics =
evalhyd::evald(
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
observed, predicted, {"RMSE", "NSE", "KGE", "KGEPRIME"}
);
......@@ -90,7 +90,7 @@ TEST(DeterministTests, TestTransform)
// compute and check results on square-rooted streamflow series
std::vector<xt::xarray<double>> metrics =
evalhyd::evald(observed, predicted, {"NSE"}, "sqrt");
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(observed, predicted, {"NSE"}, "sqrt");
xt::xtensor<double, 3> nse_sqrt =
{{{0.882817}},
......@@ -101,7 +101,7 @@ TEST(DeterministTests, TestTransform)
EXPECT_TRUE(xt::allclose(metrics[0], nse_sqrt));
// compute and check results on inverted streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "inv");
metrics = evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(observed, predicted, {"NSE"}, "inv");
xt::xtensor<double, 3> nse_inv =
{{{0.737323}},
......@@ -112,7 +112,7 @@ TEST(DeterministTests, TestTransform)
EXPECT_TRUE(xt::allclose(metrics[0], nse_inv));
// compute and check results on square-rooted streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "log");
metrics = evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(observed, predicted, {"NSE"}, "log");
xt::xtensor<double, 3> nse_log =
{{{0.893344}},
......@@ -123,7 +123,7 @@ TEST(DeterministTests, TestTransform)
EXPECT_TRUE(xt::allclose(metrics[0], nse_log));
// compute and check results on power-transformed streamflow series
metrics = evalhyd::evald(observed, predicted, {"NSE"}, "pow", 0.2);
metrics = evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(observed, predicted, {"NSE"}, "pow", 0.2);
xt::xtensor<double, 3> nse_pow =
{{{0.899207}},
......@@ -143,7 +143,7 @@ TEST(DeterministTests, TestMasks)
std::tie(observed, predicted) = load_data_d();
// generate temporal subset by dropping 20 first time steps
xt::xtensor<double, 2> masks =
xt::xtensor<bool, 2> masks =
xt::ones<bool>({std::size_t {1}, std::size_t {observed.size()}});
xt::view(masks, 0, xt::range(0, 20)) = 0;
......@@ -152,14 +152,14 @@ TEST(DeterministTests, TestMasks)
{"RMSE", "NSE", "KGE", "KGEPRIME"};
std::vector<xt::xarray<double>> metrics_masked =
evalhyd::evald(observed, predicted, metrics, "none", 1, -9, masks);
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(observed, predicted, metrics, "none", 1, -9, masks);
// compute scores on pre-computed subset of whole record
xt::xtensor<double, 2> obs = xt::view(observed, xt::all(), xt::range(20, _));
xt::xtensor<double, 2> prd = xt::view(predicted, xt::all(), xt::range(20, _));
std::vector<xt::xarray<double>> metrics_subset =
evalhyd::evald(obs, prd, metrics);
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(obs, prd, metrics);
// check results are identical
for (int m = 0; m < metrics.size(); m++)
......@@ -197,8 +197,8 @@ TEST(DeterministTests, TestMaskingConditions)
// compute scores using "NaN-ed" time indices where conditions on streamflow met
std::vector<xt::xarray<double>> metrics_q_preconditioned =
evalhyd::evald(
xt::where((observed < 2000) | (observed > 3000), observed, NAN),
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
xt::eval(xt::where((observed < 2000) | (observed > 3000), observed, NAN)),
predicted,
metrics
);
......@@ -230,8 +230,8 @@ TEST(DeterministTests, TestMaskingConditions)
// compute scores using "NaN-ed" time indices where conditions on streamflow met
std::vector<xt::xarray<double>> metrics_q_preconditioned_ =
evalhyd::evald(
xt::where(observed >= mean, observed, NAN),
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
xt::eval(xt::where(observed >= mean, observed, NAN)),
predicted,
metrics
);
......@@ -261,9 +261,9 @@ TEST(DeterministTests, TestMaskingConditions)
// compute scores on already subset time series
std::vector<xt::xarray<double>> metrics_t_subset =
evalhyd::evald(
xt::view(observed, xt::all(), xt::range(0, 100)),
xt::view(predicted, xt::all(), xt::range(0, 100)),
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
xt::eval(xt::view(observed, xt::all(), xt::range(0, 100))),
xt::eval(xt::view(predicted, xt::all(), xt::range(0, 100))),
metrics
);
......@@ -299,7 +299,7 @@ TEST(DeterministTests, TestMissingData)
// compute metrics with observations containing NaN values
std::vector<xt::xarray<double>> metrics_nan =
evalhyd::evald(observed, predicted, metrics);
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(observed, predicted, metrics);
for (int m = 0; m < metrics.size(); m++) {
for (int p = 0; p < predicted.shape(0); p++) {
......@@ -311,9 +311,11 @@ TEST(DeterministTests, TestMissingData)
xt::view(predicted, p, xt::range(20+(3*(p+1)), _));
std::vector<xt::xarray<double>> metrics_sbs =
evalhyd::evald(xt::view(obs, xt::newaxis(), xt::all()),
xt::view(prd, xt::newaxis(), xt::all()),
{metrics[m]});
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
xt::eval(xt::view(obs, xt::newaxis(), xt::all())),
xt::eval(xt::view(prd, xt::newaxis(), xt::all())),
{metrics[m]}
);
// compare to check results are the same
EXPECT_TRUE(
......@@ -352,14 +354,14 @@ TEST(DeterministTests, TestBootstrap)
{{"n_samples", 10}, {"len_sample", 3}, {"summary", 0}};
std::vector<xt::xarray<double>> metrics_bts =
evalhyd::evald(
xt::view(observed, xt::newaxis(), xt::all()),
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
xt::eval(xt::view(observed, xt::newaxis(), xt::all())),
predicted,
metrics,
"none", // transform
1, // exponent
-9, // epsilon
{}, // t_msk
xt::xtensor<bool, 2>({}), // t_msk
{}, // m_cdt
bootstrap,
datetimes
......@@ -377,8 +379,8 @@ TEST(DeterministTests, TestBootstrap)
xt::concatenate(xt::xtuple(predicted, predicted, predicted), 1);
std::vector<xt::xarray<double>> metrics_rep =
evalhyd::evald(
xt::view(observed_x3, xt::newaxis(), xt::all()),
evalhyd::evald<xt::xtensor<double, 2>, xt::xtensor<bool, 2>>(
xt::eval(xt::view(observed_x3, xt::newaxis(), xt::all())),
predicted_x3,
metrics
);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment