diff --git a/include/evalhyd/probabilist.hpp b/include/evalhyd/probabilist.hpp index 98c68a911648c4413d79176e346104b88fe703e0..9dbfe7d013268dd457fd96d82dbec43bbd0a7e8c 100644 --- a/include/evalhyd/probabilist.hpp +++ b/include/evalhyd/probabilist.hpp @@ -28,13 +28,23 @@ namespace evalhyd /// \param [in] q_thr (optional): /// 1D array of streamflow exceedance threshold(s). /// shape: (thresholds,) + /// \param [in] t_msk (optional): + /// 2D array of booleans representing temporal mask(s) used to work on + /// temporal subsets of the whole streamflow time series (where + /// True/False is used for the time steps to include/discard in a + /// given subset. If not provided, no subset is performed and only + /// one set of metrics is returned corresponding to the whole time + /// series. If provided, as many sets of metrics are returned as they + /// are masks provided. + /// shape: (1+, time) /// \return /// Vector of 2D array of metrics for each threshold. - std::vector<xt::xtensor<double, 2>> evalp( + std::vector<xt::xtensor<double, 3>> evalp( const xt::xtensor<double, 2>& q_obs, const xt::xtensor<double, 2>& q_prd, const std::vector<std::string>& metrics, - const xt::xtensor<double, 1>& q_thr = {} + const xt::xtensor<double, 1>& q_thr = {}, + const xt::xtensor<bool, 2>& t_msk = {} ) { // check that the metrics to be computed are valid @@ -47,7 +57,7 @@ namespace evalhyd eh::utils::check_optionals(metrics, q_thr); // instantiate probabilist evaluator - eh::probabilist::Evaluator evaluator(q_obs, q_prd, q_thr); + eh::probabilist::Evaluator evaluator(q_obs, q_prd, q_thr, t_msk); // declare maps for memoisation purposes std::unordered_map<std::string, std::vector<std::string>> elt; @@ -55,14 +65,14 @@ namespace evalhyd // register potentially recurring computation elements across metrics elt["bs"] = {"o_k", "y_k"}; - elt["bss"] = {"o_k", "bar_o"}; + elt["BSS"] = {"o_k", "bar_o"}; elt["BS_CRD"] = {"o_k", "bar_o", "y_k"}; elt["BS_LBD"] = {"o_k", "y_k"}; elt["qs"] = {"q_qnt"}; // register nested metrics (i.e. metric dependent on another metric) dep["BS"] = {"bs"}; - dep["BSS"] = {"bs", "bss"}; + dep["BSS"] = {"bs"}; dep["QS"] = {"qs"}; dep["CRPS"] = {"qs", "crps"}; @@ -90,8 +100,6 @@ namespace evalhyd { if ( dependency == "bs" ) evaluator.calc_bs(); - else if ( dependency == "bss" ) - evaluator.calc_bss(); else if ( dependency == "qs" ) evaluator.calc_qs(); else if ( dependency == "crps" ) @@ -99,7 +107,7 @@ namespace evalhyd } // retrieve or compute requested metrics - std::vector<xt::xtensor<double, 2>> r; + std::vector<xt::xtensor<double, 3>> r; for (const auto& metric : metrics) { diff --git a/include/evalhyd/probabilist/evaluator.h b/include/evalhyd/probabilist/evaluator.h index ccd60af7ec42280a9ded16b4fec52249beda84c9..43e27f49cf14054d770c506ed364ff59fe61ab34 100644 --- a/include/evalhyd/probabilist/evaluator.h +++ b/include/evalhyd/probabilist/evaluator.h @@ -14,10 +14,11 @@ namespace evalhyd const xt::xtensor<double, 2>& q_obs; const xt::xtensor<double, 2>& q_prd; const xt::xtensor<double, 1>& q_thr; + xt::xtensor<bool, 2> t_msk; // members for computational elements xt::xtensor<double, 2> o_k; - xt::xtensor<double, 1> bar_o; + xt::xtensor<double, 2> bar_o; xt::xtensor<double, 2> y_k; xt::xtensor<double, 2> q_qnt; @@ -25,23 +26,27 @@ namespace evalhyd // constructor method Evaluator(const xt::xtensor<double, 2>& obs, const xt::xtensor<double, 2>& prd, - const xt::xtensor<double, 1>& thr) : - q_obs{obs}, q_prd{prd}, q_thr{thr} {}; + const xt::xtensor<double, 1>& thr, + const xt::xtensor<bool, 2>& msk) : + q_obs{obs}, q_prd{prd}, q_thr{thr}, t_msk(msk) + { + if (msk.size() < 1) + t_msk = xt::ones<bool>(obs.shape()); + }; // members for intermediate evaluation metrics // (i.e. before the reduction along the temporal axis) xt::xtensor<double, 2> bs; - xt::xtensor<double, 2> bss; xt::xtensor<double, 2> qs; xt::xtensor<double, 2> crps; // members for evaluation metrics - xt::xtensor<double, 2> BS; - xt::xtensor<double, 2> BS_CRD; - xt::xtensor<double, 2> BS_LBD; - xt::xtensor<double, 2> BSS; - xt::xtensor<double, 2> QS; - xt::xtensor<double, 2> CRPS; + xt::xtensor<double, 3> BS; + xt::xtensor<double, 3> BS_CRD; + xt::xtensor<double, 3> BS_LBD; + xt::xtensor<double, 3> BSS; + xt::xtensor<double, 3> QS; + xt::xtensor<double, 3> CRPS; // methods to compute derived data void calc_q_qnt(); @@ -53,7 +58,6 @@ namespace evalhyd // methods to compute intermediate metrics void calc_bs(); - void calc_bss(); void calc_qs(); void calc_crps(); diff --git a/include/evalhyd/probabilist/evaluator_brier.cpp b/include/evalhyd/probabilist/evaluator_brier.cpp index dfe611a842e29ec50764aa2e2b1da80bc58bcc42..069b74b710b5c8968cf8ec988947d56fcad6ec44 100644 --- a/include/evalhyd/probabilist/evaluator_brier.cpp +++ b/include/evalhyd/probabilist/evaluator_brier.cpp @@ -37,17 +37,37 @@ namespace evalhyd // Compute the Brier score (BS). // + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \require bs: - // 2D array of Brier score for each threshold for each time step. + // Brier score for each threshold for each time step. // shape: (thresholds, time) // \assign BS: - // 2D array of Brier score for each threshold. - // shape: (thresholds, 1) + // Brier score for each subset and for each threshold. + // shape: (subsets, thresholds, 1) void Evaluator::calc_BS() { - // calculate the mean over the time steps - // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$ - BS = xt::mean(bs, -1, xt::keep_dims); + // define some dimensions + std::size_t n_msk = t_msk.shape(0); + std::size_t n_thr = bs.shape(0); + + // initialise output variable + // shape: (subsets, thresholds, 1) + BS = xt::zeros<double>({n_msk, n_thr, std::size_t {1}}); + + // compute variable one mask at a time to minimise memory imprint + for (int m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN); + + // calculate the mean over the time steps + // $BS = \frac{1}{n} \sum_{k=1}^{n} (o_k - y_k)^2$ + xt::view(BS, m, xt::all(), xt::all()) = + xt::nanmean(bs_masked, -1, xt::keep_dims); + } } // Compute the calibration-refinement decomposition of the Brier score @@ -56,18 +76,24 @@ namespace evalhyd // BS = reliability - resolution + uncertainty // // \require q_thr: - // 1D array of streamflow exceedance threshold(s). + // Streamflow exceedance threshold(s). // shape: (thresholds,) + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \require o_k: // Observed event outcome. // shape: (thresholds, time) // \require y_k: // Event probability forecast. // shape: (thresholds, time) + // \require bar_o: + // Mean event observed outcome. + // shape: (subsets, thresholds) // \assign BS_CRD: - // 2D array of Brier score components (reliability, resolution, - // uncertainty) for each threshold. - // shape: (thresholds, components) + // Brier score components (reliability, resolution, uncertainty) + // for each subset and for each threshold. + // shape: (subsets, thresholds, components) void Evaluator::calc_BS_CRD() { // declare internal variables @@ -79,66 +105,80 @@ namespace evalhyd xt::xtensor<double, 1> y_i; // define some dimensions - std::size_t n = o_k.shape(1); std::size_t n_thr = o_k.shape(0); std::size_t n_mbr = q_prd.shape(0); + std::size_t n_msk = t_msk.shape(0); std::size_t n_cmp = 3; - // initialise output variable - // shape: (components, thresholds) - BS_CRD = xt::zeros<double>({n_thr, n_cmp}); - // compute range of forecast values $y_i$ y_i = xt::arange<double>(double(n_mbr + 1)) / n_mbr; - // compute mask to subsample time steps belonging to same forecast bin - // (where bins are defined as the range of forecast values) - msk_bins = xt::equal( - y_k, xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis()) - ); - - // compute number of forecasts in each forecast bin $N_i$ - N_i = xt::sum(msk_bins, -1); - - // compute subsample relative frequency - // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ - bar_o_i = xt::where( - N_i > 0, - xt::sum( - xt::where( - msk_bins, - xt::view(o_k, xt::newaxis(), - xt::all(), xt::all()), - 0. - ), - -1 - ) / N_i, - 0. - ); - - // calculate reliability = - // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$ - xt::col(BS_CRD, 0) = - xt::sum( - xt::square( - xt::view(y_i, xt::all(), xt::newaxis()) - - bar_o_i - ) * N_i, - 0 - ) / n; - - // calculate resolution = - // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$ - xt::col(BS_CRD, 1) = - xt::sum( - xt::square( - bar_o_i - bar_o - ) * N_i, - 0 - ) / n; - - // calculate uncertainty = $\bar{o} (1 - \bar{o})$ - xt::col(BS_CRD, 2) = bar_o * (1 - bar_o); + // initialise output variable + // shape: (subsets, thresholds, components) + BS_CRD = xt::zeros<double>({n_msk, n_thr, n_cmp}); + + // compute variable one mask at a time to minimise memory imprint + for (int m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN); + auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN); + + // calculate length of subset + auto l = xt::sum(xt::row(t_msk, m)); + + // compute mask to subsample time steps belonging to same forecast bin + // (where bins are defined as the range of forecast values) + msk_bins = xt::equal( + y_k_masked, + xt::view(y_i, xt::all(), xt::newaxis(), xt::newaxis()) + ); + + // compute number of forecasts in each forecast bin $N_i$ + N_i = xt::nansum(msk_bins, -1); + + // compute subsample relative frequency + // $\bar{o_i} = \frac{1}{N_i} \sum_{k \in N_i} o_k$ + bar_o_i = xt::where( + N_i > 0, + xt::nansum( + xt::where( + msk_bins, + xt::view(o_k_masked, xt::newaxis(), + xt::all(), xt::all()), + 0. + ), + -1 + ) / N_i, + 0. + ); + + // calculate reliability = + // $\frac{1}{n} \sum_{i=1}^{I} N_i (y_i - \bar{o_i})^2$ + xt::view(BS_CRD, m, xt::all(), 0) = + xt::nansum( + xt::square( + xt::view(y_i, xt::all(), xt::newaxis()) + - bar_o_i + ) * N_i, + 0 + ) / l; + + // calculate resolution = + // $\frac{1}{n} \sum_{i=1}^{I} N_i (\bar{o_i} - \bar{o})^2$ + xt::view(BS_CRD, m, xt::all(), 1) = + xt::nansum( + xt::square( + bar_o_i - xt::row(bar_o, m) + ) * N_i, + 0 + ) / l; + + // calculate uncertainty = $\bar{o} (1 - \bar{o})$ + xt::view(BS_CRD, m, xt::all(), 2) = + xt::row(bar_o, m) * (1 - xt::row(bar_o, m)); + } } // Compute the likelihood-base rate decomposition of the Brier score @@ -146,6 +186,9 @@ namespace evalhyd // // BS = type 2 bias - discrimination + sharpness // + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \require o_k: // Observed event outcome. // shape: (thresholds, time) @@ -153,9 +196,9 @@ namespace evalhyd // Event probability forecast. // shape: (thresholds, time) // \return BS_LBD: - // 2D array of Brier score components (type 2 bias, discrimination, - // sharpness) for each threshold. - // shape: (thresholds, components) + // Brier score components (type 2 bias, discrimination, sharpness) + // for each subset and for each threshold. + // shape: (subsets, thresholds, components) void Evaluator::calc_BS_LBD() { // declare internal variables @@ -169,124 +212,148 @@ namespace evalhyd xt::xtensor<double, 1> o_j; // define some dimensions - std::size_t n = o_k.shape(1); std::size_t n_thr = o_k.shape(0); + std::size_t n_msk = t_msk.shape(0); std::size_t n_cmp = 3; - // declare and initialise output variable - // shape: (components, thresholds) - BS_LBD = xt::zeros<double>({n_thr, n_cmp}); - // set the range of observed values $o_j$ o_j = {0., 1.}; - // compute mask to subsample time steps belonging to same observation bin - // (where bins are defined as the range of forecast values) - msk_bins = xt::equal( - o_k, xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis()) - ); - - // compute number of observations in each observation bin $M_j$ - M_j = xt::sum(msk_bins, -1); - - // compute subsample relative frequency - // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$ - bar_y_j = xt::where( - M_j > 0, - xt::sum( - xt::where( - msk_bins, - xt::view(y_k, xt::newaxis(), - xt::all(), xt::all()), - 0. - ), - -1 - ) / M_j, - 0. - ); - - // compute mean "climatology" forecast probability - // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$ - bar_y = xt::mean(y_k, -1); - - // calculate type 2 bias = - // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$ - xt::col(BS_LBD, 0) = - xt::sum( - xt::square( - xt::view(o_j, xt::all(), xt::newaxis()) - - bar_y_j - ) * M_j, - 0 - ) / n; - - // calculate discrimination = - // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$ - xt::col(BS_LBD, 1) = - xt::sum( - xt::square( - bar_y_j - bar_y - ) * M_j, - 0 - ) / n; - - // calculate sharpness = - // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$ - xt::col(BS_LBD, 2) = - xt::sum( - xt::square( - y_k - - xt::view(bar_y, xt::all(), xt::newaxis()) - ), - -1 - ) / n; + // declare and initialise output variable + // shape: (subsets, thresholds, components) + BS_LBD = xt::zeros<double>({n_msk, n_thr, n_cmp}); + + // compute variable one mask at a time to minimise memory imprint + for (int m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN); + auto y_k_masked = xt::where(xt::row(t_msk, m), y_k, NAN); + + // calculate length of subset + auto l = xt::sum(xt::row(t_msk, m)); + + // compute mask to subsample time steps belonging to same observation bin + // (where bins are defined as the range of forecast values) + msk_bins = xt::equal( + o_k_masked, + xt::view(o_j, xt::all(), xt::newaxis(), xt::newaxis()) + ); + + // compute number of observations in each observation bin $M_j$ + M_j = xt::nansum(msk_bins, -1); + + // compute subsample relative frequency + // $\bar{y_j} = \frac{1}{M_j} \sum_{k \in M_j} y_k$ + bar_y_j = xt::where( + M_j > 0, + xt::nansum( + xt::where( + msk_bins, + xt::view( + y_k_masked, + xt::newaxis(), xt::all(), xt::all() + ), + 0. + ), + -1 + ) / M_j, + 0. + ); + + // compute mean "climatology" forecast probability + // $\bar{y} = \frac{1}{n} \sum_{k=1}^{n} y_k$ + bar_y = xt::nanmean(y_k_masked, -1); + + // calculate type 2 bias = + // $\frac{1}{n} \sum_{j=1}^{J} M_j (o_j - \bar{y_j})^2$ + xt::view(BS_LBD, m, xt::all(), 0) = + xt::nansum( + xt::square( + xt::view(o_j, xt::all(), xt::newaxis()) + - bar_y_j + ) * M_j, + 0 + ) / l; + + // calculate discrimination = + // $\frac{1}{n} \sum_{j=1}^{J} M_j (\bar{y_j} - \bar{y})^2$ + xt::view(BS_LBD, m, xt::all(), 1) = + xt::nansum( + xt::square( + bar_y_j - bar_y + ) * M_j, + 0 + ) / l; + + // calculate sharpness = + // $\frac{1}{n} \sum_{k=1}^{n} (\bar{y_k} - \bar{y})^2$ + xt::view(BS_LBD, m, xt::all(), 2) = + xt::nansum( + xt::square( + y_k_masked - + xt::view(bar_y, xt::all(), xt::newaxis()) + ), + -1 + ) / l; + } } - // Compute the Brier skill score for each time step. + // Compute the Brier skill score (BSS). // + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \require o_k: // Observed event outcome. // shape: (thresholds, time) // \require bar_o: // Mean event observed outcome. - // shape: (thresholds,) + // shape: (subsets, thresholds) // \require bs: // Brier scores for each time step. // shape: (thresholds, time) - // \assign bss: - // 2D array of Brier skill score for each threshold for each time step. - // shape: (thresholds, time) - void Evaluator::calc_bss() - { - // calculate reference Brier score(s) - // $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$ - xt::xtensor<double, 2> bs_ref = - xt::mean( - xt::square( - o_k - - xt::view(bar_o, xt::all(), xt::newaxis()) - ), - -1, xt::keep_dims - ); - - // return computed Brier skill score(s) - // $bss = 1 - \frac{bs}{bs_{ref}} - bss = 1 - (bs / bs_ref); - } - - // Compute the Brier skill score (BSS). - // - // \require bss: - // 2D array of Brier skill score for each threshold for each time step. - // shape: (thresholds, time) // \assign BSS: - // 2D array of Brier skill score for each threshold. - // shape: (thresholds, 1) + // Brier skill score for each subset and for each threshold. + // shape: (subsets, thresholds, 1) void Evaluator::calc_BSS() { - // calculate the mean over the time steps - // $BSS = \frac{1}{n} \sum_{k=1}^{n} bss$ - BSS = xt::mean(bss, -1, xt::keep_dims); + // define some dimensions + std::size_t n_thr = o_k.shape(0); + std::size_t n_msk = t_msk.shape(0); + + // declare and initialise output variable + // shape: (subsets, thresholds, components) + BSS = xt::zeros<double>({n_msk, n_thr, std::size_t {1}}); + + // compute variable one mask at a time to minimise memory imprint + for (int m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where(xt::row(t_msk, m), o_k, NAN); + auto bs_masked = xt::where(xt::row(t_msk, m), bs, NAN); + + // calculate reference Brier score(s) + // $bs_{ref} = \frac{1}{n} \sum_{k=1}^{n} (o_k - \bar{o})^2$ + xt::xtensor<double, 2> bs_ref = + xt::nanmean( + xt::square( + o_k_masked - + xt::view( + xt::row(bar_o, m), + xt::all(), xt::newaxis() + ) + ), + -1, xt::keep_dims + ); + + // compute Brier skill score(s) + // $BSS = \frac{1}{n} \sum_{k=1}^{n} 1 - \frac{bs}{bs_{ref}} + xt::view(BSS, m, xt::all(), xt::all()) = + xt::nanmean(1 - (bs_masked / bs_ref), -1, xt::keep_dims); + } } } } diff --git a/include/evalhyd/probabilist/evaluator_elements.cpp b/include/evalhyd/probabilist/evaluator_elements.cpp index 1618d8f0fa194bdd6a22a2e25490aa5fe6674239..49e98845635433987aa2a29c5c918ba98e7deac8 100644 --- a/include/evalhyd/probabilist/evaluator_elements.cpp +++ b/include/evalhyd/probabilist/evaluator_elements.cpp @@ -1,4 +1,5 @@ #include <xtensor/xmath.hpp> +#include <xtensor/xview.hpp> #include <xtensor/xsort.hpp> #include "evaluator.h" @@ -10,13 +11,13 @@ namespace evalhyd // Determine observed realisation of threshold(s) exceedance. // // \require q_obs: - // 2D array of streamflow observations. + // Streamflow observations. // shape: (1, time) // \require q_thr: - // 1D array of streamflow exceedance threshold(s). + // Streamflow exceedance threshold(s). // shape: (thresholds,) // \assign o_k: - // 2D array of event observed outcome. + // Event observed outcome. // shape: (thresholds, time) void Evaluator::calc_o_k() { @@ -27,28 +28,38 @@ namespace evalhyd // Determine mean observed realisation of threshold(s) exceedance. // // \require o_k: - // 2D array of event observed outcome. + // Event observed outcome. // shape: (thresholds, time) + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \assign bar_o: - // 1D array of mean event observed outcome. - // shape: (thresholds,) + // Mean event observed outcome. + // shape: (subsets, thresholds) void Evaluator::calc_bar_o() { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto o_k_masked = xt::where( + xt::view(t_msk, xt::all(), xt::newaxis(), xt::all()), + o_k, NAN + ); + // compute mean "climatology" relative frequency of the event // $\bar{o} = \frac{1}{n} \sum_{k=1}^{n} o_k$ - bar_o = xt::mean(o_k, -1); + bar_o = xt::nanmean(o_k_masked, -1); } // Determine forecast probability of threshold(s) exceedance to occur. // // \require q_prd: - // 2D array of streamflow predictions. + // Streamflow predictions. // shape: (members, time) // \require q_thr: - // 1D array of streamflow exceedance threshold(s). + // Streamflow exceedance threshold(s). // shape: (thresholds,) // \assign y_k: - // 2D array of event probability forecast. + // Event probability forecast. // shape: (thresholds, time) void Evaluator::calc_y_k() { @@ -72,10 +83,10 @@ namespace evalhyd // Compute the forecast quantiles from the ensemble members. // // \require q_prd: - // 2D array of streamflow predictions. + // Streamflow predictions. // shape: (members, time) // \assign q_qnt: - // 2D array of streamflow forecast quantiles. + // Streamflow forecast quantiles. // shape: (quantiles, time) void Evaluator::calc_q_qnt() { diff --git a/include/evalhyd/probabilist/evaluator_quantiles.cpp b/include/evalhyd/probabilist/evaluator_quantiles.cpp index 89b54ce453207c03655c24cce6520eeb7a0878ce..7170732732fce65d595f13f8398758f08e234c6b 100644 --- a/include/evalhyd/probabilist/evaluator_quantiles.cpp +++ b/include/evalhyd/probabilist/evaluator_quantiles.cpp @@ -14,13 +14,13 @@ namespace evalhyd // Compute the quantile scores for each time step. // // \require q_obs: - // 2D array of streamflow observations. + // Streamflow observations. // shape: (1, time) // \require q_qnt: - // 2D array of streamflow quantiles. + // Streamflow quantiles. // shape: (quantiles, time) // \assign qs: - // 2D array of the quantile scores for each time step. + // Quantile scores for each time step. // shape: (quantiles, time) void Evaluator::calc_qs() { @@ -45,17 +45,37 @@ namespace evalhyd // Compute the quantile score (QS). // + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \require qs: - // 2D array of quantile scores for each time step. + // Quantile scores for each time step. // shape: (quantiles, time) // \assign QS: - // 2D array of quantile scores. - // shape: (quantiles, 1) + // Quantile scores. + // shape: (subsets, quantiles, 1) void Evaluator::calc_QS() { - // calculate the mean over the time steps - // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$ - QS = xt::mean(qs, -1, xt::keep_dims); + // define some dimensions + std::size_t n_msk = t_msk.shape(0); + std::size_t n_qnt = qs.shape(0); + + // initialise output variable + // shape: (subsets, thresholds, 1) + QS = xt::zeros<double>({n_msk, n_qnt, std::size_t {1}}); + + // compute variable one mask at a time to minimise memory imprint + for (int m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto qs_masked = xt::where(xt::row(t_msk, m), qs, NAN); + + // calculate the mean over the time steps + // $QS = \frac{1}{n} \sum_{k=1}^{n} qs$ + xt::view(QS, m, xt::all(), xt::all()) = + xt::nanmean(qs_masked, -1, xt::keep_dims); + } } // Compute the continuous rank probability score(s) based @@ -67,10 +87,10 @@ namespace evalhyd // integration to be accurate. // // \require qs: - // 2D array of quantile scores for each time step. + // Quantile scores for each time step. // shape: (quantiles, time) // \assign crps: - // 2D array of CRPS for each time step. + // CRPS for each time step. // shape: (1, time) void Evaluator::calc_crps() { @@ -88,17 +108,36 @@ namespace evalhyd // Compute the continuous rank probability score (CRPS) based // on quantile scores. // + // \require t_msk: + // Temporal subsets of the whole record. + // shape: (subsets, time) // \require crps: - // 2D array of CRPS for each time step. + // CRPS for each time step. // shape: (quantiles, time) // \assign CRPS: - // 2D array containing the CRPS. - // shape: (1, 1) + // CRPS. + // shape: (subsets, 1, 1) void Evaluator::calc_CRPS() { - // calculate the mean over the time steps - // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$ - CRPS = xt::mean(crps, -1, xt::keep_dims); + // define some dimensions + std::size_t n_msk = t_msk.shape(0); + + // initialise output variable + // shape: (subsets, thresholds, 1) + CRPS = xt::zeros<double>({n_msk, std::size_t {1}, std::size_t {1}}); + + // compute variable one mask at a time to minimise memory imprint + for (int m = 0; m < n_msk; m++) + { + // apply the mask + // (using NaN workaround until reducers work on masked_view) + auto crps_masked = xt::where(xt::row(t_msk, m), crps, NAN); + + // calculate the mean over the time steps + // $CRPS = \frac{1}{n} \sum_{k=1}^{n} crps$ + xt::view(CRPS, m, xt::all(), xt::all()) = + xt::nanmean(crps_masked, -1, xt::keep_dims); + } } } } diff --git a/include/evalhyd/probabilist/evaluator_utils.cpp b/include/evalhyd/probabilist/evaluator_utils.cpp index 720ad75ee7fe0d60e0640351dc561370f9b12278..923cc3aef2ce64446d56188104e4e14861a9b74e 100644 --- a/include/evalhyd/probabilist/evaluator_utils.cpp +++ b/include/evalhyd/probabilist/evaluator_utils.cpp @@ -8,13 +8,13 @@ namespace evalhyd // Determine whether flows are greater than given threshold(s). // // \param q: - // Array of streamflow data. + // Streamflow data. // shape: (1+, time) // \param thr: - // Array of streamflow thresholds. + // Streamflow thresholds. // shape: (thresholds,) // \return - // Array of boolean-like threshold(s) exceedance. + // Boolean-like threshold(s) exceedance. // shape: (thresholds, 1+, time) xt::xtensor<double, 3> is_above_threshold( const xt::xtensor<double, 2> &q, @@ -27,13 +27,13 @@ namespace evalhyd // Determine whether flows are strictly lower than given threshold(s) // // \param q: - // Array of streamflow data. + // Streamflow data. // shape: (1+, time) // \param thr: - // Array of streamflow thresholds. + // Streamflow thresholds. // shape: (thresholds,) // \return - // Array of boolean-like threshold(s) exceedance. + // Boolean-like threshold(s) exceedance. // shape: (thresholds, 1+, time) xt::xtensor<double, 3> is_below_threshold( const xt::xtensor<double, 2> &q, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b162b136d4972424621654608e11f60befafb464..9b62835030aae524e69ccbb76143e57334732dfc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,6 +19,9 @@ include_directories("../deps/xtensor/include") include_directories(../include) +SET(GCC_EVALHYD_COMPILE_FLAGS "-O3") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_EVALHYD_COMPILE_FLAGS}") + # TEST SUITE ------------------------------------------------------------------- include_directories(data) diff --git a/tests/test_probabilist.cpp b/tests/test_probabilist.cpp index bfb3c457de71ba6cb2a0119eabc211200c8413d2..7bc1734953d9aba500179fdc6610e4e5229d4498 100644 --- a/tests/test_probabilist.cpp +++ b/tests/test_probabilist.cpp @@ -22,7 +22,7 @@ TEST(ProbabilistTests, TestBrier) { // compute scores xt::xtensor<double, 1> thresholds = {690, 534, 445}; - std::vector<xt::xtensor<double, 2>> metrics = + std::vector<xt::xtensor<double, 3>> metrics = evalhyd::evalp( observed, predicted, {"BS", "BSS", "BS_CRD", "BS_LBD"}, thresholds @@ -30,30 +30,30 @@ TEST(ProbabilistTests, TestBrier) { // check results // Brier scores - xt::xtensor<double, 2> bs = - {{0.10615136}, - {0.07395622}, - {0.08669186}}; + xt::xtensor<double, 3> bs = + {{{0.10615136}, + {0.07395622}, + {0.08669186}}}; EXPECT_TRUE(xt::allclose(metrics[0], bs)); // Brier skill scores - xt::xtensor<double, 2> bss = - {{0.5705594}, - {0.6661165}, - {0.5635126}}; + xt::xtensor<double, 3> bss = + {{{0.5705594}, + {0.6661165}, + {0.5635126}}}; EXPECT_TRUE(xt::allclose(metrics[1], bss)); // Brier calibration-refinement decompositions - xt::xtensor<double, 2> bs_crd = - {{0.011411758, 0.1524456, 0.2471852}, - {0.005532413, 0.1530793, 0.2215031}, - {0.010139431, 0.1220601, 0.1986125}}; + xt::xtensor<double, 3> bs_crd = + {{{0.011411758, 0.1524456, 0.2471852}, + {0.005532413, 0.1530793, 0.2215031}, + {0.010139431, 0.1220601, 0.1986125}}}; EXPECT_TRUE(xt::allclose(metrics[2], bs_crd)); - // Brier likelihood-based decompositions - xt::xtensor<double, 2> bs_lbd = - {{0.012159881, 0.1506234, 0.2446149}, - {0.008031746, 0.1473869, 0.2133114}, - {0.017191279, 0.1048221, 0.1743227}}; + // Brier likelihood-base rate decompositions + xt::xtensor<double, 3> bs_lbd = + {{{0.012159881, 0.1506234, 0.2446149}, + {0.008031746, 0.1473869, 0.2133114}, + {0.017191279, 0.1048221, 0.1743227}}}; EXPECT_TRUE(xt::allclose(metrics[3], bs_lbd)); }