An error occurred while loading the file. Please try again.
-
Thibault Hallouin authoreda81fd6b6
// Copyright (c) 2023, INRAE.
// Distributed under the terms of the GPL-3 Licence.
// The full licence is in the file LICENCE, distributed with this software.
#ifndef EVALHYD_UNCERTAINTY_HPP
#define EVALHYD_UNCERTAINTY_HPP
#include <string>
#include <vector>
#include <array>
#include <ctime>
#include <chrono>
#include <iomanip>
#include <stdexcept>
#include <xtensor/xtensor.hpp>
#include <xtensor/xadapt.hpp>
#include <xtensor/xrandom.hpp>
#include <xtensor/xio.hpp>
#include "maths.hpp"
typedef std::chrono::time_point<
std::chrono::system_clock, std::chrono::minutes
> tp_minutes;
namespace evalhyd
{
namespace uncertainty
{
inline auto bootstrap(
const std::vector<std::string>& datetimes,
int n_samples, int len_sample
)
{
// convert string to time_point (via tm)
std::vector<std::tm> v_tm;
std::vector<tp_minutes> v_timepoints;
for (auto const& str: datetimes)
{
// convert string to tm
std::tm tm = {};
std::istringstream ss(str);
ss >> std::get_time(&tm, "%Y-%m-%d %H:%M:%S");
if (ss.fail())
{
throw std::runtime_error("datetime string parsing failed");
}
tm.tm_year += 400; // add 400y to avoid dates prior 1970
// while preserving leap year pattern
v_tm.push_back(tm);
// convert tm to time_point
auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm));
v_timepoints.push_back(
std::chrono::time_point_cast<std::chrono::minutes>(tp)
);
}
// adapt vector into xtensor
xt::xtensor<tp_minutes, 1> x_timepoints = xt::adapt(v_timepoints);
// check constant time interval
auto ti = x_timepoints[1] - x_timepoints[0];
for (std::size_t t = 1; t < x_timepoints.size() - 1; t++)
{
if (x_timepoints[t + 1] - x_timepoints[t] != ti)
{
throw std::runtime_error(
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
"time interval not constant across datetimes"
);
}
}
// identify start and end years for period
int start_yr = v_tm.front().tm_year + 1900;
int end_yr = v_tm.back().tm_year + 1900;
// assume start of year block as start of time series
std::tm start_hy = v_tm.front();
xt::xtensor<int, 1> year_blocks = xt::zeros<int>({v_tm.size()});
for (int y = start_yr; y < end_yr; y++)
{
// define window for year blocks
start_hy.tm_year = y - 1900;
auto start = std::chrono::system_clock::from_time_t(
std::mktime(&start_hy)
);
start_hy.tm_year += 1;
auto end = std::chrono::system_clock::from_time_t(
std::mktime(&start_hy)
);
xt::xtensor<bool, 1> wdw =
(x_timepoints >= start) && (x_timepoints < end);
// check that year is complete (without a rigorous leap year check)
int n_days = xt::sum(wdw)();
if ((n_days != 365) && (n_days != 366))
{
throw std::runtime_error(
"year starting in " + std::to_string(y)
+ " is incomplete"
);
}
// determine corresponding year block for each time step
year_blocks = xt::where(wdw, y, year_blocks);
}
// check that time series ends on the last day of a year block
if (year_blocks(year_blocks.size() - 1) == 0)
{
throw std::runtime_error(
"final day of final year not equal to first day of "
"first year minus one time step"
);
}
// generate bootstrapping experiment
xt::xtensor<int, 2> experiment = xt::random::randint(
{n_samples, len_sample}, start_yr, end_yr
);
std::vector<xt::xkeep_slice<int>> samples;
// compute metrics for each sample
for (int s = 0; s < n_samples; s++)
{
// select bootstrapped years
auto exp = xt::view(experiment, s);
auto i0 = xt::flatten_indices(
xt::argwhere(xt::equal(year_blocks, exp(0)))
);
auto i1 = xt::flatten_indices(
xt::argwhere(xt::equal(year_blocks, exp(1)))
);
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
xt::xtensor<int, 1> idx = xt::concatenate(xt::xtuple(i0, i1), 0);
for (std::size_t p = 2; p < exp.size(); p++)
{
auto i = xt::flatten_indices(
xt::argwhere(xt::equal(year_blocks, exp(p)))
);
idx = xt::concatenate(xt::xtuple(idx, i), 0);
}
samples.push_back(xt::keep(idx));
}
return samples;
}
inline auto summarise(const xt::xarray<double>& values, int summary)
{
// TODO: wait for xt::quantile to be available
// or implement it internally for n-dim expressions
// summary 2: series of quantiles across samples
if (summary == 2)
{
// // adjust last axis size (from samples to number of statistics)
// auto s = values.shape();
// s.pop_back();
// s.push_back(7);
// xt::xarray<double> v = xt::zeros<double>(s);
// // quantiles
// xt::view(v, xt::all()) =
// xt::quantile(values, {0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95}, -1);
// return v;
return values;
}
// TODO: wait for xt::stddev to be fixed for rtensor
// or implement it internally for n-dim expressions
// summary 1: mean and standard deviation across samples
else if (summary == 1)
{
// // adjust last axis size (from samples to number of statistics)
// auto s = values.shape();
// s.pop_back();
// s.push_back(2);
// xt::xarray<double> v = xt::zeros<double>(s);
// // mean
// xt::strided_view(s, xt::ellipsis(), 0) = xt::mean(values, -1);
// // standard deviation
// xt::strided_view(s, xt::ellipsis(), 1) = xt::stddev(values, -1);
// return v;
return values;
}
// summary 0: raw (keep all samples)
else
{
return values;
}
}
}
}
#endif //EVALHYD_UNCERTAINTY_HPP