Commit abb5a04f authored by Dorchies David's avatar Dorchies David
Browse files

fix(SeriesAggreg): force mean calculation for regime

- refactor SeriesAggreg.data.frame
- add default value for FunConvert

Refs #41
parent 538bf8fc
SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat = NULL, NewTimeFormat = NULL,
YearFirstMonth = 1, TimeLag = 0, ...) {
SeriesAggreg.data.frame <- function(TabSeries,
Format,
ConvertFun = .AggregConvertFun(names(TabSeries)[-1]),
TimeFormat = NULL,
NewTimeFormat = NULL,
YearFirstMonth = 1,
TimeLag = 0,
...) {
## Arguments checks
if (!is.null(TimeFormat)) {
warning("deprecated 'TimeFormat' argument", call. = FALSE)
......@@ -8,13 +13,13 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
if (missing(Format)) {
Format <- getSeriesAggregFormat(NewTimeFormat)
} else if (!is.null(NewTimeFormat)) {
warning("deprecated 'NewTimeFormat' argument: 'Format' argument is used instead",
warning("deprecated 'NewTimeFormat' argument: 'Format' argument is used instead",
call. = FALSE)
}
if (is.null(Format)) {
stop("argument 'Format' is missing")
}
## check TabSeries
if (!is.data.frame(TabSeries)) {
stop("'TabSeries' must be a data.frame containing the dates and data to be aggregated")
......@@ -30,28 +35,37 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
TabSeries[[1L]] <- as.POSIXct(TabSeries[[1L]])
}
## check TabSeries other columns (boolean converted to numeric)
apply(TabSeries[, -1L, drop = FALSE], MARGIN = 2, FUN = function(iCol) {
if (!is.numeric(iCol)) {
stop("'TabSeries' columns (other than the first one) must be of numeric class")
apply(
TabSeries[, -1L, drop = FALSE],
MARGIN = 2,
FUN = function(iCol) {
if (!is.numeric(iCol)) {
stop("'TabSeries' columns (other than the first one) must be of numeric class")
}
}
})
)
## check Format
listFormat <- c("%Y%m%d", "%Y%m", "%Y", "%m", "%d")
Format <- gsub(pattern = "[[:punct:]]+", replacement = "%", Format)
Format <-
gsub(pattern = "[[:punct:]]+", replacement = "%", Format)
Format <- match.arg(Format, choices = listFormat)
## check ConvertFun
listConvertFun <- c("sum", "mean")
ConvertFun <- listConvertFun[match(ConvertFun, listConvertFun)]
listConvertFun <- list(sum = sum, mean = mean)
ConvertFun <-
names(listConvertFun)[match(ConvertFun, names(listConvertFun))]
if (anyNA(ConvertFun)) {
stop("'ConvertFun' should be a one of 'sum' or 'mean'")
}
if (length(ConvertFun) != (ncol(TabSeries) - 1)) {
stop(
sprintf("'ConvertFun' must be of length %i (ncol(TabSeries)-1)", ncol(TabSeries) - 1)
)
stop(sprintf(
"'ConvertFun' must be of length %i (ncol(TabSeries)-1)",
ncol(TabSeries) - 1
))
}
## check YearFirstMonth
msgYearFirstMonth <- "'YearFirstMonth' should be a single vector of numeric value between 1 and 12"
msgYearFirstMonth <-
"'YearFirstMonth' should be a single vector of numeric value between 1 and 12"
YearFirstMonth <- match(YearFirstMonth, 1:12)
if (anyNA(YearFirstMonth)) {
stop(msgYearFirstMonth)
......@@ -63,7 +77,8 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
warning("'YearFirstMonth' is ignored because Format != '%Y'")
}
## check TimeLag
msgTimeLag <- "'TimeLag' should be a single vector of a positive numeric value"
msgTimeLag <-
"'TimeLag' should be a single vector of a positive numeric value"
if (!is.vector(TimeLag)) {
stop(msgTimeLag)
}
......@@ -82,13 +97,24 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
TabSeries2 <- TabSeries0
if (!Format %in% c("%d", "%m")) {
start <- sprintf("%i-01-01 00:00:00", as.numeric(format(TabSeries2$DatesR[1L], format = "%Y"))-1)
stop <- sprintf("%i-12-31 00:00:00", as.numeric(format(TabSeries2$DatesR[nrow(TabSeries2)], format = "%Y"))+1)
by <- ifelse(grepl("hours", format(diff(TabSeries$DatesR[1:2]))), yes = "hours", no = "days")
fakeTs <- data.frame(DatesR = seq(from = as.POSIXct(start, tz = "UTC"),
to = as.POSIXct(stop , tz = "UTC"),
by = by) + TimeLag)
TabSeries2 <- merge(fakeTs, TabSeries2, by = "DatesR", all.x = TRUE)
start <-
sprintf("%i-01-01 00:00:00", as.numeric(format(TabSeries2$DatesR[1L], format = "%Y")) -
1)
stop <-
sprintf("%i-12-31 00:00:00", as.numeric(format(TabSeries2$DatesR[nrow(TabSeries2)], format = "%Y")) +
1)
by <-
ifelse(grepl("hours", format(diff(
TabSeries$DatesR[1:2]
))), yes = "hours", no = "days")
fakeTs <-
data.frame(DatesR = seq(
from = as.POSIXct(start, tz = "UTC"),
to = as.POSIXct(stop , tz = "UTC"),
by = by
) + TimeLag)
TabSeries2 <-
merge(fakeTs, TabSeries2, by = "DatesR", all.x = TRUE)
}
TabSeries2$DatesRini <- TabSeries2$DatesR %in% TabSeries0$DatesR
......@@ -99,7 +125,9 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
# Compute aggregation
TabSeries2$Selec <- !duplicated(TabSeries2$Selec2)
if (all(TabSeries2$Selec)) {
warning("the requested time 'Format' is the same as the one in 'TabSeries'. No time-step conversion was performed")
warning(
"the requested time 'Format' is the same as the one in 'TabSeries'. No time-step conversion was performed"
)
return(TabSeries)
}
if (Format == "%Y") {
......@@ -108,7 +136,8 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
spF2 <- "%Y-%m"
TabSeries2$Selec1 <- format(TabSeries2$DatesR, spF1)
TabSeries2$Selec2 <- format(TabSeries2$DatesR, spF2)
TabSeries2$Selec <- !duplicated(TabSeries2$Selec2) & TabSeries2$Selec1 == yfm
TabSeries2$Selec <-
!duplicated(TabSeries2$Selec2) & TabSeries2$Selec1 == yfm
}
TabSeries2$Fac2 <- cumsum(TabSeries2$Selec)
} else {
......@@ -119,27 +148,34 @@ SeriesAggreg.data.frame <- function(TabSeries, Format, ConvertFun, TimeFormat =
}
TabSeries2$Fac2 <- TabSeries2$Selec2
TabSeries2$Selec <- !duplicated(TabSeries2$Selec2)
}
if ("mean" %in% ConvertFun) {
colTsAggregMean <- c("Fac2", colnames(TabSeries)[-1L][ConvertFun == "mean"])
tsAggregMean <- aggregate(. ~ Fac2, data = TabSeries2[, colTsAggregMean], FUN = mean, na.action = na.pass)
} else {
tsAggregMean <- data.frame(a = NA, b = NA)
}
if ("sum" %in% ConvertFun) {
colTsAggregSum <- c("Fac2", colnames(TabSeries)[-1L][ConvertFun == "sum"])
tsAggregSum <- aggregate(. ~ Fac2, data = TabSeries2[, colTsAggregSum], FUN = sum, na.action = na.pass)
} else {
tsAggregSum <- data.frame(a = NA, b = NA)
}
tsAggreg <- cbind(tsAggregMean, tsAggregSum)
ConvertFun <- rep("mean", ncol(TabSeries) - 1)
}
#browser()
listTsAggreg <- lapply(names(listConvertFun), function(x) {
if (any(ConvertFun == x)) {
colTsAggreg <-
c("Fac2", colnames(TabSeries)[-1L][ConvertFun == x])
aggregate(. ~ Fac2,
data = TabSeries2[, colTsAggreg],
FUN = listConvertFun[[x]],
na.action = na.pass)
} else {
NULL
}
})
listTsAggreg <- listTsAggreg[!sapply(listTsAggreg, is.null)]
tsAggreg <- do.call(cbind, listTsAggreg)
tsAggreg <- tsAggreg[, !duplicated(colnames(tsAggreg))]
tsAggreg <- merge(tsAggreg, TabSeries2[, c("Fac2", "DatesR", "DatesRini", "Selec")], by = "Fac2", all.x = TRUE, all.y = FALSE)
tsAggreg <-
merge(
tsAggreg,
TabSeries2[, c("Fac2", "DatesR", "DatesRini", "Selec")],
by = "Fac2",
all.x = TRUE,
all.y = FALSE
)
tsAggreg <- tsAggreg[tsAggreg$Selec & tsAggreg$DatesRini, ]
tsAggreg <- tsAggreg[, colnames(TabSeries0)]
return(tsAggreg)
}
......@@ -29,8 +29,13 @@ Warning: on the aggregated outputs, the dates correspond to the beginning of the
\usage{
\method{SeriesAggreg}{data.frame}(TabSeries,
Format, ConvertFun, TimeFormat = NULL, NewTimeFormat = NULL,
YearFirstMonth = 1, TimeLag = 0, \dots)
Format,
ConvertFun = .AggregConvertFun(names(TabSeries)[-1]),
TimeFormat = NULL,
NewTimeFormat = NULL,
YearFirstMonth = 1,
TimeLag = 0,
\dots)
\method{SeriesAggreg}{list}(TabSeries,
Format,
......@@ -56,7 +61,7 @@ YearFirstMonth = 1, TimeLag = 0, \dots)
\item{NewTimeFormat}{(deprecated) [character] output time step format (i.e. \code{"hourly"}, \code{"daily"}, \code{"monthly"} or \code{"yearly"}). Use the \code{TabSeries} argument instead}
\item{ConvertFun}{[character] names of aggregation functions (e.g. for P[mm], T[degC], Q[mm]: \code{ConvertFun = c("sum", "mean", "sum"}))}
\item{ConvertFun}{[character] names of aggregation functions (e.g. for P[mm], T[degC], Q[mm]: \code{ConvertFun = c("sum", "mean", "sum"})) (Default: use the name of the column or "mean" for regime calculation)}
\item{YearFirstMonth}{(optional) [numeric] integer used when \code{Format = "\%Y"} to set when the starting month of the year (e.g. 01 for calendar year or 09 for hydrological year starting in September)}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment