diff --git a/R/SeriesAggreg.R b/R/SeriesAggreg.R index 4a819fb9b633a549827ec59dc1ecbcf3b9e24a2a..599c412865b62b5c48a60df7f744e72ad19ec79a 100644 --- a/R/SeriesAggreg.R +++ b/R/SeriesAggreg.R @@ -1,112 +1,112 @@ -SeriesAggreg <- function(TabSeries,TimeFormat,NewTimeFormat,ConvertFun,YearFirstMonth=01, verbose = TRUE){ - - +SeriesAggreg_new <- function(TabSeries, TimeFormat, NewTimeFormat, ConvertFun, YearFirstMonth = 01, TimeLag = 0, verbose = TRUE){ + + ##_Arguments_check - - ##check_TabSeries - if(is.null(TabSeries) ){ stop("TabSeries must be a dataframe containing the dates and data to be converted \n"); return(NULL); } - if(!is.data.frame(TabSeries)){ stop("TabSeries must be a dataframe containing the dates and data to be converted \n"); return(NULL); } - if(ncol(TabSeries)<2){ stop("TabSeries must contain at least two columns (including the coulmn of dates \n"); return(NULL); } - if("POSIXlt" %in% class(TabSeries[,1]) == FALSE & "POSIXct" %in% class(TabSeries[,1]) == FALSE){ stop("TabSeries first column must be a vector of class POSIXlt or POSIXct \n"); return(NULL); } - for(iCol in 2:ncol(TabSeries)){ - if(!is.numeric(TabSeries[,iCol])){ stop("TabSeries columns (other than the first one) be of numeric class \n"); return(NULL); } } - ##check_TimeFormat - if(is.null( TimeFormat)){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(!is.vector( TimeFormat)){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(!is.character(TimeFormat)){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(length(TimeFormat)!=1 ){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(TimeFormat %in% c("hourly","daily","monthly","yearly")==FALSE){ - stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - ##check_NewTimeFormat - if(is.null( NewTimeFormat)){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(!is.vector( NewTimeFormat)){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(!is.character(NewTimeFormat)){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(length(NewTimeFormat)!=1 ){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - if(NewTimeFormat %in% c("hourly","daily","monthly","yearly")==FALSE){ - stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } - ##check_ConvertFun - if(is.null( ConvertFun)){ stop("ConvertFun must be a vector of character \n"); return(NULL); } - if(!is.vector( ConvertFun)){ stop("ConvertFun must be a vector of character \n"); return(NULL); } - if(!is.character(ConvertFun)){ stop("ConvertFun must be a vector of character \n"); return(NULL); } - if(length(ConvertFun)!=(ncol(TabSeries)-1)){ stop(paste("ConvertFun must be of length ",ncol(TabSeries)-1," (length=ncol(TabSeries)-1) \n",sep="")); return(NULL); } - if(sum(ConvertFun %in% c("sum","mean")==FALSE)!=0){ stop("ConvertFun elements must be either 'sum' or 'mean' \n"); return(NULL); } - ##check_YearFirstMonth - if(is.null( YearFirstMonth)){ stop("YearFirstMonth must be an integer between 1 and 12 \n"); return(NULL); } - if(!is.vector( YearFirstMonth)){ stop("YearFirstMonth must be an integer between 1 and 12 \n"); return(NULL); } - if(!is.numeric(YearFirstMonth)){ stop("YearFirstMonth must be an integer between 1 and 12 \n"); return(NULL); } - YearFirstMonth <- as.integer(YearFirstMonth); - if(length(YearFirstMonth)!=1){ stop(paste("YearFirstMonth must be only one integer between 1 and 12 \n",sep="")); return(NULL); } - if(YearFirstMonth %in% (1:12) == FALSE){ stop(paste("YearFirstMonth must be only one integer between 1 and 12 \n",sep="")); return(NULL); } - ##check_DatesR_integrity - if(TimeFormat=="hourly" ){ by <- "hours" ; } - if(TimeFormat=="daily" ){ by <- "days" ; } - if(TimeFormat=="monthly"){ by <- "months"; } - if(TimeFormat=="yearly" ){ by <- "years" ; } - TmpDatesR <- seq(from=TabSeries[1,1],to=tail(TabSeries[,1],1),by=by) - if(!identical(TabSeries[,1],TmpDatesR)){ stop("Problem detected in TabSeries dates vector (in comparison with seq(from=TabSeries[1,1],to=tail(TabSeries[,1],1))) \n"); return(NULL); } - ##check_conversion_direction - if((TimeFormat == "daily" & NewTimeFormat %in% c("hourly") ) | - (TimeFormat == "monthly" & NewTimeFormat %in% c("hourly","daily") ) | - (TimeFormat == "yearly" & NewTimeFormat %in% c("hourly","daily","monthly"))){ - stop("only time aggregation can be performed \n"); return(NULL); } - ##check_if_conversion_not_needed - if((TimeFormat == "hourly" & NewTimeFormat=="hourly" ) | - (TimeFormat == "daily" & NewTimeFormat=="daily" ) | - (TimeFormat == "monthly" & NewTimeFormat=="monthly") | - (TimeFormat == "yearly" & NewTimeFormat=="yearly" )){ - if(verbose){ warning("\t The old and new format are identical \n\t -> no time-step conversion was performed \n"); return(TabSeries); } } - - + + ##check_TabSeries + if(is.null(TabSeries) ){ stop("TabSeries must be a dataframe containing the dates and data to be converted \n"); return(NULL); } + if(!is.data.frame(TabSeries)){ stop("TabSeries must be a dataframe containing the dates and data to be converted \n"); return(NULL); } + if(ncol(TabSeries)<2){ stop("TabSeries must contain at least two columns (including the coulmn of dates \n"); return(NULL); } + if("POSIXlt" %in% class(TabSeries[,1]) == FALSE & "POSIXct" %in% class(TabSeries[,1]) == FALSE){ stop("TabSeries first column must be a vector of class POSIXlt or POSIXct \n"); return(NULL); } + for(iCol in 2:ncol(TabSeries)){ + if(!is.numeric(TabSeries[,iCol])){ stop("TabSeries columns (other than the first one) be of numeric class \n"); return(NULL); } } + ##check_TimeFormat + if(is.null( TimeFormat)){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(!is.vector( TimeFormat)){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(!is.character(TimeFormat)){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(length(TimeFormat)!=1 ){ stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(TimeFormat %in% c("hourly","daily","monthly","yearly")==FALSE){ + stop("TimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + ##check_NewTimeFormat + if(is.null( NewTimeFormat)){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(!is.vector( NewTimeFormat)){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(!is.character(NewTimeFormat)){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(length(NewTimeFormat)!=1 ){ stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + if(NewTimeFormat %in% c("hourly","daily","monthly","yearly")==FALSE){ + stop("NewTimeFormat must be 'hourly', 'daily', 'monthly' or 'yearly' \n"); return(NULL); } + ##check_ConvertFun + if(is.null( ConvertFun)){ stop("ConvertFun must be a vector of character \n"); return(NULL); } + if(!is.vector( ConvertFun)){ stop("ConvertFun must be a vector of character \n"); return(NULL); } + if(!is.character(ConvertFun)){ stop("ConvertFun must be a vector of character \n"); return(NULL); } + if(length(ConvertFun)!=(ncol(TabSeries)-1)){ stop(paste("ConvertFun must be of length ",ncol(TabSeries)-1," (length=ncol(TabSeries)-1) \n",sep="")); return(NULL); } + if(sum(ConvertFun %in% c("sum","mean")==FALSE)!=0){ stop("ConvertFun elements must be either 'sum' or 'mean' \n"); return(NULL); } + ##check_YearFirstMonth + if(is.null( YearFirstMonth)){ stop("YearFirstMonth must be an integer between 1 and 12 \n"); return(NULL); } + if(!is.vector( YearFirstMonth)){ stop("YearFirstMonth must be an integer between 1 and 12 \n"); return(NULL); } + if(!is.numeric(YearFirstMonth)){ stop("YearFirstMonth must be an integer between 1 and 12 \n"); return(NULL); } + YearFirstMonth <- as.integer(YearFirstMonth); + if(length(YearFirstMonth)!=1){ stop(paste("YearFirstMonth must be only one integer between 1 and 12 \n",sep="")); return(NULL); } + if(YearFirstMonth %in% (1:12) == FALSE){ stop(paste("YearFirstMonth must be only one integer between 1 and 12 \n",sep="")); return(NULL); } + ##check_DatesR_integrity + if(TimeFormat=="hourly" ){ by <- "hours" ; } + if(TimeFormat=="daily" ){ by <- "days" ; } + if(TimeFormat=="monthly"){ by <- "months"; } + if(TimeFormat=="yearly" ){ by <- "years" ; } + TmpDatesR <- seq(from=TabSeries[1,1],to=tail(TabSeries[,1],1),by=by) + if(!identical(TabSeries[,1],TmpDatesR)){ stop("Problem detected in TabSeries dates vector (in comparison with seq(from=TabSeries[1,1],to=tail(TabSeries[,1],1))) \n"); return(NULL); } + ##check_conversion_direction + if((TimeFormat == "daily" & NewTimeFormat %in% c("hourly") ) | + (TimeFormat == "monthly" & NewTimeFormat %in% c("hourly","daily") ) | + (TimeFormat == "yearly" & NewTimeFormat %in% c("hourly","daily","monthly"))){ + stop("only time aggregation can be performed \n"); return(NULL); } + ##check_if_conversion_not_needed + if((TimeFormat == "hourly" & NewTimeFormat=="hourly" ) | + (TimeFormat == "daily" & NewTimeFormat=="daily" ) | + (TimeFormat == "monthly" & NewTimeFormat=="monthly") | + (TimeFormat == "yearly" & NewTimeFormat=="yearly" )){ + if(verbose){ warning("\t The old and new format are identical \n\t -> no time-step conversion was performed \n"); return(TabSeries); } } + + ##_Time_step_conversion - - ##_Handle_conventional_difference_between_hourly_series_and_others - TmpDatesR <- TabSeries[,1]; - if(TimeFormat=="hourly"){ TmpDatesR <- TmpDatesR - 60*60; } - Hmax <- "00"; if(TimeFormat=="hourly"){ Hmax <- "23" } - - ##_Identify_the_part_of_the_series_to_be_aggregated - NDaysInMonth <- list("31",c("28","29"),"31","30","31","30","31","31","30","31","30","31") - YearLastMonth <- YearFirstMonth+11; if(YearLastMonth>12){ YearLastMonth <- YearLastMonth-12; } - YearFirstMonthTxt <- formatC(YearFirstMonth,format="d",width=2,flag="0") - YearLastMonthTxt <- formatC(YearLastMonth,format="d",width=2,flag="0") - if(NewTimeFormat=="daily" ){ Ind1 <- which(format(TmpDatesR, "%H")=="00"); - Ind2 <- which(format(TmpDatesR, "%H")==Hmax); - if(Ind2[1]<Ind1[1]){ Ind2 <- Ind2[2:length(Ind2)]; } - if(tail(Ind1,1)>tail(Ind2,1)){ Ind1 <- Ind1[1:(length(Ind1)-1)]; } - ### Aggr <- NULL; iii <- 0; for(kkk in 1:length(Ind1)){ - ### iii <- iii+1; Aggr <- c(Aggr,rep(iii,length(Ind1[kkk]:Ind2[kkk]))); } - Aggr <- as.numeric(format(TmpDatesR[min(Ind1):max(Ind2)],"%Y%m%d")); ### more efficient - NewDatesR <- data.frame(seq(from=TmpDatesR[min(Ind1)],to=TmpDatesR[max(Ind2)],by="days")) - } - if(NewTimeFormat=="monthly"){ Ind1 <- which(format(TmpDatesR, "%d%H")=="0100"); - Ind2 <- which(format(TmpDatesR,"%m%d%H") %in% paste(c("0131","0228","0229","0331","0430","0531","0630","0731","0831","0930","1031","1130","1231"),Hmax,sep="")); - Ind2[1:(length(Ind2)-1)][diff(Ind2)==1] <- NA; Ind2 <- Ind2[!is.na(Ind2)]; ### to keep only feb 29 if both feb 28 and feb 29 exists - if(Ind2[1]<Ind1[1]){ Ind2 <- Ind2[2:length(Ind2)]; } - if(tail(Ind1,1)>tail(Ind2,1)){ Ind1 <- Ind1[1:(length(Ind1)-1)]; } - ### Aggr <- NULL; iii <- 0; for(kkk in 1:length(Ind1)){ - ### iii <- iii+1; Aggr <- c(Aggr,rep(iii,length(Ind1[kkk]:Ind2[kkk]))); } - Aggr <- as.numeric(format(TmpDatesR[min(Ind1):max(Ind2)],"%Y%m")); ### more efficient - NewDatesR <- data.frame(seq(from=TmpDatesR[min(Ind1)],to=TmpDatesR[max(Ind2)],by="months")) - } - if(NewTimeFormat=="yearly" ){ Ind1 <- which(format(TmpDatesR,"%m%d%H") %in% paste(YearFirstMonthTxt,"0100",sep="")); - Ind2 <- which(format(TmpDatesR,"%m%d%H") %in% paste(YearLastMonthTxt,NDaysInMonth[[YearLastMonth]],Hmax,sep="")); - Ind2[1:(length(Ind2)-1)][diff(Ind2)==1] <- NA; Ind2 <- Ind2[!is.na(Ind2)]; ### to keep only feb 29 if both feb 28 and feb 29 exists - if(Ind2[1]<Ind1[1]){ Ind2 <- Ind2[2:length(Ind2)]; } - if(tail(Ind1,1)>tail(Ind2,1)){ Ind1 <- Ind1[1:(length(Ind1)-1)]; } - Aggr <- NULL; iii <- 0; for(kkk in 1:length(Ind1)){ - iii <- iii+1; Aggr <- c(Aggr,rep(iii,length(Ind1[kkk]:Ind2[kkk]))); } - ### Aggr <- as.numeric(format(TmpDatesR[min(Ind1):max(Ind2)],"%Y")); ### not working if YearFirstMonth != 01 - NewDatesR <- data.frame(seq(from=TmpDatesR[min(Ind1)],to=TmpDatesR[max(Ind2)],by="years")) - } - ##_Aggreation_and_export - NewTabSeries <- data.frame(NewDatesR) - for(iCol in 2:ncol(TabSeries)){ - AggregData <- aggregate(TabSeries[min(Ind1):max(Ind2),iCol],by=list(Aggr),FUN=ConvertFun[iCol-1],na.rm=F)[,2] - NewTabSeries <- data.frame(NewTabSeries,AggregData) - } - names(NewTabSeries) <- names(TabSeries) - return(NewTabSeries); - - -} - + + ##_Handle_conventional_difference_between_hourly_series_and_others + TmpDatesR <- TabSeries[,1]; + #if(TimeFormat=="hourly"){ TmpDatesR <- TmpDatesR - 60*60; } + TmpDatesR <- TmpDatesR + TimeLag + Hmax <- "00"; if(TimeFormat=="hourly"){ Hmax <- "23" } + + ##_Identify_the_part_of_the_series_to_be_aggregated + NDaysInMonth <- list("31",c("28","29"),"31","30","31","30","31","31","30","31","30","31") + YearLastMonth <- YearFirstMonth+11; if(YearLastMonth>12){ YearLastMonth <- YearLastMonth-12; } + YearFirstMonthTxt <- formatC(YearFirstMonth,format="d",width=2,flag="0") + YearLastMonthTxt <- formatC(YearLastMonth,format="d",width=2,flag="0") + if(NewTimeFormat=="daily" ){ Ind1 <- which(format(TmpDatesR, "%H")=="00"); + Ind2 <- which(format(TmpDatesR, "%H")==Hmax); + if(Ind2[1]<Ind1[1]){ Ind2 <- Ind2[2:length(Ind2)]; } + if(tail(Ind1,1)>tail(Ind2,1)){ Ind1 <- Ind1[1:(length(Ind1)-1)]; } + ### Aggr <- NULL; iii <- 0; for(kkk in 1:length(Ind1)){ + ### iii <- iii+1; Aggr <- c(Aggr,rep(iii,length(Ind1[kkk]:Ind2[kkk]))); } + Aggr <- as.numeric(format(TmpDatesR[min(Ind1):max(Ind2)],"%Y%m%d")); ### more efficient + NewDatesR <- data.frame(seq(from=TmpDatesR[min(Ind1)],to=TmpDatesR[max(Ind2)],by="days")) + } + if(NewTimeFormat=="monthly"){ Ind1 <- which(format(TmpDatesR, "%d%H")=="0100"); + Ind2 <- which(format(TmpDatesR,"%m%d%H") %in% paste(c("0131","0228","0229","0331","0430","0531","0630","0731","0831","0930","1031","1130","1231"),Hmax,sep="")); + Ind2[1:(length(Ind2)-1)][diff(Ind2)==1] <- NA; Ind2 <- Ind2[!is.na(Ind2)]; ### to keep only feb 29 if both feb 28 and feb 29 exists + if(Ind2[1]<Ind1[1]){ Ind2 <- Ind2[2:length(Ind2)]; } + if(tail(Ind1,1)>tail(Ind2,1)){ Ind1 <- Ind1[1:(length(Ind1)-1)]; } + ### Aggr <- NULL; iii <- 0; for(kkk in 1:length(Ind1)){ + ### iii <- iii+1; Aggr <- c(Aggr,rep(iii,length(Ind1[kkk]:Ind2[kkk]))); } + Aggr <- as.numeric(format(TmpDatesR[min(Ind1):max(Ind2)],"%Y%m")); ### more efficient + NewDatesR <- data.frame(seq(from=TmpDatesR[min(Ind1)],to=TmpDatesR[max(Ind2)],by="months")) + } + if(NewTimeFormat=="yearly" ){ Ind1 <- which(format(TmpDatesR,"%m%d%H") %in% paste(YearFirstMonthTxt,"0100",sep="")); + Ind2 <- which(format(TmpDatesR,"%m%d%H") %in% paste(YearLastMonthTxt,NDaysInMonth[[YearLastMonth]],Hmax,sep="")); + Ind2[1:(length(Ind2)-1)][diff(Ind2)==1] <- NA; Ind2 <- Ind2[!is.na(Ind2)]; ### to keep only feb 29 if both feb 28 and feb 29 exists + if(Ind2[1]<Ind1[1]){ Ind2 <- Ind2[2:length(Ind2)]; } + if(tail(Ind1,1)>tail(Ind2,1)){ Ind1 <- Ind1[1:(length(Ind1)-1)]; } + Aggr <- NULL; iii <- 0; for(kkk in 1:length(Ind1)){ + iii <- iii+1; Aggr <- c(Aggr,rep(iii,length(Ind1[kkk]:Ind2[kkk]))); } + ### Aggr <- as.numeric(format(TmpDatesR[min(Ind1):max(Ind2)],"%Y")); ### not working if YearFirstMonth != 01 + NewDatesR <- data.frame(seq(from=TmpDatesR[min(Ind1)],to=TmpDatesR[max(Ind2)],by="years")) + } + ##_Aggreation_and_export + NewTabSeries <- data.frame(NewDatesR) + for(iCol in 2:ncol(TabSeries)){ + AggregData <- aggregate(TabSeries[min(Ind1):max(Ind2),iCol],by=list(Aggr),FUN=ConvertFun[iCol-1],na.rm=F)[,2] + NewTabSeries <- data.frame(NewTabSeries,AggregData) + } + names(NewTabSeries) <- names(TabSeries) + return(NewTabSeries); + + +} \ No newline at end of file diff --git a/man/SeriesAggreg.Rd b/man/SeriesAggreg.Rd index 336981e6113428ed9bb412964d34c1eb016d37b4..41c79f3c86efc42a62816840b4ee6fe6ff24a4b7 100644 --- a/man/SeriesAggreg.Rd +++ b/man/SeriesAggreg.Rd @@ -17,6 +17,8 @@ SeriesAggreg(TabSeries, TimeFormat, NewTimeFormat, ConvertFun, \item{YearFirstMonth}{(optional) [numeric] integer used when NewTimeFormat = "yearly" to set when the starting month of the year (e.g. 01 for calendar year or 09 for hydrological year starting in september)} +\item{TimeLag}{(optional) [numeric] numeric indicating a time lag (in seconds) for the time series aggregation (especially useful to aggegate hourly time series in daily time series)} + \item{verbose}{(optional) [boolean] boolean indicating if the function is run in verbose mode or not, default = FALSE} } \value{ @@ -24,17 +26,18 @@ SeriesAggreg(TabSeries, TimeFormat, NewTimeFormat, ConvertFun, } \description{ Conversion of time series to another time step (aggregation only). \cr -Warning : on the aggregated outputs, the dates correpond to the beginning of the time step \cr +Warning : on the aggregated outputs, the dates correspond to the beginning of the time step \cr (e.g. for daily time-series 01/03/2005 00:00 = value for period 01/03/2005 00:00 - 01/03/2005 23:59) \cr (e.g. for monthly time-series 01/03/2005 00:00 = value for period 01/03/2005 00:00 - 31/03/2005 23:59) \cr (e.g. for yearly time-series 01/03/2005 00:00 = value for period 01/03/2005 00:00 - 28/02/2006 23:59) } \examples{ -## loading catchment data library(airGR) -## preparation of the initial time series data frame at the daily time step +## loading catchment data data(L0123002) + +## preparation of the initial time series data frame at the daily time step TabSeries <- data.frame(BasinObs$DatesR, BasinObs$P, BasinObs$E, BasinObs$T, BasinObs$Qmm) TimeFormat <- "daily"