Newer
Older
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/format.R
#
# Manages all the format problem of data and info. Mainly problem of
# input and output of the 'StatsAnalysisTrend' package. It also allows
# to join different selections of station and to gets exact period of
# trend analysis.
## 1. INPUT
### 1.1. Preparation
# Prepares the data in order to have a list of a data tibble with
# date, group and flow column and a info tibble with the station code
# and group column to fit the entry of the 'extract.Var' function in
# the 'StatsAnalysisTrend' package
prepare = function(df_data, colnamegroup=NULL) {
colindgroup = which(colnames(df_data) == colnamegroup)
df_data = group_by_at(df_data, colindgroup)
data = tibble(Date=df_data$Date,
group=group_indices(df_data),
info = bind_cols(group=seq(1:nrow(Gkey)),
Gkey)
# Stores data and info tibble as a list that match the entry of
# the 'extract.Var' function
res = list(data=data, info=info)
return (res)
}
### 1.2. Re-preparation
# Re-prepares the data in outing of the 'extract.Var' function in
# the 'StatsAnalysisTrend' package in order to fit again to the
# entry of the same function
reprepare = function(df_XEx, df_Xlist, colnamegroup=NULL) {
# Changes the column name of the results of the
# 'extract.Var' function
nbt = lengths(regmatches(exDate, gregexpr('-', exDate)))
# Converts it to date from a year and a month
df_XEx$Date = paste(df_XEx$Date, '01', sep='-')
# If there is no dash
# Converts it to date from only a year
df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-')
# If there is more than 2 dashes
df_XEx[-1],
df_Xlist$info[df_XEx$group,
2:ncol(df_Xlist$info)])
df_XlistEx = prepare(df_XEx, colnamegroup=colnamegroup)
return (df_XlistEx)
prepare_date = function(df_XEx, df_Xlist) {
dateStart_group = summarise(group_by(df_Xlist$data, group),
Date = min(Date))
# filter(group_by(df_Xlist$data, group), Date == min(Date))
dateStart_group$Date_julian = NA
date = as.Date(dateStart_group$Date)
origin = as.Date(paste(format(dateStart_group$Date, "%Y"),
'-01-01', sep=''))
for (i in 1:nrow(dateStart_group)) {
dateJultmp = julian(date[i], origin=origin[i])
dateStart_group$Date_julian[i] = dateJultmp
}
dateStart_group$Year = format(dateStart_group$Date, "%Y")
for (group in dateStart_group$group) {
OkdateStart_group = dateStart_group$group == group
Shift = dateStart_group$Date_julian[OkdateStart_group]
year = dateStart_group$Year[OkdateStart_group]
OkXEx_code_year = df_XEx$group1 == group & df_XEx$datetime == year
df_XEx$values[OkXEx_code_year] =
df_XEx$values[OkXEx_code_year] + Shift
OkXEx_code = df_XEx$group1 == group
XEx_code = df_XEx$values[OkXEx_code]
meanXEx_code = mean(XEx_code, na.rm=TRUE)
OkOverStd = dXEx_code >= stdXEx_code*3
OkOverStd[is.na(OkOverStd)] = FALSE
XEx_code[OkOverStd] = XEx_code[OkOverStd] + 365
print(OkOverStd)
# print(group)
# print(df_XEx$datetime[df_XEx$group1 == group][dXEx_code >= stdXEx_code*3])
## 2. OUTPUT
# Cleans the trend results of the function 'Estimate.stats' in the
# 'StatsAnalysisTrend' package. It adds the station code and the
# intercept of the trend to the trend results. Also makes the data
# more presentable.
clean = function (df_Xtrend, df_XEx, df_Xlist) {
# Reprepares the list of data and info in order to be presentable
df_Xlist = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
df_Xlist$data$code[which(df_Xlist$data$group == g)] = df_Xlist$info$code[df_Xlist$info$group == g]
}
df_Xtrend = bind_cols(df_Xtrend,
df_Xlist$info[df_Xtrend$group1,
2:ncol(df_Xlist$info)])
df_Xtrend = get_intercept(df_Xtrend, df_Xlist, unit2day=365.25)
df_Xtrend = relocate(df_Xtrend, intercept, .after=trend)
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# Creates a list of results to return
res = list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info)
return (res)
}
## 3. OTHER
### 3.1. Joining selection
# Joins tibbles of different selection of station as a unique one
join = function (df_data_AG, df_data_IN, df_meta_AG, df_meta_IN) {
# If there is an INRAE and an Agence de l'eau Adour-Garonne selection
if (!is.null(df_data_IN) & !is.null(df_data_AG)) {
# Gets the station in common
common = levels(factor(df_meta_IN[df_meta_IN$code %in% df_meta_AG$code,]$code))
# Gets the Nv station to add
INadd = levels(factor(df_meta_IN[!(df_meta_IN$code %in% df_meta_AG$code),]$code))
# Selects only the IN meta to add
df_meta_INadd = df_meta_IN[df_meta_IN$code %in% INadd,]
# Names the source of the selection
df_meta_AG$source = 'AG'
df_meta_INadd$source = 'IN'
# Joins IN data to AG data
df_meta = full_join(df_meta_AG, df_meta_INadd)
# Selects only the IN data to add
df_data_INadd = df_data_IN[df_data_IN$code %in% INadd,]
# Joins IN meta to AG meta
df_data = full_join(df_data_AG, df_data_INadd)
# If there is just an Agence de l'eau Adour-Garonne selection
} else if (is.null(df_data_IN) & !is.null(df_data_AG)) {
df_meta_AG$source = 'AG'
df_meta = df_meta_AG
df_data = df_data_AG
# If there is just an INRAE selection
} else if (!is.null(df_data_IN) & is.null(df_data_AG)) {
df_meta_IN$source = 'IN'
df_meta = df_meta_IN
df_data = df_data_IN
# If there is no selection
} else {
stop('No data')
}
return (list(data=df_data, meta=df_meta))
}
### 3.2. Period of trend
# Compute the start and the end of the period for a trend analysis
# according to the accessible data
get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {
# Converts results of trend to tibble
df_Xtrend = tibble(df_Xtrend)
# Fix the period start and end of the accessible period to a
# default date
df_Xtrend$period_start = as.Date("1970-01-01")
df_Xtrend$period_end = as.Date("1970-01-01")
# Changes the format of the date variable to date
df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
df_XExtmp = df_Xlisttmp$data
# For all the different group
for (g in df_Xlisttmp$info$group) {
# Gets the analyse data associated to the group
df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
# Gets the id in the trend result associated to the group
id = which(df_Xtrend$group1 == g)
# Computes index of the nearest accessible start and end date
OkStart = df_XExtmp_code$Date >= as.Date(per[1])
OkEnd = df_XExtmp_code$Date <= as.Date(per[2])
distStart = abs(df_XExtmp_code$Date[OkStart] - as.Date(per[1]))
distEnd = abs(df_XExtmp_code$Date[OkEnd] - as.Date(per[2]))
iStart = which.min(distStart)
iEnd = which.min(distEnd)
# Stores the start and end of the trend analysis
df_Xtrend$period_start[id] =
as.Date(df_XExtmp_code$Date[iStart])
df_Xtrend$period_end[id] =
as.Date(df_XExtmp_code$Date[iEnd])
}
return (df_Xtrend)