Newer
Older
# \\\
# Copyright 2021-2022 Louis Héraut*1
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_X = mean(df_data_code_per$Qm3s, na.rm=TRUE)
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=p_thresold)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=p_thresold)
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realises the trend analysis of the minimum 10 day average flow
# over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {
df_data_code = tibble(Date=rollmean(df_data_code$Date,
10,
fill=NA),
Qm3s=rollmean(df_data_code$Qm3s,
10,
fill=NA),
code=c)
df_data_roll = bind_rows(df_data_roll, df_data_code)
}
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
# Specify the period of analyse
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
# Store the trend
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
# Clean results of trend analyse
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
return (res_VCN10trend)
}
### 1.4. VCN10 date
# Realises the trend analysis of the date of the minimum 10 day
# average flow over the year (VCN10) hydrological variable
get_dateVCN10trend = function (df_data, df_meta, period, p_thresold) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
for (c in Code) {
# Get the data associated to the code
df_data_code = df_data[df_data$code == c,]
# Perform the roll mean of the flow over 10 days
df_data_code = tibble(Date=df_data_code$Date,
Qm3s=rollmean(df_data_code$Qm3s,
10,
fill=NA),
code=c)
# Store the results
df_data_roll = bind_rows(df_data_roll, df_data_code)
}
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_VCN10trendB = tibble()
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
# Compute the yearly min over the averaged data
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=which.min,
period=per,
timestep='year',
pos.datetime=1)
# Converts index of the VCN10 to the julian date associated
df_VCN10Ex = prepare_date(df_VCN10Ex, df_VCN10list)
# Compute the trend analysis
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
## 2. OTHER ANALYSES
### 2.1. Break date
# Compute the break date of the flow data by station
get_break = function (df_data, df_meta, p_thresold=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),]
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
# Perform the break analysis thanks to the Pettitt test
res_break = pettitt.test(df_data_codeNoNA$Qm3s)
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
if (p_value <= p_thresold) {
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak])
# step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
### 2.2. Time gap
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}