Newer
Older
# \\\
# Copyright 2021-2022 Louis Héraut*1
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_X = mean(df_data_code_per$Qm3s, na.rm=TRUE)
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=p_thresold)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=p_thresold)
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realise the trend analysis of the minimum 10 day average flow over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {
df_data_code = tibble(Date=rollmean(df_data_code$Date,
10,
fill=NA),
Qm3s=rollmean(df_data_code$Qm3s,
10,
fill=NA),
code=c)
df_data_roll = bind_rows(df_data_roll, df_data_code)
}
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
## 2. OTHER ANALYSES
### 2.1. Break date
# Compute the break date of the flow data by station
get_break = function (df_data, df_meta, p_thresold=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),]
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# Perform the break analysis thanks to the Pettitt test
res_break = pettitt.test(df_data_codeNoNA$Qm3s)
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
if (p_value <= p_thresold) {
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak])
# step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
### 2.2. Time gap
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}