Newer
Older
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=p_thresold)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=p_thresold)
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realises the trend analysis of the minimum 10 day average flow
# over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
# Specify the period of analyse
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
# Store the trend
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
# Clean results of trend analyse
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
return (res_VCN10trend)
}
### 1.4. tINI date
get_tINItrend = function (df_data, df_meta, period, p_thresold) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Gets the number of station
nCode = length(Code)
# Blank tibble to store the data averaged
df_data_roll = tibble()
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# For all the code
for (code in Code) {
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Perform the roll mean of the flow over 10 days
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
# Store the results
df_data_roll = bind_rows(df_data_roll, df_data_roll_code)
}
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_tINItrendB = tibble()
# For all periods
for (per in period) {
df_tINIEx = tibble()
# For all the code
for (k in 1:nCode) {
# Gets the code
code = Code[k]
# Get the data associated to the code
df_data_roll_code = df_data_roll[df_data_roll$code == code,]
# Prepare the data to fit the entry of extract.Var
df_tINIlist_code = prepare(df_data_roll_code,
colnamegroup=c('code'))
per.start = df_meta$start_year[df_meta$code == code]
per.start = paste(sprintf("%02d", per.start), '-01', sep='')
# Compute the yearly min over the averaged data
df_tINIEx_code = extract.Var(data.station=df_tINIlist_code,
funct=which.min,
period=per,
per.start=per.start,
timestep='year',
pos.datetime=1)
#######
df_tINIEx_code$group1 = k
# Store the results
df_tINIEx = bind_rows(df_tINIEx, df_tINIEx_code)
}
# Converts index of the tINI to the julian date associated
df_tINIEx = prepare_date(df_tINIEx, df_tINIlist)
# Compute the trend analysis
df_tINItrend = Estimate.stats(data.extract=df_tINIEx,
level=p_thresold)
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_tINIlistB = df_tINIlist
df_tINIExB = df_tINIEx
}
# Specify the period of analyse
df_tINItrend = get_period(per, df_tINItrend, df_tINIEx,
df_tINIlist)
# Store the trend
df_tINItrendB = bind_rows(df_tINItrendB, df_tINItrend)
}
# Clean results of trend analyse
res_tINItrend = clean(df_tINItrendB, df_tINIExB, df_tINIlistB)
return (res_tINItrend)
}
# Realises the trend analysis of the date of the minimum 10 day
# average flow over the year (VCN10) hydrological variable
get_tMIDtrend = function (df_data, df_meta, period, p_thresold) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
}
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_tMIDlist = prepare(df_data_roll, colnamegroup=c('code'))
df_tMIDEx = extract.Var(data.station=df_tMIDlist,
funct=which.min,
period=per,
timestep='year',
pos.datetime=1)
# Converts index of the tMID to the julian date associated
df_tMIDEx = prepare_date(df_tMIDEx, df_tMIDlist)
df_tMIDtrend = Estimate.stats(data.extract=df_tMIDEx,
df_tMIDlistB = df_tMIDlist
df_tMIDExB = df_tMIDEx
df_tMIDtrend = get_period(per, df_tMIDtrend, df_tMIDEx,
df_tMIDlist)
df_tMIDtrendB = bind_rows(df_tMIDtrendB, df_tMIDtrend)
res_tMIDtrend = clean(df_tMIDtrendB, df_tMIDExB, df_tMIDlistB)
return (res_tMIDtrend)
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
### 2.1. Hydrograph
xref = matrix(
c(0.033, 0.025, 0.033, 0.075, 0.167, 0.217, 0.142,
0.092, 0.067, 0.058, 0.050, 0.042,
0.050, 0.050, 0.058, 0.083, 0.150, 0.167, 0.117,
0.083, 0.058, 0.058, 0.067, 0.058,
0.058, 0.050, 0.100, 0.142, 0.158, 0.092, 0.067,
0.050, 0.042, 0.058, 0.083, 0.100,
0.112, 0.098, 0.101, 0.125, 0.122, 0.072, 0.036,
0.024, 0.039, 0.067, 0.102, 0.102,
0.139, 0.092, 0.082, 0.099, 0.087, 0.039, 0.015,
0.012, 0.036, 0.108, 0.159, 0.131,
0.136, 0.105, 0.091, 0.057, 0.042, 0.034, 0.036,
0.052, 0.068, 0.111, 0.132, 0.136,
0.156, 0.154, 0.117, 0.119, 0.086, 0.044, 0.025,
0.015, 0.025, 0.044, 0.089, 0.127,
0.204, 0.163, 0.118, 0.102, 0.060, 0.030, 0.018,
0.012, 0.023, 0.041, 0.087, 0.143,
0.157, 0.130, 0.119, 0.094, 0.062, 0.042, 0.028,
0.021, 0.035, 0.062, 0.099, 0.150,
0.128, 0.142, 0.122, 0.128, 0.105, 0.065, 0.035,
0.024, 0.031, 0.044, 0.074, 0.101,
0.133, 0.126, 0.111, 0.110, 0.081, 0.056, 0.038,
0.027, 0.042, 0.063, 0.098, 0.117,
0.099, 0.100, 0.101, 0.099, 0.088, 0.078, 0.072,
0.064, 0.064, 0.069, 0.076, 0.089),
ncol=12, byrow=TRUE)
colnames(xref) = seq(1, 12, 1)
row.names(xref) = c('GROUP11', 'GROUP10', 'GROUP9', 'GROUP8',
'GROUP7', 'GROUUK', 'GROUP6', 'GROUP5',
'GROUP4', 'GROUP3', 'GROUP2', 'GROUP1')
# Computes the hydrograph of a station
get_hydrograph = function (df_data, period=NULL, df_meta=NULL) {
# If there is a specified period
if (!is.null(period)) {
# Extracts only the data of this period
df_data = df_data[df_data$Date >= as.Date(period[1])
& df_data$Date <= as.Date(period[2]),]
}
# If there is the metadata
if (!is.null(df_meta)) {
# New column in metadata for hydrological regime
df_meta$regime_hydro = NA
# New column in metadata for the start of the hydrological year
df_meta$start_year = NA
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Otherwise it is just a list of flow from one station
} else {
# Only one code is present
nCode = 1
}
# Blank tibble to store data
df_QM = tibble()
# For all accessible code
for (k in 1:nCode) {
# If there is the metadata
if (!is.null(df_meta)) {
# Gets the code
code = Code[k]
# Get the associated data
df_data_code = df_data[df_data$code == code,]
} else {
# The data are the date for the current code
df_data_code = df_data
}
# Gets a list of the month of the data as numeric
monthData = as.numeric(format(df_data_code$Date, "%m"))
# Blank list to stock month mean
QM_code = c()
# For all months
for (i in 1:12) {
# Gets all the flow data associated to the current month
data = df_data_code$Value[monthData == i]
# Averages the data
QM_code[i] = mean(data, na.rm=TRUE)
}
regime = 0
classRegime = ""
distance = rep(0, length(xref[,1]))
distancemin = 0
for (j in 1:length(xref[,1])) {
distance[j] = sum((QM_code / mean(QM_code) - xref[j,])^2)
}
regime = 1 + length(xref[,1]) - which.min(distance)
distancemin = distance[which.min(distance)]
if (regime < 7) {
classRegime = "Pluvial"
} else if (regime >= 7 & regime < 10) {
classRegime = "Transition"
} else if (regime >= 10) {
classRegime = "Nival glaciaire"
}
# If there is the metadata
if (!is.null(df_meta)) {
# Creates a temporary tibble to store hydrograph results
df_QMtmp = tibble(QM=QM_code, code=code)
# Stores it
df_QM = bind_rows(df_QM, df_QMtmp)
# Stores result of the hydrological regime
df_meta$regime_hydro[df_meta$code == code] = classRegime
# Computes the month of the max QM
maxMonth = which.max(df_QM$QM)
# Stores it as the start of the hydrological year
df_meta$start_year[df_meta$code == code] = maxMonth
# Otherwise
} else {
# No tibble needed
df_QM = QM_code
df_meta = classRegime
}
}
# Returns the hydrograph and meta data
return (list(QM=df_QM, meta=df_meta))
}
### 2.2. Break date
# Compute the break date of the flow data by station
get_break = function (df_data, df_meta, p_thresold=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Value),]
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
if (p_value <= p_thresold) {
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Value[1:ibreak])
# step2 = mean(df_data_codeNoNA$Value[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}