Newer
Older
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=p_thresold)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=p_thresold)
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realises the trend analysis of the minimum 10 day average flow
# over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {
df_data_code = tibble(Date=rollmean(df_data_code$Date,
10,
fill=NA),
df_data_roll = bind_rows(df_data_roll, df_data_code)
}
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
# Specify the period of analyse
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
# Store the trend
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
# Clean results of trend analyse
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
return (res_VCN10trend)
}
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
### 1.4. tINI date
# get_tINItrend = function (df_data, df_meta, period, p_thresold) {
# # Get all different stations code
# Code = levels(factor(df_meta$code))
# # Blank tibble to store the data averaged
# df_data_roll = tibble()
# # For all the code
# for (c in Code) {
# # Get the data associated to the code
# df_data_code = df_data[df_data$code == c,]
# # Perform the roll mean of the flow over 10 days
# df_data_code = tibble(Date=df_data_code$Date,
# Value=rollmean(df_data_code$Value,
# 10,
# fill=NA),
# code=c)
# # Store the results
# df_data_roll = bind_rows(df_data_roll, df_data_code)
# }
# # Make sure to convert the period to a list
# period = as.list(period)
# # Set the max interval period as the minimal possible
# Imax = 0
# # Blank tibble for data to return
# df_tMIDtrendB = tibble()
# # For all periods
# for (per in period) {
# # Prepare the data to fit the entry of extract.Var
# df_tMIDlist = prepare(df_data_roll, colnamegroup=c('code'))
# # Compute the yearly min over the averaged data
# df_tMIDEx = extract.Var(data.station=df_tMIDlist,
# funct=which.min,
# period=per,
# timestep='year',
# pos.datetime=1)
# # Converts index of the tMID to the julian date associated
# df_tMIDEx = prepare_date(df_tMIDEx, df_tMIDlist)
# # Compute the trend analysis
# df_tMIDtrend = Estimate.stats(data.extract=df_tMIDEx,
# level=p_thresold)
# # Get the associated time interval
# I = interval(per[1], per[2])
# # If it is the largest interval
# if (I > Imax) {
# # Store it and the associated data and info
# Imax = I
# df_tMIDlistB = df_tMIDlist
# df_tMIDExB = df_tMIDEx
# }
# # Specify the period of analyse
# df_tMIDtrend = get_period(per, df_tMIDtrend, df_tMIDEx,
# df_tMIDlist)
# # Store the trend
# df_tMIDtrendB = bind_rows(df_tMIDtrendB, df_tMIDtrend)
# }
# # Clean results of trend analyse
# res_tMIDtrend = clean(df_tMIDtrendB, df_tMIDExB, df_tMIDlistB)
# return (res_tMIDtrend)
# }
### 1.5. tMID date
# Realises the trend analysis of the date of the minimum 10 day
# average flow over the year (VCN10) hydrological variable
get_tMIDtrend = function (df_data, df_meta, period, p_thresold) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
for (c in Code) {
# Get the data associated to the code
df_data_code = df_data[df_data$code == c,]
# Perform the roll mean of the flow over 10 days
df_data_code = tibble(Date=df_data_code$Date,
10,
fill=NA),
code=c)
# Store the results
df_data_roll = bind_rows(df_data_roll, df_data_code)
}
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_tMIDlist = prepare(df_data_roll, colnamegroup=c('code'))
df_tMIDEx = extract.Var(data.station=df_tMIDlist,
funct=which.min,
period=per,
timestep='year',
pos.datetime=1)
# Converts index of the tMID to the julian date associated
df_tMIDEx = prepare_date(df_tMIDEx, df_tMIDlist)
df_tMIDtrend = Estimate.stats(data.extract=df_tMIDEx,
df_tMIDlistB = df_tMIDlist
df_tMIDExB = df_tMIDEx
df_tMIDtrend = get_period(per, df_tMIDtrend, df_tMIDEx,
df_tMIDlist)
df_tMIDtrendB = bind_rows(df_tMIDtrendB, df_tMIDtrend)
res_tMIDtrend = clean(df_tMIDtrendB, df_tMIDExB, df_tMIDlistB)
return (res_tMIDtrend)
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
### 2.1. Hydrograph
xref = matrix(
c(0.033, 0.025, 0.033, 0.075, 0.167, 0.217, 0.142,
0.092, 0.067, 0.058, 0.050, 0.042,
0.050, 0.050, 0.058, 0.083, 0.150, 0.167, 0.117,
0.083, 0.058, 0.058, 0.067, 0.058,
0.058, 0.050, 0.100, 0.142, 0.158, 0.092, 0.067,
0.050, 0.042, 0.058, 0.083, 0.100,
0.112, 0.098, 0.101, 0.125, 0.122, 0.072, 0.036,
0.024, 0.039, 0.067, 0.102, 0.102,
0.139, 0.092, 0.082, 0.099, 0.087, 0.039, 0.015,
0.012, 0.036, 0.108, 0.159, 0.131,
0.136, 0.105, 0.091, 0.057, 0.042, 0.034, 0.036,
0.052, 0.068, 0.111, 0.132, 0.136,
0.156, 0.154, 0.117, 0.119, 0.086, 0.044, 0.025,
0.015, 0.025, 0.044, 0.089, 0.127,
0.204, 0.163, 0.118, 0.102, 0.060, 0.030, 0.018,
0.012, 0.023, 0.041, 0.087, 0.143,
0.157, 0.130, 0.119, 0.094, 0.062, 0.042, 0.028,
0.021, 0.035, 0.062, 0.099, 0.150,
0.128, 0.142, 0.122, 0.128, 0.105, 0.065, 0.035,
0.024, 0.031, 0.044, 0.074, 0.101,
0.133, 0.126, 0.111, 0.110, 0.081, 0.056, 0.038,
0.027, 0.042, 0.063, 0.098, 0.117,
0.099, 0.100, 0.101, 0.099, 0.088, 0.078, 0.072,
0.064, 0.064, 0.069, 0.076, 0.089),
ncol=12, byrow=TRUE)
colnames(xref) = seq(1, 12, 1)
row.names(xref) = c('GROUP11', 'GROUP10', 'GROUP9', 'GROUP8',
'GROUP7', 'GROUUK', 'GROUP6', 'GROUP5',
'GROUP4', 'GROUP3', 'GROUP2', 'GROUP1')
# Computes the hydrograph of a station
get_hydrograph = function (df_data, period=NULL, df_meta=NULL) {
# If there is a specified period
if (!is.null(period)) {
# Extracts only the data of this period
df_data = df_data[df_data$Date >= as.Date(period[1])
& df_data$Date <= as.Date(period[2]),]
}
# If there is the metadata
if (!is.null(df_meta)) {
# New column in metadata for hydrological regime
df_meta$regime_hydro = NA
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Otherwise it is just a list of flow from one station
} else {
# Only one code is present
nCode = 1
}
# Blank tibble to store data
df_QM = tibble()
# For all accessible code
for (k in 1:nCode) {
# If there is the metadata
if (!is.null(df_meta)) {
# Gets the code
code = Code[k]
# Get the associated data
df_data_code = df_data[df_data$code == code,]
} else {
# The data are the date for the current code
df_data_code = df_data
}
# Gets a list of the month of the data as numeric
monthData = as.numeric(format(df_data_code$Date, "%m"))
# Blank list to stock month mean
QM_code = c()
# For all months
for (i in 1:12) {
# Gets all the flow data associated to the current month
data = df_data_code$Value[monthData == i]
# Averages the data
QM_code[i] = mean(data, na.rm=TRUE)
}
regime = 0
classRegime = ""
distance = rep(0, length(xref[,1]))
distancemin = 0
for (j in 1:length(xref[,1])) {
distance[j] = sum((QM_code / mean(QM_code) - xref[j,])^2)
}
regime = 1 + length(xref[,1]) - which.min(distance)
distancemin = distance[which.min(distance)]
if (regime < 7) {
classRegime = "Pluvial"
} else if (regime >= 7 & regime < 10) {
classRegime = "Transition"
} else if (regime >= 10) {
classRegime = "Nival glaciaire"
}
# If there is the metadata
if (!is.null(df_meta)) {
# Creates a temporary tibble to store hydrograph results
df_QMtmp = tibble(QM=QM_code, code=code)
# Stores it
df_QM = bind_rows(df_QM, df_QMtmp)
# Stores result of the hydrological regime
df_meta$regime_hydro[df_meta$code == code] = classRegime
# Otherwise
} else {
# No tibble needed
df_QM = QM_code
df_meta = classRegime
}
}
# Returns the hydrograph and meta data
return (list(QM=df_QM, meta=df_meta))
}
### 2.2. Break date
# Compute the break date of the flow data by station
get_break = function (df_data, df_meta, p_thresold=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Value),]
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
if (p_value <= p_thresold) {
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Value[1:ibreak])
# step2 = mean(df_data_codeNoNA$Value[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}