Newer
Older
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=p_thresold)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=p_thresold)
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realises the trend analysis of the minimum 10 day average flow
# over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
# Specify the period of analyse
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
# Store the trend
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
# Clean results of trend analyse
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
return (res_VCN10trend)
}
get_tINItrend = function (df_data, df_meta, period, p_thresold) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Gets the number of station
nCode = length(Code)
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
for (code in Code) {
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Perform the roll mean of the flow over 10 days
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
# Store the results
df_data_roll = bind_rows(df_data_roll, df_data_roll_code)
}
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_tINItrendB = tibble()
# For all periods
for (per in period) {
df_tINIEx = tibble()
# For all the code
for (k in 1:nCode) {
# Gets the code
code = Code[k]
per.start = df_meta$start_year[df_meta$code == code]
per.start = paste(sprintf("%02d", per.start), '-01', sep='')
# Get the data associated to the code
df_data_roll_code = df_data_roll[df_data_roll$code == code,]
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Prepare the data to fit the entry of extract.Var
df_QNAlist_code = prepare(df_data_code,
colnamegroup=c('code'))
# Compute the yearly mean over the data
df_QNAEx_code = extract.Var(data.station=df_QNAlist_code,
funct=min,
timestep='year',
period=per,
# Prepare the data to fit the entry of extract.Var
df_tINIlist_code = prepare(df_data_roll_code,
colnamegroup=c('code'))
# Compute the yearly min over the averaged data
df_tINIEx_code = extract.Var(data.station=df_tINIlist_code,
df_tINIlist_code$data$group = k
df_tINIlist_code$info$group = k
# Converts index of the tINI to the julian date associated
df_tINIEx_code = prepare_date(df_tINIEx_code,
df_tINIlist_code,
per.start=per.start)
df_tINIlist$data = bind_rows(df_tINIlist$data,
df_tINIlist_code$data)
df_tINIlist$info = bind_rows(df_tINIlist$info,
df_tINIlist_code$info)
# Compute the trend analysis
df_tINItrend = Estimate.stats(data.extract=df_tINIEx,
level=p_thresold)
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_tINIlistB = df_tINIlist
df_tINIExB = df_tINIEx
}
# Specify the period of analyse
df_tINItrend = get_period(per, df_tINItrend, df_tINIEx,
df_tINIlist)
# Store the trend
df_tINItrendB = bind_rows(df_tINItrendB, df_tINItrend)
}
# Clean results of trend analyse
res_tINItrend = clean(df_tINItrendB, df_tINIExB, df_tINIlistB)
return (res_tINItrend)
}
# Realises the trend analysis of the date of the minimum 10 day
# average flow over the year (VCN10) hydrological variable
get_tMIDtrend = function (df_data, df_meta, period, p_thresold) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
}
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_tMIDlist = prepare(df_data_roll, colnamegroup=c('code'))
df_tMIDEx = extract.Var(data.station=df_tMIDlist,
funct=which.min,
period=per,
timestep='year',
pos.datetime=1)
# Converts index of the tMID to the julian date associated
df_tMIDEx = prepare_date(df_tMIDEx, df_tMIDlist)
df_tMIDtrend = Estimate.stats(data.extract=df_tMIDEx,
df_tMIDlistB = df_tMIDlist
df_tMIDExB = df_tMIDEx
df_tMIDtrend = get_period(per, df_tMIDtrend, df_tMIDEx,
df_tMIDlist)
df_tMIDtrendB = bind_rows(df_tMIDtrendB, df_tMIDtrend)
res_tMIDtrend = clean(df_tMIDtrendB, df_tMIDExB, df_tMIDlistB)
return (res_tMIDtrend)
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
### 2.1. Hydrograph
xref = matrix(
c(0.033, 0.025, 0.033, 0.075, 0.167, 0.217, 0.142,
0.092, 0.067, 0.058, 0.050, 0.042,
0.050, 0.050, 0.058, 0.083, 0.150, 0.167, 0.117,
0.083, 0.058, 0.058, 0.067, 0.058,
0.058, 0.050, 0.100, 0.142, 0.158, 0.092, 0.067,
0.050, 0.042, 0.058, 0.083, 0.100,
0.112, 0.098, 0.101, 0.125, 0.122, 0.072, 0.036,
0.024, 0.039, 0.067, 0.102, 0.102,
0.139, 0.092, 0.082, 0.099, 0.087, 0.039, 0.015,
0.012, 0.036, 0.108, 0.159, 0.131,
0.136, 0.105, 0.091, 0.057, 0.042, 0.034, 0.036,
0.052, 0.068, 0.111, 0.132, 0.136,
0.156, 0.154, 0.117, 0.119, 0.086, 0.044, 0.025,
0.015, 0.025, 0.044, 0.089, 0.127,
0.204, 0.163, 0.118, 0.102, 0.060, 0.030, 0.018,
0.012, 0.023, 0.041, 0.087, 0.143,
0.157, 0.130, 0.119, 0.094, 0.062, 0.042, 0.028,
0.021, 0.035, 0.062, 0.099, 0.150,
0.128, 0.142, 0.122, 0.128, 0.105, 0.065, 0.035,
0.024, 0.031, 0.044, 0.074, 0.101,
0.133, 0.126, 0.111, 0.110, 0.081, 0.056, 0.038,
0.027, 0.042, 0.063, 0.098, 0.117,
0.099, 0.100, 0.101, 0.099, 0.088, 0.078, 0.072,
0.064, 0.064, 0.069, 0.076, 0.089),
ncol=12, byrow=TRUE)
colnames(xref) = seq(1, 12, 1)
row.names(xref) = c('GROUP11', 'GROUP10', 'GROUP9', 'GROUP8',
'GROUP7', 'GROUUK', 'GROUP6', 'GROUP5',
'GROUP4', 'GROUP3', 'GROUP2', 'GROUP1')
# Computes the hydrograph of a station
get_hydrograph = function (df_data, period=NULL, df_meta=NULL) {
# If there is a specified period
if (!is.null(period)) {
# Extracts only the data of this period
df_data = df_data[df_data$Date >= as.Date(period[1])
& df_data$Date <= as.Date(period[2]),]
}
# If there is the metadata
if (!is.null(df_meta)) {
# New column in metadata for hydrological regime
df_meta$regime_hydro = NA
# New column in metadata for the start of the hydrological year
df_meta$start_year = NA
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Otherwise it is just a list of flow from one station
} else {
# Only one code is present
nCode = 1
}
# Blank tibble to store data
df_QM = tibble()
# For all accessible code
for (k in 1:nCode) {
# If there is the metadata
if (!is.null(df_meta)) {
# Gets the code
code = Code[k]
# Get the associated data
df_data_code = df_data[df_data$code == code,]
} else {
# The data are the date for the current code
df_data_code = df_data
}
# Gets a list of the month of the data as numeric
monthData = as.numeric(format(df_data_code$Date, "%m"))
# Blank list to stock month mean
QM_code = c()
# For all months
for (i in 1:12) {
# Gets all the flow data associated to the current month
data = df_data_code$Value[monthData == i]
# Averages the data
QM_code[i] = mean(data, na.rm=TRUE)
}
regime = 0
classRegime = ""
distance = rep(0, length(xref[,1]))
distancemin = 0
for (j in 1:length(xref[,1])) {
distance[j] = sum((QM_code / mean(QM_code) - xref[j,])^2)
}
regime = 1 + length(xref[,1]) - which.min(distance)
distancemin = distance[which.min(distance)]
if (regime < 7) {
classRegime = "Pluvial"
} else if (regime >= 7 & regime < 10) {
classRegime = "Transition"
} else if (regime >= 10) {
classRegime = "Nival glaciaire"
}
# If there is the metadata
if (!is.null(df_meta)) {
# Creates a temporary tibble to store hydrograph results
df_QMtmp = tibble(QM=QM_code, code=code)
# Stores it
df_QM = bind_rows(df_QM, df_QMtmp)
# Stores result of the hydrological regime
df_meta$regime_hydro[df_meta$code == code] = classRegime
# Stores it as the start of the hydrological year
df_meta$start_year[df_meta$code == code] = maxMonth
# Otherwise
} else {
# No tibble needed
df_QM = QM_code
df_meta = classRegime
}
}
# Returns the hydrograph and meta data
return (list(QM=df_QM, meta=df_meta))
}
### 2.2. Break date
# Compute the break date of the flow data by station
get_break = function (df_data, df_meta, p_thresold=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Value),]
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
if (p_value <= p_thresold) {
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Value[1:ibreak])
# step2 = mean(df_data_codeNoNA$Value[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}