Newer
Older
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
get_QAtrend = function (df_data, df_meta, period, alpha, yearLac_day) {
df_data = missing_data(df_data, df_meta,
yearLac_day=yearLac_day,
yearStart='01-01')
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
get_QMNAtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day) {
df_data = missing_data(df_data, df_meta,
yearLac_day=yearLac_day, yearStart='01-01')
# Samples the data
df_data = sampling_data(df_data, df_meta,
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realises the trend analysis of the minimum 10 day average flow
# over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day) {
df_data = missing_data(df_data, df_meta,
yearLac_day=yearLac_day, yearStart='01-01')
# Samples the data
df_data = sampling_data(df_data, df_meta,
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
# Specify the period of analyse
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
# Store the trend
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
# Clean results of trend analyse
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
return (res_VCN10trend)
}
### 1.4. DEB date
which_underfirst = function (L, UpLim, select_longest=TRUE) {
if (select_longest) {
dID = diff(ID)
dID = c(10, dID)
IDjump = which(dID != 1)
Njump = length(IDjump)
Periods = vector(mode='list', length=Njump)
Nperiod = c()
for (i in 1:Njump) {
idStart = IDjump[i]
if (i < Njump) {
idEnd = IDjump[i+1] - 1
} else {
idEnd = length(ID)
}
period = ID[idStart:idEnd]
Periods[[i]] = period
Nperiod = c(Nperiod, length(period))
period_max = Periods[[which.max(Nperiod)]]
id = period_max[1]
} else {
id = ID[1]
get_DEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day, thresold_type='VCN10', select_longest=TRUE) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Gets the number of station
nCode = length(Code)
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
for (code in Code) {
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Perform the roll mean of the flow over 10 days
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
# Store the results
df_data_roll = bind_rows(df_data_roll, df_data_roll_code)
}
# Removes incomplete data from time series
df_data = missing_data(df_data,
df_meta=df_meta,
yearLac_day=yearLac_day)
# Samples the data
df_data = sampling_data(df_data,
df_meta=df_meta,
sampleSpan=sampleSpan)
# Removes incomplete data from the averaged time series
df_data_roll = missing_data(df_data_roll,
df_meta=df_meta,
yearLac_day=yearLac_day)
# Samples the data
df_data_roll = sampling_data(df_data_roll,
df_meta=df_meta,
sampleSpan=sampleSpan)
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
if (thresold_type == 'QNj') {
# Prepare the data to fit the entry of extract.Var
df_QTlist = prepare(df_data,
colnamegroup=c('code'))
} else if (thresold_type == 'VCN10') {
# Prepare the data to fit the entry of extract.Var
df_QTlist = prepare(df_data_roll,
colnamegroup=c('code'))
}
# Compute the yearly mean over the data
df_QTEx = extract.Var(data.station=df_QTlist,
funct=min,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QT = summarise(group_by(df_QTEx, group1),
values=max(values, na.rm=TRUE))
# Renames the column of group of trend results
colnames(df_QT) = c('group', 'Thresold')
df_QT = full_join(df_QT, df_QTlist$info, by='group')
df_QT = df_QT[-1]
# print(df_QT)
df_DEBEx = tibble()
df_DEBlist = list(data=tibble(), info=tibble())
# For all the code
for (k in 1:nCode) {
# Gets the code
code = Code[k]
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Get the averaged data associated to the code
df_data_roll_code = df_data_roll[df_data_roll$code == code,]
df_DEBlist_code = prepare(df_data_roll_code,
QT_code = df_QT$Thresold[df_QT$code == code]
# Compute the yearly min over the averaged data
df_DEBEx_code = extract.Var(data.station=df_DEBlist_code,
funct=which_underfirst,
UpLim=QT_code,
select_longest=select_longest)
df_DEBEx_code$group1 = k
df_DEBlist_code$data$group = k
df_DEBlist_code$info$group = k
# Converts index of the DEB to the julian date associated
df_DEBEx_code = prepare_date(df_DEBEx_code,
df_DEBlist_code)
df_DEBEx = bind_rows(df_DEBEx, df_DEBEx_code)
df_DEBlist$data = bind_rows(df_DEBlist$data,
df_DEBlist_code$data)
df_DEBlist$info = bind_rows(df_DEBlist$info,
df_DEBlist_code$info)
df_DEBtrend = Estimate.stats(data.extract=df_DEBEx,
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_DEBlistB = df_DEBlist
df_DEBExB = df_DEBEx
df_DEBtrend = get_period(per, df_DEBtrend, df_DEBEx,
df_DEBlist)
df_DEBtrendB = bind_rows(df_DEBtrendB, df_DEBtrend)
res_DEBtrend = clean(df_DEBtrendB, df_DEBExB, df_DEBlistB)
return (res_DEBtrend)
# Realises the trend analysis of the date of the minimum 10 day
# average flow over the year (VCN10) hydrological variable
get_CENtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
df_data_roll = missing_data(df_data_roll, df_meta,
yearLac_day=yearLac_day,
yearStart='01-01')
# Samples the data
df_data_roll = sampling_data(df_data_roll, df_meta,
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_CENlist = prepare(df_data_roll, colnamegroup=c('code'))
df_CENEx = extract.Var(data.station=df_CENlist,
funct=which.min,
period=per,
timestep='year',
pos.datetime=1)
# Converts index of the CEN to the julian date associated
df_CENEx = prepare_date(df_CENEx, df_CENlist)
df_CENtrend = Estimate.stats(data.extract=df_CENEx,
df_CENlistB = df_CENlist
df_CENExB = df_CENEx
df_CENtrend = get_period(per, df_CENtrend, df_CENEx,
df_CENlist)
df_CENtrendB = bind_rows(df_CENtrendB, df_CENtrend)
res_CENtrend = clean(df_CENtrendB, df_CENExB, df_CENlistB)
return (res_CENtrend)
0.133, 0.126, 0.111, 0.110, 0.081, 0.056, 0.038,
0.017, 0.008, 0.017, 0.042, 0.108, 0.183, 0.200,
0.175, 0.117, 0.067, 0.042, 0.025),
ncol=12, byrow=TRUE)
colnames(xref) = seq(1, 12, 1)
row.names(xref) = c('GROUP1', 'GROUP2', 'GROUP3', 'GROUP4',
'GROUP5', 'GROUP6', 'GROUP7', 'GROUP8',
'GROUP9', 'GROUP10', 'GROUP11', 'GROUP12')
# Computes the hydrograph of a station
get_hydrograph = function (df_data, period=NULL, df_meta=NULL) {
# If there is a specified period
if (!is.null(period)) {
# Extracts only the data of this period
df_data = df_data[df_data$Date >= as.Date(period[1])
& df_data$Date <= as.Date(period[2]),]
}
# If there is the metadata
if (!is.null(df_meta)) {
# New column in metadata for hydrological regime
df_meta$regime_hydro = NA
# New column in metadata for the start of the hydrological year
df_meta$start_year = NA
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Otherwise it is just a list of flow from one station
} else {
# Only one code is present
nCode = 1
}
# Blank tibble to store data
df_QM = tibble()
# For all accessible code
for (k in 1:nCode) {
# If there is the metadata
if (!is.null(df_meta)) {
# Gets the code
code = Code[k]
# Get the associated data
df_data_code = df_data[df_data$code == code,]
} else {
# The data are the date for the current code
df_data_code = df_data
}
# Gets a list of the month of the data as numeric
monthData = as.numeric(format(df_data_code$Date, "%m"))
# Blank list to stock month mean
QM_code = c()
# For all months
for (i in 1:12) {
# Gets all the flow data associated to the current month
data = df_data_code$Value[monthData == i]
# Averages the data
QM_code[i] = mean(data, na.rm=TRUE)
}
regime = 0
classRegime = ""
distance = rep(0, length(xref[,1]))
distancemin = 0
for (j in 1:length(xref[,1])) {
distance[j] = sum((QM_code / mean(QM_code) - xref[j,])^2)
}
regime = 1 + length(xref[,1]) - which.min(distance)
distancemin = distance[which.min(distance)]
if (regime < 7) {
classRegime = "Pluvial"
} else if (regime >= 7 & regime < 10) {
classRegime = "Transition"
} else if (regime >= 10) {
# If there is the metadata
if (!is.null(df_meta)) {
# Creates a temporary tibble to store hydrograph results
df_QMtmp = tibble(QM=QM_code, code=code)
# Stores it
df_QM = bind_rows(df_QM, df_QMtmp)
# Stores result of the hydrological regime
df_meta$regime_hydro[df_meta$code == code] = classRegime
# Stores it as the start of the hydrological year
df_meta$start_year[df_meta$code == code] = maxMonth
# Otherwise
} else {
# No tibble needed
df_QM = QM_code
df_meta = classRegime
}
}
# Returns the hydrograph and meta data
return (list(QM=df_QM, meta=df_meta))
}
### 2.2. Break date
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Value),]
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Value[1:ibreak])
# step2 = mean(df_data_codeNoNA$Value[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}