An error occurred while loading the file. Please try again.
-
Heraut Louis authored93790e80
# \\\
# Copyright 2021-2022 Louis Héraut*1
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
library(zoo) # rollmean
library(StatsAnalysisTrend)
library(lubridate)
library(trend)
# Sourcing R file
source('processing/format.R', encoding='latin1')
## 1. TREND ANALYSIS _________________________________________________
### 1.0. Intercept of trend __________________________________________
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
# Create a column in trend full of NA
df_Xtrend$intercept = NA
# For all different group
for (g in df_Xlist$info$group) {
# Get the data and trend value linked to this group
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
# Get the time start and end of the different periods
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
UEnd = levels(factor(End))
# Get the number of different periods of trend analysis
nPeriod = max(length(UStart), length(UEnd))
# For each of these perdiods
for (i in 1:nPeriod) {
# Get data and trend associated to the period
df_data_code_per =
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
# Get the group associated to this period
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
# Compute mean of flow and time period
mu_X = mean(df_data_code_per$Value, na.rm=TRUE)
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
# Get the intercept of the trend
b = mu_X - mu_t * df_Xtrend_code_per$trend
# And store it
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
### 1.1. QA __________________________________________________________
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
get_QAtrend = function (df_data, df_meta, period, alpha, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) {
# Local corrections if needed
res = flag_data(df_data, df_meta,
df_flag=df_flag,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Removes incomplete data from time series
res = missing_data(df_data, df_meta,
dayLac_lim=dayLac_lim,
yearNA_lim=yearNA_lim,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_QAtrendB = tibble()
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_QAlist = prepare(df_data, colnamegroup=c('code'))
# Compute the yearly mean over the data
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
# Compute the trend analysis
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=alpha,
dep.option='AR1')
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
# Specify the period of analyse
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
}
# Clean results of trend analyse
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
res = list(data=df_data, mod=df_mod, analyse=res_QAtrend)
return (res)
}
### 1.2. QMNA ________________________________________________________
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
get_QMNAtrend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) {
# Local corrections if needed
res = flag_data(df_data, df_meta,
df_flag=df_flag,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Removes incomplete data from time series
res = missing_data(df_data, df_meta,
dayLac_lim=dayLac_lim,
yearNA_lim=yearNA_lim,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Samples the data
res = sampling_data(df_data, df_meta,
sampleSpan=sampleSpan,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_QMNAtrendB = tibble()
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
# Compute the montly mean over the data
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
timestep='year-month',
per.start="01",
pos.datetime=1,
na.rm=TRUE)
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
# Rerepare the data to fit the entry of extract.Var
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
# Compute the yearly min over the data
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
# Compute the trend analysis
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=alpha,
dep.option='AR1')
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
# Specify the period of analyse
df_QMNAtrend = get_period(per, df_QMNAtrend,
df_QMNAEx,
df_QMNAlist)
# Store the trend
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
# Clean results of trend analyse
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
res = list(data=df_data, mod=df_mod, analyse=res_QMNAtrend)
return (res)
}
### 1.3. VCN10 _______________________________________________________
rollmean_code = function (df_data, Code, nroll=10, df_mod=NULL) {
# Blank tibble to store the data averaged
df_data_roll = tibble()
# For all the code
for (code in Code) {
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Perform the roll mean of the flow over 10 days
df_data_roll_code = tibble(Date=df_data_code$Date,
Value=rollmean(df_data_code$Value,
10,
fill=NA),
code=code)
# Store the results
df_data_roll = bind_rows(df_data_roll, df_data_roll_code)
if (!is.null(df_mod)) {
df_mod = add_mod(df_mod, code,
type='Rolling average',
fun_name='rollmean',
comment='Rolling average of 10 day over all the data')
}
}
if (!is.null(df_mod)) {
res = list(data=df_data, mod=df_mod)
return (res)
} else {
return (df_data_roll)
}
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
}
# Realises the trend analysis of the minimum 10 day average flow
# over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Local corrections if needed
res = flag_data(df_data, df_meta,
df_flag=df_flag,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Computes the rolling average by 10 days over the data
res = rollmean_code(df_data, Code, 10, df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Removes incomplete data from time series
res = missing_data(df_data_roll, df_meta,
dayLac_lim=dayLac_lim,
yearNA_lim=yearNA_lim,
df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Samples the data
res = sampling_data(df_data_roll, df_meta,
sampleSpan=sampleSpan,
df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_VCN10trendB = tibble()
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
# Compute the yearly min over the averaged data
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
# Compute the trend analysis
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=alpha,
dep.option='AR1')
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
# Specify the period of analyse
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
# Store the trend
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
# Clean results of trend analyse
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
res = list(data=df_data_roll, mod=df_mod,
analyse=res_VCN10trend)
return (res)
}
### 1.4. tDEB date ___________________________________________________
which_underfirst = function (L, UpLim, select_longest=TRUE) {
ID = which(L <= UpLim)
if (select_longest) {
dID = diff(ID)
dID = c(10, dID)
IDjump = which(dID != 1)
Njump = length(IDjump)
Periods = vector(mode='list', length=Njump)
Nperiod = c()
for (i in 1:Njump) {
idStart = IDjump[i]
if (i < Njump) {
idEnd = IDjump[i+1] - 1
} else {
idEnd = length(ID)
}
period = ID[idStart:idEnd]
Periods[[i]] = period
Nperiod = c(Nperiod, length(period))
}
period_max = Periods[[which.max(Nperiod)]]
id = period_max[1]
} else {
id = ID[1]
}
return (id)
}
get_tDEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, thresold_type='VCN10', select_longest=TRUE, df_mod=tibble()) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Gets the number of station
nCode = length(Code)
# Local corrections if needed
res = flag_data(df_data, df_meta,
df_flag=df_flag,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Computes the rolling average by 10 days over the data
res = rollmean_code(df_data, Code, 10, df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Removes incomplete data from time series
df_data = missing_data(df_data,
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
df_meta=df_meta,
dayLac_lim=dayLac_lim,
yearNA_lim=yearNA_lim)
# Samples the data
df_data = sampling_data(df_data,
df_meta=df_meta,
sampleSpan=sampleSpan)
# Removes incomplete data from the averaged time series
res = missing_data(df_data_roll,
df_meta=df_meta,
dayLac_lim=dayLac_lim,
yearNA_lim=yearNA_lim,
df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Samples the data
res = sampling_data(df_data_roll,
df_meta=df_meta,
sampleSpan=sampleSpan,
df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_tDEBtrendB = tibble()
# For all periods
for (per in period) {
if (thresold_type == 'QNj') {
# Prepare the data to fit the entry of extract.Var
df_QTlist = prepare(df_data,
colnamegroup=c('code'))
} else if (thresold_type == 'VCN10') {
# Prepare the data to fit the entry of extract.Var
df_QTlist = prepare(df_data_roll,
colnamegroup=c('code'))
}
# Compute the yearly mean over the data
df_QTEx = extract.Var(data.station=df_QTlist,
funct=min,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QT = summarise(group_by(df_QTEx, group1),
values=max(values, na.rm=TRUE))
# Renames the column of group of trend results
colnames(df_QT) = c('group', 'Thresold')
df_QT = full_join(df_QT, df_QTlist$info, by='group')
df_QT = df_QT[-1]
# print(df_QT)
df_tDEBEx = tibble()
df_tDEBlist = list(data=tibble(), info=tibble())
# For all the code
for (k in 1:nCode) {
# Gets the code
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
code = Code[k]
# Get the data associated to the code
df_data_code = df_data[df_data$code == code,]
# Get the averaged data associated to the code
df_data_roll_code = df_data_roll[df_data_roll$code == code,]
# Prepare the data to fit the entry of extract.Var
df_tDEBlist_code = prepare(df_data_roll_code,
colnamegroup=c('code'))
QT_code = df_QT$Thresold[df_QT$code == code]
# Compute the yearly min over the averaged data
df_tDEBEx_code = extract.Var(data.station=df_tDEBlist_code,
funct=which_underfirst,
period=per,
timestep='year',
pos.datetime=1,
UpLim=QT_code,
select_longest=select_longest)
df_tDEBEx_code$group1 = k
df_tDEBlist_code$data$group = k
df_tDEBlist_code$info$group = k
# Converts index of the tDEB to the julian date associated
df_tDEBEx_code = prepare_date(df_tDEBEx_code,
df_tDEBlist_code)
# Store the results
df_tDEBEx = bind_rows(df_tDEBEx, df_tDEBEx_code)
df_tDEBlist$data = bind_rows(df_tDEBlist$data,
df_tDEBlist_code$data)
df_tDEBlist$info = bind_rows(df_tDEBlist$info,
df_tDEBlist_code$info)
}
# Compute the trend analysis
df_tDEBtrend = Estimate.stats(data.extract=df_tDEBEx,
level=alpha,
dep.option='AR1')
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_tDEBlistB = df_tDEBlist
df_tDEBExB = df_tDEBEx
}
# Specify the period of analyse
df_tDEBtrend = get_period(per, df_tDEBtrend, df_tDEBEx,
df_tDEBlist)
# Store the trend
df_tDEBtrendB = bind_rows(df_tDEBtrendB, df_tDEBtrend)
}
# Clean results of trend analyse
res_tDEBtrend = clean(df_tDEBtrendB, df_tDEBExB, df_tDEBlistB)
res = list(data=df_data_roll, mod=df_mod,
analyse=res_tDEBtrend)
return (res)
}
### 1.5. tCEN date ___________________________________________________
561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
# Realises the trend analysis of the date of the minimum 10 day
# average flow over the year (VCN10) hydrological variable
get_tCENtrend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Blank tibble to store the data averaged
df_data_roll = tibble()
# Local corrections if needed
res = flag_data(df_data, df_meta,
df_flag=df_flag,
df_mod=df_mod)
df_data = res$data
df_mod = res$mod
# Computes the rolling average by 10 days over the data
res = rollmean_code(df_data, Code, 10, df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Removes incomplete data from time series
res = missing_data(df_data_roll, df_meta,
dayLac_lim=dayLac_lim,
yearNA_lim=yearNA_lim,
df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Samples the data
res = sampling_data(df_data_roll, df_meta,
sampleSpan=sampleSpan,
df_mod=df_mod)
df_data_roll = res$data
df_mod = res$mod
# Make sure to convert the period to a list
period = as.list(period)
# Set the max interval period as the minimal possible
Imax = 0
# Blank tibble for data to return
df_tCENtrendB = tibble()
# For all periods
for (per in period) {
# Prepare the data to fit the entry of extract.Var
df_tCENlist = prepare(df_data_roll, colnamegroup=c('code'))
# Compute the yearly min over the averaged data
df_tCENEx = extract.Var(data.station=df_tCENlist,
funct=which.min,
period=per,
timestep='year',
pos.datetime=1)
# Converts index of the tCEN to the julian date associated
df_tCENEx = prepare_date(df_tCENEx, df_tCENlist)
# Compute the trend analysis
df_tCENtrend = Estimate.stats(data.extract=df_tCENEx,
level=alpha,
dep.option='AR1')
# Get the associated time interval
I = interval(per[1], per[2])
# If it is the largest interval
if (I > Imax) {
# Store it and the associated data and info
Imax = I
df_tCENlistB = df_tCENlist
df_tCENExB = df_tCENEx
631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
}
# Specify the period of analyse
df_tCENtrend = get_period(per, df_tCENtrend, df_tCENEx,
df_tCENlist)
# Store the trend
df_tCENtrendB = bind_rows(df_tCENtrendB, df_tCENtrend)
}
# Clean results of trend analyse
res_tCENtrend = clean(df_tCENtrendB, df_tCENExB, df_tCENlistB)
res = list(data=df_data_roll, mod=df_mod,
analyse=res_tCENtrend)
return (res)
}
## 2. OTHER ANALYSES _________________________________________________
### 2.1. Hydrograph __________________________________________________
xref = matrix(
c(0.099, 0.100, 0.101, 0.099, 0.088, 0.078, 0.072,
0.064, 0.064, 0.069, 0.076, 0.089,
0.133, 0.126, 0.111, 0.110, 0.081, 0.056, 0.038,
0.027, 0.042, 0.063, 0.098, 0.117,
0.128, 0.142, 0.122, 0.128, 0.105, 0.065, 0.035,
0.024, 0.031, 0.044, 0.074, 0.101,
0.157, 0.130, 0.119, 0.094, 0.062, 0.042, 0.028,
0.021, 0.035, 0.062, 0.099, 0.150,
0.204, 0.163, 0.118, 0.102, 0.060, 0.030, 0.018,
0.012, 0.023, 0.041, 0.087, 0.143,
0.156, 0.154, 0.117, 0.119, 0.086, 0.044, 0.025,
0.015, 0.025, 0.044, 0.089, 0.127,
0.139, 0.092, 0.082, 0.099, 0.087, 0.039, 0.015,
0.012, 0.036, 0.108, 0.159, 0.131,
0.112, 0.098, 0.101, 0.125, 0.122, 0.072, 0.036,
0.024, 0.039, 0.067, 0.102, 0.102,
0.058, 0.050, 0.100, 0.142, 0.158, 0.092, 0.067,
0.050, 0.042, 0.058, 0.083, 0.100,
0.050, 0.050, 0.058, 0.083, 0.150, 0.167, 0.117,
0.083, 0.058, 0.058, 0.067, 0.058,
0.033, 0.025, 0.033, 0.075, 0.167, 0.217, 0.142,
0.092, 0.067, 0.058, 0.050, 0.042,
0.017, 0.008, 0.017, 0.042, 0.108, 0.183, 0.200,
0.175, 0.117, 0.067, 0.042, 0.025),
ncol=12, byrow=TRUE)
colnames(xref) = seq(1, 12, 1)
row.names(xref) = c('GROUP1', 'GROUP2', 'GROUP3', 'GROUP4',
'GROUP5', 'GROUP6', 'GROUP7', 'GROUP8',
'GROUP9', 'GROUP10', 'GROUP11', 'GROUP12')
# Computes the hydrograph of a station
get_hydrograph = function (df_data, period=NULL, df_meta=NULL) {
# If there is a specified period
if (!is.null(period)) {
# Extracts only the data of this period
df_data = df_data[df_data$Date >= as.Date(period[1])
& df_data$Date <= as.Date(period[2]),]
}
# If there is the metadata
if (!is.null(df_meta)) {
# New column in metadata for hydrological regime
df_meta$regime_hydro = NA
# New column in metadata for the start of the hydrological year
df_meta$start_year = NA
# Get all different stations code
Code = levels(factor(df_meta$code))
701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
# Number of stations
nCode = length(Code)
# Otherwise it is just a list of flow from one station
} else {
# Only one code is present
nCode = 1
}
# Blank tibble to store data
df_QM = tibble()
# For all accessible code
for (k in 1:nCode) {
# If there is the metadata
if (!is.null(df_meta)) {
# Gets the code
code = Code[k]
# Get the associated data
df_data_code = df_data[df_data$code == code,]
} else {
# The data are the date for the current code
df_data_code = df_data
}
# Gets a list of the month of the data as numeric
monthData = as.numeric(format(df_data_code$Date, "%m"))
# Blank list to stock month mean
QM_code = c()
# For all months
for (i in 1:12) {
# Gets all the flow data associated to the current month
data = df_data_code$Value[monthData == i]
# Averages the data
QM_code[i] = mean(data, na.rm=TRUE)
}
regime = 0
classRegime = ""
distance = rep(0, length(xref[,1]))
distancemin = 0
for (j in 1:length(xref[,1])) {
distance[j] = sum((QM_code / mean(QM_code) - xref[j,])^2)
}
regime = which.min(distance)
distancemin = distance[which.min(distance)]
if (regime < 7) {
classRegime = "Pluvial"
} else if (regime >= 7 & regime < 10) {
classRegime = "Transition"
} else if (regime >= 10) {
classRegime = "Nival Glaciaire"
}
# If there is the metadata
if (!is.null(df_meta)) {
# Creates a temporary tibble to store hydrograph results
df_QMtmp = tibble(QM=QM_code, code=code)
# Stores it
df_QM = bind_rows(df_QM, df_QMtmp)
# Stores result of the hydrological regime
df_meta$regime_hydro[df_meta$code == code] = classRegime
# Computes the month of the max QM
maxMonth = which.max(QM_code)
# Stores it as the start of the hydrological year
df_meta$start_year[df_meta$code == code] = maxMonth
# Otherwise
} else {
# No tibble needed
771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
df_QM = QM_code
df_meta = classRegime
}
}
# Returns the hydrograph and meta data
return (list(QM=df_QM, meta=df_meta))
}
### 2.2. Break date __________________________________________________
# Compute the break date of the flow data by station
get_break = function (df_data, df_meta, alpha=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Number of stations
nCode = length(Code)
# Blank date break list and associated station code vector
date_break = list()
Code_break = c()
# For all accessible code
for (code in Code) {
# Get the associated data
df_data_code = df_data[df_data$code == code,]
# Remove NA data
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Value),]
# Perform the break analysis thanks to the Pettitt test
res_break = pettitt.test(df_data_codeNoNA$Value)
# Extract p value
p_value = res_break$p
# The length of the data analysed
nbreak = res_break$nobs
# Index of the break date
ibreak = res_break$estimate
# If the p value results is under the thresold
if (p_value <= alpha) {
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Value[1:ibreak])
# step2 = mean(df_data_codeNoNA$Value[(ibreak+1):nbreak])
}
# Create a tibble with the break analysis results
df_break = tibble(code=Code_break, Date=as.Date(date_break))
return (df_break)
}
### 2.3. Time gap ____________________________________________________
# Compute the time gap by station
get_lacune = function (df_data, df_meta) {
# Get all different stations code
Code = levels(factor(df_meta$code))
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
df_data_NoNA = df_data[NoNA,]
# For every station
for (code in Code) {
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
meanLac = c(meanLac, lac_mean)
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
# Create tibble for lacune
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
}
### 2.4. Compute square root of data _________________________________
compute_sqrt = function (df_data) {
df_sqrt = tibble(Date=df_data$Date,
Value=sqrt(df_data$Value),
code=df_data$code)
return (df_sqrt)
}
### 2.5. Criticism of data ___________________________________________
add_critique = function (df_critique, Code, author, level, start_date, variable, type, comment='', end_date=NULL, df_meta=NULL, resdir=NULL) {
if (Code == 'all' & is.null(df_meta)) {
Code = NA # erreur
} else if (Code == 'all' & !is.null(df_meta)) {
# Get all different stations code
Code = levels(factor(df_meta$code))
}
if (is.null(end_date)) {
end_date = start_date
}
df_tmp = tibble(code=Code, author=author, level=level,
start_date=start_date, end_date=end_date,
variable=variable, type=type,
comment=comment)
df_critique = bind_rows(df_critique, df_tmp)
nc = nrow(df_critique)
print('Criticism registered')
911912913914915916917918919920921
print(df_critique[(nc-2):nc,])
if (!is.null(resdir)) {
write_critique(df_critique, resdir)
}
return (df_critique)
}
# df_critique = add_critique(df_critique, resdir=resdir, Code='', author='louis', level=, start_date=, end_date=NA, variable='', type='', comment='')