Newer
Older
# \\\
# Copyright 2021-2022 Louis Héraut*1
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
# Compute the time gap by station
# Get all different stations code
# Create new vector to stock results for cumulative and mean
# time gap by station
tLac = c()
meanLac = c()
# Get rows where there is no NA
NoNA = complete.cases(df_data)
# Get data where there is no NA
df_data_NoNA = df_data[NoNA,]
# For every station
# Get only the data rows for the selected station
df_data_code = df_data[df_data$code==code,]
# Get date for the selected station
Date = df_data_code$Date
# Get time span for the selection station
span = as.numeric(Date[length(Date)] - Date[1])
# Get only the data rows with no NA for the selected station
df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
# Get date for the selected station
Date_NoNA = df_data_NoNA_code$Date
# Compute the time gap
lac = as.numeric(diff(Date_NoNA) - 1)
# Compute the cumulative gap
lac_sum = sum(lac)
# Store the cumulative gap rate
tLac = c(tLac, lac_sum/span)
# Compute the mean gap
lac_mean = mean(lac[lac != 0])
# Store the mean gap
}
# Compute the cumulative gap rate in pourcent
tLac100 = tLac * 100
df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
# Join a tibble
df_meta = full_join(df_meta, df_lac)
return (df_meta)
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
Start = df_Xtrend_code$period_start
End = df_Xtrend_code$period_end
# Extract only the unrepeated dates
UStart = levels(factor(Start))
df_data_code_per =
df_data_code[df_data_code$Date >= Start[i]
& df_data_code$Date <= End[i],]
df_Xtrend_code_per =
df_Xtrend_code[df_Xtrend_code$period_start == Start[i]
& df_Xtrend_code$period_end == End[i],]
id = which(df_Xtrend$group == g
& df_Xtrend$period_start == Start[i]
& df_Xtrend$period_end == End[i])
mu_X = mean(df_data_code_per$Qm3s, na.rm=TRUE)
mu_t = as.numeric(mean(c(Start[i],
End[i]),
na.rm=TRUE)) / unit2day
df_Xtrend$intercept[id] = b
}
}
return (df_Xtrend)
}
# Compute the start and the end of the period for a trend analysis
# according to the accessible data
get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {
# Fix the period start and end of the accessible period to a
# default date
df_Xtrend$period_start = as.Date("1970-01-01")
df_Xtrend$period_end = as.Date("1970-01-01")
df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
df_XExtmp = df_Xlisttmp$data
# Get the id in the trend result associated to the group
id = which(df_Xtrend$group1 == g)
# Compute index of the nearest accessible start and end date
iStart = which.min(abs(df_XExtmp_code$Date
- as.Date(per[1])))
iEnd = which.min(abs(df_XExtmp_code$Date
- as.Date(per[2])))
df_Xtrend$period_start[id] =
as.Date(df_XExtmp_code$Date[iStart])
df_Xtrend$period_end[id] =
as.Date(df_XExtmp_code$Date[iEnd])
get_break = function (df_data, df_meta, p_thresold=0.05) {
# Get all different stations code
Code = levels(factor(df_meta$code))
df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),]
res_break = pettitt.test(df_data_codeNoNA$Qm3s)
# Get the mean of the index break if there is several
ibreak = round(mean(ibreak), 0)
# Store the date break with its associated code
date_break = append(date_break,
df_data_codeNoNA$Date[ibreak])
Code_break = append(Code_break, code)
}
# step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak])
# step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak])
df_break = tibble(code=Code_break, Date=as.Date(date_break))
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
df_QAlist = prepare(df_data, colnamegroup=c('code'))
df_QAEx = extract.Var(data.station=df_QAlist,
funct=mean,
timestep='year',
period=per,
pos.datetime=1,
na.rm=TRUE)
df_QAtrend = Estimate.stats(data.extract=df_QAEx,
level=p_thresold)
Imax = I
df_QAlistB = df_QAlist
df_QAExB = df_QAEx
}
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
# Store the trend
df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)
res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
period = as.list(period)
Imax = 0
df_QMNAtrendB = tibble()
for (per in period) {
df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=mean,
period=per,
df_QMNAlist = reprepare(df_QMNAEx,
df_QMNAlist,
colnamegroup=c('code'))
df_QMNAEx = extract.Var(data.station=df_QMNAlist,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
level=p_thresold)
I = interval(per[1], per[2])
if (I > Imax) {
Imax = I
df_QMNAlistB = df_QMNAlist
df_QMNAExB = df_QMNAEx
}
df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
}
res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
# Realise the trend analysis of the minimum 10 day average flow over the year (VCN10) hydrological variable
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {
df_data_roll = tibble()
for (c in Code) {
df_data_code = df_data[df_data$code == c,]
df_data_code = tibble(Date=rollmean(df_data_code$Date,
10,
fill=NA),
Qm3s=rollmean(df_data_code$Qm3s,
10,
fill=NA),
code=c)
df_data_roll = bind_rows(df_data_roll, df_data_code)
}
period = as.list(period)
Imax = 0
df_VCN10trendB = tibble()
for (per in period) {
df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))
df_VCN10Ex = extract.Var(data.station=df_VCN10list,
funct=min,
period=per,
timestep='year',
pos.datetime=1,
na.rm=TRUE)
df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
level=p_thresold)
I = interval(per[1], per[2])
if (I > Imax) {
Imax = I
df_VCN10listB = df_VCN10list
df_VCN10ExB = df_VCN10Ex
}
df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
df_VCN10list)
df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
}
res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)