analyse.R 15.57 KiB
# \\\
# Copyright 2021-2022 Louis Héraut*1
# *1   INRAE, France
#      louis.heraut@inrae.fr
# This file is part of ash R toolbox.
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# ash R toolbox is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with ash R toolbox.  If not, see <https://www.gnu.org/licenses/>.
# ///
# processing/analyse.R
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.
# Usefull library
library(dplyr)
library(zoo)
library(StatsAnalysisTrend)
library(lubridate)
library(trend)
# Sourcing R file
source('processing/format.R', encoding='latin1')
## 1. TREND ANALYSIS
### 1.0. Intercept of trend
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
    # Create a column in trend full of NA
    df_Xtrend$intercept = NA
    # For all different group
    for (g in df_Xlist$info$group) {
        # Get the data and trend value linked to this group
        df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
        df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]
        # Get the time start and end of the different periods
        Start = df_Xtrend_code$period_start
        End = df_Xtrend_code$period_end
        # Extract only the unrepeated dates
        UStart = levels(factor(Start))
        UEnd = levels(factor(End))
        # Get the number of different periods of trend analysis
        nPeriod = max(length(UStart), length(UEnd))
        # For each of these perdiods
        for (i in 1:nPeriod) {
            # Get data and trend associated to the period
            df_data_code_per = 
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
df_data_code[df_data_code$Date >= Start[i] & df_data_code$Date <= End[i],] df_Xtrend_code_per = df_Xtrend_code[df_Xtrend_code$period_start == Start[i] & df_Xtrend_code$period_end == End[i],] # Get the group associated to this period id = which(df_Xtrend$group == g & df_Xtrend$period_start == Start[i] & df_Xtrend$period_end == End[i]) # Compute mean of flow and time period mu_X = mean(df_data_code_per$Qm3s, na.rm=TRUE) mu_t = as.numeric(mean(c(Start[i], End[i]), na.rm=TRUE)) / unit2day # Get the intercept of the trend b = mu_X - mu_t * df_Xtrend_code_per$trend # And store it df_Xtrend$intercept[id] = b } } return (df_Xtrend) } ### 1.1. QA # Realise the trend analysis of the average annual flow (QA) # hydrological variable get_QAtrend = function (df_data, period, p_thresold) { # Make sure to convert the period to a list period = as.list(period) # Set the max interval period as the minimal possible Imax = 0 # Blank tibble for data to return df_QAtrendB = tibble() # For all periods for (per in period) { # Prepare the data to fit the entry of extract.Var df_QAlist = prepare(df_data, colnamegroup=c('code')) # Compute the yearly mean over the data df_QAEx = extract.Var(data.station=df_QAlist, funct=mean, timestep='year', period=per, pos.datetime=1, na.rm=TRUE) # Compute the trend analysis df_QAtrend = Estimate.stats(data.extract=df_QAEx, level=p_thresold) # Get the associated time interval I = interval(per[1], per[2]) # If it is the largest interval if (I > Imax) { # Store it and the associated data and info Imax = I df_QAlistB = df_QAlist df_QAExB = df_QAEx } # Specify the period of analyse df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist) # Store the trend df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend) }
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
# Clean results of trend analyse res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB) return (res_QAtrend) } ### 1.2. QMNA # Realise the trend analysis of the monthly minimum flow in the # year (QMNA) hydrological variable get_QMNAtrend = function (df_data, period, p_thresold) { # Make sure to convert the period to a list period = as.list(period) # Set the max interval period as the minimal possible Imax = 0 # Blank tibble for data to return df_QMNAtrendB = tibble() # For all periods for (per in period) { # Prepare the data to fit the entry of extract.Var df_QMNAlist = prepare(df_data, colnamegroup=c('code')) # Compute the montly mean over the data df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=mean, period=per, timestep='year-month', per.start="01", pos.datetime=1, na.rm=TRUE) # Rerepare the data to fit the entry of extract.Var df_QMNAlist = reprepare(df_QMNAEx, df_QMNAlist, colnamegroup=c('code')) # Compute the yearly min over the data df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=min, period=per, timestep='year', pos.datetime=1, na.rm=TRUE) # Compute the trend analysis df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx, level=p_thresold) # Get the associated time interval I = interval(per[1], per[2]) # If it is the largest interval if (I > Imax) { # Store it and the associated data and info Imax = I df_QMNAlistB = df_QMNAlist df_QMNAExB = df_QMNAEx } # Specify the period of analyse df_QMNAtrend = get_period(per, df_QMNAtrend, df_QMNAEx, df_QMNAlist) # Store the trend df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend) } # Clean results of trend analyse res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB) return (res_QMNAtrend) } ### 1.3. VCN10 # Realises the trend analysis of the minimum 10 day average flow # over the year (VCN10) hydrological variable
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
get_VCN10trend = function (df_data, df_meta, period, p_thresold) { # Get all different stations code Code = levels(factor(df_meta$code)) # Blank tibble to store the data averaged df_data_roll = tibble() # For all the code for (c in Code) { # Get the data associated to the code df_data_code = df_data[df_data$code == c,] # Perform the roll mean of the flow over 10 days df_data_code = tibble(Date=rollmean(df_data_code$Date, 10, fill=NA), Qm3s=rollmean(df_data_code$Qm3s, 10, fill=NA), code=c) # Store the results df_data_roll = bind_rows(df_data_roll, df_data_code) } # Make sure to convert the period to a list period = as.list(period) # Set the max interval period as the minimal possible Imax = 0 # Blank tibble for data to return df_VCN10trendB = tibble() # For all periods for (per in period) { # Prepare the data to fit the entry of extract.Var df_VCN10list = prepare(df_data_roll, colnamegroup=c('code')) # Compute the yearly min over the averaged data df_VCN10Ex = extract.Var(data.station=df_VCN10list, funct=min, period=per, timestep='year', pos.datetime=1, na.rm=TRUE) # Compute the trend analysis df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex, level=p_thresold) # Get the associated time interval I = interval(per[1], per[2]) # If it is the largest interval if (I > Imax) { # Store it and the associated data and info Imax = I df_VCN10listB = df_VCN10list df_VCN10ExB = df_VCN10Ex } # Specify the period of analyse df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex, df_VCN10list) # Store the trend df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend) } # Clean results of trend analyse res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB) return (res_VCN10trend) } ### 1.4. VCN10 date # Realises the trend analysis of the date of the minimum 10 day # average flow over the year (VCN10) hydrological variable
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
get_dateVCN10trend = function (df_data, df_meta, period, p_thresold) { # Get all different stations code Code = levels(factor(df_meta$code)) # Blank tibble to store the data averaged df_data_roll = tibble() # For all the code for (c in Code) { # Get the data associated to the code df_data_code = df_data[df_data$code == c,] # Perform the roll mean of the flow over 10 days df_data_code = tibble(Date=df_data_code$Date, Qm3s=rollmean(df_data_code$Qm3s, 10, fill=NA), code=c) # Store the results df_data_roll = bind_rows(df_data_roll, df_data_code) } # Make sure to convert the period to a list period = as.list(period) # Set the max interval period as the minimal possible Imax = 0 # Blank tibble for data to return df_VCN10trendB = tibble() # For all periods for (per in period) { # Prepare the data to fit the entry of extract.Var df_VCN10list = prepare(df_data_roll, colnamegroup=c('code')) # Compute the yearly min over the averaged data df_VCN10Ex = extract.Var(data.station=df_VCN10list, funct=which.min, period=per, timestep='year', pos.datetime=1) # Converts index of the VCN10 to the julian date associated df_VCN10Ex = prepare_date(df_VCN10Ex, df_VCN10list) # Compute the trend analysis df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex, level=p_thresold) # Get the associated time interval I = interval(per[1], per[2]) # If it is the largest interval if (I > Imax) { # Store it and the associated data and info Imax = I df_VCN10listB = df_VCN10list df_VCN10ExB = df_VCN10Ex } # Specify the period of analyse df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex, df_VCN10list) # Store the trend df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend) } # Clean results of trend analyse res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB) return (res_VCN10trend) } ## 2. OTHER ANALYSES ### 2.1. Break date
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
# Compute the break date of the flow data by station get_break = function (df_data, df_meta, p_thresold=0.05) { # Get all different stations code Code = levels(factor(df_meta$code)) # Number of stations nCode = length(Code) # Blank date break list and associated station code vector date_break = list() Code_break = c() # For all accessible code for (code in Code) { # Get the associated data df_data_code = df_data[df_data$code == code,] # Remove NA data df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),] # Perform the break analysis thanks to the Pettitt test res_break = pettitt.test(df_data_codeNoNA$Qm3s) # Extract p value p_value = res_break$p # The length of the data analysed nbreak = res_break$nobs # Index of the break date ibreak = res_break$estimate # If the p value results is under the thresold if (p_value <= p_thresold) { # Get the mean of the index break if there is several ibreak = round(mean(ibreak), 0) # Store the date break with its associated code date_break = append(date_break, df_data_codeNoNA$Date[ibreak]) Code_break = append(Code_break, code) } # step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak]) # step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak]) } # Create a tibble with the break analysis results df_break = tibble(code=Code_break, Date=as.Date(date_break)) return (df_break) } ### 2.2. Time gap # Compute the time gap by station get_lacune = function (df_data, df_meta) { # Get all different stations code Code = levels(factor(df_meta$code)) # Create new vector to stock results for cumulative and mean # time gap by station tLac = c() meanLac = c() # Get rows where there is no NA NoNA = complete.cases(df_data) # Get data where there is no NA df_data_NoNA = df_data[NoNA,] # For every station for (code in Code) { # Get only the data rows for the selected station df_data_code = df_data[df_data$code==code,] # Get date for the selected station Date = df_data_code$Date # Get time span for the selection station
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
span = as.numeric(Date[length(Date)] - Date[1]) # Get only the data rows with no NA for the selected station df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,] # Get date for the selected station Date_NoNA = df_data_NoNA_code$Date # Compute the time gap lac = as.numeric(diff(Date_NoNA) - 1) # Compute the cumulative gap lac_sum = sum(lac) # Store the cumulative gap rate tLac = c(tLac, lac_sum/span) # Compute the mean gap lac_mean = mean(lac[lac != 0]) # Store the mean gap meanLac = c(meanLac, lac_mean) } # Compute the cumulative gap rate in pourcent tLac100 = tLac * 100 # Create tibble for lacune df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac) # Join a tibble df_meta = full_join(df_meta, df_lac) return (df_meta) }