analyse.R 12.9 KB
Newer Older
Heraut Louis's avatar
Heraut Louis committed
# \\\
# Copyright 2021-2022 Louis Héraut*1
#
# *1   INRAE, France
#      louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox.  If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/analyse.R
#
# File that realise all the possible analysis of data.
# This file regroup mainly the functions use to compute the trend
# analysis of hydrologic variables thanks to the Mann-Kendall Test.
# Functions needed for break or gap analysis are also present.


# Usefull library
library(dplyr)
library(zoo)
library(StatsAnalysisTrend)
Heraut Louis's avatar
Heraut Louis committed
library(lubridate)
library(trend)
Heraut Louis's avatar
Heraut Louis committed

# Sourcing R file
Heraut Louis's avatar
Heraut Louis committed
source('processing/format.R', encoding='latin1')


# Compute the time gap by station
Heraut Louis's avatar
Heraut Louis committed
get_lacune = function (df_data, df_meta) {
    
    # Get all different stations code
Heraut Louis's avatar
Heraut Louis committed
    Code = levels(factor(df_meta$code))
Heraut Louis's avatar
Heraut Louis committed
    # Create new vector to stock results for cumulative and mean
    # time gap by station
    tLac = c()
    meanLac = c()

    # Get rows where there is no NA
    NoNA = complete.cases(df_data)
    # Get data where there is no NA
    df_data_NoNA = df_data[NoNA,]

    # For every station
Heraut Louis's avatar
Heraut Louis committed
    for (code in Code) {   
        # Get only the data rows for the selected station
        df_data_code = df_data[df_data$code==code,]
        # Get date for the selected station
        Date = df_data_code$Date
        # Get time span for the selection station
        span = as.numeric(Date[length(Date)] - Date[1])
        
        # Get only the data rows with no NA for the selected station
        df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
        # Get date for the selected station
        Date_NoNA = df_data_NoNA_code$Date
        
        # Compute the time gap
        lac = as.numeric(diff(Date_NoNA) - 1)

        # Compute the cumulative gap
        lac_sum = sum(lac)
        # Store the cumulative gap rate
        tLac = c(tLac, lac_sum/span)

        # Compute the mean gap
        lac_mean = mean(lac[lac != 0])
        # Store the mean gap
Heraut Louis's avatar
Heraut Louis committed
        meanLac = c(meanLac, lac_mean) 
    }
    
    # Compute the cumulative gap rate in pourcent
    tLac100 = tLac * 100
Heraut Louis's avatar
Heraut Louis committed
    # Create tibble for lacune
    df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
Heraut Louis's avatar
Heraut Louis committed
    # Join a tibble
    df_meta = full_join(df_meta, df_lac)
    return (df_meta)
Heraut Louis's avatar
Heraut Louis committed
# Compute intercept values of linear trends with first order values
# of trends and the data on which analysis is performed.
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
Heraut Louis's avatar
Heraut Louis committed

    # Create a column in trend full of NA
Heraut Louis's avatar
Heraut Louis committed
    df_Xtrend$intercept = NA
Heraut Louis's avatar
Heraut Louis committed
    # For all different group
    for (g in df_Xlist$info$group) {
Heraut Louis's avatar
Heraut Louis committed
        # Get the data and trend value linked to this group
        df_data_code = df_Xlist$data[df_Xlist$data$group == g,]
Heraut Louis's avatar
Heraut Louis committed
        df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,]

Heraut Louis's avatar
Heraut Louis committed
        # Get the time start and end of the different periods
Heraut Louis's avatar
Heraut Louis committed
        Start = df_Xtrend_code$period_start
        End = df_Xtrend_code$period_end
Heraut Louis's avatar
Heraut Louis committed
        # Extract only the unrepeated dates
        UStart = levels(factor(Start))
Heraut Louis's avatar
Heraut Louis committed
        UEnd = levels(factor(End))
Heraut Louis's avatar
Heraut Louis committed
        # Get the number of different periods of trend analysis
Heraut Louis's avatar
Heraut Louis committed
        nPeriod = max(length(UStart), length(UEnd))

Heraut Louis's avatar
Heraut Louis committed
        # For each of these perdiods
Heraut Louis's avatar
Heraut Louis committed
        for (i in 1:nPeriod) {
Heraut Louis's avatar
Heraut Louis committed
            # Get data and trend associated to the period
Heraut Louis's avatar
Heraut Louis committed
            df_data_code_per = 
                df_data_code[df_data_code$Date >= Start[i] 
                             & df_data_code$Date <= End[i],]
            df_Xtrend_code_per = 
                df_Xtrend_code[df_Xtrend_code$period_start == Start[i] 
                              & df_Xtrend_code$period_end == End[i],]
Heraut Louis's avatar
Heraut Louis committed

            # Get the group associated to this period
Heraut Louis's avatar
Heraut Louis committed
            id = which(df_Xtrend$group == g 
                       & df_Xtrend$period_start == Start[i] 
                       & df_Xtrend$period_end == End[i])

Heraut Louis's avatar
Heraut Louis committed
            # Compute mean of flow and time period
Heraut Louis's avatar
Heraut Louis committed
            mu_X = mean(df_data_code_per$Qm3s, na.rm=TRUE)
            mu_t = as.numeric(mean(c(Start[i],
                                     End[i]),
                                   na.rm=TRUE)) / unit2day
Heraut Louis's avatar
Heraut Louis committed

            # Get the intercept of the trend
Heraut Louis's avatar
Heraut Louis committed
            b = mu_X - mu_t * df_Xtrend_code_per$trend
Heraut Louis's avatar
Heraut Louis committed
            # And store it
Heraut Louis's avatar
Heraut Louis committed
            df_Xtrend$intercept[id] = b
        } 
    }
    return (df_Xtrend)
}


Heraut Louis's avatar
Heraut Louis committed
# Compute the start and the end of the period for a trend analysis
# according to the accessible data 
Heraut Louis's avatar
Heraut Louis committed
get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {

Heraut Louis's avatar
Heraut Louis committed
    # Convert results of trend to tibble
Heraut Louis's avatar
Heraut Louis committed
    df_Xtrend = tibble(df_Xtrend)
Heraut Louis's avatar
Heraut Louis committed
    # Fix the period start and end of the accessible period to a
    # default date
Heraut Louis's avatar
Heraut Louis committed
    df_Xtrend$period_start = as.Date("1970-01-01")
    df_Xtrend$period_end = as.Date("1970-01-01")
Heraut Louis's avatar
Heraut Louis committed
    # Change the format of the date variable to date
Heraut Louis's avatar
Heraut Louis committed
    df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
    df_XExtmp = df_Xlisttmp$data
Heraut Louis's avatar
Heraut Louis committed
    # For all the different group
Heraut Louis's avatar
Heraut Louis committed
    for (g in df_Xlisttmp$info$group) {
Heraut Louis's avatar
Heraut Louis committed
        # Get the analyse data associated to the group
Heraut Louis's avatar
Heraut Louis committed
        df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
Heraut Louis's avatar
Heraut Louis committed
        # Get the id in the trend result associated to the group
        id = which(df_Xtrend$group1 == g)

        # Compute index of the nearest accessible start and end date
Heraut Louis's avatar
Heraut Louis committed
        iStart = which.min(abs(df_XExtmp_code$Date
                               - as.Date(per[1])))
        iEnd = which.min(abs(df_XExtmp_code$Date 
                             - as.Date(per[2])))
Heraut Louis's avatar
Heraut Louis committed
        # Store the start and end of the trend analysis
Heraut Louis's avatar
Heraut Louis committed
        df_Xtrend$period_start[id] =
            as.Date(df_XExtmp_code$Date[iStart])
        df_Xtrend$period_end[id] =
            as.Date(df_XExtmp_code$Date[iEnd])
Heraut Louis's avatar
Heraut Louis committed
    return (df_Xtrend)
Heraut Louis's avatar
Heraut Louis committed
# Compute the break date of the flow data by station 
Heraut Louis's avatar
Heraut Louis committed
get_break = function (df_data, df_meta, p_thresold=0.05) {
    
    # Get all different stations code
    Code = levels(factor(df_meta$code))
Heraut Louis's avatar
Heraut Louis committed
    # Number of stations
    nCode = length(Code)
Heraut Louis's avatar
Heraut Louis committed

Heraut Louis's avatar
Heraut Louis committed
    # Blank date break list and associated station code vector
    date_break = list()
Heraut Louis's avatar
Heraut Louis committed
    Code_break = c()
Heraut Louis's avatar
Heraut Louis committed

    # For all accessible code
    for (code in Code) {
Heraut Louis's avatar
Heraut Louis committed
        # Get the associated data
        df_data_code = df_data[df_data$code == code,] 
Heraut Louis's avatar
Heraut Louis committed
        # Remove NA data
        df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),]

Heraut Louis's avatar
Heraut Louis committed
        # Perform the break analysis thanks to the Pettitt test
        res_break = pettitt.test(df_data_codeNoNA$Qm3s)
Heraut Louis's avatar
Heraut Louis committed
        # Extract p value
Heraut Louis's avatar
Heraut Louis committed
        p_value = res_break$p
Heraut Louis's avatar
Heraut Louis committed
        # The length of the data analysed
        nbreak = res_break$nobs
Heraut Louis's avatar
Heraut Louis committed
        # Index of the break date
        ibreak = res_break$estimate

Heraut Louis's avatar
Heraut Louis committed
        # If the p value results is under the thresold
Heraut Louis's avatar
Heraut Louis committed
        if (p_value <= p_thresold) {
Heraut Louis's avatar
Heraut Louis committed
            # Get the mean of the index break if there is several
            ibreak = round(mean(ibreak), 0)
            # Store the date break with its associated code
Heraut Louis's avatar
Heraut Louis committed
            date_break = append(date_break, 
                                df_data_codeNoNA$Date[ibreak])
            Code_break = append(Code_break, code)
        }
Heraut Louis's avatar
Heraut Louis committed
        # step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak])
        # step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak])
Heraut Louis's avatar
Heraut Louis committed
    # Create a tibble with the break analysis results
Heraut Louis's avatar
Heraut Louis committed
    df_break = tibble(code=Code_break, Date=as.Date(date_break))
    return (df_break)
}
Heraut Louis's avatar
Heraut Louis committed


Heraut Louis's avatar
Heraut Louis committed
# Realise the trend analysis of the average annual flow (QA)
# hydrological variable
Heraut Louis's avatar
Heraut Louis committed
get_QAtrend = function (df_data, period, p_thresold) {
Heraut Louis's avatar
Heraut Louis committed

    # Make sure to convert the period to a list
Heraut Louis's avatar
Heraut Louis committed
    period = as.list(period)
Heraut Louis's avatar
Heraut Louis committed

    # Set the max interval period as the minimal possible
Heraut Louis's avatar
Heraut Louis committed
    Imax = 0
Heraut Louis's avatar
Heraut Louis committed
    # Blank tibble for data to return
Heraut Louis's avatar
Heraut Louis committed
    df_QAtrendB = tibble()

Heraut Louis's avatar
Heraut Louis committed
    # For all periods
Heraut Louis's avatar
Heraut Louis committed
    for (per in period) {
Heraut Louis's avatar
Heraut Louis committed
        # Prepare the data to fit the entry of extract.Var
Heraut Louis's avatar
Heraut Louis committed
        df_QAlist = prepare(df_data, colnamegroup=c('code'))

Heraut Louis's avatar
Heraut Louis committed
        # Compute the QA over the data
Heraut Louis's avatar
Heraut Louis committed
        df_QAEx = extract.Var(data.station=df_QAlist,
                              funct=mean,
                              timestep='year',
                              period=per,
                              pos.datetime=1,
                              na.rm=TRUE)
Heraut Louis's avatar
Heraut Louis committed
        # Compute the trend analysis
Heraut Louis's avatar
Heraut Louis committed
        df_QAtrend = Estimate.stats(data.extract=df_QAEx,
                                      level=p_thresold)
Heraut Louis's avatar
Heraut Louis committed
        # Get the associated time interval
Heraut Louis's avatar
Heraut Louis committed
        I = interval(per[1], per[2])
Heraut Louis's avatar
Heraut Louis committed
        # If it is the largest interval
Heraut Louis's avatar
Heraut Louis committed
        if (I > Imax) {
Heraut Louis's avatar
Heraut Louis committed
            # Store it and the associated data and info
Heraut Louis's avatar
Heraut Louis committed
            Imax = I
            df_QAlistB = df_QAlist
            df_QAExB = df_QAEx
        }

Heraut Louis's avatar
Heraut Louis committed
        # Specify the period of analyse
Heraut Louis's avatar
Heraut Louis committed
        df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist)
Heraut Louis's avatar
Heraut Louis committed
        # Store the trend
        df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)   
Heraut Louis's avatar
Heraut Louis committed
    } 
Heraut Louis's avatar
Heraut Louis committed
    # Clean results of trend analyse
Heraut Louis's avatar
Heraut Louis committed
    res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)
    return (res_QAtrend)
}

Heraut Louis's avatar
Heraut Louis committed
# Realise the trend analysis of the monthly minimum flow in the
# year (QMNA) hydrological variable
Heraut Louis's avatar
Heraut Louis committed
get_QMNAtrend = function (df_data, period, p_thresold) {
Heraut Louis's avatar
Heraut Louis committed

    period = as.list(period)
    
    Imax = 0
    df_QMNAtrendB = tibble()

    for (per in period) {

        df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
Heraut Louis's avatar
Heraut Louis committed

Heraut Louis's avatar
Heraut Louis committed
        df_QMNAEx = extract.Var(data.station=df_QMNAlist,
                                funct=mean,
                                period=per,
Heraut Louis's avatar
Heraut Louis committed
                                timestep='year-month',
                                per.start="01",
Heraut Louis's avatar
Heraut Louis committed
                                pos.datetime=1,
                                na.rm=TRUE)
        
Heraut Louis's avatar
Heraut Louis committed
        df_QMNAlist = reprepare(df_QMNAEx,
                                df_QMNAlist,
                                colnamegroup=c('code'))
Heraut Louis's avatar
Heraut Louis committed
        
        df_QMNAEx = extract.Var(data.station=df_QMNAlist,
                                funct=min,
                                period=per,
                                timestep='year',
                                pos.datetime=1,
                                na.rm=TRUE)
        
Heraut Louis's avatar
Heraut Louis committed
        df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx,
                                      level=p_thresold)
Heraut Louis's avatar
Heraut Louis committed
        
        I = interval(per[1], per[2])
        if (I > Imax) {
            Imax = I
            df_QMNAlistB = df_QMNAlist
            df_QMNAExB = df_QMNAEx
        }

Heraut Louis's avatar
Heraut Louis committed
        df_QMNAtrend = get_period(per, df_QMNAtrend,
                                  df_QMNAEx,
Heraut Louis's avatar
Heraut Louis committed
                                  df_QMNAlist)

Heraut Louis's avatar
Heraut Louis committed
        df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
    }
    
    res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)
    return (res_QMNAtrend)
Heraut Louis's avatar
Heraut Louis committed
# Realise the trend analysis of the minimum 10 day average flow over the year (VCN10) hydrological variable
Heraut Louis's avatar
Heraut Louis committed
get_VCN10trend = function (df_data, df_meta, period, p_thresold) {

    # Get all different stations code
Heraut Louis's avatar
Heraut Louis committed
    Code = levels(factor(df_meta$code))
    df_data_roll = tibble()

    for (c in Code) {
        df_data_code = df_data[df_data$code == c,]
        
        df_data_code = tibble(Date=rollmean(df_data_code$Date,
                                            10,
                                            fill=NA),
                              Qm3s=rollmean(df_data_code$Qm3s, 
                                            10,
                                            fill=NA),
                              code=c)

        df_data_roll = bind_rows(df_data_roll, df_data_code)
    }

Heraut Louis's avatar
Heraut Louis committed
    period = as.list(period)
    
    Imax = 0
    df_VCN10trendB = tibble()
    
    for (per in period) {
        
        df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))

        df_VCN10Ex = extract.Var(data.station=df_VCN10list,
                                 funct=min,
                                 period=per,
                                 timestep='year',
                                 pos.datetime=1,
                                 na.rm=TRUE)

Heraut Louis's avatar
Heraut Louis committed
        df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex,
                                      level=p_thresold)
Heraut Louis's avatar
Heraut Louis committed

        I = interval(per[1], per[2])
        if (I > Imax) {
            Imax = I
            df_VCN10listB = df_VCN10list
            df_VCN10ExB = df_VCN10Ex
        }

Heraut Louis's avatar
Heraut Louis committed
        df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex,
                                   df_VCN10list)

Heraut Louis's avatar
Heraut Louis committed
        df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
    }
Heraut Louis's avatar
Heraut Louis committed
    res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)
Heraut Louis's avatar
Heraut Louis committed