analyse.R

# Usefull library
library(dplyr)
library(zoo)
library(StatsAnalysisTrend)

# Sourcing R file
source('processing/format.R', encoding='latin1')


# Compute the time gap by station
get_lacune = function (df_data, df_info) {
    
    # Get all different stations code
    Code = levels(factor(df_info$code))
    
    # Create new vector to stock results for cumulative time gap by station
    tLac = c()
    # Create new vector to stock results for mean time gap by station
    meanLac = c()

    # Get rows where there is no NA
    NoNA = complete.cases(df_data)
    # Get data where there is no NA
    df_data_NoNA = df_data[NoNA,]

    # For every station
    for (code in Code) {
        
        # Get only the data rows for the selected station
        df_data_code = df_data[df_data$code==code,]
        # Get date for the selected station
        Date = df_data_code$Date
        # Get time span for the selection station
        span = as.numeric(Date[length(Date)] - Date[1])
        
        # Get only the data rows with no NA for the selected station
        df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
        # Get date for the selected station
        Date_NoNA = df_data_NoNA_code$Date
        
        # Compute the time gap
        lac = as.numeric(diff(Date_NoNA) - 1)

        # Compute the cumulative gap
        lac_sum = sum(lac)
        # Store the cumulative gap rate
        tLac = c(tLac, lac_sum/span)

        # Compute the mean gap
        lac_mean = mean(lac[lac != 0])
        # Store the mean gap
        meanLac = c(meanLac, lac_mean)
        
    }
    
    # Compute the cumulative gap rate in pourcent
    tLac100 = tLac * 100
    
    # Create a tibble
    df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
    
    return (df_lac)
}


get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {

    intercept = c()
    # Group = levels(factor())
    for (g in df_Xlist$info$group) {
        df_data_code = df_Xlist$data[df_Xlist$data$group == g,]

        trend = df_Xtrend$trend[df_Xtrend$group == g]
        mu_X = mean(df_data_code$Qm3s, na.rm=TRUE)

        mu_t = as.numeric(mean(df_data_code$Date,
                               na.rm=TRUE))/unit2day
        
        b = mu_X - mu_t * trend

        intercept = append(intercept, b)
    }
    return (intercept)
}


get_QAtrend = function (df_data, period) {
    # AVERAGE ANNUAL FLOW : QA #
    
    period = as.list(period)
    
    Imax = 0
    df_QAtrendB = tibble()

    for (per in period){
               
        df_QAlist = prepare(df_data, colnamegroup=c('code'))

        df_QAEx = extract.Var(data.station=df_QAlist,
                              funct=mean,
                              timestep='year',
                              period=per,
                              pos.datetime=1,
                              na.rm=TRUE)

        df_QAtrend = Estimate.stats(data.extract=df_QAEx)

        I = interval(per[1], per[2])
        if (I > Imax) {
            Imax = I
            df_QAlistB = df_QAlist
            df_QAExB = df_QAEx
        }

        df_QAtrend = bind_cols(df_QAtrend,
                               tibble(period_start=as.Date(per[1])),
                               tibble(period_end=as.Date(per[2])))
        df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend)

    }
    
    res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB)

    return (res_QAtrend)
}

get_QMNAtrend = function (df_data, period) {
    # MONTHLY MINIMUM FLOW IN THE YEAR : QMNA #

    period = as.list(period)
    
    Imax = 0
    df_QMNAtrendB = tibble()

    for (per in period) {

        df_QMNAlist = prepare(df_data, colnamegroup=c('code'))
        
        df_QMNAEx = extract.Var(data.station=df_QMNAlist,
                                funct=mean,
                                period=per,
                                timestep='month',
                                pos.datetime=1,
                                na.rm=TRUE)
        
        df_QMNAlist = reprepare(df_QMNAEx, df_QMNAlist, colnamegroup=c('code'))
        
        df_QMNAEx = extract.Var(data.station=df_QMNAlist,
                                funct=min,
                                period=per,
                                timestep='year',
                                pos.datetime=1,
                                na.rm=TRUE)
        
        df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx)
        
        I = interval(per[1], per[2])
        if (I > Imax) {
            Imax = I
            df_QMNAlistB = df_QMNAlist
            df_QMNAExB = df_QMNAEx
        }

        df_QMNAtrend = bind_cols(df_QMNAtrend,
                               tibble(period_start=as.Date(per[1])),
                               tibble(period_end=as.Date(per[2])))
        df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend)
    }
    
    
    res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB)

    return (res_QMNAtrend)
}


get_VCN10trend = function (df_data, df_meta, period) {
    # MINIMUM 10 DAY AVERAGE FLOW OVER THE YEAR : VCN10 #

    # Get all different stations code
    Code = levels(factor(df_meta$code))

    df_data_roll = tibble()

    for (c in Code) {
        df_data_code = df_data[df_data$code == c,]
        
        df_data_code = tibble(Date=rollmean(df_data_code$Date,
                                            10,
                                            fill=NA),
                              Qm3s=rollmean(df_data_code$Qm3s, 
                                            10,
                                            fill=NA),
                              code=c)

        df_data_roll = bind_rows(df_data_roll, df_data_code)
    }

    period = as.list(period)
    
    Imax = 0
    df_VCN10trendB = tibble()
    
    for (per in period) {
        
        df_VCN10list = prepare(df_data_roll, colnamegroup=c('code'))

        df_VCN10Ex = extract.Var(data.station=df_VCN10list,
                                 funct=min,
                                 period=per,
                                 timestep='year',
                                 pos.datetime=1,
                                 na.rm=TRUE)

        df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex)

        I = interval(per[1], per[2])
        if (I > Imax) {
            Imax = I
            df_VCN10listB = df_VCN10list
            df_VCN10ExB = df_VCN10Ex
        }

        df_VCN10trend = bind_cols(df_VCN10trend,
                                  tibble(period_start=as.Date(per[1])),
                                  tibble(period_end=as.Date(per[2])))
        df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend)
    }
    
    res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB)

    return (res_VCN10trend)
}