analyse.R 9.70 KiB
# Usefull library
library(dplyr)
library(zoo)
library(StatsAnalysisTrend)
library(lubridate)
library(trend)
# Sourcing R file
source('processing/format.R', encoding='latin1')
# Compute the time gap by station
get_lacune = function (df_data, df_info) {
    # Get all different stations code
    Code = levels(factor(df_info$code))
    # Create new vector to stock results for cumulative time gap by station
    tLac = c()
    # Create new vector to stock results for mean time gap by station
    meanLac = c()
    # Get rows where there is no NA
    NoNA = complete.cases(df_data)
    # Get data where there is no NA
    df_data_NoNA = df_data[NoNA,]
    # For every station
    for (code in Code) {
        # Get only the data rows for the selected station
        df_data_code = df_data[df_data$code==code,]
        # Get date for the selected station
        Date = df_data_code$Date
        # Get time span for the selection station
        span = as.numeric(Date[length(Date)] - Date[1])
        # Get only the data rows with no NA for the selected station
        df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
        # Get date for the selected station
        Date_NoNA = df_data_NoNA_code$Date
        # Compute the time gap
        lac = as.numeric(diff(Date_NoNA) - 1)
        # Compute the cumulative gap
        lac_sum = sum(lac)
        # Store the cumulative gap rate
        tLac = c(tLac, lac_sum/span)
        # Compute the mean gap
        lac_mean = mean(lac[lac != 0])
        # Store the mean gap
        meanLac = c(meanLac, lac_mean)
    # Compute the cumulative gap rate in pourcent
    tLac100 = tLac * 100
    # Create a tibble
    df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
    return (df_lac)
get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) {
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
df_Xtrend$intercept = NA for (g in df_Xlist$info$group) { df_data_code = df_Xlist$data[df_Xlist$data$group == g,] df_Xtrend_code = df_Xtrend[df_Xtrend$group == g,] Start = df_Xtrend_code$period_start UStart = levels(factor(Start)) End = df_Xtrend_code$period_end UEnd = levels(factor(End)) nPeriod = max(length(UStart), length(UEnd)) for (i in 1:nPeriod) { df_data_code_per = df_data_code[df_data_code$Date >= Start[i] & df_data_code$Date <= End[i],] df_Xtrend_code_per = df_Xtrend_code[df_Xtrend_code$period_start == Start[i] & df_Xtrend_code$period_end == End[i],] id = which(df_Xtrend$group == g & df_Xtrend$period_start == Start[i] & df_Xtrend$period_end == End[i]) mu_X = mean(df_data_code_per$Qm3s, na.rm=TRUE) mu_t = as.numeric(mean(c(Start[i], End[i]), na.rm=TRUE)) / unit2day b = mu_X - mu_t * df_Xtrend_code_per$trend df_Xtrend$intercept[id] = b } } return (df_Xtrend) } get_period = function (per, df_Xtrend, df_XEx, df_Xlist) { df_Xtrend = tibble(df_Xtrend) df_Xtrend$period_start = as.Date("1970-01-01") df_Xtrend$period_end = as.Date("1970-01-01") df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) df_XExtmp = df_Xlisttmp$data for (g in df_Xlisttmp$info$group) { df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,] iStart = which.min(abs(df_XExtmp_code$Date - as.Date(per[1]))) iEnd = which.min(abs(df_XExtmp_code$Date - as.Date(per[2]))) id = which(df_Xtrend$group1 == g) df_Xtrend$period_start[id] = as.Date(df_XExtmp_code$Date[iStart]) df_Xtrend$period_end[id] = as.Date(df_XExtmp_code$Date[iEnd]) } return (df_Xtrend)
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
} get_break = function (df_data, df_meta, p_thresold=0.05) { # Get all different stations code Code = levels(factor(df_meta$code)) nCode = length(Code) date_break = list() Code_break = c() for (code in Code) { df_data_code = df_data[df_data$code == code,] df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),] res_break = pettitt.test(df_data_codeNoNA$Qm3s) p_value = res_break$p nbreak = res_break$nobs ibreak = res_break$estimate if (length(ibreak) > 1) { ibreak = ibreak[1] } # step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak]) # step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak]) if (p_value <= p_thresold) { date_break = append(date_break, df_data_codeNoNA$Date[ibreak]) Code_break = append(Code_break, code) } } df_break = tibble(code=Code_break, Date=as.Date(date_break)) return (df_break) } get_QAtrend = function (df_data, period, p_thresold) { # AVERAGE ANNUAL FLOW : QA # period = as.list(period) Imax = 0 df_QAtrendB = tibble() for (per in period) { df_QAlist = prepare(df_data, colnamegroup=c('code')) df_QAEx = extract.Var(data.station=df_QAlist, funct=mean, timestep='year', period=per, pos.datetime=1, na.rm=TRUE) df_QAtrend = Estimate.stats(data.extract=df_QAEx, level=p_thresold) I = interval(per[1], per[2]) if (I > Imax) { Imax = I df_QAlistB = df_QAlist df_QAExB = df_QAEx }
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
df_QAtrend = get_period(per, df_QAtrend, df_QAEx, df_QAlist) df_QAtrendB = bind_rows(df_QAtrendB, df_QAtrend) } res_QAtrend = clean(df_QAtrendB, df_QAExB, df_QAlistB) return (res_QAtrend) } get_QMNAtrend = function (df_data, period, p_thresold) { # MONTHLY MINIMUM FLOW IN THE YEAR : QMNA # period = as.list(period) Imax = 0 df_QMNAtrendB = tibble() for (per in period) { df_QMNAlist = prepare(df_data, colnamegroup=c('code')) # df_QMNAEx = extract.Var(data.station=df_QMNAlist, # funct=mean, # period=per, # timestep='month', # pos.datetime=1, # na.rm=TRUE) df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=mean, period=per, timestep='year-month', per.start="01", pos.datetime=1, na.rm=TRUE) df_QMNAlist = reprepare(df_QMNAEx, df_QMNAlist, colnamegroup=c('code')) df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=min, period=per, timestep='year', pos.datetime=1, na.rm=TRUE) df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx, level=p_thresold) I = interval(per[1], per[2]) if (I > Imax) { Imax = I df_QMNAlistB = df_QMNAlist df_QMNAExB = df_QMNAEx } df_QMNAtrend = get_period(per, df_QMNAtrend, df_QMNAEx, df_QMNAlist) df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend) } res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB) return (res_QMNAtrend)
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
} get_VCN10trend = function (df_data, df_meta, period, p_thresold) { # MINIMUM 10 DAY AVERAGE FLOW OVER THE YEAR : VCN10 # # Get all different stations code Code = levels(factor(df_meta$code)) df_data_roll = tibble() for (c in Code) { df_data_code = df_data[df_data$code == c,] df_data_code = tibble(Date=rollmean(df_data_code$Date, 10, fill=NA), Qm3s=rollmean(df_data_code$Qm3s, 10, fill=NA), code=c) df_data_roll = bind_rows(df_data_roll, df_data_code) } period = as.list(period) Imax = 0 df_VCN10trendB = tibble() for (per in period) { df_VCN10list = prepare(df_data_roll, colnamegroup=c('code')) df_VCN10Ex = extract.Var(data.station=df_VCN10list, funct=min, period=per, timestep='year', pos.datetime=1, na.rm=TRUE) df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex, level=p_thresold) I = interval(per[1], per[2]) if (I > Imax) { Imax = I df_VCN10listB = df_VCN10list df_VCN10ExB = df_VCN10Ex } df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex, df_VCN10list) df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend) } res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB) return (res_VCN10trend) }