diff --git a/processing/analyse.R b/processing/analyse.R index 51a782973d866ff3734ab9f35b84e8cdd53f5343..74e606ccf6c09b605a862c0a0e41943d67c6cd26 100644 --- a/processing/analyse.R +++ b/processing/analyse.R @@ -40,60 +40,8 @@ library(trend) source('processing/format.R', encoding='latin1') -# Compute the time gap by station -get_lacune = function (df_data, df_meta) { - - # Get all different stations code - Code = levels(factor(df_meta$code)) - - # Create new vector to stock results for cumulative and mean - # time gap by station - tLac = c() - meanLac = c() - - # Get rows where there is no NA - NoNA = complete.cases(df_data) - # Get data where there is no NA - df_data_NoNA = df_data[NoNA,] - - # For every station - for (code in Code) { - # Get only the data rows for the selected station - df_data_code = df_data[df_data$code==code,] - # Get date for the selected station - Date = df_data_code$Date - # Get time span for the selection station - span = as.numeric(Date[length(Date)] - Date[1]) - - # Get only the data rows with no NA for the selected station - df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,] - # Get date for the selected station - Date_NoNA = df_data_NoNA_code$Date - - # Compute the time gap - lac = as.numeric(diff(Date_NoNA) - 1) - - # Compute the cumulative gap - lac_sum = sum(lac) - # Store the cumulative gap rate - tLac = c(tLac, lac_sum/span) - - # Compute the mean gap - lac_mean = mean(lac[lac != 0]) - # Store the mean gap - meanLac = c(meanLac, lac_mean) - } - - # Compute the cumulative gap rate in pourcent - tLac100 = tLac * 100 - # Create tibble for lacune - df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac) - # Join a tibble - df_meta = full_join(df_meta, df_lac) - return (df_meta) -} - - +## 1. TREND ANALYSIS +### 1.0. Intercept of trend # Compute intercept values of linear trends with first order values # of trends and the data on which analysis is performed. get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) { @@ -146,92 +94,7 @@ get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) { return (df_Xtrend) } - -# Compute the start and the end of the period for a trend analysis -# according to the accessible data -get_period = function (per, df_Xtrend, df_XEx, df_Xlist) { - - # Convert results of trend to tibble - df_Xtrend = tibble(df_Xtrend) - # Fix the period start and end of the accessible period to a - # default date - df_Xtrend$period_start = as.Date("1970-01-01") - df_Xtrend$period_end = as.Date("1970-01-01") - - # Change the format of the date variable to date - df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) - df_XExtmp = df_Xlisttmp$data - - # For all the different group - for (g in df_Xlisttmp$info$group) { - # Get the analyse data associated to the group - df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,] - # Get the id in the trend result associated to the group - id = which(df_Xtrend$group1 == g) - - # Compute index of the nearest accessible start and end date - iStart = which.min(abs(df_XExtmp_code$Date - - as.Date(per[1]))) - iEnd = which.min(abs(df_XExtmp_code$Date - - as.Date(per[2]))) - - # Store the start and end of the trend analysis - df_Xtrend$period_start[id] = - as.Date(df_XExtmp_code$Date[iStart]) - df_Xtrend$period_end[id] = - as.Date(df_XExtmp_code$Date[iEnd]) - } - return (df_Xtrend) -} - - -# Compute the break date of the flow data by station -get_break = function (df_data, df_meta, p_thresold=0.05) { - - # Get all different stations code - Code = levels(factor(df_meta$code)) - # Number of stations - nCode = length(Code) - - # Blank date break list and associated station code vector - date_break = list() - Code_break = c() - - # For all accessible code - for (code in Code) { - # Get the associated data - df_data_code = df_data[df_data$code == code,] - # Remove NA data - df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),] - - # Perform the break analysis thanks to the Pettitt test - res_break = pettitt.test(df_data_codeNoNA$Qm3s) - - # Extract p value - p_value = res_break$p - # The length of the data analysed - nbreak = res_break$nobs - # Index of the break date - ibreak = res_break$estimate - - # If the p value results is under the thresold - if (p_value <= p_thresold) { - # Get the mean of the index break if there is several - ibreak = round(mean(ibreak), 0) - # Store the date break with its associated code - date_break = append(date_break, - df_data_codeNoNA$Date[ibreak]) - Code_break = append(Code_break, code) - } - # step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak]) - # step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak]) - } - # Create a tibble with the break analysis results - df_break = tibble(code=Code_break, Date=as.Date(date_break)) - return (df_break) -} - - +### 1.1. QA # Realise the trend analysis of the average annual flow (QA) # hydrological variable get_QAtrend = function (df_data, period, p_thresold) { @@ -249,7 +112,7 @@ get_QAtrend = function (df_data, period, p_thresold) { # Prepare the data to fit the entry of extract.Var df_QAlist = prepare(df_data, colnamegroup=c('code')) - # Compute the QA over the data + # Compute the yearly mean over the data df_QAEx = extract.Var(data.station=df_QAlist, funct=mean, timestep='year', @@ -280,20 +143,24 @@ get_QAtrend = function (df_data, period, p_thresold) { return (res_QAtrend) } - +### 1.2. QMNA # Realise the trend analysis of the monthly minimum flow in the # year (QMNA) hydrological variable get_QMNAtrend = function (df_data, period, p_thresold) { + # Make sure to convert the period to a list period = as.list(period) + # Set the max interval period as the minimal possible Imax = 0 + # Blank tibble for data to return df_QMNAtrendB = tibble() + # For all periods for (per in period) { - + # Prepare the data to fit the entry of extract.Var df_QMNAlist = prepare(df_data, colnamegroup=c('code')) - + # Compute the montly mean over the data df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=mean, period=per, @@ -301,52 +168,57 @@ get_QMNAtrend = function (df_data, period, p_thresold) { per.start="01", pos.datetime=1, na.rm=TRUE) - + # Rerepare the data to fit the entry of extract.Var df_QMNAlist = reprepare(df_QMNAEx, df_QMNAlist, colnamegroup=c('code')) - + # Compute the yearly min over the data df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=min, period=per, timestep='year', pos.datetime=1, na.rm=TRUE) - + # Compute the trend analysis df_QMNAtrend = Estimate.stats(data.extract=df_QMNAEx, level=p_thresold) - + + # Get the associated time interval I = interval(per[1], per[2]) + # If it is the largest interval if (I > Imax) { + # Store it and the associated data and info Imax = I df_QMNAlistB = df_QMNAlist df_QMNAExB = df_QMNAEx } - + + # Specify the period of analyse df_QMNAtrend = get_period(per, df_QMNAtrend, df_QMNAEx, df_QMNAlist) - + # Store the trend df_QMNAtrendB = bind_rows(df_QMNAtrendB, df_QMNAtrend) } - + # Clean results of trend analyse res_QMNAtrend = clean(df_QMNAtrendB, df_QMNAExB, df_QMNAlistB) - return (res_QMNAtrend) } - +### 1.3. VCN10 # Realise the trend analysis of the minimum 10 day average flow over the year (VCN10) hydrological variable get_VCN10trend = function (df_data, df_meta, period, p_thresold) { # Get all different stations code Code = levels(factor(df_meta$code)) - + # Blank tibble to store the data averaged df_data_roll = tibble() + # For all the code for (c in Code) { + # Get the data associated to the code df_data_code = df_data[df_data$code == c,] - + # Perform the roll mean of the flow over 10 days df_data_code = tibble(Date=rollmean(df_data_code$Date, 10, fill=NA), @@ -354,47 +226,153 @@ get_VCN10trend = function (df_data, df_meta, period, p_thresold) { 10, fill=NA), code=c) - + # Store the results df_data_roll = bind_rows(df_data_roll, df_data_code) } + # Make sure to convert the period to a list period = as.list(period) - + # Set the max interval period as the minimal possible Imax = 0 + # Blank tibble for data to return df_VCN10trendB = tibble() - + + # For all periods for (per in period) { - + # Prepare the data to fit the entry of extract.Var df_VCN10list = prepare(df_data_roll, colnamegroup=c('code')) - + # Compute the yearly min over the averaged data df_VCN10Ex = extract.Var(data.station=df_VCN10list, funct=min, period=per, timestep='year', pos.datetime=1, na.rm=TRUE) - + # Compute the trend analysis df_VCN10trend = Estimate.stats(data.extract=df_VCN10Ex, level=p_thresold) + # Get the associated time interval I = interval(per[1], per[2]) + # If it is the largest interval if (I > Imax) { + # Store it and the associated data and info Imax = I df_VCN10listB = df_VCN10list df_VCN10ExB = df_VCN10Ex } + # Specify the period of analyse df_VCN10trend = get_period(per, df_VCN10trend, df_VCN10Ex, df_VCN10list) - + # Store the trend df_VCN10trendB = bind_rows(df_VCN10trendB, df_VCN10trend) } - + # Clean results of trend analyse res_VCN10trend = clean(df_VCN10trendB, df_VCN10ExB, df_VCN10listB) - return (res_VCN10trend) } +## 2. OTHER ANALYSES +### 2.1. Break date +# Compute the break date of the flow data by station +get_break = function (df_data, df_meta, p_thresold=0.05) { + + # Get all different stations code + Code = levels(factor(df_meta$code)) + # Number of stations + nCode = length(Code) + + # Blank date break list and associated station code vector + date_break = list() + Code_break = c() + + # For all accessible code + for (code in Code) { + # Get the associated data + df_data_code = df_data[df_data$code == code,] + # Remove NA data + df_data_codeNoNA = df_data_code[!is.na(df_data_code$Qm3s),] + # Perform the break analysis thanks to the Pettitt test + res_break = pettitt.test(df_data_codeNoNA$Qm3s) + + # Extract p value + p_value = res_break$p + # The length of the data analysed + nbreak = res_break$nobs + # Index of the break date + ibreak = res_break$estimate + + # If the p value results is under the thresold + if (p_value <= p_thresold) { + # Get the mean of the index break if there is several + ibreak = round(mean(ibreak), 0) + # Store the date break with its associated code + date_break = append(date_break, + df_data_codeNoNA$Date[ibreak]) + Code_break = append(Code_break, code) + } + # step1 = mean(df_data_codeNoNA$Qm3s[1:ibreak]) + # step2 = mean(df_data_codeNoNA$Qm3s[(ibreak+1):nbreak]) + } + # Create a tibble with the break analysis results + df_break = tibble(code=Code_break, Date=as.Date(date_break)) + return (df_break) +} + +### 2.2. Time gap +# Compute the time gap by station +get_lacune = function (df_data, df_meta) { + + # Get all different stations code + Code = levels(factor(df_meta$code)) + + # Create new vector to stock results for cumulative and mean + # time gap by station + tLac = c() + meanLac = c() + + # Get rows where there is no NA + NoNA = complete.cases(df_data) + # Get data where there is no NA + df_data_NoNA = df_data[NoNA,] + + # For every station + for (code in Code) { + # Get only the data rows for the selected station + df_data_code = df_data[df_data$code==code,] + # Get date for the selected station + Date = df_data_code$Date + # Get time span for the selection station + span = as.numeric(Date[length(Date)] - Date[1]) + + # Get only the data rows with no NA for the selected station + df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,] + # Get date for the selected station + Date_NoNA = df_data_NoNA_code$Date + + # Compute the time gap + lac = as.numeric(diff(Date_NoNA) - 1) + + # Compute the cumulative gap + lac_sum = sum(lac) + # Store the cumulative gap rate + tLac = c(tLac, lac_sum/span) + + # Compute the mean gap + lac_mean = mean(lac[lac != 0]) + # Store the mean gap + meanLac = c(meanLac, lac_mean) + } + + # Compute the cumulative gap rate in pourcent + tLac100 = tLac * 100 + # Create tibble for lacune + df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac) + # Join a tibble + df_meta = full_join(df_meta, df_lac) + return (df_meta) +} diff --git a/processing/format.R b/processing/format.R index 4c4cbeacf94c711dda18088506a01485aadd0500..27b7e43727a722fe20da5cc50a955d3b881b7e6a 100644 --- a/processing/format.R +++ b/processing/format.R @@ -71,6 +71,44 @@ join = function (df_data_AG, df_data_NV, df_meta_AG, df_meta_NV) { } +# Compute the start and the end of the period for a trend analysis +# according to the accessible data +get_period = function (per, df_Xtrend, df_XEx, df_Xlist) { + + # Convert results of trend to tibble + df_Xtrend = tibble(df_Xtrend) + # Fix the period start and end of the accessible period to a + # default date + df_Xtrend$period_start = as.Date("1970-01-01") + df_Xtrend$period_end = as.Date("1970-01-01") + + # Change the format of the date variable to date + df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) + df_XExtmp = df_Xlisttmp$data + + # For all the different group + for (g in df_Xlisttmp$info$group) { + # Get the analyse data associated to the group + df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,] + # Get the id in the trend result associated to the group + id = which(df_Xtrend$group1 == g) + + # Compute index of the nearest accessible start and end date + iStart = which.min(abs(df_XExtmp_code$Date + - as.Date(per[1]))) + iEnd = which.min(abs(df_XExtmp_code$Date + - as.Date(per[2]))) + + # Store the start and end of the trend analysis + df_Xtrend$period_start[id] = + as.Date(df_XExtmp_code$Date[iStart]) + df_Xtrend$period_end[id] = + as.Date(df_XExtmp_code$Date[iEnd]) + } + return (df_Xtrend) +} + + # Prepare the data in order to have a list of a data tibble with date, group and flow column and a info tibble with the station code and group column to fit the entry of the 'StatsAnalysisTrend' package prepare = function(df_data, colnamegroup=NULL) {