format.R 7.99 KiB
# \\\
# Copyright 2021-2022 Louis Héraut*1
# *1   INRAE, France
#      louis.heraut@inrae.fr
# This file is part of ash R toolbox.
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# ash R toolbox is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with ash R toolbox.  If not, see <https://www.gnu.org/licenses/>.
# ///
# processing/format.R
# Manages all the format problem of data and info. Mainly problem of
# input and output of the 'StatsAnalysisTrend' package. It also allows
# to join different selections of station and to gets exact period of
# trend analysis.
# Usefull library
library(dplyr)
## 1. INPUT
### 1.1. Preparation
# Prepares the data in order to have a list of a data tibble with
# date, group and flow column and a info tibble with the station code
# and group column to fit the entry of the 'extract.Var' function in
# the 'StatsAnalysisTrend' package
prepare = function(df_data, colnamegroup=NULL) {
    # Forces the column name to group to be a vector 
    colnamegroup = c(colnamegroup)
    # Converts it to index of the column to group
    colindgroup = which(colnames(df_data) == colnamegroup)
    # Groups the data by those indexes
    df_data = group_by_at(df_data, colindgroup)
    # Creates a new tibble of data with a group column
    data = tibble(Date=df_data$Date, 
                  group=group_indices(df_data),
                  Qm3s=df_data$Qm3s)
    # Gets the different value of the group
    Gkey = group_keys(df_data)
    # Creates a new tibble of info of the group
    info = bind_cols(group=seq(1:nrow(Gkey)),
                     Gkey)
    # Stores data and info tibble as a list that match the entry of
    # the 'extract.Var' function
    res = list(data=data, info=info)
    return (res)
### 1.2. Re-preparation
# Re-prepares the data in outing of the 'extract.Var' function in
# the 'StatsAnalysisTrend' package in order to fit again to the
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
# entry of the same function reprepare = function(df_XEx, df_Xlist, colnamegroup=NULL) { # Changes the column name of the results of the # 'extract.Var' function colnames(df_XEx) = c('Date', 'group', 'Qm3s') # Converts Date column as character df_XEx$Date = as.character(df_XEx$Date) # Takes the first date as example exDate = df_XEx$Date[1] # Finds the number of dash in the date nbt = lengths(regmatches(exDate, gregexpr('-', exDate))) # If there is only one dash if (nbt == 1) { # Converts it to date from a year and a month df_XEx$Date = paste(df_XEx$Date, '01', sep='-') # If there is no dash } else if (nbt == 0) { # Converts it to date from only a year df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-') # If there is more than 2 dashes } else if (nbt != 2) { # This is not a classical date stop('erreur of date format') } # Recreates the outing of the 'extract.Var' function nicer df_XEx = bind_cols(Date=as.Date(df_XEx$Date, format="%Y-%m-%d"), df_XEx[-1], df_Xlist$info[df_XEx$group, 2:ncol(df_Xlist$info)]) # Prepares the nicer outing df_XlistEx = prepare(df_XEx, colnamegroup=colnamegroup) return (df_XlistEx) } ## 2. OUTPUT # Cleans the trend results of the function 'Estimate.stats' in the # 'StatsAnalysisTrend' package. It adds the station code and the # intercept of the trend to the trend results. Also makes the data # more presentable. clean = function (df_Xtrend, df_XEx, df_Xlist) { # Reprepares the list of data and info in order to be presentable df_Xlist = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) # Adds a column of station code df_Xlist$data$code = NA # For all the group for (g in df_Xlist$info$group) { # Adds the station code corresponding to each group info df_Xlist$data$code[which(df_Xlist$data$group == g)] = df_Xlist$info$code[df_Xlist$info$group == g] } # Adds the info to trend tibble df_Xtrend = bind_cols(df_Xtrend, df_Xlist$info[df_Xtrend$group1, 2:ncol(df_Xlist$info)]) # Renames the column of group of trend results colnames(df_Xtrend)[1] = 'group' # Adds the intercept value of trend df_Xtrend = get_intercept(df_Xtrend, df_Xlist, unit2day=365.25) # Changes the position of the intercept column df_Xtrend = relocate(df_Xtrend, intercept, .after=trend) # Creates a list of results to return res = list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info)
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
return (res) } ## 3. OTHER ### 3.1. Joining selection # Joins tibbles of different selection of station as a unique one join = function (df_data_AG, df_data_IN, df_meta_AG, df_meta_IN) { # If there is an INRAE and an Agence de l'eau Adour-Garonne selection if (!is.null(df_data_IN) & !is.null(df_data_AG)) { # Gets the station in common common = levels(factor(df_meta_IN[df_meta_IN$code %in% df_meta_AG$code,]$code)) # Gets the Nv station to add INadd = levels(factor(df_meta_IN[!(df_meta_IN$code %in% df_meta_AG$code),]$code)) # Selects only the IN meta to add df_meta_INadd = df_meta_IN[df_meta_IN$code %in% INadd,] # Names the source of the selection df_meta_AG$source = 'AG' df_meta_INadd$source = 'IN' # Joins IN data to AG data df_meta = full_join(df_meta_AG, df_meta_INadd) # Selects only the IN data to add df_data_INadd = df_data_IN[df_data_IN$code %in% INadd,] # Joins IN meta to AG meta df_data = full_join(df_data_AG, df_data_INadd) # If there is just an Agence de l'eau Adour-Garonne selection } else if (is.null(df_data_IN) & !is.null(df_data_AG)) { df_meta_AG$source = 'AG' df_meta = df_meta_AG df_data = df_data_AG # If there is just an INRAE selection } else if (!is.null(df_data_IN) & is.null(df_data_AG)) { df_meta_IN$source = 'IN' df_meta = df_meta_IN df_data = df_data_IN # If there is no selection } else { stop('No data') } return (list(data=df_data, meta=df_meta)) } ### 3.2. Period of trend # Compute the start and the end of the period for a trend analysis # according to the accessible data get_period = function (per, df_Xtrend, df_XEx, df_Xlist) { # Converts results of trend to tibble df_Xtrend = tibble(df_Xtrend) # Fix the period start and end of the accessible period to a # default date df_Xtrend$period_start = as.Date("1970-01-01") df_Xtrend$period_end = as.Date("1970-01-01") # Changes the format of the date variable to date df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) df_XExtmp = df_Xlisttmp$data # For all the different group for (g in df_Xlisttmp$info$group) { # Gets the analyse data associated to the group
211212213214215216217218219220221222223224225226227228229
df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,] # Gets the id in the trend result associated to the group id = which(df_Xtrend$group1 == g) # Computes index of the nearest accessible start and end date iStart = which.min(abs(df_XExtmp_code$Date - as.Date(per[1]))) iEnd = which.min(abs(df_XExtmp_code$Date - as.Date(per[2]))) # Stores the start and end of the trend analysis df_Xtrend$period_start[id] = as.Date(df_XExtmp_code$Date[iStart]) df_Xtrend$period_end[id] = as.Date(df_XExtmp_code$Date[iEnd]) } return (df_Xtrend) }