# \\\
# Copyright 2021-2022 Louis H�raut*1
#
# *1   INRAE, France
#      louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox.  If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/format.R
#
# 


# Usefull library
library(dplyr)


join = function (df_data_AG, df_data_NV, df_meta_AG, df_meta_NV) {

    if (!is.null(df_data_NV) & !is.null(df_data_AG)) {

        # Get the station in common
        common = levels(factor(df_meta_NV[df_meta_NV$code %in% df_meta_AG$code,]$code)) 
        # Get the Nv station to add
        NVadd = levels(factor(df_meta_NV[!(df_meta_NV$code %in% df_meta_AG$code),]$code))

        # Select only the NV meta to add
        df_meta_NVadd = df_meta_NV[df_meta_NV$code %in% NVadd,]

        df_meta_AG$source = 'AG'
        df_meta_NVadd$source = 'NV'
        
        # Join NV data to AG data
        df_meta = full_join(df_meta_AG, df_meta_NVadd)

        # Select only the NV data to add
        df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,]
        # Join NV meta to AG meta
        df_data = full_join(df_data_AG, df_data_NVadd)

    } else if (is.null(df_data_NV) & !is.null(df_data_AG)) {
        df_meta_AG$source = 'AG'
        df_meta = df_meta_AG
        df_data = df_data_AG

    } else if (!is.null(df_data_NV) & is.null(df_data_AG)) {
        df_meta_NV$source = 'NV'
        df_meta = df_meta_NV
        df_data = df_data_NV

    } else {
        stop('No data')
    }
    
    return (list(data=df_data, meta=df_meta))
}


# Compute the start and the end of the period for a trend analysis
# according to the accessible data 
get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {

    # Convert results of trend to tibble
    df_Xtrend = tibble(df_Xtrend)
    # Fix the period start and end of the accessible period to a
    # default date
    df_Xtrend$period_start = as.Date("1970-01-01")
    df_Xtrend$period_end = as.Date("1970-01-01")

    # Change the format of the date variable to date
    df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
    df_XExtmp = df_Xlisttmp$data

    # For all the different group
    for (g in df_Xlisttmp$info$group) {
        # Get the analyse data associated to the group
        df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
        # Get the id in the trend result associated to the group
        id = which(df_Xtrend$group1 == g)

        # Compute index of the nearest accessible start and end date
        iStart = which.min(abs(df_XExtmp_code$Date
                               - as.Date(per[1])))
        iEnd = which.min(abs(df_XExtmp_code$Date 
                             - as.Date(per[2])))

        # Store the start and end of the trend analysis
        df_Xtrend$period_start[id] =
            as.Date(df_XExtmp_code$Date[iStart])
        df_Xtrend$period_end[id] =
            as.Date(df_XExtmp_code$Date[iEnd])
    }
    return (df_Xtrend)
}


# Prepare the data in order to have a list of a data tibble with date, group and flow column and a info tibble with the station code and group column to fit the entry of the 'StatsAnalysisTrend' package
prepare = function(df_data, colnamegroup=NULL) {
            
    colnamegroup = c(colnamegroup)
    colindgroup = which(colnames(df_data) == colnamegroup)
    df_data = group_by_at(df_data, colindgroup)

    data = tibble(Date=df_data$Date, 
                  group=group_indices(df_data),
                  Qm3s=df_data$Qm3s) 
    Gkey = group_keys(df_data)

    info = bind_cols(group=seq(1:nrow(Gkey)),
                     Gkey)
    return (list(data=data, info=info))
}


reprepare = function(df_XEx, df_Xlist, colnamegroup=NULL) {

    colnames(df_XEx) = c('Date', 'group', 'Qm3s')

    df_XEx$Date = as.character(df_XEx$Date)
    exDate = df_XEx$Date[1]
    nbt = lengths(regmatches(exDate, gregexpr('-', exDate)))
    
    if (nbt == 1) {
         df_XEx$Date = paste(df_XEx$Date, '01', sep='-')
    } else if (nbt == 0) {
         df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-')
    } else if (nbt != 2) {
        stop('erreur of date format')
    }

    df_XEx = bind_cols(Date=as.Date(df_XEx$Date,
                                       format="%Y-%m-%d"),
                       df_XEx[-1],
                       df_Xlist$info[df_XEx$group,
                                     2:ncol(df_Xlist$info)])

    df_XlistEx = prepare(df_XEx, colnamegroup=colnamegroup)
    return (df_XlistEx)
}


clean = function (df_Xtrend, df_XEx, df_Xlist) {

    df_Xlist = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))

    # print(df_Xlist)

    df_Xlist$data$code = NA
    for (g in df_Xlist$info$group) {
        df_Xlist$data$code[which(df_Xlist$data$group == g)] = df_Xlist$info$code[df_Xlist$info$group == g]
    }
    
    # df_Xlist$data = df_Xlist$data[, !names(df_Xlist$data) == "group")]

    df_Xtrend = bind_cols(df_Xtrend,
                          df_Xlist$info[df_Xtrend$group1,
                                       2:ncol(df_Xlist$info)])

    colnames(df_Xtrend)[1] = 'group'

    df_Xtrend = get_intercept(df_Xtrend, df_Xlist, unit2day=365.25)

    # df_Xtrend$intercept = intercept
    df_Xtrend = relocate(df_Xtrend, intercept, .after=trend)

    return (list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info))
}