analyse.R

# Usefull library
# library(StatsAnalysisTrend)
library(dplyr)


# Compute the time gap by station
get_lacune = function (df_data, df_info) {
    
    # Get all different stations code
    Code = levels(factor(df_info$code))
    
    # Create new vector to stock results for cumulative time gap by station
    tLac = c()
    # Create new vector to stock results for mean time gap by station
    meanLac = c()

    # Get rows where there is no NA
    NoNA = complete.cases(df_data)
    # Get data where there is no NA
    df_data_NoNA = df_data[NoNA,]

    # For every station
    for (code in Code) {
        
        # Get only the data rows for the selected station
        df_data_code = df_data[df_data$code==code,]
        # Get date for the selected station
        Date = df_data_code$Date
        # Get time span for the selection station
        span = as.numeric(Date[length(Date)] - Date[1])
        
        # Get only the data rows with no NA for the selected station
        df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,]
        # Get date for the selected station
        Date_NoNA = df_data_NoNA_code$Date
        
        # Compute the time gap
        lac = as.numeric(diff(Date_NoNA) - 1)

        # Compute the cumulative gap
        lac_sum = sum(lac)
        # Store the cumulative gap rate
        tLac = c(tLac, lac_sum/span)

        # Compute the mean gap
        lac_mean = mean(lac[lac != 0])
        # Store the mean gap
        meanLac = c(meanLac, lac_mean)
        
    }
    
    # Compute the cumulative gap rate in pourcent
    tLac100 = tLac * 100
    
    # Create a tibble
    df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac)
    
    return (df_lac)
}