# Usefull library library(dplyr) # Compute the time gap by station get_lacune = function (df_data, df_info) { # Get all different stations code Code = levels(factor(df_info$code)) # Create new vector to stock results for cumulative time gap by station tLac = c() # Create new vector to stock results for mean time gap by station meanLac = c() # Get rows where there is no NA NoNA = complete.cases(df_data) # Get data where there is no NA df_data_NoNA = df_data[NoNA,] # For every station for (code in Code) { # Get only the data rows for the selected station df_data_code = df_data[df_data$code==code,] # Get date for the selected station Date = df_data_code$Date # Get time span for the selection station span = as.numeric(Date[length(Date)] - Date[1]) # Get only the data rows with no NA for the selected station df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,] # Get date for the selected station Date_NoNA = df_data_NoNA_code$Date # Compute the time gap lac = as.numeric(diff(Date_NoNA) - 1) # Compute the cumulative gap lac_sum = sum(lac) # Store the cumulative gap rate tLac = c(tLac, lac_sum/span) # Compute the mean gap lac_mean = mean(lac[lac != 0]) # Store the mean gap meanLac = c(meanLac, lac_mean) } # Compute the cumulative gap rate in pourcent tLac100 = tLac * 100 # Create a tibble df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac) return (df_lac) }