From 709cea162a434d62630f98f5baf8659ba3efdd6e Mon Sep 17 00:00:00 2001 From: "louis.heraut" <louis.heraut@inrae.fr> Date: Thu, 6 Jan 2022 21:57:56 +0100 Subject: [PATCH] Commenting --- processing/extract.R | 107 ++++++++++--------- processing/format.R | 246 +++++++++++++++++++++++++------------------ 2 files changed, 206 insertions(+), 147 deletions(-) diff --git a/processing/extract.R b/processing/extract.R index 9db9688..522fa81 100644 --- a/processing/extract.R +++ b/processing/extract.R @@ -23,7 +23,12 @@ # # processing/extract.R # -# +# Regroups functions to generation of generical station selection +# according to pre-existing directory of data or specific other +# selection format as '.docx' file from Agence de l'eau Adour-Garonne. +# Also useful to extract the data and the metadata present in the +# Banque Hydro files from a selection of station. Manages also +# shapefiles loading. # Usefull library @@ -121,6 +126,8 @@ iRegHydro = c('D'='Affluents du Rhin', '4'='RĂ©union') +## 2. SELECTION +### 2.1. Creation of selection # Create a txt file that resume all the station data files present # in a filedir create_selection = function (computer_data_path, filedir, outname) { @@ -152,14 +159,13 @@ create_selection = function (computer_data_path, filedir, outname) { write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE) return (NULL) } - # Example # create_selection( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # "France207", # "nival_selection.txt") - +### 2.2. Agence de l'eau Adour-Garonne selection # Gets the selection of station from the 'Liste-station_RRSE.docx' file get_selection_AG = function (computer_data_path, listdir, listname, cnames=c('code','station', 'BV_km2', @@ -205,7 +211,6 @@ get_selection_AG = function (computer_data_path, listdir, listname, ok=selec) return (df_selec) } - # Example # df_selec_AG = get_selection_AG( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", @@ -220,7 +225,7 @@ get_selection_AG = function (computer_data_path, listdir, listname, # c_num=c('BV_km2', # 'longueur_serie')) - +### 2.3. INRAE selection # Gets the selection of station from the selection txt file generated # by the 'create_selection' function get_selection_IN = function (computer_data_path, listdir, listname) { @@ -245,6 +250,8 @@ get_selection_IN = function (computer_data_path, listdir, listname) { # "nival_selection.txt") +## 3. EXTRACTION +### 3.1. Extraction of metadata # Extraction of metadata of stations extract_meta = function (computer_data_path, filedir, filename, verbose=TRUE) { @@ -311,47 +318,54 @@ extract_meta = function (computer_data_path, filedir, filename, metatxt = c(readLines(file_path, n=41, encoding="UTF-8")) # Create a tibble with all the metadata needed + # (IN for INRAE data and BH for BH data) df_meta = - tibble(code=trimws(substr(metatxt[11], 38, - nchar(metatxt[11]))), - nom=trimws(substr(metatxt[12], 39, - nchar(metatxt[12]))), - territoire=trimws(substr(metatxt[13], 39, - nchar(metatxt[13]))), - - gestionnaire=trimws(substr(metatxt[7], 60, - nchar(metatxt[7]))), - - L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)), - L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)), - - L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)), - L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)), - - surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)), - surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)), - - altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)), - altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)), - - debut=substr(metatxt[25], 38, 50), - fin=substr(metatxt[25], 52, 63), - - statut=iStatut[trimws(substr(metatxt[26], 38, 50))], - finalite=iFinalite[trimws(substr(metatxt[26], 52, - 56))], - type=iType[trimws(substr(metatxt[26], 58, 58))], - influence=iInfluence[trimws(substr(metatxt[26], 60, - 60))], - debit=iDebit[trimws(substr(metatxt[26], 62, 62))], - QBE=iQBE[trimws(substr(metatxt[26], 72, 72))], - QME=iQME[trimws(substr(metatxt[26], 74, 74))], - QHE=iQHE[trimws(substr(metatxt[26], 76, 76))], - file_path=file_path, - ) - + tibble( + # Station code + code=trimws(substr(metatxt[11], 38, + nchar(metatxt[11]))), + # Station name + nom=trimws(substr(metatxt[12], 39, + nchar(metatxt[12]))), + # Territory + territoire=trimws(substr(metatxt[13], 39, + nchar(metatxt[13]))), + # Administrator + gestionnaire=trimws(substr(metatxt[7], 60, + nchar(metatxt[7]))), + # Lambert 93 localisation + L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)), + L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)), + L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)), + L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)), + + # Surface + surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)), + surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)), + + # Elevation + altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)), + altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)), + + # Start and end of the data + debut=substr(metatxt[25], 38, 50), + fin=substr(metatxt[25], 52, 63), + + # Different other info about the flow quality and type + statut=iStatut[trimws(substr(metatxt[26], 38, 50))], + finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))], + type=iType[trimws(substr(metatxt[26], 58, 58))], + influence=iInfluence[trimws(substr(metatxt[26], 60, 60))], + debit=iDebit[trimws(substr(metatxt[26], 62, 62))], + QBE=iQBE[trimws(substr(metatxt[26], 72, 72))], + QME=iQME[trimws(substr(metatxt[26], 74, 74))], + QHE=iQHE[trimws(substr(metatxt[26], 76, 76))], + + # The path to the data file of BH + file_path=file_path) + + # Adding of the hydrological region df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)] - return (df_meta) } else { @@ -359,14 +373,13 @@ extract_meta = function (computer_data_path, filedir, filename, return (NULL) } } - # Example # df_meta = extract_meta( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # "BanqueHydro_Export2021", # c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt')) - +### 3.2. Extraction of data # Extraction of data extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) { @@ -454,7 +467,6 @@ extract_data = function (computer_data_path, filedir, filename, return (NULL) } } - # Example # df_data = extract_data( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", @@ -462,6 +474,7 @@ extract_data = function (computer_data_path, filedir, filename, # c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt')) +## 4. SHAPEFILE MANAGEMENT # Generates a list of shapefiles to draw a hydrological map of # the France ini_shapefile = function (computer_data_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, rv_shpdir, rv_shpname, riv=TRUE) { diff --git a/processing/format.R b/processing/format.R index 27b7e43..46d0fe7 100644 --- a/processing/format.R +++ b/processing/format.R @@ -23,160 +23,206 @@ # # processing/format.R # -# +# Manages all the format problem of data and info. Mainly problem of +# input and output of the 'StatsAnalysisTrend' package. It also allows +# to join different selections of station and to gets exact period of +# trend analysis. # Usefull library library(dplyr) -join = function (df_data_AG, df_data_NV, df_meta_AG, df_meta_NV) { - - if (!is.null(df_data_NV) & !is.null(df_data_AG)) { - - # Get the station in common - common = levels(factor(df_meta_NV[df_meta_NV$code %in% df_meta_AG$code,]$code)) - # Get the Nv station to add - NVadd = levels(factor(df_meta_NV[!(df_meta_NV$code %in% df_meta_AG$code),]$code)) - - # Select only the NV meta to add - df_meta_NVadd = df_meta_NV[df_meta_NV$code %in% NVadd,] - - df_meta_AG$source = 'AG' - df_meta_NVadd$source = 'NV' - - # Join NV data to AG data - df_meta = full_join(df_meta_AG, df_meta_NVadd) - - # Select only the NV data to add - df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,] - # Join NV meta to AG meta - df_data = full_join(df_data_AG, df_data_NVadd) - - } else if (is.null(df_data_NV) & !is.null(df_data_AG)) { - df_meta_AG$source = 'AG' - df_meta = df_meta_AG - df_data = df_data_AG - - } else if (!is.null(df_data_NV) & is.null(df_data_AG)) { - df_meta_NV$source = 'NV' - df_meta = df_meta_NV - df_data = df_data_NV - - } else { - stop('No data') - } - - return (list(data=df_data, meta=df_meta)) -} - - -# Compute the start and the end of the period for a trend analysis -# according to the accessible data -get_period = function (per, df_Xtrend, df_XEx, df_Xlist) { - - # Convert results of trend to tibble - df_Xtrend = tibble(df_Xtrend) - # Fix the period start and end of the accessible period to a - # default date - df_Xtrend$period_start = as.Date("1970-01-01") - df_Xtrend$period_end = as.Date("1970-01-01") - - # Change the format of the date variable to date - df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) - df_XExtmp = df_Xlisttmp$data - - # For all the different group - for (g in df_Xlisttmp$info$group) { - # Get the analyse data associated to the group - df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,] - # Get the id in the trend result associated to the group - id = which(df_Xtrend$group1 == g) - - # Compute index of the nearest accessible start and end date - iStart = which.min(abs(df_XExtmp_code$Date - - as.Date(per[1]))) - iEnd = which.min(abs(df_XExtmp_code$Date - - as.Date(per[2]))) - - # Store the start and end of the trend analysis - df_Xtrend$period_start[id] = - as.Date(df_XExtmp_code$Date[iStart]) - df_Xtrend$period_end[id] = - as.Date(df_XExtmp_code$Date[iEnd]) - } - return (df_Xtrend) -} - - -# Prepare the data in order to have a list of a data tibble with date, group and flow column and a info tibble with the station code and group column to fit the entry of the 'StatsAnalysisTrend' package +## 1. INPUT +### 1.1. Preparation +# Prepares the data in order to have a list of a data tibble with +# date, group and flow column and a info tibble with the station code +# and group column to fit the entry of the 'extract.Var' function in +# the 'StatsAnalysisTrend' package prepare = function(df_data, colnamegroup=NULL) { - + + # Forces the column name to group to be a vector colnamegroup = c(colnamegroup) + # Converts it to index of the column to group colindgroup = which(colnames(df_data) == colnamegroup) + # Groups the data by those indexes df_data = group_by_at(df_data, colindgroup) + # Creates a new tibble of data with a group column data = tibble(Date=df_data$Date, group=group_indices(df_data), - Qm3s=df_data$Qm3s) + Qm3s=df_data$Qm3s) + + # Gets the different value of the group Gkey = group_keys(df_data) - + # Creates a new tibble of info of the group info = bind_cols(group=seq(1:nrow(Gkey)), Gkey) - return (list(data=data, info=info)) -} + # Stores data and info tibble as a list that match the entry of + # the 'extract.Var' function + res = list(data=data, info=info) + return (res) +} +### 1.2. Re-preparation +# Re-prepares the data in outing of the 'extract.Var' function in +# the 'StatsAnalysisTrend' package in order to fit again to the +# entry of the same function reprepare = function(df_XEx, df_Xlist, colnamegroup=NULL) { + # Changes the column name of the results of the + # 'extract.Var' function colnames(df_XEx) = c('Date', 'group', 'Qm3s') + # Converts Date column as character df_XEx$Date = as.character(df_XEx$Date) + # Takes the first date as example exDate = df_XEx$Date[1] + # Finds the number of dash in the date nbt = lengths(regmatches(exDate, gregexpr('-', exDate))) - + + # If there is only one dash if (nbt == 1) { - df_XEx$Date = paste(df_XEx$Date, '01', sep='-') + # Converts it to date from a year and a month + df_XEx$Date = paste(df_XEx$Date, '01', sep='-') + # If there is no dash } else if (nbt == 0) { - df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-') + # Converts it to date from only a year + df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-') + # If there is more than 2 dashes } else if (nbt != 2) { + This is not a classical date stop('erreur of date format') } - + # Recreates the outing of the 'extract.Var' function nicer df_XEx = bind_cols(Date=as.Date(df_XEx$Date, format="%Y-%m-%d"), df_XEx[-1], df_Xlist$info[df_XEx$group, 2:ncol(df_Xlist$info)]) - + # Prepares the nicer outing df_XlistEx = prepare(df_XEx, colnamegroup=colnamegroup) return (df_XlistEx) } +## 2. OUTPUT +# Cleans the trend results of the function 'Estimate.stats' in the +# 'StatsAnalysisTrend' package. It adds the station code and the +# intercept of the trend to the trend results. Also makes the data +# more presentable. clean = function (df_Xtrend, df_XEx, df_Xlist) { + # Reprepares the list of data and info in order to be presentable df_Xlist = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) - # print(df_Xlist) - + # Adds a column of station code df_Xlist$data$code = NA + # For all the group for (g in df_Xlist$info$group) { + # Adds the station code corresponding to each group info df_Xlist$data$code[which(df_Xlist$data$group == g)] = df_Xlist$info$code[df_Xlist$info$group == g] } - - # df_Xlist$data = df_Xlist$data[, !names(df_Xlist$data) == "group")] + # Adds the info to trend tibble df_Xtrend = bind_cols(df_Xtrend, df_Xlist$info[df_Xtrend$group1, 2:ncol(df_Xlist$info)]) - + # Renames the column of group of trend results colnames(df_Xtrend)[1] = 'group' - + # Adds the intercept value of trend df_Xtrend = get_intercept(df_Xtrend, df_Xlist, unit2day=365.25) - - # df_Xtrend$intercept = intercept + # Changes the position of the intercept column df_Xtrend = relocate(df_Xtrend, intercept, .after=trend) - return (list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info)) + # Creates a list of results to return + res = list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info) + return (res) +} + + +## 3. OTHER +### 3.1. Joining selection +# Joins tibbles of different selection of station as a unique one +join = function (df_data_AG, df_data_IN, df_meta_AG, df_meta_IN) { + + # If there is an INRAE and an Agence de l'eau Adour-Garonne selection + if (!is.null(df_data_IN) & !is.null(df_data_AG)) { + + # Gets the station in common + common = levels(factor(df_meta_IN[df_meta_IN$code %in% df_meta_AG$code,]$code)) + # Gets the Nv station to add + INadd = levels(factor(df_meta_IN[!(df_meta_IN$code %in% df_meta_AG$code),]$code)) + + # Selects only the IN meta to add + df_meta_INadd = df_meta_IN[df_meta_IN$code %in% INadd,] + + # Names the source of the selection + df_meta_AG$source = 'AG' + df_meta_INadd$source = 'IN' + + # Joins IN data to AG data + df_meta = full_join(df_meta_AG, df_meta_INadd) + + # Selects only the IN data to add + df_data_INadd = df_data_IN[df_data_IN$code %in% INadd,] + # Joins IN meta to AG meta + df_data = full_join(df_data_AG, df_data_INadd) + + # If there is just an Agence de l'eau Adour-Garonne selection + } else if (is.null(df_data_IN) & !is.null(df_data_AG)) { + df_meta_AG$source = 'AG' + df_meta = df_meta_AG + df_data = df_data_AG + + # If there is just an INRAE selection + } else if (!is.null(df_data_IN) & is.null(df_data_AG)) { + df_meta_IN$source = 'IN' + df_meta = df_meta_IN + df_data = df_data_IN + + # If there is no selection + } else { + stop('No data') + } + return (list(data=df_data, meta=df_meta)) +} + +### 3.2. Period of trend +# Compute the start and the end of the period for a trend analysis +# according to the accessible data +get_period = function (per, df_Xtrend, df_XEx, df_Xlist) { + + # Converts results of trend to tibble + df_Xtrend = tibble(df_Xtrend) + # Fix the period start and end of the accessible period to a + # default date + df_Xtrend$period_start = as.Date("1970-01-01") + df_Xtrend$period_end = as.Date("1970-01-01") + + # Changes the format of the date variable to date + df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code')) + df_XExtmp = df_Xlisttmp$data + + # For all the different group + for (g in df_Xlisttmp$info$group) { + # Gets the analyse data associated to the group + df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,] + # Gets the id in the trend result associated to the group + id = which(df_Xtrend$group1 == g) + + # Computes index of the nearest accessible start and end date + iStart = which.min(abs(df_XExtmp_code$Date + - as.Date(per[1]))) + iEnd = which.min(abs(df_XExtmp_code$Date + - as.Date(per[2]))) + + # Stores the start and end of the trend analysis + df_Xtrend$period_start[id] = + as.Date(df_XExtmp_code$Date[iStart]) + df_Xtrend$period_end[id] = + as.Date(df_XExtmp_code$Date[iEnd]) + } + return (df_Xtrend) } -- GitLab