From 709cea162a434d62630f98f5baf8659ba3efdd6e Mon Sep 17 00:00:00 2001
From: "louis.heraut" <louis.heraut@inrae.fr>
Date: Thu, 6 Jan 2022 21:57:56 +0100
Subject: [PATCH] Commenting

---
 processing/extract.R | 107 ++++++++++---------
 processing/format.R  | 246 +++++++++++++++++++++++++------------------
 2 files changed, 206 insertions(+), 147 deletions(-)

diff --git a/processing/extract.R b/processing/extract.R
index 9db9688..522fa81 100644
--- a/processing/extract.R
+++ b/processing/extract.R
@@ -23,7 +23,12 @@
 #
 # processing/extract.R
 #
-# 
+# Regroups functions to generation of generical station selection
+# according to pre-existing directory of data or specific other
+# selection format as '.docx' file from Agence de l'eau Adour-Garonne.
+# Also useful to extract the data and the metadata present in the
+# Banque Hydro files from a selection of station. Manages also
+# shapefiles loading.
 
 
 # Usefull library
@@ -121,6 +126,8 @@ iRegHydro = c('D'='Affluents du Rhin',
               '4'='Réunion')
 
 
+## 2. SELECTION
+### 2.1. Creation of selection
 # Create a txt file that resume all the station data files present
 # in a filedir
 create_selection = function (computer_data_path, filedir, outname) {
@@ -152,14 +159,13 @@ create_selection = function (computer_data_path, filedir, outname) {
     write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE)
     return (NULL)
 }
-
 # Example
 # create_selection(
 #     "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
 #     "France207",
 #     "nival_selection.txt")
 
-
+### 2.2. Agence de l'eau Adour-Garonne selection
 # Gets the selection of station from the 'Liste-station_RRSE.docx' file
 get_selection_AG = function (computer_data_path, listdir, listname,
                              cnames=c('code','station', 'BV_km2',
@@ -205,7 +211,6 @@ get_selection_AG = function (computer_data_path, listdir, listname,
                          ok=selec)
     return (df_selec)
 }
-
 # Example
 # df_selec_AG = get_selection_AG(
     # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
@@ -220,7 +225,7 @@ get_selection_AG = function (computer_data_path, listdir, listname,
     # c_num=c('BV_km2',
             # 'longueur_serie'))
 
-
+### 2.3. INRAE selection
 # Gets the selection of station from the selection txt file generated
 # by the 'create_selection' function
 get_selection_IN = function (computer_data_path, listdir, listname) {
@@ -245,6 +250,8 @@ get_selection_IN = function (computer_data_path, listdir, listname) {
     # "nival_selection.txt")
 
 
+## 3. EXTRACTION
+### 3.1. Extraction of metadata
 # Extraction of metadata of stations
 extract_meta = function (computer_data_path, filedir, filename,
                          verbose=TRUE) {
@@ -311,47 +318,54 @@ extract_meta = function (computer_data_path, filedir, filename,
         metatxt = c(readLines(file_path, n=41, encoding="UTF-8"))
         
         # Create a tibble with all the metadata needed
+        # (IN for INRAE data and BH for BH data)
         df_meta =
-            tibble(code=trimws(substr(metatxt[11], 38,
-                                      nchar(metatxt[11]))),
-                   nom=trimws(substr(metatxt[12], 39,
-                                     nchar(metatxt[12]))),
-                   territoire=trimws(substr(metatxt[13], 39,
-                                            nchar(metatxt[13]))),
-
-                   gestionnaire=trimws(substr(metatxt[7], 60,
-                                              nchar(metatxt[7]))),
-                   
-                   L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)),
-                   L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)),
-
-                   L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)),
-                   L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)),
-
-                   surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)),
-                   surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)),
-
-                   altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)),
-                   altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)),
-
-                   debut=substr(metatxt[25], 38, 50),
-                   fin=substr(metatxt[25], 52, 63),
-                   
-                   statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
-                   finalite=iFinalite[trimws(substr(metatxt[26], 52,
-                                                    56))],
-                   type=iType[trimws(substr(metatxt[26], 58, 58))],
-                   influence=iInfluence[trimws(substr(metatxt[26], 60,
-                                                      60))],
-                   debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
-                   QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
-                   QME=iQME[trimws(substr(metatxt[26], 74, 74))],
-                   QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
-                   file_path=file_path,
-                   )
-
+            tibble(
+                # Station code
+                code=trimws(substr(metatxt[11], 38,
+                                   nchar(metatxt[11]))),
+                # Station name
+                nom=trimws(substr(metatxt[12], 39,
+                                  nchar(metatxt[12]))),
+                # Territory
+                territoire=trimws(substr(metatxt[13], 39,
+                                         nchar(metatxt[13]))),
+                # Administrator
+                gestionnaire=trimws(substr(metatxt[7], 60,
+                                           nchar(metatxt[7]))),
+                # Lambert 93 localisation
+                L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)),
+                L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)),
+                L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)),
+                L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)),
+
+                # Surface
+                surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)),
+                surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)),
+
+                # Elevation
+                altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)),
+                altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)),
+
+                # Start and end of the data
+                debut=substr(metatxt[25], 38, 50),
+                fin=substr(metatxt[25], 52, 63),
+
+                # Different other info about the flow quality and type
+                statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
+                finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))],
+                type=iType[trimws(substr(metatxt[26], 58, 58))],
+                influence=iInfluence[trimws(substr(metatxt[26], 60, 60))],
+                debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
+                QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
+                QME=iQME[trimws(substr(metatxt[26], 74, 74))],
+                QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
+
+                # The path to the data file of BH
+                file_path=file_path)
+        
+        # Adding of the hydrological region
         df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)]
-
         return (df_meta)
 
     } else {
@@ -359,14 +373,13 @@ extract_meta = function (computer_data_path, filedir, filename,
         return (NULL)
     }
 }
-
 # Example
 # df_meta = extract_meta(
 #     "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
 #     "BanqueHydro_Export2021",
 #     c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
 
-
+### 3.2. Extraction of data
 # Extraction of data
 extract_data = function (computer_data_path, filedir, filename,
                          verbose=TRUE) {
@@ -454,7 +467,6 @@ extract_data = function (computer_data_path, filedir, filename,
         return (NULL)
     }
 }
-
 # Example
 # df_data = extract_data(
 #     "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
@@ -462,6 +474,7 @@ extract_data = function (computer_data_path, filedir, filename,
 #     c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
 
 
+## 4. SHAPEFILE MANAGEMENT
 # Generates a list of shapefiles to draw a hydrological map of
 # the France
 ini_shapefile = function (computer_data_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, rv_shpdir, rv_shpname, riv=TRUE) {
diff --git a/processing/format.R b/processing/format.R
index 27b7e43..46d0fe7 100644
--- a/processing/format.R
+++ b/processing/format.R
@@ -23,160 +23,206 @@
 #
 # processing/format.R
 #
-# 
+# Manages all the format problem of data and info. Mainly problem of
+# input and output of the 'StatsAnalysisTrend' package. It also allows
+# to join different selections of station and to gets exact period of
+# trend analysis.
 
 
 # Usefull library
 library(dplyr)
 
 
-join = function (df_data_AG, df_data_NV, df_meta_AG, df_meta_NV) {
-
-    if (!is.null(df_data_NV) & !is.null(df_data_AG)) {
-
-        # Get the station in common
-        common = levels(factor(df_meta_NV[df_meta_NV$code %in% df_meta_AG$code,]$code)) 
-        # Get the Nv station to add
-        NVadd = levels(factor(df_meta_NV[!(df_meta_NV$code %in% df_meta_AG$code),]$code))
-
-        # Select only the NV meta to add
-        df_meta_NVadd = df_meta_NV[df_meta_NV$code %in% NVadd,]
-
-        df_meta_AG$source = 'AG'
-        df_meta_NVadd$source = 'NV'
-        
-        # Join NV data to AG data
-        df_meta = full_join(df_meta_AG, df_meta_NVadd)
-
-        # Select only the NV data to add
-        df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,]
-        # Join NV meta to AG meta
-        df_data = full_join(df_data_AG, df_data_NVadd)
-
-    } else if (is.null(df_data_NV) & !is.null(df_data_AG)) {
-        df_meta_AG$source = 'AG'
-        df_meta = df_meta_AG
-        df_data = df_data_AG
-
-    } else if (!is.null(df_data_NV) & is.null(df_data_AG)) {
-        df_meta_NV$source = 'NV'
-        df_meta = df_meta_NV
-        df_data = df_data_NV
-
-    } else {
-        stop('No data')
-    }
-    
-    return (list(data=df_data, meta=df_meta))
-}
-
-
-# Compute the start and the end of the period for a trend analysis
-# according to the accessible data 
-get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {
-
-    # Convert results of trend to tibble
-    df_Xtrend = tibble(df_Xtrend)
-    # Fix the period start and end of the accessible period to a
-    # default date
-    df_Xtrend$period_start = as.Date("1970-01-01")
-    df_Xtrend$period_end = as.Date("1970-01-01")
-
-    # Change the format of the date variable to date
-    df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
-    df_XExtmp = df_Xlisttmp$data
-
-    # For all the different group
-    for (g in df_Xlisttmp$info$group) {
-        # Get the analyse data associated to the group
-        df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
-        # Get the id in the trend result associated to the group
-        id = which(df_Xtrend$group1 == g)
-
-        # Compute index of the nearest accessible start and end date
-        iStart = which.min(abs(df_XExtmp_code$Date
-                               - as.Date(per[1])))
-        iEnd = which.min(abs(df_XExtmp_code$Date 
-                             - as.Date(per[2])))
-
-        # Store the start and end of the trend analysis
-        df_Xtrend$period_start[id] =
-            as.Date(df_XExtmp_code$Date[iStart])
-        df_Xtrend$period_end[id] =
-            as.Date(df_XExtmp_code$Date[iEnd])
-    }
-    return (df_Xtrend)
-}
-
-
-# Prepare the data in order to have a list of a data tibble with date, group and flow column and a info tibble with the station code and group column to fit the entry of the 'StatsAnalysisTrend' package
+## 1. INPUT
+### 1.1. Preparation
+# Prepares the data in order to have a list of a data tibble with
+# date, group and flow column and a info tibble with the station code
+# and group column to fit the entry of the 'extract.Var' function in
+# the 'StatsAnalysisTrend' package
 prepare = function(df_data, colnamegroup=NULL) {
-            
+    
+    # Forces the column name to group to be a vector 
     colnamegroup = c(colnamegroup)
+    # Converts it to index of the column to group
     colindgroup = which(colnames(df_data) == colnamegroup)
+    # Groups the data by those indexes
     df_data = group_by_at(df_data, colindgroup)
 
+    # Creates a new tibble of data with a group column
     data = tibble(Date=df_data$Date, 
                   group=group_indices(df_data),
-                  Qm3s=df_data$Qm3s) 
+                  Qm3s=df_data$Qm3s)
+    
+    # Gets the different value of the group
     Gkey = group_keys(df_data)
-
+    # Creates a new tibble of info of the group
     info = bind_cols(group=seq(1:nrow(Gkey)),
                      Gkey)
-    return (list(data=data, info=info))
-}
 
+    # Stores data and info tibble as a list that match the entry of
+    # the 'extract.Var' function
+    res = list(data=data, info=info)
+    return (res)
+}
 
+### 1.2. Re-preparation
+# Re-prepares the data in outing of the 'extract.Var' function in
+# the 'StatsAnalysisTrend' package in order to fit again to the
+# entry of the same function
 reprepare = function(df_XEx, df_Xlist, colnamegroup=NULL) {
 
+    # Changes the column name of the results of the
+    # 'extract.Var' function
     colnames(df_XEx) = c('Date', 'group', 'Qm3s')
 
+    # Converts Date column as character
     df_XEx$Date = as.character(df_XEx$Date)
+    # Takes the first date as example
     exDate = df_XEx$Date[1]
+    # Finds the number of dash in the date
     nbt = lengths(regmatches(exDate, gregexpr('-', exDate)))
-    
+
+    # If there is only one dash
     if (nbt == 1) {
-         df_XEx$Date = paste(df_XEx$Date, '01', sep='-')
+        # Converts it to date from a year and a month
+        df_XEx$Date = paste(df_XEx$Date, '01', sep='-')
+    # If there is no dash
     } else if (nbt == 0) {
-         df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-')
+        # Converts it to date from only a year
+        df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-')
+    # If there is more than 2 dashes
     } else if (nbt != 2) {
+        This is not a classical date
         stop('erreur of date format')
     }
-
+    # Recreates the outing of the 'extract.Var' function nicer
     df_XEx = bind_cols(Date=as.Date(df_XEx$Date,
                                        format="%Y-%m-%d"),
                        df_XEx[-1],
                        df_Xlist$info[df_XEx$group,
                                      2:ncol(df_Xlist$info)])
-
+    # Prepares the nicer outing
     df_XlistEx = prepare(df_XEx, colnamegroup=colnamegroup)
     return (df_XlistEx)
 }
 
 
+## 2. OUTPUT
+# Cleans the trend results of the function 'Estimate.stats' in the
+# 'StatsAnalysisTrend' package. It adds the station code and the
+# intercept of the trend to the trend results. Also makes the data
+# more presentable.
 clean = function (df_Xtrend, df_XEx, df_Xlist) {
 
+    # Reprepares the list of data and info in order to be presentable
     df_Xlist = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
 
-    # print(df_Xlist)
-
+    # Adds a column of station code
     df_Xlist$data$code = NA
+    # For all the group
     for (g in df_Xlist$info$group) {
+        # Adds the station code corresponding to each group info
         df_Xlist$data$code[which(df_Xlist$data$group == g)] = df_Xlist$info$code[df_Xlist$info$group == g]
     }
-    
-    # df_Xlist$data = df_Xlist$data[, !names(df_Xlist$data) == "group")]
 
+    # Adds the info to trend tibble
     df_Xtrend = bind_cols(df_Xtrend,
                           df_Xlist$info[df_Xtrend$group1,
                                        2:ncol(df_Xlist$info)])
-
+    # Renames the column of group of trend results
     colnames(df_Xtrend)[1] = 'group'
-
+    # Adds the intercept value of trend
     df_Xtrend = get_intercept(df_Xtrend, df_Xlist, unit2day=365.25)
-
-    # df_Xtrend$intercept = intercept
+    # Changes the position of the intercept column
     df_Xtrend = relocate(df_Xtrend, intercept, .after=trend)
 
-    return (list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info))
+    # Creates a list of results to return
+    res = list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info)
+    return (res)
+}
+
+
+## 3. OTHER
+### 3.1. Joining selection
+# Joins tibbles of different selection of station as a unique one
+join = function (df_data_AG, df_data_IN, df_meta_AG, df_meta_IN) {
+
+    # If there is an INRAE and an Agence de l'eau Adour-Garonne selection
+    if (!is.null(df_data_IN) & !is.null(df_data_AG)) {
+
+        # Gets the station in common
+        common = levels(factor(df_meta_IN[df_meta_IN$code %in% df_meta_AG$code,]$code)) 
+        # Gets the Nv station to add
+        INadd = levels(factor(df_meta_IN[!(df_meta_IN$code %in% df_meta_AG$code),]$code))
+
+        # Selects only the IN meta to add
+        df_meta_INadd = df_meta_IN[df_meta_IN$code %in% INadd,]
+
+        # Names the source of the selection
+        df_meta_AG$source = 'AG'
+        df_meta_INadd$source = 'IN'
+        
+        # Joins IN data to AG data
+        df_meta = full_join(df_meta_AG, df_meta_INadd)
+
+        # Selects only the IN data to add
+        df_data_INadd = df_data_IN[df_data_IN$code %in% INadd,]
+        # Joins IN meta to AG meta
+        df_data = full_join(df_data_AG, df_data_INadd)
+
+    # If there is just an Agence de l'eau Adour-Garonne selection
+    } else if (is.null(df_data_IN) & !is.null(df_data_AG)) {
+        df_meta_AG$source = 'AG'
+        df_meta = df_meta_AG
+        df_data = df_data_AG
+        
+    # If there is just an INRAE selection
+    } else if (!is.null(df_data_IN) & is.null(df_data_AG)) {
+        df_meta_IN$source = 'IN'
+        df_meta = df_meta_IN
+        df_data = df_data_IN
+
+    # If there is no selection
+    } else {
+        stop('No data')
+    }
+    return (list(data=df_data, meta=df_meta))
+}
+
+### 3.2. Period of trend
+# Compute the start and the end of the period for a trend analysis
+# according to the accessible data 
+get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {
+
+    # Converts results of trend to tibble
+    df_Xtrend = tibble(df_Xtrend)
+    # Fix the period start and end of the accessible period to a
+    # default date
+    df_Xtrend$period_start = as.Date("1970-01-01")
+    df_Xtrend$period_end = as.Date("1970-01-01")
+
+    # Changes the format of the date variable to date
+    df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
+    df_XExtmp = df_Xlisttmp$data
+
+    # For all the different group
+    for (g in df_Xlisttmp$info$group) {
+        # Gets the analyse data associated to the group
+        df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
+        # Gets the id in the trend result associated to the group
+        id = which(df_Xtrend$group1 == g)
+
+        # Computes index of the nearest accessible start and end date
+        iStart = which.min(abs(df_XExtmp_code$Date
+                               - as.Date(per[1])))
+        iEnd = which.min(abs(df_XExtmp_code$Date 
+                             - as.Date(per[2])))
+
+        # Stores the start and end of the trend analysis
+        df_Xtrend$period_start[id] =
+            as.Date(df_XExtmp_code$Date[iStart])
+        df_Xtrend$period_end[id] =
+            as.Date(df_XExtmp_code$Date[iEnd])
+    }
+    return (df_Xtrend)
 }
-- 
GitLab