From b6a9422d25b8811344b84fc384abb837cecc959e Mon Sep 17 00:00:00 2001 From: "louis.heraut" <louis.heraut@inrae.fr> Date: Tue, 16 Nov 2021 10:39:33 +0100 Subject: [PATCH] Extract NV and BH --- processing/{extract.R => extractBH.R} | 49 ++++---- processing/extractNV.R | 156 ++++++++++++++++++++++++++ script.R | 17 +-- 3 files changed, 191 insertions(+), 31 deletions(-) rename processing/{extract.R => extractBH.R} (88%) create mode 100644 processing/extractNV.R diff --git a/processing/extract.R b/processing/extractBH.R similarity index 88% rename from processing/extract.R rename to processing/extractBH.R index a636b77..e2e45a6 100644 --- a/processing/extract.R +++ b/processing/extractBH.R @@ -52,7 +52,7 @@ iQHE = c('0'='qualité hautes eaux inconnue', # Get the selection of data from the 'Liste-station_RRSE' file and the BanqueHydro directory get_selection = function (computer_data_path, listdir, listname, cnames=c('code','station', 'BV_km2', 'axe_principal_concerne', 'longueur_serie', 'commentaires', 'choix'), - cisnum=c('BV_km2', 'longueur_serie')) { + c_num=c('BV_km2', 'longueur_serie')) { # Get the file path to the data list_path = file.path(computer_data_path, listdir, listname) @@ -66,28 +66,31 @@ get_selection = function (computer_data_path, listdir, listname, splits <- lapply(splits, function(x) x$text) # Combine columns back together in wide format - df_list <- bind_cols(splits) + df_selec <- bind_cols(splits) - df_list = df_list[-1,] + df_selec = df_selec[-1,] - names(df_list) = cnames + names(df_selec) = cnames - for (c in cisnum) { - df_list$c = as.numeric(sub(",", ".", - pull(df_list, c))) + for (c in c_num) { + df_selec$c = as.numeric(sub(",", ".", + pull(df_selec, c))) } - df_selec = df_list[df_list$choix == 'A garder' | df_list$choix == 'Ajout',] + selec = (df_selec$choix == 'A garder' | df_selec$choix == 'Ajout') df_selec = bind_cols(df_selec, - filename=paste(df_selec$code, '_HYDRO_QJM.txt', sep='')) + filename=paste(df_selec$code, '_HYDRO_QJM.txt', sep=''), + ok=selec + ) + return (df_selec) } # Example # df_selec = get_selection( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", -# "liste_station", +# "", # "Liste-station_RRSE.docx", # cnames=c('code','station', # 'BV_km2', @@ -95,12 +98,12 @@ get_selection = function (computer_data_path, listdir, listname, # 'longueur_serie', # 'commentaires', # 'choix'), -# cisnum=c('BV_km2', +# c_num=c('BV_km2', # 'longueur_serie')) # Extraction of information -extract_info = function (computer_data_path, filedir, filename, verbose=TRUE) { +extractBH_info = function (computer_data_path, filedir, filename, verbose=TRUE) { # Convert the filename in vector filename = c(filename) @@ -139,9 +142,9 @@ extract_info = function (computer_data_path, filedir, filename, verbose=TRUE) { # Concatenate by raw data frames created by this function when filename correspond to only one filename df_info = rbind(df_info, - extract_info(computer_data_path, - filedir, - f)) + extractBH_info(computer_data_path, + filedir, + f)) } # Set the rownames by default (to avoid strange numbering) @@ -173,7 +176,7 @@ extract_info = function (computer_data_path, filedir, filename, verbose=TRUE) { territoire=trimws(substr(infotxt[13], 39, nchar(infotxt[13]))), L93X=as.numeric(substr(infotxt[16], 38, 50)), L93Y=as.numeric(substr(infotxt[16], 52, 63)), - surface=as.numeric(substr(infotxt[19], 38, 50)), + surface_km2=as.numeric(substr(infotxt[19], 38, 50)), statut=iStatut[trimws(substr(infotxt[26], 38, 50))], finalite=iFinalite[trimws(substr(infotxt[26], 52, 56))], type=iType[trimws(substr(infotxt[26], 58, 58))], @@ -194,14 +197,14 @@ extract_info = function (computer_data_path, filedir, filename, verbose=TRUE) { } # Example -# df_info = extract_info( +# df_info = extractBH_info( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # '', # c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt')) # Extraction of data -extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) { +extractBH_data = function (computer_data_path, filedir, filename, verbose=TRUE) { # Convert the filename in vector filename = c(filename) @@ -239,9 +242,9 @@ extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) { # Concatenate by raw data frames created by this function when filename correspond to only one filename df_data = rbind(df_data, - extract_data(computer_data_path, - filedir, - f)) + extractBH_data(computer_data_path, + filedir, + f)) } # Set the rownames by default (to avoid strange numbering) @@ -270,7 +273,7 @@ extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) { skip=41) # Extract all the information for the station - df_info = extract_info(computer_data_path, filedir, filename, verbose=FALSE) + df_info = extractBH_info(computer_data_path, filedir, filename, verbose=FALSE) # Get the code of the station code = df_info$code # Create a tibble with the date as Date class and the code of the station @@ -289,7 +292,7 @@ extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) { } # Example -# df_data = extract_data( +# df_data = extractBH_data( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # '', # c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt')) diff --git a/processing/extractNV.R b/processing/extractNV.R new file mode 100644 index 0000000..4bb8d7c --- /dev/null +++ b/processing/extractNV.R @@ -0,0 +1,156 @@ +# Usefull library +library(tools) +library(dplyr) + + +# Extraction of information +extractNVlist_info = function (computer_data_path, filedir, listdir, listname, verbose=TRUE) { + + # Print information if asked + if (verbose) { + print(paste("extraction of info for file :", listname)) + } + + # Get the file path to the data + list_path = file.path(computer_data_path, listdir, listname) + + # Extract the data as a data frame + df_info = read.table(list_path, + header=TRUE) + + # Create a filelist to store all the filename + codelist = c() + # Get all the filename in the data directory selected + filelist_tmp = list.files(file.path(computer_data_path, + filedir)) + + # For all the filename in the directory selected + for (f in filelist_tmp) { + # If the filename extention is 'txt' + if (file_ext(f) == 'txt') { + # Store the filename in the filelist + codelist = c(codelist, gsub('.txt', '', f)) + } + } + + exist = df_info$CODE %in% codelist + missing = codelist[!(codelist %in% df_info$CODE)] + print(paste('station missing :', missing)) + + df_info = df_info[exist,] + + # Create a tibble with all the information needed + df_info = + tibble(code=as.character(df_info$CODE), + nom=as.character(df_info$NOM), + L93X=df_info$X_L2E, + L93Y=df_info$Y_L2E, + surface_km2=df_info$S_km2, + altitude_m=df_info$Alt, + file_path=paste(df_info$CODE, '.txt', sep='') + ) + + return (df_info) +} + +# Example +# df_info = extractNVlist_info( +# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", +# 'France207', +# '', +# 'liste_bv_principaux_global.txt') + + +# Extraction of data +extractNV_data = function (computer_data_path, filedir, filename, verbose=TRUE) { + + # Convert the filename in vector + filename = c(filename) + + # If the filename is 'all' or regroup more than one filename + if (all(filename == 'all') | length(filename) > 1) { + + # If the filename is 'all' + if (all(filename == 'all')) { + # Create a filelist to store all the filename + filelist = c() + # Get all the filename in the data directory selected + filelist_tmp = list.files(file.path(computer_data_path, + filedir)) + + # For all the filename in the directory selected + for (f in filelist_tmp) { + # If the filename extention is 'txt' + if (file_ext(f) == 'txt') { + # Store the filename in the filelist + filelist = c(filelist, f) + } + } + # If the filename regroup more than one filename + } else if (length(filename > 1)) { + # The filelist correspond to the filename + filelist = filename + } + + # Create a blank data frame + df_data = data.frame() + + # For all the file in the filelist + for (f in filelist) { + + # Concatenate by raw data frames created by this function when filename correspond to only one filename + df_data = rbind(df_data, + extractNV_data(computer_data_path, + filedir, + f)) + } + + # Set the rownames by default (to avoid strange numbering) + rownames(df_data) = NULL + return (df_data) + } + + # Get the filename from the vector + filename = filename[1] + + # Print information if asked + if (verbose) { + print(paste("extraction of data for file :", filename)) + } + + # Get the file path to the data + file_path = file.path(computer_data_path, filedir, filename) + + if (file.exists(file_path)) { + + # Extract the data as a data frame + df_data = read.table(file_path, + header=FALSE, + skip=1, + na.strings=c('-1', '-99.000')) + + # Create a tibble with the date as Date class and the code of the station + date = paste(df_data[,1], + df_data[,2], + df_data[,3], + sep='-') + + df_data = tibble(Date=as.Date(as.character(date), + format="%Y-%m-%d"), + Qm3s=df_data[,4], + QCode=df_data[,5], + code=gsub('.txt', '', filename)) + + return (df_data) + + } else { + print(paste('filename', file_path, 'do not exist')) + return (NULL) + } +} + +# Example +# df_data = extractNV_data( + # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", + # 'France207', + # c('O0015310.txt', 'Q0214010.txt', 'P0115020.txt')) diff --git a/script.R b/script.R index 37994b7..ab8f9e0 100644 --- a/script.R +++ b/script.R @@ -46,7 +46,8 @@ BHdatadir = setwd(computer_work_path) # Sourcing R file -source('processing/extract.R') +source('processing/extractBH.R') +source('processing/extractNV.R') source('processing/analyse.R') source('plotting/panel.R') @@ -80,21 +81,21 @@ if (is.character(BHlistdir) & is.character(BHlistname) & is.character(BHdatadir) 'longueur_serie', 'commentaires', 'choix'), - cisnum=c('BV_km2', + c_num=c('BV_km2', 'longueur_serie')) BHfiledir = BHdatadir - BHfilename = df_selec$filename + BHfilename = df_selec[df_selec$ok,]$filename } # Extract information about selected stations -df_info = extract_info(computer_data_path, BHfiledir, BHfilename) +df_info_BH = extractBH_info(computer_data_path, BHfiledir, BHfilename) # Extract data about selected stations -df_data = extract_data(computer_data_path, BHfiledir, BHfilename) +df_data_BH = extractBH_data(computer_data_path, BHfiledir, BHfilename) # Plot time panel of debit by stations -panel(df_data, df_info, figdir, BHfiledir) -# panel(df_data, df_info, figdir, BHfiledir, is_sqrt=TRUE) +panel(df_data_BH, df_info_BH, figdir, BHfiledir) +# panel(df_data_BH, df_info_BH, figdir, BHfiledir, is_sqrt=TRUE) # Compute gap parameters for stations -df_lac = get_lacune(df_data, df_info) +df_lac = get_lacune(df_data_BH, df_info_BH) -- GitLab