Commit abaf448d authored by Heraut Louis's avatar Heraut Louis
Browse files

Add of selection file

parent 513e64e3
No related merge requests found
Showing with 126 additions and 17 deletions
+126 -17
# Usefull library
library(tools)
library(dplyr)
library(officer)
# General information on station
......@@ -48,8 +49,76 @@ iQHE = c('0'='qualité hautes eaux inconnue',
'2'='qualité hautes eaux douteuse')
# Get the selection of data from the 'Liste-station_RRSE' file and the BanqueHydro directory
get_selection = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2', 'axe_principal_concerne', 'longueur_serie', 'commentaires', 'choix'),
cisnum=c('BV_km2', 'longueur_serie')) {
# Get the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
# Extract the data as a data frame
# df_list = read.table(list_path,
# header=TRUE,
# sep=';',
# dec=',',
# quote='',
# skip=0,
# nrows=3,
# strip.white=TRUE,
# comment.char="",
# colClasses=c("character",
# "character",
# "numeric",
# "character",
# "numeric",
# "character",
# "character"))
sample_data = read_docx(list_path)
content = docx_summary(sample_data)
table_cells <- content %>% filter(content_type == "table cell")
table_data <- table_cells %>% filter(!is_header) %>% select(row_id, cell_id, text)
# Split data into individual columns
splits <- split(table_data, table_data$cell_id)
splits <- lapply(splits, function(x) x$text)
# Combine columns back together in wide format
df_list <- bind_cols(splits)
df_list = df_list[-1,]
names(df_list) = cnames
for (c in cisnum) {
df_list$c = as.numeric(sub(",", ".",
pull(df_list, c)))
}
df_selec = df_list[df_list$choix == 'A garder' | df_list$choix == 'Ajout',]
df_selec = bind_cols(df_selec,
filename=paste(df_selec$code, '_HYDRO_QJM.txt', sep=''))
return (df_selec)
}
# Example
# df_selec = get_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "liste_station",
# "Liste-station_RRSE.docx",
# cnames=c('code','station',
# 'BV_km2',
# 'axe_principal_concerne',
# 'longueur_serie',
# 'commentaires',
# 'choix'),
# cisnum=c('BV_km2',
# 'longueur_serie'))
# Extraction of information
extract_info = function (data_path, filedir, filename, verbose=TRUE) {
extract_info = function (computer_data_path, filedir, filename, verbose=TRUE) {
# Convert the filename in vector
filename = c(filename)
......@@ -62,7 +131,7 @@ extract_info = function (data_path, filedir, filename, verbose=TRUE) {
# Create a filelist to store all the filename
filelist = c()
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(data_path,
filelist_tmp = list.files(file.path(computer_data_path,
filedir))
# For all the filename in the directory selected
......@@ -88,7 +157,7 @@ extract_info = function (data_path, filedir, filename, verbose=TRUE) {
# Concatenate by raw data frames created by this function when filename correspond to only one filename
df_info = rbind(df_info,
extract_info(data_path,
extract_info(computer_data_path,
filedir,
f))
}
......@@ -107,7 +176,7 @@ extract_info = function (data_path, filedir, filename, verbose=TRUE) {
}
# Get the file path to the data
file_path = file.path(data_path, filedir, filename)
file_path = file.path(computer_data_path, filedir, filename)
# Extract all the header
infotxt = c(readLines(file_path, n=41))
......@@ -142,7 +211,7 @@ extract_info = function (data_path, filedir, filename, verbose=TRUE) {
# Extraction of data
extract_data = function (data_path, filedir, filename, verbose=TRUE) {
extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) {
# Convert the filename in vector
filename = c(filename)
......@@ -155,7 +224,7 @@ extract_data = function (data_path, filedir, filename, verbose=TRUE) {
# Create a filelist to store all the filename
filelist = c()
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(data_path,
filelist_tmp = list.files(file.path(computer_data_path,
filedir))
# For all the filename in the directory selected
......@@ -180,7 +249,7 @@ extract_data = function (data_path, filedir, filename, verbose=TRUE) {
# Concatenate by raw data frames created by this function when filename correspond to only one filename
df_data = rbind(df_data,
extract_data(data_path,
extract_data(computer_data_path,
filedir,
f))
}
......@@ -199,8 +268,9 @@ extract_data = function (data_path, filedir, filename, verbose=TRUE) {
}
# Get the file path to the data
file_path = file.path(data_path, filedir, filename)
file_path = file.path(computer_data_path, filedir, filename)
# Extract the data as a data frame
df_data = read.table(file_path,
header=TRUE,
na.strings=c(' -99', ' -99.000'),
......@@ -208,7 +278,7 @@ extract_data = function (data_path, filedir, filename, verbose=TRUE) {
skip=41)[,1:2]
# Extract all the information for the station
df_info = extract_info(data_path, filedir, filename, verbose=FALSE)
df_info = extract_info(computer_data_path, filedir, filename, verbose=FALSE)
# Get the code of the station
code = df_info$code
# Create a tibble with the date as Date class and the code of the station
......
......@@ -2,18 +2,39 @@
### A MODIFIER ###
# Path to the data
computer_data_path = #"/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data"
"C:\\Users\\louis.heraut\\Documents\\CDD_stationnarite\\data"
computer_data_path =
"/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data"
# "C:\\Users\\louis.heraut\\Documents\\CDD_stationnarite\\data"
# Work path
computer_work_path = #"/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/ASH"
"C:\\Users\\louis.heraut\\Documents\\CDD_stationnarite\\ASH"
computer_work_path =
"/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/ASH"
# "C:\\Users\\louis.heraut\\Documents\\CDD_stationnarite\\ASH"
# Manual selection
# Path to data that will be analysed
filedir = "test"
filename = #c("H5920011_HYDRO_QJM.txt", "K4470010_HYDRO_QJM.txt")
"all"
filedir =
FALSE
# "test"
# "BanqueHydro_Export2021"
filename =
FALSE
# c("H5920011_HYDRO_QJM.txt", "K4470010_HYDRO_QJM.txt")
# "all"
# Or list selection
# Path to the list file of station that will be analysed
listdir =
# FALSE
""
listname =
"Liste-station_RRSE.docx"
# FALSE
BHdir =
"BanqueHydro_Export2021"
# FALSE
# selecdir = "RRSE_selection"
##################
......@@ -44,6 +65,24 @@ if (!(file.exists(figdir))) {
print(paste('figdir :', figdir))
# Get only the selected station from a list station file
if (is.character(listdir) & is.character(listname) & is.character(BHdir)){
df_selec = get_selection(computer_data_path,
listdir,
listname,
cnames=c('code',
'station',
'BV_km2',
'axe_principal_concerne',
'longueur_serie',
'commentaires',
'choix'),
cisnum=c('BV_km2',
'longueur_serie'))
filedir = BHdir
filename = df_selec$filename
}
# Extract information about selected stations
df_info = extract_info(computer_data_path, filedir, filename)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment