-
Heraut Louis authored3e1edf46
# Usefull library
library(tools)
library(dplyr)
library(officer)
# General metadata on station
iStatut = c('0'='inconnu',
'1'='station avec signification hydrologique',
'2'='station sans signification hydrologique',
'3'="station d'essai")
iFinalite = c('0'='inconnue',
'1'="hydromtrie gnrale",
'2'='alerte de crue',
'3'="hydromtrie gnrale et alerte de crue",
'4'="gestion d'ouvrage",
'5'='police des eaux',
'6'="suivi d'tiage",
'7'='bassin exprimental',
'8'='drainage')
iType = c('0'='inconnu',
'1'='une chelle',
'2'='deux chelles, station mre',
'3'='deux chelles, station fille',
'4'='dbits mesurs',
'5'='virtuelle')
iInfluence = c('0'='inconnue',
'1'='nulle ou faible',
'2'='en tiage seulement',
'3'='forte en toute saison')
iDebit = c('0'='reconstitu',
'1'="rel (prise en compte de l'eau rajoute ou retire du bassin selon amnagements)",
'2'='naturel')
iQBE = c('0'='qualit basses eaux inconnue',
'1'='qualit basses eaux bonne',
'2'='qualit basses eaux douteuse')
iQME = c('0'='qualit moyennes eaux inconnue',
'1'='qualit moyennes eaux bonne',
'2'='qualit moyennes eaux douteuse')
iQHE = c('0'='qualit hautes eaux inconnue',
'1'='qualit hautes eaux bonne',
'2'='qualit hautes eaux douteuse')
iRegHydro = c('D'='Affluents du Rhin',
'E'="Fleuves ctiers de l'Artois-Picardie",
'A'='Rhin',
'B'='Meuse',
'F'='Seine aval (Marne incluse)',
'G'='Fleuves ctiers haut normands',
'H'='Seine amont',
'I'='Fleuves ctiers bas normands',
'J'='Bretagne',
'K'='Loire',
'L'='Loire',
'M'='Loire',
'N'='Fleuves ctiers au sud de la Loire',
'O'='Garonne',
'P'='Dordogne',
'Q'='Adour',
'R'='Charente',
'S'="Fleuves ctiers de l'Adour-Garonne",
'U'='Sane',
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
'V'='Rhne',
'W'='Isre',
'X'='Durance',
'Y'='Fleuves ctiers du Rhne-Mditranne et Corse',
'Z'='les',
'1'='Guadeloupe',
'2'='Martinique',
'5'='Guyane',
'6'='Guyane',
'7'='Guyane',
'8'='Guyane',
'9'='Guyane',
'4'='Runion')
create_selection = function (computer_data_path, filedir, outname) {
outfile = file.path(computer_data_path, outname)
codelist = c()
dir_path = file.path(computer_data_path, filedir)
filelist_tmp = list.files(dir_path)
for (f in filelist_tmp) {
if (file_ext(f) == 'txt') {
codelist = c(codelist, gsub('.txt', '', f))
}
}
df_file = tibble(code=codelist,
filename=paste(codelist,
'_HYDRO_QJM.txt', sep=''),
ok=TRUE)
write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE)
return (NULL)
}
# Example
# create_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "France207",
# "nival_selection.txt")
# Get the selection of data from the 'Liste-station_RRSE' file and the BanqueHydro directory
get_selection_AG = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2', 'axe_principal_concerne', 'longueur_serie', 'commentaires', 'choix'),
c_num=c('BV_km2', 'longueur_serie')) {
# Get the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
sample_data = read_docx(list_path)
content = docx_summary(sample_data)
table_cells <- content %>% filter(content_type == "table cell")
table_data <- table_cells %>% filter(!is_header) %>% select(row_id, cell_id, text)
# Split data into individual columns
splits <- split(table_data, table_data$cell_id)
splits <- lapply(splits, function(x) x$text)
# Combine columns back together in wide format
df_selec <- bind_cols(splits)
df_selec = df_selec[-1,]
# Change the columns name
names(df_selec) = cnames
for (c in c_num) {
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
df_selec$c = as.numeric(sub(",", ".",
pull(df_selec, c)))
}
selec = (df_selec$choix == 'A garder' | df_selec$choix == 'Ajout')
df_selec = bind_cols(df_selec,
filename=paste(df_selec$code, '_HYDRO_QJM.txt', sep=''),
ok=selec
)
return (df_selec)
}
# Example
df_selec_AG = get_selection_AG(
"/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
"",
"Liste-station_RRSE.docx",
cnames=c('code','station',
'BV_km2',
'axe_principal_concerne',
'longueur_serie',
'commentaires',
'choix'),
c_num=c('BV_km2',
'longueur_serie'))
get_selection_NV = function (computer_data_path, listdir, listname) {
# Get the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
# Extract the data as a data frame
df_selec = read.table(list_path,
header=TRUE,
encoding='UTF-8',
sep=';',
)
df_selec = tibble(code=as.character(df_selec$code),
filename=as.character(df_selec$filename),
ok=df_selec$ok)
return (df_selec)
}
# Example
# df_selec_NV = get_selection_NV(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "",
# "nival_selection.txt")
# Extraction of metadata
extract_meta = function (computer_data_path, filedir, filename, verbose=TRUE) {
# Convert the filename in vector
filename = c(filename)
# If the filename is 'all' or regroup more than one filename
if (all(filename == 'all') | length(filename) > 1) {
# If the filename is 'all'
if (all(filename == 'all')) {
# Create a filelist to store all the filename
filelist = c()
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
filedir))
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
# For all the filename in the directory selected
for (f in filelist_tmp) {
# If the filename extention is 'txt'
if (file_ext(f) == 'txt') {
# Store the filename in the filelist
filelist = c(filelist, f)
}
}
# If the filename regroup more than one filename
} else if (length(filename > 1)) {
# The filelist correspond to the filename
filelist = filename
}
# Create a blank data frame
df_meta = data.frame()
# For all the file in the filelist
for (f in filelist) {
# Concatenate by raw data frames created by this function when filename correspond to only one filename
df_meta = rbind(df_meta,
extract_meta(computer_data_path,
filedir,
f))
}
# Set the rownames by default (to avoid strange numbering)
rownames(df_meta) = NULL
return (df_meta)
}
# Get the filename from the vector
filename = filename[1]
# Print metadata if asked
if (verbose) {
print(paste("extraction of BH meta for file :", filename))
}
# Get the file path to the data
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path), nchar(file_path)) != '/') {
# Extract all the header
metatxt = c(readLines(file_path, n=41, encoding="UTF-8"))
# Create a tibble with all the metadata needed
df_meta =
tibble(code=trimws(substr(metatxt[11], 38, nchar(metatxt[11]))),
nom=trimws(substr(metatxt[12], 39, nchar(metatxt[12]))),
territoire=trimws(substr(metatxt[13], 39, nchar(metatxt[13]))),
L93X=as.numeric(substr(metatxt[16], 38, 50)),
L93Y=as.numeric(substr(metatxt[16], 52, 63)),
surface_km2=as.numeric(substr(metatxt[19], 38, 50)),
statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))],
type=iType[trimws(substr(metatxt[26], 58, 58))],
influence=iInfluence[trimws(substr(metatxt[26], 60, 60))],
debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
QME=iQME[trimws(substr(metatxt[26], 74, 74))],
QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
file_path=file_path,
)
df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)]
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
return (df_meta)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
}
# Example
# df_meta = extract_meta(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
# Extraction of data
extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) {
# Convert the filename in vector
filename = c(filename)
# If the filename is 'all' or regroup more than one filename
if (all(filename == 'all') | length(filename) > 1) {
# If the filename is 'all'
if (all(filename == 'all')) {
# Create a filelist to store all the filename
filelist = c()
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
filedir))
# For all the filename in the directory selected
for (f in filelist_tmp) {
# If the filename extention is 'txt'
if (file_ext(f) == 'txt') {
# Store the filename in the filelist
filelist = c(filelist, f)
}
}
# If the filename regroup more than one filename
} else if (length(filename > 1)) {
# The filelist correspond to the filename
filelist = filename
}
# Create a blank data frame
df_data = data.frame()
# For all the file in the filelist
for (f in filelist) {
# Concatenate by raw data frames created by this function when filename correspond to only one filename
df_data = rbind(df_data,
extract_data(computer_data_path,
filedir,
f))
}
# Set the rownames by default (to avoid strange numbering)
rownames(df_data) = NULL
return (df_data)
}
# Get the filename from the vector
filename = filename[1]
# Print metadata if asked
if (verbose) {
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
print(paste("extraction of BH data for file :", filename))
}
# Get the file path to the data
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path), nchar(file_path)) != '/') {
# Extract the data as a data frame
df_data = read.table(file_path,
header=TRUE,
na.strings=c(' -99', ' -99.000'),
sep=';',
skip=41)
# Extract all the metadata for the station
df_meta = extract_meta(computer_data_path, filedir, filename, verbose=FALSE)
# Get the code of the station
code = df_meta$code
# Create a tibble with the date as Date class and the code of the station
df_data = tibble(Date=as.Date(as.character(df_data$Date),
format="%Y%m%d"),
Qm3s=df_data$Qls * 1E-3,
df_data[-1:-2],
code=code)
return (df_data)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
}
# Example
# df_data = extract_data(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))