An error occurred while loading the file. Please try again.
-
Heraut Louis authored4e58b039
# \\\
# Copyright 2021-2022 Louis Héraut*1
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/extract.R
#
# Regroups functions to generation of generical station selection
# according to pre-existing directory of data or specific other
# selection format as '.docx' file from Agence de l'eau Adour-Garonne.
# Also useful to extract the data and the metadata present in the
# Banque Hydro files from a selection of station. Manages also
# shapefiles loading.
# Usefull library
library(tools)
library(dplyr)
library(officer)
### 1. GENERAL METADATA ON STATION ___________________________________
# Status of the station
iStatut = c('0'='inconnu',
'1'='station avec signification hydrologique',
'2'='station sans signification hydrologique',
'3'="station d'essai")
# Goal
iFinalite = c('0'='inconnue',
'1'="hydrométrie générale",
'2'='alerte de crue',
'3'="hydrométrie générale et alerte de crue",
'4'="gestion d'ouvrage",
'5'='police des eaux',
'6'="suivi d'étiage",
'7'='bassin expérimental',
'8'='drainage')
# Type of measure
iType = c('0'='inconnu',
'1'='une échelle',
'2'='deux échelles, station mère',
'3'='deux échelles, station fille',
'4'='débits mesurés',
'5'='virtuelle')
# Influence of the flow
iInfluence = c('0'='inconnue',
'1'='nulle ou faible',
'2'='en étiage seulement',
'3'='forte en toute saison')
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
# Type of flow
iDebit = c('0'='reconstitué',
'1'=paste("réel (prise en compte de l'eau rajoutée ",
"ou retirée du bassin selon aménagements)",
sep=''),
'2'='naturel')
# Quality of low water flow
iQBE = c('0'='qualité basses eaux inconnue',
'1'='qualité basses eaux bonne',
'2'='qualité basses eaux douteuse')
# Quality of mean water flow
iQME = c('0'='qualité moyennes eaux inconnue',
'1'='qualité moyennes eaux bonne',
'2'='qualité moyennes eaux douteuse')
# Quality of high water flow
iQHE = c('0'='qualité hautes eaux inconnue',
'1'='qualité hautes eaux bonne',
'2'='qualité hautes eaux douteuse')
# Hydrological region
iRegHydro = c('D'='Affluents du Rhin',
'E'="Fleuves côtiers de l'Artois-Picardie",
'A'='Rhin',
'B'='Meuse',
'F'='Seine aval (Marne incluse)',
'G'='Fleuves côtiers haut normands',
'H'='Seine amont',
'I'='Fleuves côtiers bas normands',
'J'='Bretagne',
'K'='Loire',
'L'='Loire',
'M'='Loire',
'N'='Fleuves côtiers au sud de la Loire',
'O0'='Garonne',
'O1'='Garonne',
'O2'='Garonne',
'O3'='Tarn-Aveyron',
'O4'='Tarn-Aveyron',
'O5'='Tarn-Aveyron',
'O6'='Tarn-Aveyron',
'O7'='Lot',
'O8'='Lot',
'O9'='Lot',
'P'='Dordogne',
'Q'='Adour',
'R'='Charente',
'S'="Fleuves côtiers de l'Adour-Garonne",
'U'='Saône',
'V'='Rhône',
'W'='Isère',
'X'='Durance',
'Y'='Fleuves côtiers du Rhône-Méditérannée et Corse',
'Z'='Îles',
'1'='Guadeloupe',
'2'='Martinique',
'5'='Guyane',
'6'='Guyane',
'7'='Guyane',
'8'='Guyane',
'9'='Guyane',
'4'='Réunion')
## 2. SELECTION ______________________________________________________
### 2.1. Creation of selection _______________________________________
# Create a txt file that resume all the station data files present
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
# in a filedir
create_selection = function (computer_data_path, filedir, outname, optname='_HYDRO_QJM') {
# Out file for store results
outfile = file.path(computer_data_path, outname)
# Path to find the directory of desired codes
dir_path = file.path(computer_data_path, filedir)
# Create a filelist of all the filename in the above directory
filelist_tmp = list.files(dir_path)
# Create a filelist to store all station codes
codelist = c()
# For all the filename in the file list
for (f in filelist_tmp) {
# If the filename is a 'txt' file
if (file_ext(f) == 'txt') {
# Extracts the station code
code = gsub("[^[:alnum:] ].*$", '', f)
# Then the station code is stored
codelist = c(codelist, code)
}
}
# Create a tibble to store the data to write
df_file = tibble(code=codelist,
filename=paste(codelist, optname, '.txt',
sep=''),
ok=TRUE)
# Write the data in a txt file
write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE)
# Returns that it is done with the path
print('Done')
print(paste('path : ', outfile, sep=''))
print('example of file : ')
print(head(df_file))
}
# Example
# create_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "France207",
# "INRAE_selection.txt")
### 2.2. Agence de l'eau Adour-Garonne selection _____________________
# Gets the selection of station from the 'Liste-station_RRSE.docx' file
get_selection_AEAG = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2',
'axe_principal_concerne',
'longueur_serie', 'commentaires',
'choix'),
c_num=c('BV_km2', 'longueur_serie')) {
# Gets the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
# Reads and formats the docx file
sample_data = read_docx(list_path)
content = docx_summary(sample_data)
table_cells = content %>% filter(content_type == "table cell")
table_data = table_cells %>% filter(!is_header) %>% select(row_id,
cell_id,
text)
# Splits data into individual columns
splits = split(table_data, table_data$cell_id)
splits = lapply(splits, function(x) x$text)
# Combines columns back together in wide format
df_selec = bind_cols(splits)
# Removes the first line
df_selec = df_selec[-1,]
# Changes the columns name
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
names(df_selec) = cnames
# For all the numerical column
for (c in c_num) {
# Convert them as numeric
df_selec$c = as.numeric(sub(",", ".",
pull(df_selec, c)))
}
# Perfoms the selection according to the column of choice
selec = (df_selec$choix == 'A garder' | df_selec$choix == 'Ajout')
# Stores it in the tibble of selection
df_selec = bind_cols(df_selec,
filename=paste(df_selec$code, '_HYDRO_QJM.txt',
sep=''),
ok=selec)
return (df_selec)
}
# Example
# df_selec_AEAG = get_selection_AEAG(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "",
# "Liste-station_RRSE.docx",
# cnames=c('code','station',
# 'BV_km2',
# 'axe_principal_concerne',
# 'longueur_serie',
# 'commentaires',
# 'choix'),
# c_num=c('BV_km2',
# 'longueur_serie'))
### 2.3. INRAE selection _____________________________________________
# Gets the selection of station from the selection txt file generated
# by the 'create_selection' function
get_selection_INRAE = function (computer_data_path, listdir, listname) {
# Gets the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
# Extracts the data as a data frame
df_selec = read.table(list_path,
header=TRUE,
encoding='UTF-8',
sep=';')
# Stores it in the tibble of selection
df_selec = tibble(code=as.character(df_selec$code),
filename=as.character(df_selec$filename),
ok=df_selec$ok)
return (df_selec)
}
# Example
# df_selec_INRAE = get_selection_INRAE(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "",
# "INRAE_selection.txt")
## 3. EXTRACTION _____________________________________________________
### 3.1. Extraction of metadata
# Extraction of metadata of stations
extract_meta = function (computer_data_path, filedir, filename,
verbose=TRUE) {
# Convert the filename in vector
filename = c(filename)
# If the filename is 'all' or regroup more than one filename
if (all(filename == 'all') | length(filename) > 1) {
# If the filename is 'all'
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
if (all(filename == 'all')) {
# Create a filelist to store all the filename
filelist = c()
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
filedir))
# For all the filename in the directory selected
for (f in filelist_tmp) {
# If the filename extention is 'txt'
if (file_ext(f) == 'txt') {
# Store the filename in the filelist
filelist = c(filelist, f)
}
}
# If the filename regroup more than one filename
} else if (length(filename > 1)) {
# The filelist correspond to the filename
filelist = filename
}
# Create a blank data frame
df_meta = data.frame()
# For all the file in the filelist
for (f in filelist) {
# Concatenate by raw data frames created by this function
# when filename correspond to only one filename
df_meta = rbind(df_meta,
extract_meta(computer_data_path,
filedir,
f))
}
# Set the rownames by default (to avoid strange numbering)
rownames(df_meta) = NULL
return (df_meta)
}
# Get the filename from the vector
filename = filename[1]
# Print metadata if asked
if (verbose) {
print(paste("extraction of BH meta for file :", filename))
}
# Get the file path to the data
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path),
nchar(file_path)) != '/') {
# Extract all the header
metatxt = c(readLines(file_path, n=41, encoding="UTF-8"))
# Create a tibble with all the metadata needed
# (IN for INRAE data and BH for BH data)
df_meta =
tibble(
# Station code
code=trimws(substr(metatxt[11], 38,
nchar(metatxt[11]))),
# Station name
nom=trimws(substr(metatxt[12], 39,
nchar(metatxt[12]))),
# Territory
territoire=trimws(substr(metatxt[13], 39,
nchar(metatxt[13]))),
# Administrator
gestionnaire=trimws(substr(metatxt[7], 60,
nchar(metatxt[7]))),
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
# Lambert 93 localisation
L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)),
L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)),
L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)),
L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)),
# Surface
surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)),
surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)),
# Elevation
altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)),
altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)),
# Start and end of the data
debut=substr(metatxt[25], 38, 50),
fin=substr(metatxt[25], 52, 63),
# Different other info about the flow quality and type
statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))],
type=iType[trimws(substr(metatxt[26], 58, 58))],
influence=iInfluence[trimws(substr(metatxt[26], 60, 60))],
debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
QME=iQME[trimws(substr(metatxt[26], 74, 74))],
QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
# The path to the data file of BH
file_path=file_path)
Ltmp = names(iRegHydro)[nchar(names(iRegHydro)) == 2]
Ltmp = substr(Ltmp, 1, 1)
infoSecteur = rle(sort(Ltmp))$values
oneL = substr(df_meta$code, 1, 1)
twoL = substr(df_meta$code, 1, 2)
RH = c()
for (i in 1:length(oneL)) {
if (oneL[i] %in% infoSecteur) {
RHtmp = iRegHydro[twoL[i]]
} else {
RHtmp = iRegHydro[oneL[i]]
}
RH = c(RH, RHtmp)
}
# Adding of the hydrological region
df_meta$region_hydro = RH
return (df_meta)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
}
# Example
# df_meta = extract_meta(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "BanqueHydro_Export2021",
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
### 3.2. Extraction of data __________________________________________
# Extraction of data
extract_data = function (computer_data_path, filedir, filename,
verbose=TRUE) {
# Convert the filename in vector
filename = c(filename)
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
# If the filename is 'all' or regroup more than one filename
if (all(filename == 'all') | length(filename) > 1) {
# If the filename is 'all'
if (all(filename == 'all')) {
# Create a filelist to store all the filename
filelist = c()
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
filedir))
# For all the filename in the directory selected
for (f in filelist_tmp) {
# If the filename extention is 'txt'
if (file_ext(f) == 'txt') {
# Store the filename in the filelist
filelist = c(filelist, f)
}
}
# If the filename regroup more than one filename
} else if (length(filename > 1)) {
# The filelist correspond to the filename
filelist = filename
}
# Create a blank data frame
df_data = data.frame()
# For all the file in the filelist
for (f in filelist) {
# Concatenate by raw data frames created by this function
# when filename correspond to only one filename
df_data = rbind(df_data,
extract_data(computer_data_path,
filedir,
f))
}
# Set the rownames by default (to avoid strange numbering)
rownames(df_data) = NULL
return (df_data)
}
# Get the filename from the vector
filename = filename[1]
# Print metadata if asked
if (verbose) {
print(paste("extraction of BH data for file :", filename))
}
# Get the file path to the data
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path),
nchar(file_path)) != '/') {
# Extract the data as a data frame
df_data = read.table(file_path,
header=TRUE,
na.strings=c(' -99', ' -99.000'),
sep=';',
skip=41)
# Extract all the metadata for the station
df_meta = extract_meta(computer_data_path, filedir, filename,
verbose=FALSE)
# Get the code of the station
code = df_meta$code
# Create a tibble with the date as Date class and the code
# of the station
df_data = tibble(Date=as.Date(as.character(df_data$Date),
format="%Y%m%d"),
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
Value=df_data$Qls * 1E-3,
Qmmj=df_data$Qmmj,
val_H=as.character(df_data$val_H),
val_I=as.character(df_data$val_I),
code=code)
return (df_data)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
}
# Example
# df_data = extract_data(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
## 4. SHAPEFILE MANAGEMENT ___________________________________________
# Generates a list of shapefiles to draw a hydrological map of
# the France
ini_shapefile = function (resources_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, sbs_shpdir, sbs_shpname, rv_shpdir, rv_shpname, show_river=TRUE) {
# Path for shapefile
fr_shppath = file.path(resources_path, fr_shpdir, fr_shpname)
bs_shppath = file.path(resources_path, bs_shpdir, bs_shpname)
sbs_shppath = file.path(resources_path, sbs_shpdir, sbs_shpname)
rv_shppath = file.path(resources_path, rv_shpdir, rv_shpname)
# France
fr_spdf = readOGR(dsn=fr_shppath, verbose=FALSE)
proj4string(fr_spdf) = CRS("+proj=longlat +ellps=WGS84")
# Transformation in Lambert93
france = spTransform(fr_spdf, CRS("+init=epsg:2154"))
df_france = tibble(fortify(france))
# Hydrological basin
bassin = readOGR(dsn=bs_shppath, verbose=FALSE)
df_bassin = tibble(fortify(bassin))
# Hydrological basin
subbassin = readOGR(dsn=sbs_shppath, verbose=FALSE)
df_subbassin = tibble(fortify(subbassin))
# If the river shapefile needs to be load
if (show_river) {
# Hydrographic network
river = readOGR(dsn=rv_shppath, verbose=FALSE) ### trop long ###
river = river[which(river$Classe == 1),]
df_river = tibble(fortify(river))
} else {
df_river = NULL
}
return (list(france=df_france, bassin=df_bassin, subbassin=df_subbassin, river=df_river))
}