Newer
Older
# \\\
# Copyright 2021-2022 Louis Hraut*1
#
# *1 INRAE, France
# louis.heraut@inrae.fr
#
# This file is part of ash R toolbox.
#
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ash R toolbox. If not, see <https://www.gnu.org/licenses/>.
# ///
#
#
# processing/extract.R
#
#
iStatut = c('0'='inconnu',
'1'='station avec signification hydrologique',
'2'='station sans signification hydrologique',
'3'="station d'essai")
'3'="hydromtrie gnrale et alerte de crue",
'6'="suivi d'tiage",
'7'='bassin exprimental',
'1'='une chelle',
'2'='deux chelles, station mre',
'3'='deux chelles, station fille',
'4'='dbits mesurs',
'1'=paste("rel (prise en compte de l'eau rajoute ",
"ou retire du bassin selon amnagements)",
sep=''),
iQBE = c('0'='qualit basses eaux inconnue',
'1'='qualit basses eaux bonne',
'2'='qualit basses eaux douteuse')
iQME = c('0'='qualit moyennes eaux inconnue',
'1'='qualit moyennes eaux bonne',
'2'='qualit moyennes eaux douteuse')
iQHE = c('0'='qualit hautes eaux inconnue',
'1'='qualit hautes eaux bonne',
'2'='qualit hautes eaux douteuse')
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
iRegHydro = c('D'='Affluents du Rhin',
'E'="Fleuves ctiers de l'Artois-Picardie",
'A'='Rhin',
'B'='Meuse',
'F'='Seine aval (Marne incluse)',
'G'='Fleuves ctiers haut normands',
'H'='Seine amont',
'I'='Fleuves ctiers bas normands',
'J'='Bretagne',
'K'='Loire',
'L'='Loire',
'M'='Loire',
'N'='Fleuves ctiers au sud de la Loire',
'O'='Garonne',
'P'='Dordogne',
'Q'='Adour',
'R'='Charente',
'S'="Fleuves ctiers de l'Adour-Garonne",
'U'='Sane',
'V'='Rhne',
'W'='Isre',
'X'='Durance',
'Y'='Fleuves ctiers du Rhne-Mditranne et Corse',
'Z'='les',
'1'='Guadeloupe',
'2'='Martinique',
'5'='Guyane',
'6'='Guyane',
'7'='Guyane',
'8'='Guyane',
'9'='Guyane',
'4'='Runion')
# Create a txt file that resume all the station data files present
# in a filedir
create_selection = function (computer_data_path, filedir, outname) {
outfile = file.path(computer_data_path, outname)
# Path to find the directory of desired codes
dir_path = file.path(computer_data_path, filedir)
# Create a filelist of all the filename in the above directory
filelist_tmp = list.files(dir_path)
# Create a filelist to store all station codes
codelist = c()
# For all the filename in the file list
codelist = c(codelist, gsub('.txt', '', f))
}
}
df_file = tibble(code=codelist,
filename=paste(codelist,
'_HYDRO_QJM.txt', sep=''),
ok=TRUE)
write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE)
return (NULL)
}
# Example
# create_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "France207",
# "nival_selection.txt")
# Gets the selection of station from the 'Liste-station_RRSE.docx' file
get_selection_AG = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2',
'axe_principal_concerne',
'longueur_serie', 'commentaires',
'choix'),
c_num=c('BV_km2', 'longueur_serie')) {
list_path = file.path(computer_data_path, listdir, listname)
sample_data = read_docx(list_path)
content = docx_summary(sample_data)
table_cells = content %>% filter(content_type == "table cell")
table_data = table_cells %>% filter(!is_header) %>% select(row_id,
cell_id,
text)
# Splits data into individual columns
splits = split(table_data, table_data$cell_id)
splits = lapply(splits, function(x) x$text)
# Combines columns back together in wide format
df_selec = bind_cols(splits)
# Removes the first line
df_selec$c = as.numeric(sub(",", ".",
pull(df_selec, c)))
selec = (df_selec$choix == 'A garder' | df_selec$choix == 'Ajout')
filename=paste(df_selec$code, '_HYDRO_QJM.txt',
sep=''),
ok=selec)
# df_selec_AG = get_selection_AG(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "",
# "Liste-station_RRSE.docx",
# cnames=c('code','station',
# 'BV_km2',
# 'axe_principal_concerne',
# 'longueur_serie',
# 'commentaires',
# 'choix'),
# c_num=c('BV_km2',
# 'longueur_serie'))
# Gets the selection of station from the selection txt file generated
# by the 'create_selection' function
get_selection_IN = function (computer_data_path, listdir, listname) {
list_path = file.path(computer_data_path, listdir, listname)
df_selec = read.table(list_path,
header=TRUE,
encoding='UTF-8',
df_selec = tibble(code=as.character(df_selec$code),
filename=as.character(df_selec$filename),
ok=df_selec$ok)
return (df_selec)
}
# Example
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "",
# "nival_selection.txt")
# Extraction of metadata of stations
extract_meta = function (computer_data_path, filedir, filename,
verbose=TRUE) {
# Convert the filename in vector
# If the filename is 'all' or regroup more than one filename
# Create a filelist to store all the filename
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
# For all the filename in the directory selected
# If the filename extention is 'txt'
# Store the filename in the filelist
# The filelist correspond to the filename
}
# Create a blank data frame
# For all the file in the filelist
# Concatenate by raw data frames created by this function
# when filename correspond to only one filename
extract_meta(computer_data_path,
# Set the rownames by default (to avoid strange numbering)
# Get the filename from the vector
filename = filename[1]
print(paste("extraction of BH meta for file :", filename))
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path),
nchar(file_path)) != '/') {
metatxt = c(readLines(file_path, n=41, encoding="UTF-8"))
# Create a tibble with all the metadata needed
df_meta =
tibble(code=trimws(substr(metatxt[11], 38,
nchar(metatxt[11]))),
nom=trimws(substr(metatxt[12], 39,
nchar(metatxt[12]))),
territoire=trimws(substr(metatxt[13], 39,
nchar(metatxt[13]))),
gestionnaire=trimws(substr(metatxt[7], 60,
nchar(metatxt[7]))),
L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)),
L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)),
L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)),
L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)),
surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)),
surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)),
altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)),
altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)),
debut=substr(metatxt[25], 38, 50),
fin=substr(metatxt[25], 52, 63),
influence=iInfluence[trimws(substr(metatxt[26], 60,
60))],
debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
QME=iQME[trimws(substr(metatxt[26], 74, 74))],
QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)]
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "BanqueHydro_Export2021",
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
extract_data = function (computer_data_path, filedir, filename,
verbose=TRUE) {
# If the filename is 'all' or regroup more than one filename
# Create a filelist to store all the filename
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
# For all the filename in the directory selected
# If the filename extention is 'txt'
# Store the filename in the filelist
# For all the file in the filelist
# Concatenate by raw data frames created by this function
# when filename correspond to only one filename
extract_data(computer_data_path,
# Set the rownames by default (to avoid strange numbering)
# Get the filename from the vector
print(paste("extraction of BH data for file :", filename))
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path),
nchar(file_path)) != '/') {
# Extract the data as a data frame
df_data = read.table(file_path,
header=TRUE,
na.strings=c(' -99', ' -99.000'),
sep=';',
df_meta = extract_meta(computer_data_path, filedir, filename,
verbose=FALSE)
# Create a tibble with the date as Date class and the code
# of the station
df_data = tibble(Date=as.Date(as.character(df_data$Date),
format="%Y%m%d"),
Qm3s=df_data$Qls * 1E-3,
df_data[-1:-2],
code=code)
return (df_data)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
# Generates a list of shapefiles to draw a hydrological map of
# the France
ini_shapefile = function (computer_data_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, rv_shpdir, rv_shpname, riv=TRUE) {
# Path for shapefile
fr_shppath = file.path(computer_data_path, fr_shpdir, fr_shpname)
bs_shppath = file.path(computer_data_path, bs_shpdir, bs_shpname)
rv_shppath = file.path(computer_data_path, rv_shpdir, rv_shpname)
# France
fr_spdf = readOGR(dsn=fr_shppath, verbose=FALSE)
proj4string(fr_spdf) = CRS("+proj=longlat +ellps=WGS84")
# Transformation in Lambert93
france = spTransform(fr_spdf, CRS("+init=epsg:2154"))
df_france = tibble(fortify(france))
bassin = readOGR(dsn=bs_shppath, verbose=FALSE)
df_bassin = tibble(fortify(bassin))
river = readOGR(dsn=rv_shppath, verbose=FALSE) ### trop long ###
river = river[which(river$Classe == 1),]
df_river = tibble(fortify(river))
} else {
df_river = NULL
}
return (list(france=df_france, bassin=df_bassin, river=df_river))
}