Newer
Older
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# General information on station
iStatut = c('0'='inconnu',
'1'='station avec signification hydrologique',
'2'='station sans signification hydrologique',
'3'="station d'essai")
iFinalite = c('0'='inconnue',
'1'="hydrométrie générale",
'2'='alerte de crue',
'3'="hydrométrie générale et alerte de crue",
'4'="gestion d'ouvrage",
'5'='police des eaux',
'6'="suivi d'étiage",
'7'='bassin expérimental',
'8'='drainage')
iType = c('0'='inconnu',
'1'='une échelle',
'2'='deux échelles, station mère',
'3'='deux échelles, station fille',
'4'='débits mesurés',
'5'='virtuelle')
iInfluence = c('0'='inconnue',
'1'='nulle ou faible',
'2'='en étiage seulement',
'3'='forte en toute saison')
iDebit = c('0'='reconstitué',
'1'="réel (prise en compte de l'eau rajoutée ou retirée du bassin selon aménagements)",
'2'='naturel')
iQBE = c('0'='qualité basses eaux inconnue',
'1'='qualité basses eaux bonne',
'2'='qualité basses eaux douteuse')
iQME = c('0'='qualité moyennes eaux inconnue',
'1'='qualité moyennes eaux bonne',
'2'='qualité moyennes eaux douteuse')
iQHE = c('0'='qualité hautes eaux inconnue',
'1'='qualité hautes eaux bonne',
'2'='qualité hautes eaux douteuse')
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Get the selection of data from the 'Liste-station_RRSE' file and the BanqueHydro directory
get_selection = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2', 'axe_principal_concerne', 'longueur_serie', 'commentaires', 'choix'),
cisnum=c('BV_km2', 'longueur_serie')) {
# Get the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
sample_data = read_docx(list_path)
content = docx_summary(sample_data)
table_cells <- content %>% filter(content_type == "table cell")
table_data <- table_cells %>% filter(!is_header) %>% select(row_id, cell_id, text)
# Split data into individual columns
splits <- split(table_data, table_data$cell_id)
splits <- lapply(splits, function(x) x$text)
# Combine columns back together in wide format
df_list <- bind_cols(splits)
df_list = df_list[-1,]
names(df_list) = cnames
for (c in cisnum) {
df_list$c = as.numeric(sub(",", ".",
pull(df_list, c)))
}
df_selec = df_list[df_list$choix == 'A garder' | df_list$choix == 'Ajout',]
df_selec = bind_cols(df_selec,
filename=paste(df_selec$code, '_HYDRO_QJM.txt', sep=''))
return (df_selec)
}
# Example
# df_selec = get_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "liste_station",
# "Liste-station_RRSE.docx",
# cnames=c('code','station',
# 'BV_km2',
# 'axe_principal_concerne',
# 'longueur_serie',
# 'commentaires',
# 'choix'),
# cisnum=c('BV_km2',
# 'longueur_serie'))
extract_info = function (computer_data_path, filedir, filename, verbose=TRUE) {
# Convert the filename in vector
# If the filename is 'all' or regroup more than one filename
# Create a filelist to store all the filename
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
# For all the filename in the directory selected
# If the filename extention is 'txt'
# Store the filename in the filelist
# If the filename regroup more than one filename
# The filelist correspond to the filename
}
# Create a blank data frame
# For all the file in the filelist
# Concatenate by raw data frames created by this function when filename correspond to only one filename
# Set the rownames by default (to avoid strange numbering)
# Get the filename from the vector
filename = filename[1]
# Print information if asked
if (verbose) {
print(paste("extraction of info for file :", filename))
}
file_path = file.path(computer_data_path, filedir, filename)
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
if (file.exists(file_path)) {
# Extract all the header
infotxt = c(readLines(file_path, n=41))
# Create a tibble with all the information needed
df_info =
tibble(code=trimws(substr(infotxt[11], 38, nchar(infotxt[11]))),
nom=trimws(substr(infotxt[12], 39, nchar(infotxt[12]))),
territoire=trimws(substr(infotxt[13], 39, nchar(infotxt[13]))),
L93X=as.numeric(substr(infotxt[16], 38, 50)),
L93Y=as.numeric(substr(infotxt[16], 52, 63)),
surface=as.numeric(substr(infotxt[19], 38, 50)),
statut=iStatut[trimws(substr(infotxt[26], 38, 50))],
finalite=iFinalite[trimws(substr(infotxt[26], 52, 56))],
type=iType[trimws(substr(infotxt[26], 58, 58))],
influence=iInfluence[trimws(substr(infotxt[26], 60, 60))],
debit=iDebit[trimws(substr(infotxt[26], 62, 62))],
QBE=iQBE[trimws(substr(infotxt[26], 72, 72))],
QME=iQME[trimws(substr(infotxt[26], 74, 74))],
QHE=iQHE[trimws(substr(infotxt[26], 76, 76))],
file_path=file_path
)
return (df_info)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) {
# If the filename is 'all' or regroup more than one filename
# Create a filelist to store all the filename
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
# For all the filename in the directory selected
# If the filename extention is 'txt'
# Store the filename in the filelist
# If the filename regroup more than one filename
# The filelist correspond to the filename
# For all the file in the filelist
# Concatenate by raw data frames created by this function when filename correspond to only one filename
# Set the rownames by default (to avoid strange numbering)
# Get the filename from the vector
# Print information if asked
if (verbose) {
print(paste("extraction of data for file :", filename))
}
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path)) {
# Extract the data as a data frame
df_data = read.table(file_path,
header=TRUE,
na.strings=c(' -99', ' -99.000'),
sep=';',
# Extract all the information for the station
df_info = extract_info(computer_data_path, filedir, filename, verbose=FALSE)
# Get the code of the station
code = df_info$code
# Create a tibble with the date as Date class and the code of the station
df_data = tibble(Date=as.Date(as.character(df_data$Date),
format="%Y%m%d"),
Qm3s=df_data$Qls * 1E-3,
df_data[-1:-2],
code=code)
return (df_data)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))