Newer
Older
iStatut = c('0'='inconnu',
'1'='station avec signification hydrologique',
'2'='station sans signification hydrologique',
'3'="station d'essai")
iFinalite = c('0'='inconnue',
'3'="hydromtrie gnrale et alerte de crue",
'6'="suivi d'tiage",
'7'='bassin exprimental',
'1'='une chelle',
'2'='deux chelles, station mre',
'3'='deux chelles, station fille',
'4'='dbits mesurs',
'5'='virtuelle')
iInfluence = c('0'='inconnue',
'1'='nulle ou faible',
iDebit = c('0'='reconstitu',
'1'="rel (prise en compte de l'eau rajoute ou retire du bassin selon amnagements)",
iQBE = c('0'='qualit basses eaux inconnue',
'1'='qualit basses eaux bonne',
'2'='qualit basses eaux douteuse')
iQME = c('0'='qualit moyennes eaux inconnue',
'1'='qualit moyennes eaux bonne',
'2'='qualit moyennes eaux douteuse')
iQHE = c('0'='qualit hautes eaux inconnue',
'1'='qualit hautes eaux bonne',
'2'='qualit hautes eaux douteuse')
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
iRegHydro = c('D'='Affluents du Rhin',
'E'="Fleuves ctiers de l'Artois-Picardie",
'A'='Rhin',
'B'='Meuse',
'F'='Seine aval (Marne incluse)',
'G'='Fleuves ctiers haut normands',
'H'='Seine amont',
'I'='Fleuves ctiers bas normands',
'J'='Bretagne',
'K'='Loire',
'L'='Loire',
'M'='Loire',
'N'='Fleuves ctiers au sud de la Loire',
'O'='Garonne',
'P'='Dordogne',
'Q'='Adour',
'R'='Charente',
'S'="Fleuves ctiers de l'Adour-Garonne",
'U'='Sane',
'V'='Rhne',
'W'='Isre',
'X'='Durance',
'Y'='Fleuves ctiers du Rhne-Mditranne et Corse',
'Z'='les',
'1'='Guadeloupe',
'2'='Martinique',
'5'='Guyane',
'6'='Guyane',
'7'='Guyane',
'8'='Guyane',
'9'='Guyane',
'4'='Runion')
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
create_selection = function (computer_data_path, filedir, outname) {
outfile = file.path(computer_data_path, outname)
codelist = c()
dir_path = file.path(computer_data_path, filedir)
filelist_tmp = list.files(dir_path)
for (f in filelist_tmp) {
if (file_ext(f) == 'txt') {
codelist = c(codelist, gsub('.txt', '', f))
}
}
df_file = tibble(code=codelist,
filename=paste(codelist,
'_HYDRO_QJM.txt', sep=''),
ok=TRUE)
write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE)
return (NULL)
}
# Example
# create_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "France207",
# "nival_selection.txt")
# Get the selection of data from the 'Liste-station_RRSE' file and the BanqueHydro directory
get_selection_AG = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2', 'axe_principal_concerne', 'longueur_serie', 'commentaires', 'choix'),
c_num=c('BV_km2', 'longueur_serie')) {
# Get the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
sample_data = read_docx(list_path)
content = docx_summary(sample_data)
table_cells <- content %>% filter(content_type == "table cell")
table_data <- table_cells %>% filter(!is_header) %>% select(row_id, cell_id, text)
# Split data into individual columns
splits <- split(table_data, table_data$cell_id)
splits <- lapply(splits, function(x) x$text)
# Combine columns back together in wide format
for (c in c_num) {
df_selec$c = as.numeric(sub(",", ".",
pull(df_selec, c)))
selec = (df_selec$choix == 'A garder' | df_selec$choix == 'Ajout')
filename=paste(df_selec$code, '_HYDRO_QJM.txt', sep=''),
ok=selec
)
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
df_selec_AG = get_selection_AG(
"/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
"",
"Liste-station_RRSE.docx",
cnames=c('code','station',
'BV_km2',
'axe_principal_concerne',
'longueur_serie',
'commentaires',
'choix'),
c_num=c('BV_km2',
'longueur_serie'))
get_selection_NV = function (computer_data_path, listdir, listname) {
# Get the file path to the data
list_path = file.path(computer_data_path, listdir, listname)
# Extract the data as a data frame
df_selec = read.table(list_path,
header=TRUE,
encoding='UTF-8',
sep=';',
)
df_selec = tibble(code=as.character(df_selec$code),
filename=as.character(df_selec$filename),
ok=df_selec$ok)
return (df_selec)
}
# Example
# df_selec_NV = get_selection_NV(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "",
# "nival_selection.txt")
extract_meta = function (computer_data_path, filedir, filename, verbose=TRUE) {
# Convert the filename in vector
# If the filename is 'all' or regroup more than one filename
# Create a filelist to store all the filename
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
# For all the filename in the directory selected
# If the filename extention is 'txt'
# Store the filename in the filelist
# If the filename regroup more than one filename
# The filelist correspond to the filename
}
# Create a blank data frame
# For all the file in the filelist
# Concatenate by raw data frames created by this function when filename correspond to only one filename
extract_meta(computer_data_path,
# Set the rownames by default (to avoid strange numbering)
# Get the filename from the vector
filename = filename[1]
print(paste("extraction of BH meta for file :", filename))
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path), nchar(file_path)) != '/') {
metatxt = c(readLines(file_path, n=41, encoding="UTF-8"))
# Create a tibble with all the metadata needed
df_meta =
tibble(code=trimws(substr(metatxt[11], 38, nchar(metatxt[11]))),
nom=trimws(substr(metatxt[12], 39, nchar(metatxt[12]))),
territoire=trimws(substr(metatxt[13], 39, nchar(metatxt[13]))),
L93X=as.numeric(substr(metatxt[16], 38, 50)),
L93Y=as.numeric(substr(metatxt[16], 52, 63)),
surface_km2=as.numeric(substr(metatxt[19], 38, 50)),
statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))],
type=iType[trimws(substr(metatxt[26], 58, 58))],
influence=iInfluence[trimws(substr(metatxt[26], 60, 60))],
debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
QME=iQME[trimws(substr(metatxt[26], 74, 74))],
QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)]
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
extract_data = function (computer_data_path, filedir, filename, verbose=TRUE) {
# If the filename is 'all' or regroup more than one filename
# Create a filelist to store all the filename
# Get all the filename in the data directory selected
filelist_tmp = list.files(file.path(computer_data_path,
# For all the filename in the directory selected
# If the filename extention is 'txt'
# Store the filename in the filelist
# If the filename regroup more than one filename
# The filelist correspond to the filename
# For all the file in the filelist
# Concatenate by raw data frames created by this function when filename correspond to only one filename
extract_data(computer_data_path,
# Set the rownames by default (to avoid strange numbering)
# Get the filename from the vector
print(paste("extraction of BH data for file :", filename))
file_path = file.path(computer_data_path, filedir, filename)
if (file.exists(file_path) & substr(file_path, nchar(file_path), nchar(file_path)) != '/') {
# Extract the data as a data frame
df_data = read.table(file_path,
header=TRUE,
na.strings=c(' -99', ' -99.000'),
sep=';',
df_meta = extract_meta(computer_data_path, filedir, filename, verbose=FALSE)
# Create a tibble with the date as Date class and the code of the station
df_data = tibble(Date=as.Date(as.character(df_data$Date),
format="%Y%m%d"),
Qm3s=df_data$Qls * 1E-3,
df_data[-1:-2],
code=code)
return (df_data)
} else {
print(paste('filename', file_path, 'do not exist'))
return (NULL)
}
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# '',
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))