Commit 709cea16 authored by Heraut Louis's avatar Heraut Louis
Browse files

Commenting

parent a739e525
No related merge requests found
Showing with 206 additions and 147 deletions
+206 -147
......@@ -23,7 +23,12 @@
#
# processing/extract.R
#
#
# Regroups functions to generation of generical station selection
# according to pre-existing directory of data or specific other
# selection format as '.docx' file from Agence de l'eau Adour-Garonne.
# Also useful to extract the data and the metadata present in the
# Banque Hydro files from a selection of station. Manages also
# shapefiles loading.
# Usefull library
......@@ -121,6 +126,8 @@ iRegHydro = c('D'='Affluents du Rhin',
'4'='Réunion')
## 2. SELECTION
### 2.1. Creation of selection
# Create a txt file that resume all the station data files present
# in a filedir
create_selection = function (computer_data_path, filedir, outname) {
......@@ -152,14 +159,13 @@ create_selection = function (computer_data_path, filedir, outname) {
write.table(df_file, outfile, sep=";", col.names=TRUE, quote=FALSE)
return (NULL)
}
# Example
# create_selection(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "France207",
# "nival_selection.txt")
### 2.2. Agence de l'eau Adour-Garonne selection
# Gets the selection of station from the 'Liste-station_RRSE.docx' file
get_selection_AG = function (computer_data_path, listdir, listname,
cnames=c('code','station', 'BV_km2',
......@@ -205,7 +211,6 @@ get_selection_AG = function (computer_data_path, listdir, listname,
ok=selec)
return (df_selec)
}
# Example
# df_selec_AG = get_selection_AG(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
......@@ -220,7 +225,7 @@ get_selection_AG = function (computer_data_path, listdir, listname,
# c_num=c('BV_km2',
# 'longueur_serie'))
### 2.3. INRAE selection
# Gets the selection of station from the selection txt file generated
# by the 'create_selection' function
get_selection_IN = function (computer_data_path, listdir, listname) {
......@@ -245,6 +250,8 @@ get_selection_IN = function (computer_data_path, listdir, listname) {
# "nival_selection.txt")
## 3. EXTRACTION
### 3.1. Extraction of metadata
# Extraction of metadata of stations
extract_meta = function (computer_data_path, filedir, filename,
verbose=TRUE) {
......@@ -311,47 +318,54 @@ extract_meta = function (computer_data_path, filedir, filename,
metatxt = c(readLines(file_path, n=41, encoding="UTF-8"))
# Create a tibble with all the metadata needed
# (IN for INRAE data and BH for BH data)
df_meta =
tibble(code=trimws(substr(metatxt[11], 38,
nchar(metatxt[11]))),
nom=trimws(substr(metatxt[12], 39,
nchar(metatxt[12]))),
territoire=trimws(substr(metatxt[13], 39,
nchar(metatxt[13]))),
gestionnaire=trimws(substr(metatxt[7], 60,
nchar(metatxt[7]))),
L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)),
L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)),
L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)),
L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)),
surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)),
surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)),
altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)),
altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)),
debut=substr(metatxt[25], 38, 50),
fin=substr(metatxt[25], 52, 63),
statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
finalite=iFinalite[trimws(substr(metatxt[26], 52,
56))],
type=iType[trimws(substr(metatxt[26], 58, 58))],
influence=iInfluence[trimws(substr(metatxt[26], 60,
60))],
debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
QME=iQME[trimws(substr(metatxt[26], 74, 74))],
QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
file_path=file_path,
)
tibble(
# Station code
code=trimws(substr(metatxt[11], 38,
nchar(metatxt[11]))),
# Station name
nom=trimws(substr(metatxt[12], 39,
nchar(metatxt[12]))),
# Territory
territoire=trimws(substr(metatxt[13], 39,
nchar(metatxt[13]))),
# Administrator
gestionnaire=trimws(substr(metatxt[7], 60,
nchar(metatxt[7]))),
# Lambert 93 localisation
L93X_m_IN=as.numeric(substr(metatxt[16], 65, 77)),
L93X_m_BH=as.numeric(substr(metatxt[16], 38, 50)),
L93Y_m_IN=as.numeric(substr(metatxt[16], 79, 90)),
L93Y_m_BH=as.numeric(substr(metatxt[16], 52, 63)),
# Surface
surface_km2_IN=as.numeric(substr(metatxt[19], 52, 63)),
surface_km2_BH=as.numeric(substr(metatxt[19], 38, 50)),
# Elevation
altitude_m_IN=as.numeric(substr(metatxt[20], 52, 63)),
altitude_m_BH=as.numeric(substr(metatxt[20], 38, 50)),
# Start and end of the data
debut=substr(metatxt[25], 38, 50),
fin=substr(metatxt[25], 52, 63),
# Different other info about the flow quality and type
statut=iStatut[trimws(substr(metatxt[26], 38, 50))],
finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))],
type=iType[trimws(substr(metatxt[26], 58, 58))],
influence=iInfluence[trimws(substr(metatxt[26], 60, 60))],
debit=iDebit[trimws(substr(metatxt[26], 62, 62))],
QBE=iQBE[trimws(substr(metatxt[26], 72, 72))],
QME=iQME[trimws(substr(metatxt[26], 74, 74))],
QHE=iQHE[trimws(substr(metatxt[26], 76, 76))],
# The path to the data file of BH
file_path=file_path)
# Adding of the hydrological region
df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)]
return (df_meta)
} else {
......@@ -359,14 +373,13 @@ extract_meta = function (computer_data_path, filedir, filename,
return (NULL)
}
}
# Example
# df_meta = extract_meta(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
# "BanqueHydro_Export2021",
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
### 3.2. Extraction of data
# Extraction of data
extract_data = function (computer_data_path, filedir, filename,
verbose=TRUE) {
......@@ -454,7 +467,6 @@ extract_data = function (computer_data_path, filedir, filename,
return (NULL)
}
}
# Example
# df_data = extract_data(
# "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
......@@ -462,6 +474,7 @@ extract_data = function (computer_data_path, filedir, filename,
# c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
## 4. SHAPEFILE MANAGEMENT
# Generates a list of shapefiles to draw a hydrological map of
# the France
ini_shapefile = function (computer_data_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, rv_shpdir, rv_shpname, riv=TRUE) {
......
......@@ -23,160 +23,206 @@
#
# processing/format.R
#
#
# Manages all the format problem of data and info. Mainly problem of
# input and output of the 'StatsAnalysisTrend' package. It also allows
# to join different selections of station and to gets exact period of
# trend analysis.
# Usefull library
library(dplyr)
join = function (df_data_AG, df_data_NV, df_meta_AG, df_meta_NV) {
if (!is.null(df_data_NV) & !is.null(df_data_AG)) {
# Get the station in common
common = levels(factor(df_meta_NV[df_meta_NV$code %in% df_meta_AG$code,]$code))
# Get the Nv station to add
NVadd = levels(factor(df_meta_NV[!(df_meta_NV$code %in% df_meta_AG$code),]$code))
# Select only the NV meta to add
df_meta_NVadd = df_meta_NV[df_meta_NV$code %in% NVadd,]
df_meta_AG$source = 'AG'
df_meta_NVadd$source = 'NV'
# Join NV data to AG data
df_meta = full_join(df_meta_AG, df_meta_NVadd)
# Select only the NV data to add
df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,]
# Join NV meta to AG meta
df_data = full_join(df_data_AG, df_data_NVadd)
} else if (is.null(df_data_NV) & !is.null(df_data_AG)) {
df_meta_AG$source = 'AG'
df_meta = df_meta_AG
df_data = df_data_AG
} else if (!is.null(df_data_NV) & is.null(df_data_AG)) {
df_meta_NV$source = 'NV'
df_meta = df_meta_NV
df_data = df_data_NV
} else {
stop('No data')
}
return (list(data=df_data, meta=df_meta))
}
# Compute the start and the end of the period for a trend analysis
# according to the accessible data
get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {
# Convert results of trend to tibble
df_Xtrend = tibble(df_Xtrend)
# Fix the period start and end of the accessible period to a
# default date
df_Xtrend$period_start = as.Date("1970-01-01")
df_Xtrend$period_end = as.Date("1970-01-01")
# Change the format of the date variable to date
df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
df_XExtmp = df_Xlisttmp$data
# For all the different group
for (g in df_Xlisttmp$info$group) {
# Get the analyse data associated to the group
df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
# Get the id in the trend result associated to the group
id = which(df_Xtrend$group1 == g)
# Compute index of the nearest accessible start and end date
iStart = which.min(abs(df_XExtmp_code$Date
- as.Date(per[1])))
iEnd = which.min(abs(df_XExtmp_code$Date
- as.Date(per[2])))
# Store the start and end of the trend analysis
df_Xtrend$period_start[id] =
as.Date(df_XExtmp_code$Date[iStart])
df_Xtrend$period_end[id] =
as.Date(df_XExtmp_code$Date[iEnd])
}
return (df_Xtrend)
}
# Prepare the data in order to have a list of a data tibble with date, group and flow column and a info tibble with the station code and group column to fit the entry of the 'StatsAnalysisTrend' package
## 1. INPUT
### 1.1. Preparation
# Prepares the data in order to have a list of a data tibble with
# date, group and flow column and a info tibble with the station code
# and group column to fit the entry of the 'extract.Var' function in
# the 'StatsAnalysisTrend' package
prepare = function(df_data, colnamegroup=NULL) {
# Forces the column name to group to be a vector
colnamegroup = c(colnamegroup)
# Converts it to index of the column to group
colindgroup = which(colnames(df_data) == colnamegroup)
# Groups the data by those indexes
df_data = group_by_at(df_data, colindgroup)
# Creates a new tibble of data with a group column
data = tibble(Date=df_data$Date,
group=group_indices(df_data),
Qm3s=df_data$Qm3s)
Qm3s=df_data$Qm3s)
# Gets the different value of the group
Gkey = group_keys(df_data)
# Creates a new tibble of info of the group
info = bind_cols(group=seq(1:nrow(Gkey)),
Gkey)
return (list(data=data, info=info))
}
# Stores data and info tibble as a list that match the entry of
# the 'extract.Var' function
res = list(data=data, info=info)
return (res)
}
### 1.2. Re-preparation
# Re-prepares the data in outing of the 'extract.Var' function in
# the 'StatsAnalysisTrend' package in order to fit again to the
# entry of the same function
reprepare = function(df_XEx, df_Xlist, colnamegroup=NULL) {
# Changes the column name of the results of the
# 'extract.Var' function
colnames(df_XEx) = c('Date', 'group', 'Qm3s')
# Converts Date column as character
df_XEx$Date = as.character(df_XEx$Date)
# Takes the first date as example
exDate = df_XEx$Date[1]
# Finds the number of dash in the date
nbt = lengths(regmatches(exDate, gregexpr('-', exDate)))
# If there is only one dash
if (nbt == 1) {
df_XEx$Date = paste(df_XEx$Date, '01', sep='-')
# Converts it to date from a year and a month
df_XEx$Date = paste(df_XEx$Date, '01', sep='-')
# If there is no dash
} else if (nbt == 0) {
df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-')
# Converts it to date from only a year
df_XEx$Date = paste(df_XEx$Date, '01', '01', sep='-')
# If there is more than 2 dashes
} else if (nbt != 2) {
This is not a classical date
stop('erreur of date format')
}
# Recreates the outing of the 'extract.Var' function nicer
df_XEx = bind_cols(Date=as.Date(df_XEx$Date,
format="%Y-%m-%d"),
df_XEx[-1],
df_Xlist$info[df_XEx$group,
2:ncol(df_Xlist$info)])
# Prepares the nicer outing
df_XlistEx = prepare(df_XEx, colnamegroup=colnamegroup)
return (df_XlistEx)
}
## 2. OUTPUT
# Cleans the trend results of the function 'Estimate.stats' in the
# 'StatsAnalysisTrend' package. It adds the station code and the
# intercept of the trend to the trend results. Also makes the data
# more presentable.
clean = function (df_Xtrend, df_XEx, df_Xlist) {
# Reprepares the list of data and info in order to be presentable
df_Xlist = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
# print(df_Xlist)
# Adds a column of station code
df_Xlist$data$code = NA
# For all the group
for (g in df_Xlist$info$group) {
# Adds the station code corresponding to each group info
df_Xlist$data$code[which(df_Xlist$data$group == g)] = df_Xlist$info$code[df_Xlist$info$group == g]
}
# df_Xlist$data = df_Xlist$data[, !names(df_Xlist$data) == "group")]
# Adds the info to trend tibble
df_Xtrend = bind_cols(df_Xtrend,
df_Xlist$info[df_Xtrend$group1,
2:ncol(df_Xlist$info)])
# Renames the column of group of trend results
colnames(df_Xtrend)[1] = 'group'
# Adds the intercept value of trend
df_Xtrend = get_intercept(df_Xtrend, df_Xlist, unit2day=365.25)
# df_Xtrend$intercept = intercept
# Changes the position of the intercept column
df_Xtrend = relocate(df_Xtrend, intercept, .after=trend)
return (list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info))
# Creates a list of results to return
res = list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info)
return (res)
}
## 3. OTHER
### 3.1. Joining selection
# Joins tibbles of different selection of station as a unique one
join = function (df_data_AG, df_data_IN, df_meta_AG, df_meta_IN) {
# If there is an INRAE and an Agence de l'eau Adour-Garonne selection
if (!is.null(df_data_IN) & !is.null(df_data_AG)) {
# Gets the station in common
common = levels(factor(df_meta_IN[df_meta_IN$code %in% df_meta_AG$code,]$code))
# Gets the Nv station to add
INadd = levels(factor(df_meta_IN[!(df_meta_IN$code %in% df_meta_AG$code),]$code))
# Selects only the IN meta to add
df_meta_INadd = df_meta_IN[df_meta_IN$code %in% INadd,]
# Names the source of the selection
df_meta_AG$source = 'AG'
df_meta_INadd$source = 'IN'
# Joins IN data to AG data
df_meta = full_join(df_meta_AG, df_meta_INadd)
# Selects only the IN data to add
df_data_INadd = df_data_IN[df_data_IN$code %in% INadd,]
# Joins IN meta to AG meta
df_data = full_join(df_data_AG, df_data_INadd)
# If there is just an Agence de l'eau Adour-Garonne selection
} else if (is.null(df_data_IN) & !is.null(df_data_AG)) {
df_meta_AG$source = 'AG'
df_meta = df_meta_AG
df_data = df_data_AG
# If there is just an INRAE selection
} else if (!is.null(df_data_IN) & is.null(df_data_AG)) {
df_meta_IN$source = 'IN'
df_meta = df_meta_IN
df_data = df_data_IN
# If there is no selection
} else {
stop('No data')
}
return (list(data=df_data, meta=df_meta))
}
### 3.2. Period of trend
# Compute the start and the end of the period for a trend analysis
# according to the accessible data
get_period = function (per, df_Xtrend, df_XEx, df_Xlist) {
# Converts results of trend to tibble
df_Xtrend = tibble(df_Xtrend)
# Fix the period start and end of the accessible period to a
# default date
df_Xtrend$period_start = as.Date("1970-01-01")
df_Xtrend$period_end = as.Date("1970-01-01")
# Changes the format of the date variable to date
df_Xlisttmp = reprepare(df_XEx, df_Xlist, colnamegroup=c('code'))
df_XExtmp = df_Xlisttmp$data
# For all the different group
for (g in df_Xlisttmp$info$group) {
# Gets the analyse data associated to the group
df_XExtmp_code = df_XExtmp[df_XExtmp$group == g,]
# Gets the id in the trend result associated to the group
id = which(df_Xtrend$group1 == g)
# Computes index of the nearest accessible start and end date
iStart = which.min(abs(df_XExtmp_code$Date
- as.Date(per[1])))
iEnd = which.min(abs(df_XExtmp_code$Date
- as.Date(per[2])))
# Stores the start and end of the trend analysis
df_Xtrend$period_start[id] =
as.Date(df_XExtmp_code$Date[iStart])
df_Xtrend$period_end[id] =
as.Date(df_XExtmp_code$Date[iEnd])
}
return (df_Xtrend)
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment