diff --git a/processing/analyse.R b/processing/analyse.R index 5977ded39318d64477ed4fb5a3799142611132f6..604116b2b5fc61bf2f159944cef091d488607fc8 100644 --- a/processing/analyse.R +++ b/processing/analyse.R @@ -157,11 +157,11 @@ get_QMNAtrend = function (df_data, period) { } -get_VCN10trend = function (df_data, df_info, period) { +get_VCN10trend = function (df_data, df_meta, period) { # MINIMUM 10 DAY AVERAGE FLOW OVER THE YEAR : VCN10 # # Get all different stations code - Code = levels(factor(df_info$code)) + Code = levels(factor(df_meta$code)) df_data_roll = tibble() diff --git a/processing/extractBH.R b/processing/extractBH.R index 09ea390447fabbe5d2efc24fd513c338ce601981..a55d9f89c5ebfe95756a257e36608650e745702f 100644 --- a/processing/extractBH.R +++ b/processing/extractBH.R @@ -4,7 +4,7 @@ library(dplyr) library(officer) -# General information on station +# General metadata on station iStatut = c('0'='inconnu', '1'='station avec signification hydrologique', '2'='station sans signification hydrologique', @@ -102,8 +102,8 @@ get_selection = function (computer_data_path, listdir, listname, # 'longueur_serie')) -# Extraction of information -extractBH_info = function (computer_data_path, filedir, filename, verbose=TRUE) { +# Extraction of metadata +extractBH_meta = function (computer_data_path, filedir, filename, verbose=TRUE) { # Convert the filename in vector filename = c(filename) @@ -135,27 +135,27 @@ extractBH_info = function (computer_data_path, filedir, filename, verbose=TRUE) } # Create a blank data frame - df_info = data.frame() + df_meta = data.frame() # For all the file in the filelist for (f in filelist) { # Concatenate by raw data frames created by this function when filename correspond to only one filename - df_info = rbind(df_info, - extractBH_info(computer_data_path, + df_meta = rbind(df_meta, + extractBH_meta(computer_data_path, filedir, f)) } # Set the rownames by default (to avoid strange numbering) - rownames(df_info) = NULL - return (df_info) + rownames(df_meta) = NULL + return (df_meta) } # Get the filename from the vector filename = filename[1] - # Print information if asked + # Print metadata if asked if (verbose) { print(paste("extraction of BH info for file :", filename)) } @@ -167,29 +167,29 @@ extractBH_info = function (computer_data_path, filedir, filename, verbose=TRUE) if (file.exists(file_path) & substr(file_path, nchar(file_path), nchar(file_path)) != '/') { # Extract all the header - infotxt = c(readLines(file_path, n=41)) + metatxt = c(readLines(file_path, n=41)) - # Create a tibble with all the information needed - df_info = - tibble(code=trimws(substr(infotxt[11], 38, nchar(infotxt[11]))), - nom=trimws(substr(infotxt[12], 39, nchar(infotxt[12]))), - territoire=trimws(substr(infotxt[13], 39, nchar(infotxt[13]))), - L93X=as.numeric(substr(infotxt[16], 38, 50)), - L93Y=as.numeric(substr(infotxt[16], 52, 63)), - surface_km2=as.numeric(substr(infotxt[19], 38, 50)), - statut=iStatut[trimws(substr(infotxt[26], 38, 50))], - finalite=iFinalite[trimws(substr(infotxt[26], 52, 56))], - type=iType[trimws(substr(infotxt[26], 58, 58))], - influence=iInfluence[trimws(substr(infotxt[26], 60, 60))], - debit=iDebit[trimws(substr(infotxt[26], 62, 62))], - QBE=iQBE[trimws(substr(infotxt[26], 72, 72))], - QME=iQME[trimws(substr(infotxt[26], 74, 74))], - QHE=iQHE[trimws(substr(infotxt[26], 76, 76))], + # Create a tibble with all the metadata needed + df_meta = + tibble(code=trimws(substr(metatxt[11], 38, nchar(metatxt[11]))), + nom=trimws(substr(metatxt[12], 39, nchar(metatxt[12]))), + territoire=trimws(substr(metatxt[13], 39, nchar(metatxt[13]))), + L93X=as.numeric(substr(metatxt[16], 38, 50)), + L93Y=as.numeric(substr(metatxt[16], 52, 63)), + surface_km2=as.numeric(substr(metatxt[19], 38, 50)), + statut=iStatut[trimws(substr(metatxt[26], 38, 50))], + finalite=iFinalite[trimws(substr(metatxt[26], 52, 56))], + type=iType[trimws(substr(metatxt[26], 58, 58))], + influence=iInfluence[trimws(substr(metatxt[26], 60, 60))], + debit=iDebit[trimws(substr(metatxt[26], 62, 62))], + QBE=iQBE[trimws(substr(metatxt[26], 72, 72))], + QME=iQME[trimws(substr(metatxt[26], 74, 74))], + QHE=iQHE[trimws(substr(metatxt[26], 76, 76))], file_path=file_path, source='BH' ) - return (df_info) + return (df_meta) } else { print(paste('filename', file_path, 'do not exist')) @@ -198,7 +198,7 @@ extractBH_info = function (computer_data_path, filedir, filename, verbose=TRUE) } # Example -# df_info = extractBH_info( +# df_meta = extractBH_meta( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # '', # c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt')) @@ -256,7 +256,7 @@ extractBH_data = function (computer_data_path, filedir, filename, verbose=TRUE) # Get the filename from the vector filename = filename[1] - # Print information if asked + # Print metadata if asked if (verbose) { print(paste("extraction of BH data for file :", filename)) } @@ -273,10 +273,10 @@ extractBH_data = function (computer_data_path, filedir, filename, verbose=TRUE) sep=';', skip=41) - # Extract all the information for the station - df_info = extractBH_info(computer_data_path, filedir, filename, verbose=FALSE) + # Extract all the metadata for the station + df_meta = extractBH_meta(computer_data_path, filedir, filename, verbose=FALSE) # Get the code of the station - code = df_info$code + code = df_meta$code # Create a tibble with the date as Date class and the code of the station df_data = tibble(Date=as.Date(as.character(df_data$Date), format="%Y%m%d"), diff --git a/processing/extractNV.R b/processing/extractNV.R index babdf908a572eb9f45e37e72305976738ea07c04..3f9ffe9af3d75d1bf652cb4d21f77318905fe728 100644 --- a/processing/extractNV.R +++ b/processing/extractNV.R @@ -3,12 +3,12 @@ library(tools) library(dplyr) -# Extraction of information -extractNVlist_info = function (computer_data_path, filedir, listdir, listname, verbose=TRUE) { +# Extraction of metadata +extractNVlist_meta = function (computer_data_path, filedir, listdir, listname, verbose=TRUE) { - # Print information if asked + # Print metadata if asked if (verbose) { - print(paste("extraction of NV info for file :", listname)) + print(paste("extraction of NV meta for file :", listname)) } # Get the file path to the list of station @@ -17,7 +17,7 @@ extractNVlist_info = function (computer_data_path, filedir, listdir, listname, v if (file.exists(list_path) & substr(list_path, nchar(list_path), nchar(list_path)) != '/') { # Extract the data as a data frame - df_info = read.table(list_path, + df_meta = read.table(list_path, header=TRUE) # Create a filelist to store all the filename @@ -37,44 +37,44 @@ extractNVlist_info = function (computer_data_path, filedir, listdir, listname, v } } - exist = df_info$CODE %in% codelist - missing = codelist[!(codelist %in% df_info$CODE)] - print(paste('missing station info for :', missing)) + exist = df_meta$CODE %in% codelist + missing = codelist[!(codelist %in% df_meta$CODE)] + print(paste('missing station meta for :', missing)) - df_info = df_info[exist,] - - # Create a tibble with all the information needed - df_info = - tibble(code=as.character(df_info$CODE), - nom=as.character(df_info$NOM), - L93X=df_info$X_L2E, - L93Y=df_info$Y_L2E, - surface_km2=df_info$S_km2, - altitude_m=df_info$Alt, + df_meta = df_meta[exist,] + + # Create a tibble with all the metadata needed + df_meta = + tibble(code=as.character(df_meta$CODE), + nom=as.character(df_meta$NOM), + L93X=df_meta$X_L2E, + L93Y=df_meta$Y_L2E, + surface_km2=df_meta$S_km2, + altitude_m=df_meta$Alt, file_path=file.path(dir_path, - paste(df_info$CODE, '.txt', sep='')), + paste(df_meta$CODE, '.txt', sep='')), source='NV' ) - df_info = bind_rows(df_info, + df_meta = bind_rows(df_meta, data.frame(code=missing, file_path=file.path(dir_path, paste(missing, '.txt', sep='')))) - df_info = df_info[order(df_info$code),] + df_meta = df_meta[order(df_meta$code),] } else { print(paste('filename', list_path, 'do not exist')) return (NULL) } - return (df_info) + return (df_meta) } # Example -# df_info = extractNVlist_info( +# df_meta = extractNVlist_meta( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # 'France207', # '', @@ -133,7 +133,7 @@ extractNV_data = function (computer_data_path, filedir, filename, verbose=TRUE) # Get the filename from the vector filename = filename[1] - # Print information if asked + # Print metadata if asked if (verbose) { print(paste("extraction of NV data for file :", filename)) } diff --git a/processing/format.R b/processing/format.R index 28f9c7ee1559188d301f60da7292aab255012e40..d5ddb597bbc57defd69b6f285fd73774eff0f36f 100644 --- a/processing/format.R +++ b/processing/format.R @@ -1,35 +1,35 @@ library(dplyr) -join = function (df_data_BH, df_data_NV, df_info_BH, df_info_NV) { +join = function (df_data_BH, df_data_NV, df_meta_BH, df_meta_NV) { if (!is.null(df_data_NV) & !is.null(df_data_BH)) { # Get the station in common - common = levels(factor(df_info_NV[df_info_NV$code %in% df_info_BH$code,]$code)) + common = levels(factor(df_meta_NV[df_meta_NV$code %in% df_meta_BH$code,]$code)) # Get the Nv station to add - NVadd = levels(factor(df_info_NV[!(df_info_NV$code %in% df_info_BH$code),]$code)) + NVadd = levels(factor(df_meta_NV[!(df_meta_NV$code %in% df_meta_BH$code),]$code)) - # Select only the NV info to add - df_info_NVadd = df_info_NV[df_info_NV$code %in% NVadd,] + # Select only the NV meta to add + df_meta_NVadd = df_meta_NV[df_meta_NV$code %in% NVadd,] # Join NV data to BH data - df_info = full_join(df_info_BH, df_info_NVadd, by=c("code", "nom", "L93X", "L93Y", "surface_km2", "file_path")) + df_meta = full_join(df_meta_BH, df_meta_NVadd, by=c("code", "nom", "L93X", "L93Y", "surface_km2", "file_path")) # Select only the NV data to add df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,] - # Join NV info to BH info + # Join NV meta to BH meta df_data = full_join(df_data_BH, df_data_NVadd, by=c("Date", "Qm3s", "code")) } else if (is.null(df_data_NV) & !is.null(df_data_BH)) { - df_info = df_info_BH + df_meta = df_meta_BH df_data = df_data_BH } else if (!is.null(df_data_NV) & is.null(df_data_BH)) { - df_info = df_info_NV + df_meta = df_meta_NV df_data = df_data_NV } else { stop('No data') } - return (list(data=df_data, info=df_info)) + return (list(data=df_data, meta=df_meta)) } diff --git a/script.R b/script.R index 1ff1e56a7e90409833f7f1f4286ef0d267b26ea0..386ec84aa7da64e46a696a815d96ebedf18cf2a1 100644 --- a/script.R +++ b/script.R @@ -48,7 +48,7 @@ NVfilename = # "all" -# Path to the list file of information about station that will be analysed +# Path to the list file of metadata about station that will be analysed NVlistdir = "" @@ -115,16 +115,16 @@ if (BHlistname != ""){ BHfilename = df_selec[df_selec$ok,]$filename } -# Extract information about selected stations -df_info_BH = extractBH_info(computer_data_path, BHfiledir, BHfilename) +# Extract metadata about selected stations +df_meta_BH = extractBH_meta(computer_data_path, BHfiledir, BHfilename) # Extract data about selected stations df_data_BH = extractBH_data(computer_data_path, BHfiledir, BHfilename) # NIVALE # -# Extract information about selected stations -df_info_NV = extractNVlist_info(computer_data_path, NVfiledir, NVlistdir, NVlistname) +# Extract metadata about selected stations +df_meta_NV = extractNVlist_meta(computer_data_path, NVfiledir, NVlistdir, NVlistname) ### /!\ missing station info ### # Extract data about selected stations @@ -132,21 +132,21 @@ df_data_NV = extractNV_data(computer_data_path, NVfiledir, NVfilename) # JOIN # -df_join = join(df_data_BH, df_data_NV, df_info_BH, df_info_NV) +df_join = join(df_data_BH, df_data_NV, df_meta_BH, df_meta_NV) df_data = df_join$data -df_info = df_join$info +df_meta = df_join$meta # TIME PANEL # # Plot time panel of debit by stations -# panel(df_data, df_info, figdir, "") -# panel(df_data, df_info, figdir, "", is_sqrt=TRUE) +# panel(df_data, df_meta, figdir, "") +# panel(df_data, df_meta, figdir, "", is_sqrt=TRUE) ### /!\ Removed 185 row(s) containing missing values (geom_path) -> remove NA ### # ANALYSE # # Compute gap parameters for stations -# df_lac = get_lacune(df_data, df_info) +# df_lac = get_lacune(df_data, df_meta) # QA TREND # @@ -156,4 +156,4 @@ res_QAtrend = get_QAtrend(df_data, period) # res_QMNAtrend = get_QMNAtrend(df_data, period) # VCN10 TREND # -# res_VCN10trend = get_VCN10trend(df_data, df_info, period) +res_VCN10trend = get_VCN10trend(df_data, df_meta, period)