extractNV.R 7.03 KiB
# Usefull library
library(tools)
library(dplyr)
iRegHydro = c('D'='Affluents du Rhin',
              'E'="Fleuves ctiers de l'Artois-Picardie",
              'A'='Rhin',
              'B'='Meuse',
              'F'='Seine aval (Marne incluse)',
              'G'='Fleuves ctiers haut normands',
              'H'='Seine amont',
              'I'='Fleuves ctiers bas normands',
              'J'='Bretagne',
              'K'='Loire',
              'L'='Loire',
              'M'='Loire',
              'N'='Fleuves ctiers au sud de la Loire',
              'O'='Garonne',
              'P'='Dordogne',
              'Q'='Adour',
              'R'='Charente',
              'S'="Fleuves ctiers de l'Adour-Garonne",
              'U'='Sane',
              'V'='Rhne',
              'W'='Isre',
              'X'='Durance',
              'Y'='Fleuves ctiers du Rhne-Mditranne et Corse',
              'Z'='les',
              '1'='Guadeloupe',
              '2'='Martinique',
              '5'='Guyane',
              '6'='Guyane',
              '7'='Guyane',
              '8'='Guyane',
              '9'='Guyane',
              '4'='Runion')
# Extraction of metadata
extractNVlist_meta = function (computer_data_path, filedir, listdir, listname, verbose=TRUE) {
    # Print metadata if asked
    if (verbose) {
        print(paste("extraction of NV meta for file :", listname))
    # Get the file path to the list of station
    list_path = file.path(computer_data_path, listdir, listname)
    if (file.exists(list_path) & substr(list_path, nchar(list_path), nchar(list_path)) != '/') {
        # Extract the data as a data frame
        df_meta = read.table(list_path,
                             header=TRUE,
                             encoding='UTF-8'
        # Create a filelist to store all the filename
        codelist = c()
        dir_path = file.path(computer_data_path, filedir)
        # Get all the filename in the data directory selected
        filelist_tmp = list.files(dir_path)
        # For all the filename in the directory selected
        for (f in filelist_tmp) {
            # If the filename extention is 'txt'
            if (file_ext(f) == 'txt') {
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
# Store the filename in the filelist codelist = c(codelist, gsub('.txt', '', f)) } } exist = df_meta$CODE %in% codelist missing = codelist[!(codelist %in% df_meta$CODE)] # Display stations for which it misses metadata print(paste('missing station meta for :', missing)) df_meta = df_meta[exist,] # Create a tibble with all the metadata needed df_meta = tibble(code=as.character(df_meta$CODE), nom=as.character(df_meta$NOM), L93X=df_meta$X_L2E, L93Y=df_meta$Y_L2E, surface_km2=df_meta$S_km2, altitude_m=df_meta$Alt, file_path=file.path(dir_path, paste(df_meta$CODE, '.txt', sep='')), source='NV', ) df_meta = bind_rows(df_meta, data.frame(code=missing, file_path=file.path(dir_path, paste(missing, '.txt', sep='')))) df_meta = df_meta[order(df_meta$code),] df_meta$region_hydro = iRegHydro[substr(df_meta$code, 1, 1)] } else { print(paste('filename', list_path, 'do not exist')) return (NULL) } return (df_meta) } # Example # df_meta = extractNVlist_meta( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # 'France207', # '', # 'liste_bv_principaux_global.txt') # Extraction of data extractNV_data = function (computer_data_path, filedir, filename, verbose=TRUE) { # Convert the filename in vector filename = c(filename) # If the filename is 'all' or regroup more than one filename if (all(filename == 'all') | length(filename) > 1) { # If the filename is 'all' if (all(filename == 'all')) { # Create a filelist to store all the filename filelist = c() # Get all the filename in the data directory selected filelist_tmp = list.files(file.path(computer_data_path, filedir))
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
# For all the filename in the directory selected for (f in filelist_tmp) { # If the filename extention is 'txt' if (file_ext(f) == 'txt') { # Store the filename in the filelist filelist = c(filelist, f) } } # If the filename regroup more than one filename } else if (length(filename > 1)) { # The filelist correspond to the filename filelist = filename } # Create a blank data frame df_data = data.frame() # For all the file in the filelist for (f in filelist) { # Concatenate by raw data frames created by this function when filename correspond to only one filename df_data = rbind(df_data, extractNV_data(computer_data_path, filedir, f)) } # Set the rownames by default (to avoid strange numbering) rownames(df_data) = NULL return (df_data) } # Get the filename from the vector filename = filename[1] # Print metadata if asked if (verbose) { print(paste("extraction of NV data for file :", filename)) } # Get the file path to the data file_path = file.path(computer_data_path, filedir, filename) if (file.exists(file_path) & substr(file_path, nchar(file_path), nchar(file_path)) != '/') { # Extract the data as a data frame df_data = read.table(file_path, header=FALSE, skip=1, na.strings=c('-1', '-99.000')) # Create a tibble with the date as Date class and the code of the station date = paste(df_data[,1], df_data[,2], df_data[,3], sep='-') df_data = tibble(Date=as.Date(as.character(date), format="%Y-%m-%d"), Qm3s=df_data[,4], QCode=df_data[,5], code=gsub('.txt', '', filename)) return (df_data) } else { print(paste('filename', file_path, 'do not exist')) return (NULL) } }
211212213214215216217
# Example # df_data = extractNV_data( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # 'France207', # c('O0015310.txt', 'Q0214010.txt', 'P0115020.txt'))