extract.R 7.73 KB
Newer Older
# Usefull library
louis_heraut's avatar
louis_heraut committed
library(tools)
Heraut Louis's avatar
Heraut Louis committed
library(dplyr)
louis_heraut's avatar
louis_heraut committed

Heraut Louis's avatar
Heraut Louis committed
# General information on station
iStatut = c('0'='inconnu', 
            '1'='station avec signification hydrologique', 
            '2'='station sans signification hydrologique', 
            '3'="station d'essai")

iFinalite = c('0'='inconnue', 
              '1'="hydrométrie générale", 
              '2'='alerte de crue', 
              '3'="hydrométrie générale et alerte de crue",
              '4'="gestion d'ouvrage", 
              '5'='police des eaux', 
              '6'="suivi d'étiage", 
              '7'='bassin expérimental', 
              '8'='drainage')

iType = c('0'='inconnu',
          '1'='une échelle',
          '2'='deux échelles, station mère',
          '3'='deux échelles, station fille',
          '4'='débits mesurés',
          '5'='virtuelle')

iInfluence = c('0'='inconnue',
               '1'='nulle ou faible',
               '2'='en étiage seulement',
               '3'='forte en toute saison')

iDebit = c('0'='reconstitué',
           '1'="réel (prise en compte de l'eau rajoutée ou retirée du bassin selon aménagements)",
           '2'='naturel')

iQBE = c('0'='qualité basses eaux inconnue',
         '1'='qualité basses eaux bonne',
         '2'='qualité basses eaux douteuse')

iQME = c('0'='qualité moyennes eaux inconnue',
         '1'='qualité moyennes eaux bonne',
         '2'='qualité moyennes eaux douteuse')

iQHE = c('0'='qualité hautes eaux inconnue',
         '1'='qualité hautes eaux bonne',
         '2'='qualité hautes eaux douteuse')
louis_heraut's avatar
louis_heraut committed


Heraut Louis's avatar
Heraut Louis committed
# Extraction of information
extract_info = function (data_path, filedir, filename, verbose=TRUE) {
    
    # Convert the filename in vector
Heraut Louis's avatar
Heraut Louis committed
    filename = c(filename)
    
    # If the filename is 'all' or regroup more than one filename
Heraut Louis's avatar
Heraut Louis committed
    if (all(filename == 'all') | length(filename) > 1) {

        # If the filename is 'all'
Heraut Louis's avatar
Heraut Louis committed
        if (all(filename == 'all')) {
            # Create a filelist to store all the filename
Heraut Louis's avatar
Heraut Louis committed
            filelist = c()
            # Get all the filename in the data directory selected
Heraut Louis's avatar
Heraut Louis committed
            filelist_tmp = list.files(file.path(data_path,
                                                filedir))
            
            # For all the filename in the directory selected
Heraut Louis's avatar
Heraut Louis committed
            for (f in filelist_tmp) {
                # If the filename extention is 'txt'
Heraut Louis's avatar
Heraut Louis committed
                if (file_ext(f) == 'txt') {
                    # Store the filename in the filelist
Heraut Louis's avatar
Heraut Louis committed
                    filelist = c(filelist, f) 
                }
            }

            # If the filename regroup more than one filename
Heraut Louis's avatar
Heraut Louis committed
        } else if (length(filename > 1)) {
            # The filelist correspond to the filename
Heraut Louis's avatar
Heraut Louis committed
            filelist = filename
        }
        
        # Create a blank data frame
Heraut Louis's avatar
Heraut Louis committed
        df_info = data.frame()
        
        # For all the file in the filelist
Heraut Louis's avatar
Heraut Louis committed
        for (f in filelist) {
            
            # Concatenate by raw data frames created by this function when filename correspond to only one filename
Heraut Louis's avatar
Heraut Louis committed
            df_info = rbind(df_info,
                            extract_info(data_path, 
                                         filedir, 
Heraut Louis's avatar
Heraut Louis committed
        }
        
        # Set the rownames by default (to avoid strange numbering)
Heraut Louis's avatar
Heraut Louis committed
        rownames(df_info) = NULL
        return (df_info)
    }
louis_heraut's avatar
louis_heraut committed

    # Get the filename from the vector
    filename = filename[1]
    
    # Print information if asked
    if (verbose) {
        print(paste("extraction of info for file :", filename))
    }
louis_heraut's avatar
louis_heraut committed

    # Get the file path to the data
Heraut Louis's avatar
Heraut Louis committed
    file_path = file.path(data_path, filedir, filename)

    # Extract all the header
Heraut Louis's avatar
Heraut Louis committed
    infotxt = c(readLines(file_path, n=41))

    # Create a tibble with all the information needed
Heraut Louis's avatar
Heraut Louis committed
    df_info =
        tibble(code=trimws(substr(infotxt[11], 38, nchar(infotxt[11]))),
               nom=trimws(substr(infotxt[12], 39, nchar(infotxt[12]))),
               territoire=trimws(substr(infotxt[13], 39, nchar(infotxt[13]))),
               L93X=as.numeric(substr(infotxt[16], 38, 50)),
               L93Y=as.numeric(substr(infotxt[16], 52, 63)),
               surface=as.numeric(substr(infotxt[19], 38, 50)),
               statut=iStatut[trimws(substr(infotxt[26], 38, 50))],
               finalite=iFinalite[trimws(substr(infotxt[26], 52, 56))],
               type=iType[trimws(substr(infotxt[26], 58, 58))],
               influence=iInfluence[trimws(substr(infotxt[26], 60, 60))],
               debit=iDebit[trimws(substr(infotxt[26], 62, 62))],
               QBE=iQBE[trimws(substr(infotxt[26], 72, 72))],
               QME=iQME[trimws(substr(infotxt[26], 74, 74))],
               QHE=iQHE[trimws(substr(infotxt[26], 76, 76))],
               file_path=file_path
               )
Heraut Louis's avatar
Heraut Louis committed
    
    return (df_info)
louis_heraut's avatar
louis_heraut committed
}

Heraut Louis's avatar
Heraut Louis committed
# df_info = extract_info(
Heraut Louis's avatar
Heraut Louis committed
    # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
    # '',
    # c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))
Heraut Louis's avatar
Heraut Louis committed
# Extraction of data
extract_data = function (data_path, filedir, filename, verbose=TRUE) {
louis_heraut's avatar
louis_heraut committed
    
    # Convert the filename in vector
Heraut Louis's avatar
Heraut Louis committed
    filename = c(filename)

    # If the filename is 'all' or regroup more than one filename
Heraut Louis's avatar
Heraut Louis committed
    if (all(filename == 'all') | length(filename) > 1) {

        # If the filename is 'all'
Heraut Louis's avatar
Heraut Louis committed
        if (all(filename == 'all')) {
            # Create a filelist to store all the filename
Heraut Louis's avatar
Heraut Louis committed
            filelist = c()
             # Get all the filename in the data directory selected
Heraut Louis's avatar
Heraut Louis committed
            filelist_tmp = list.files(file.path(data_path,
                                                filedir))

            # For all the filename in the directory selected
Heraut Louis's avatar
Heraut Louis committed
            for (f in filelist_tmp) {
                # If the filename extention is 'txt'
Heraut Louis's avatar
Heraut Louis committed
                if (file_ext(f) == 'txt') {
                    # Store the filename in the filelist
Heraut Louis's avatar
Heraut Louis committed
                    filelist = c(filelist, f) 
                }
louis_heraut's avatar
louis_heraut committed
            }
            # If the filename regroup more than one filename
Heraut Louis's avatar
Heraut Louis committed
        } else if (length(filename > 1)) {
             # The filelist correspond to the filename
Heraut Louis's avatar
Heraut Louis committed
            filelist = filename
        } 

        # Create a blank data frame
Heraut Louis's avatar
Heraut Louis committed
        df_data = data.frame()

        # For all the file in the filelist
Heraut Louis's avatar
Heraut Louis committed
        for (f in filelist) {

            # Concatenate by raw data frames created by this function when filename correspond to only one filename
Heraut Louis's avatar
Heraut Louis committed
            df_data = rbind(df_data,
                            extract_data(data_path, 
                                         filedir, 
louis_heraut's avatar
louis_heraut committed
        }

        # Set the rownames by default (to avoid strange numbering)
Heraut Louis's avatar
Heraut Louis committed
        rownames(df_data) = NULL
        return (df_data)
louis_heraut's avatar
louis_heraut committed
    }
Heraut Louis's avatar
Heraut Louis committed

    # Get the filename from the vector
Heraut Louis's avatar
Heraut Louis committed
    filename = filename[1]
    
    # Print information if asked
    if (verbose) {
        print(paste("extraction of data for file :", filename))
    }
Heraut Louis's avatar
Heraut Louis committed

    # Get the file path to the data
Heraut Louis's avatar
Heraut Louis committed
    file_path = file.path(data_path, filedir, filename)
louis_heraut's avatar
louis_heraut committed
    
Heraut Louis's avatar
Heraut Louis committed
    df_data = read.table(file_path,
                         header=TRUE,
Heraut Louis's avatar
Heraut Louis committed
                         na.strings=c('     -99', ' -99.000'),
Heraut Louis's avatar
Heraut Louis committed
                         sep=';',
Heraut Louis's avatar
Heraut Louis committed
                         skip=41)[,1:2]    

    # Extract all the information for the station
    df_info = extract_info(data_path, filedir, filename, verbose=FALSE)
    # Get the code of the station
Heraut Louis's avatar
Heraut Louis committed
    code = df_info$code
    # Create a tibble with the date as Date class and the code of the station
Heraut Louis's avatar
Heraut Louis committed
    df_data = tibble(Date=as.Date(as.character(df_data$Date),
                                  format="%Y%m%d"),
                     df_data[-1],
                     code=code)
    
Heraut Louis's avatar
Heraut Louis committed
    return (df_data)
louis_heraut's avatar
louis_heraut committed
}

Heraut Louis's avatar
Heraut Louis committed
# df_data = extract_data(
Heraut Louis's avatar
Heraut Louis committed
#     "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data",
#     '',
#     c('H5920011_HYDRO_QJM.txt', 'K4470010_HYDRO_QJM.txt'))