diff --git a/README.txt b/README.txt index 9a49439cae4be631e3f5c22da4a8636fb61af9e4..a933a7a1db643e0b27e7763437f10dbbb7a01c7c 100644 --- a/README.txt +++ b/README.txt @@ -1,7 +1 @@ - - -tools - -install_github("https://github.com/benRenard/BFunk") - -install.packages("ggplot2") +ASH package diff --git a/plotting/panel.R b/plotting/panel.R index 0501dcec5c183b407dd7ad274c6afb972fae2edb..79a330b60550e03cf6e1518396aa80e03b87eca1 100644 --- a/plotting/panel.R +++ b/plotting/panel.R @@ -1,25 +1,45 @@ +# Usefull library library(ggplot2) +width = 30 +height = 14 +dpi = 100 + # Time panel panel = function (df_data, df_info, figdir, filedir, span=Inf) { - + # If there is not a dedicated figure directory it creats one # outdir = file.path(figdir, filedir, paste('span_', as.character(span), '_years', sep='')) - outdir = file.path(figdir, filedir, '_years', sep='') + outdir = file.path(figdir, filedir, sep='') if (!(file.exists(outdir))) { dir.create(outdir) } - # Create name for the figure file - outfile = paste('Panel_', df_info$code, '.png', sep='') - - plot = - ggplot(df_data, aes(x=Date, y=Qls)) + - geom_line() + # Get all different stations code + Code = levels(factor(df_info$code)) + for (code in Code) { + # Create name for the figure file + outfile = paste('Panel_', code, '.pdf', sep='') + print(paste("Plotting for sation :", code)) + + df_data_code = df_data[df_data$code==code,] + + # Plot + plot = + ggplot(df_data_code, aes(x=Date, y=Qls)) + + geom_line() + + # Save the plot + ggsave(path=outdir, + filename=outfile, + plot=plot, + width=width, + height=height, + dpi=dpi, + units='cm') + } - - return (plot) } diff --git a/processing/analyse.R b/processing/analyse.R new file mode 100644 index 0000000000000000000000000000000000000000..b7441a6443d7a9a3c3687a8c264a4de90094e428 --- /dev/null +++ b/processing/analyse.R @@ -0,0 +1,59 @@ +# Usefull library +# library(StatsAnalysisTrend) +library(dplyr) + + +# Compute the time gap by station +get_lacune = function (df_data, df_info) { + + # Get all different stations code + Code = levels(factor(df_info$code)) + + # Create new vector to stock results for cumulative time gap by station + tLac = c() + # Create new vector to stock results for mean time gap by station + meanLac = c() + + # Get rows where there is no NA + NoNA = complete.cases(df_data) + # Get data where there is no NA + df_data_NoNA = df_data[NoNA,] + + # For every station + for (code in Code) { + + # Get only the data rows for the selected station + df_data_code = df_data[df_data$code==code,] + # Get date for the selected station + Date = df_data_code$Date + # Get time span for the selection station + span = as.numeric(Date[length(Date)] - Date[1]) + + # Get only the data rows with no NA for the selected station + df_data_NoNA_code = df_data_NoNA[df_data_NoNA$code==code,] + # Get date for the selected station + Date_NoNA = df_data_NoNA_code$Date + + # Compute the time gap + lac = as.numeric(diff(Date_NoNA) - 1) + + # Compute the cumulative gap + lac_sum = sum(lac) + # Store the cumulative gap rate + tLac = c(tLac, lac_sum/span) + + # Compute the mean gap + lac_mean = mean(lac[lac != 0]) + # Store the mean gap + meanLac = c(meanLac, lac_mean) + + } + + # Compute the cumulative gap rate in pourcent + tLac100 = tLac * 100 + + # Create a tibble + df_lac = tibble(code=Code, tLac100=tLac100, meanLac=meanLac) + + return (df_lac) +} diff --git a/processing/extract.R b/processing/extract.R index 1bac2919aad5a921d7f4d5f2c6df81a51236249f..7da776274a2630065e214c97dd579782aa1b66de 100644 --- a/processing/extract.R +++ b/processing/extract.R @@ -1,6 +1,7 @@ +# Usefull library library(tools) library(dplyr) -# library(readr) + # General information on station iStatut = c('0'='inconnu', @@ -48,40 +49,70 @@ iQHE = c('0'='qualité hautes eaux inconnue', # Extraction of information -extract_info = function (data_path, filedir, filename) { - +extract_info = function (data_path, filedir, filename, verbose=TRUE) { + + # Convert the filename in vector filename = c(filename) + # If the filename is 'all' or regroup more than one filename if (all(filename == 'all') | length(filename) > 1) { + + # If the filename is 'all' if (all(filename == 'all')) { + # Create a filelist to store all the filename filelist = c() + # Get all the filename in the data directory selected filelist_tmp = list.files(file.path(data_path, filedir)) + + # For all the filename in the directory selected for (f in filelist_tmp) { + # If the filename extention is 'txt' if (file_ext(f) == 'txt') { + # Store the filename in the filelist filelist = c(filelist, f) } } + + # If the filename regroup more than one filename } else if (length(filename > 1)) { + # The filelist correspond to the filename filelist = filename - } + } + + # Create a blank data frame df_info = data.frame() + + # For all the file in the filelist for (f in filelist) { + + # Concatenate by raw data frames created by this function when filename correspond to only one filename df_info = rbind(df_info, extract_info(data_path, filedir, - f) - ) + f)) } + + # Set the rownames by default (to avoid strange numbering) rownames(df_info) = NULL return (df_info) } - filename = filename[1] + # Get the filename from the vector + filename = filename[1] + + # Print information if asked + if (verbose) { + print(paste("extraction of info for file :", filename)) + } + # Get the file path to the data file_path = file.path(data_path, filedir, filename) + + # Extract all the header infotxt = c(readLines(file_path, n=41)) - + + # Create a tibble with all the information needed df_info = tibble(code=trimws(substr(infotxt[11], 38, nchar(infotxt[11]))), nom=trimws(substr(infotxt[12], 39, nchar(infotxt[12]))), @@ -103,6 +134,7 @@ extract_info = function (data_path, filedir, filename) { return (df_info) } +# Example # df_info = extract_info( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # '', @@ -110,37 +142,63 @@ extract_info = function (data_path, filedir, filename) { # Extraction of data -extract_data = function (data_path, filedir, filename) { +extract_data = function (data_path, filedir, filename, verbose=TRUE) { + # Convert the filename in vector filename = c(filename) + # If the filename is 'all' or regroup more than one filename if (all(filename == 'all') | length(filename) > 1) { + + # If the filename is 'all' if (all(filename == 'all')) { + # Create a filelist to store all the filename filelist = c() + # Get all the filename in the data directory selected filelist_tmp = list.files(file.path(data_path, filedir)) + + # For all the filename in the directory selected for (f in filelist_tmp) { + # If the filename extention is 'txt' if (file_ext(f) == 'txt') { + # Store the filename in the filelist filelist = c(filelist, f) } } + # If the filename regroup more than one filename } else if (length(filename > 1)) { + # The filelist correspond to the filename filelist = filename } + + # Create a blank data frame df_data = data.frame() + + # For all the file in the filelist for (f in filelist) { + + # Concatenate by raw data frames created by this function when filename correspond to only one filename df_data = rbind(df_data, extract_data(data_path, filedir, - f) - ) + f)) } + + # Set the rownames by default (to avoid strange numbering) rownames(df_data) = NULL return (df_data) } + # Get the filename from the vector filename = filename[1] + + # Print information if asked + if (verbose) { + print(paste("extraction of data for file :", filename)) + } + # Get the file path to the data file_path = file.path(data_path, filedir, filename) df_data = read.table(file_path, @@ -149,9 +207,11 @@ extract_data = function (data_path, filedir, filename) { sep=';', skip=41)[,1:2] - df_info = extract_info(data_path, filedir, filename) + # Extract all the information for the station + df_info = extract_info(data_path, filedir, filename, verbose=FALSE) + # Get the code of the station code = df_info$code - + # Create a tibble with the date as Date class and the code of the station df_data = tibble(Date=as.Date(as.character(df_data$Date), format="%Y%m%d"), df_data[-1], @@ -160,6 +220,7 @@ extract_data = function (data_path, filedir, filename) { return (df_data) } +# Example # df_data = extract_data( # "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/data", # '', diff --git a/processing/stationnarite.R b/processing/stationnarite.R deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/script.R b/script.R index f45b8da05e93c1b3355f46ed3dffe97f6c5dc7a6..f29310455f9c9791849222530efce12336409ea4 100644 --- a/script.R +++ b/script.R @@ -9,7 +9,8 @@ computer_work_path = "/home/louis/Documents/bouleau/INRAE/CDD_stationnarite/ASH" filedir = "" -filename = "H5920011_HYDRO_QJM.txt" +filename = c("H5920011_HYDRO_QJM.txt", "K4470010_HYDRO_QJM.txt") +# filename = "all" ################## @@ -19,6 +20,7 @@ setwd(computer_work_path) # Sourcing R file source('processing/extract.R') +source('processing/analyse.R') source('plotting/panel.R') # Usefull library @@ -33,9 +35,14 @@ figdir = file.path(computer_work_path, 'figures') print(paste('figdir :', figdir)) +# Extract information about selected stations df_info = extract_info(computer_data_path, filedir, filename) + +# Extract data about selected stations df_data = extract_data(computer_data_path, filedir, filename) +# Plot time panel of debit by stations panel(df_data, df_info, figdir, filedir) - +# Compute gap parameters for stations +df_lac = get_lacune(df_data, df_info) diff --git a/script_install.R b/script_install.R new file mode 100644 index 0000000000000000000000000000000000000000..aa217d6a7a079ba9682d5da821511d21c1164d13 --- /dev/null +++ b/script_install.R @@ -0,0 +1,17 @@ + + +install.packages("tools") +install.packages("devtools") +install.packages("dplyr") +install.packages("ggplot2") + +library(devtools) +install_github("https://github.com/benRenard/BFunk") +# install_git("git@gitlab-ssh.irstea.fr:valentin.mansanarez/statistical-analysis-of-trends.git") # SSH +install_git("https://gitlab.irstea.fr/valentin.mansanarez/statistical-analysis-of-trends.git") + + + + + +