break.R 6.05 KiB
# \\\
# Copyright 2021-2022 Louis Hraut*1
# *1   INRAE, France
#      louis.heraut@inrae.fr
# This file is part of ash R toolbox.
# ash R toolbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# ash R toolbox is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with ash R toolbox.  If not, see <https://www.gnu.org/licenses/>.
# ///
# plotting/break.R
# Plot tool useful to study date break. 
## 1. BREAK PLOTTING
### 1.1. Histogram 
histogram = function (data_bin, df_meta, figdir='', filedir_opt='') {
    # Get all different stations code
    Code = levels(factor(df_meta$code))
    nCode = length(Code)
    # If there is not a dedicated figure directory it creats one
    outdir = file.path(figdir, filedir_opt, sep='')
    if (!(file.exists(outdir))) {
        dir.create(outdir)
    # Fix the major and minor date break between tick for axis
    datebreak = 10
    dateminbreak = 1
    # Computes histogram by year
    res_hist = hist(data_bin, breaks='years', plot=FALSE)
    # Gets the count by breaks
    counts = res_hist$counts
    # In pourcentage
    counts_pct = counts/nCode * 100
    # Gets the limits of the cells
    breaks = as.Date(res_hist$breaks)
    # Gets the middle of the cells
    mids = as.Date(res_hist$mids)
    # Open a new plot with personal theme
    p = ggplot() + theme_ash +
              # Y grid
        theme(panel.grid.major.y=element_line(color='grey85', size=0.15),
              # Remove y title
              axis.title.y=element_blank()) +
        # Plot bar
        geom_bar(aes(x=mids, y=counts_pct), 
                 stat='identity',
                 fill="#00A3A8") +
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
# X axis scale_x_date(date_breaks=paste(as.character(datebreak), 'year', sep=' '), date_minor_breaks=paste(as.character(dateminbreak), 'year', sep=' '), guide='axis_minor', date_labels="%Y", limits=c(min(data_bin)-years(0), max(data_bin)+years(0)), expand=c(0, 0)) + # Y axis scale_y_continuous(limits=c(0, max(counts_pct)*1.1), expand=c(0, 0)) # Saving of plot ggsave(plot=p, path=outdir, filename=paste('hist_break_date', '.pdf', sep=''), width=10, height=10, units='cm', dpi=100) } ### 1.2. Cumulative cumulative = function (data_bin, df_meta, dyear=10, figdir='', filedir_opt='') { # Get all different stations code Code = levels(factor(df_meta$code)) nCode = length(Code) # If there is not a dedicated figure directory it creats one outdir = file.path(figdir, filedir_opt, sep='') if (!(file.exists(outdir))) { dir.create(outdir) } # Fix the major and minor date break between tick for axis datebreak = 10 dateminbreak = 1 # Computes histogram by year res_hist = hist(data_bin, breaks='years', plot=FALSE) # Gets the count by breaks counts = res_hist$counts # Compute the cumulative sum cumul = cumsum(counts) # In percentage cumul_pct = cumul/nCode * 100 # Gets the limits of the cells breaks = as.Date(res_hist$breaks) # Gets the middle of the cells mids = as.Date(res_hist$mids) # Duplicates start and end value to extend graph mids = c(mids[1] - years(dyear), mids[1] - years(1), mids, mids[length(mids)] + years(dyear)) cumul_pct = c(0, 0, cumul_pct, cumul_pct[length(cumul_pct)]) # Centers the middle date mids = mids + months(6) # Shifts the breaking date to be coherent with the start # of the rupture breaks = breaks + 1 # Remove the last date because it is too much breaks = breaks[-length(breaks)] # Creates a blank datebreak list to plot cumulative graph
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
DB = c() # For all the date break cells for (i in 1:length(breaks)) { # Duplicates the date break for the number of times # it is counts in the histogram DB = c(DB, rep(breaks[i], times=counts[i])) } # Estimates the median q50 = as.Date(quantile(DB, probs=0.5)) + years(1) # Print the median print(paste('mediane :', q50)) # Open a new plot with personal theme p = ggplot() + theme_ash + # Y grid theme(panel.grid.major.y=element_line(color='grey85', size=0.15), # Remove y title axis.title.y=element_blank()) + # Plot line of cumulative sum geom_line(aes(x=mids, y=cumul_pct), color="#00A3A8") + # Plot the median line geom_line(aes(x=c(q50, q50), y=c(0, 100)), color="wheat", lty='dashed') + # X axis scale_x_date(date_breaks=paste(as.character(datebreak), 'year', sep=' '), date_minor_breaks=paste(as.character(dateminbreak), 'year', sep=' '), guide='axis_minor', date_labels="%Y", limits=c(min(mids)-years(0), max(mids)+years(0)), expand=c(0, 0)) + # Y axis scale_y_continuous(limits=c(-1, 101), expand=c(0, 0)) # Saving plot ggsave(plot=p, path=outdir, filename=paste('cumul_break_date', '.pdf', sep=''), width=10, height=10, units='cm', dpi=100) }