break.R 9.83 KiB
# \\\
# Copyright 2021-2022 Louis Héraut*1,
#                     Éric Sauquet*2,
#                     Valentin Mansanarez
# *1   INRAE, France
#      louis.heraut@inrae.fr
# *2   INRAE, France
#      eric.sauquet@inrae.fr
# This file is part of ash R toolbox.
# Ash R toolbox is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
# Ash R toolbox is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with ash R toolbox.
# If not, see <https://www.gnu.org/licenses/>.
# ///
# Rcode/plotting/break.R
# Plot tool useful to study date break. 
## 1. BREAK PLOTTING _________________________________________________
### 1.1. Histogram ___________________________________________________
histogram = function (df_break, df_meta, title='', figdir='', filedir_opt='break_hist') {
    # Get all different stations code
    Code = levels(factor(df_meta$code))
    nCode = length(Code)
    # If there is not a dedicated figure directory it creats one
    outdir = file.path(figdir, filedir_opt, sep='')
    if (!(file.exists(outdir))) {
        dir.create(outdir)
    outdir_pdf = file.path(outdir, 'pdf')
    # Creates it if it does not exist
    if (!(file.exists(outdir_pdf))) {
        dir.create(outdir_pdf)
    outdir_png = file.path(outdir, 'png')
    # Creates it if it does not exist
    if (!(file.exists(outdir_png))) {
        dir.create(outdir_png)
    # Fix the major and minor date break between tick for axis
    datebreak = 10
    dateminbreak = 1
    Date = df_break$Date
    DateOk = df_break$Date[df_break$significant]
    ## All data
    # Computes histogram by year
    res_hist = hist(Date, breaks='years', plot=FALSE)
    # Gets the count by breaks
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
counts = res_hist$counts # In pourcentage counts_pct = counts/nCode * 100 # Gets the limits of the cells breaks = as.Date(res_hist$breaks) # Gets the middle of the cells mids = as.Date(res_hist$mids) ## Significant data # Computes histogram by year res_histOk = hist(DateOk, breaks='years', plot=FALSE) # Gets the count by breaks countsOk = res_histOk$counts # In pourcentage counts_pctOk = countsOk/nCode * 100 # Gets the limits of the cells breaksOk = as.Date(res_histOk$breaks) # Gets the middle of the cells midsOk = as.Date(res_histOk$mids) # Open a new plot with personal theme p = ggplot() + theme_ash + # Y grid theme(panel.grid.major.y=element_line(color='grey85', size=0.15), # Remove y title axis.title.y=element_blank(), # Title plot.title = element_text(face="bold", colour="#00A3A8", size=10, hjust=0, vjust=-0.1)) + # Title ggtitle(title) + # Plot bar for all data geom_bar(aes(x=mids, y=counts_pct), stat='identity', fill="#00A3A8", alpha=0.5) + # Plot bar for significant data geom_bar(aes(x=midsOk, y=counts_pctOk), stat='identity', fill="#00A3A8") + # X axis scale_x_date(date_breaks=paste(as.character(datebreak), 'year', sep=' '), date_minor_breaks=paste(as.character(dateminbreak), 'year', sep=' '), guide='axis_minor', date_labels="%Y", limits=c(min(Date) - years(0), max(Date) + years(0)), expand=c(0, 0)) + # Y axis scale_y_continuous(limits=c(0, max(counts_pct)*1.1), expand=c(0, 0)) # Saving plot ggsave(plot=p, path=outdir_pdf, filename=paste(title, 'hist_break_date', '.pdf', sep=''), width=10, height=10, units='cm', dpi=100) # Saving ggsave(plot=p, path=outdir_png, filename=paste(title, 'hist_break_date', '.png', sep=''),
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
width=10, height=10, units='cm', dpi=300) } ### 1.2. Cumulative __________________________________________________ cumulative = function (df_break, df_meta, title='', dyear=10, figdir='', filedir_opt='break_cumul') { # Get all different stations code Code = levels(factor(df_meta$code)) nCode = length(Code) # If there is not a dedicated figure directory it creats one outdir = file.path(figdir, filedir_opt, sep='') if (!(file.exists(outdir))) { dir.create(outdir) } outdir_pdf = file.path(outdir, 'pdf') # Creates it if it does not exist if (!(file.exists(outdir_pdf))) { dir.create(outdir_pdf) } outdir_png = file.path(outdir, 'png') # Creates it if it does not exist if (!(file.exists(outdir_png))) { dir.create(outdir_png) } # Fix the major and minor date break between tick for axis datebreak = 10 dateminbreak = 1 Date = df_break$Date DateOk = df_break$Date[df_break$significant] ## All data # Computes histogram by year res_hist = hist(Date, breaks='years', plot=FALSE) # Gets the count by breaks counts = res_hist$counts # Compute the cumulative sum cumul = cumsum(counts) # In percentage cumul_pct = cumul/nCode * 100 # Gets the limits of the cells breaks = as.Date(res_hist$breaks) # Gets the middle of the cells midsRaw = as.Date(res_hist$mids) # Duplicates start and end value to extend graph mids = c(midsRaw[1] - years(dyear), midsRaw[1] - years(1), midsRaw, midsRaw[length(midsRaw)] + years(dyear)) cumul_pct = c(0, 0, cumul_pct, cumul_pct[length(cumul_pct)]) # Centers the middle date mids = mids + months(6) # Shifts the breaking date to be coherent with the start # of the rupture breaks = breaks + 1 # Remove the last date because it is too much breaks = breaks[-length(breaks)] ## Significant data # Computes histogram by year res_histOk = hist(DateOk, breaks='years', plot=FALSE) # Gets the count by breaks countsOk = res_histOk$counts
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
# Compute the cumulative sum cumulOk = cumsum(countsOk) # In percentage cumul_pctOk = cumulOk/nCode * 100 # Gets the limits of the cells breaksOk = as.Date(res_histOk$breaks) # Gets the middle of the cells midsOk = as.Date(res_histOk$mids) # Duplicates start and end value to extend graph midsOk = c(midsRaw[1] - years(dyear), midsOk[1] - years(1), midsOk, midsRaw[length(midsRaw)] + years(dyear)) cumul_pctOk = c(0, 0, cumul_pctOk, cumul_pctOk[length(cumul_pctOk)]) # Centers the middle date midsOk = midsOk + months(6) # Shifts the breaking date to be coherent with the start # of the rupture breaksOk = breaksOk + 1 # Remove the last date because it is too much breaksOk = breaksOk[-length(breaksOk)] # Creates a blank datebreak list to plot cumulative graph DBOk = c() # For all the date break cells for (i in 1:length(breaksOk)) { # Duplicates the date break for the number of times # it is counts in the histogram DBOk = c(DBOk, rep(breaksOk[i], times=countsOk[i])) } # Estimates the median q50 = as.Date(quantile(DBOk, probs=0.5)) + years(1) # Print the median print(paste('mediane :', q50)) # Open a new plot with personal theme p = ggplot() + theme_ash + # Y grid theme(panel.grid.major.y=element_line(color='grey85', size=0.15), # Remove y title axis.title.y=element_blank(), # Title plot.title = element_text(face="bold", colour="#00A3A8", size=10, hjust=0, vjust=-0.1)) + # Title ggtitle(title) + # Plot line of cumulative sum for all data geom_line(aes(x=mids, y=cumul_pct), color="#00A3A8", alpha=0.5) + # Plot line of cumulative sum for significant data geom_line(aes(x=midsOk, y=cumul_pctOk), color="#00A3A8") + # Plot the median line geom_line(aes(x=c(q50, q50), y=c(0, max(cumul_pctOk))), color="wheat", lty='dashed') + # X axis scale_x_date(date_breaks=paste(as.character(datebreak), 'year', sep=' '), date_minor_breaks=paste(as.character(dateminbreak),
281282283284285286287288289290291292293294295296297298299300301302303304305306307308
'year', sep=' '), guide='axis_minor', date_labels="%Y", limits=c(min(mids) - years(0), max(mids) + years(0)), expand=c(0, 0)) + # Y axis scale_y_continuous(limits=c(-1, 101), expand=c(0, 0)) if (title != '') { title = paste(title, '_', sep='') } # Saving plot ggsave(plot=p, path=outdir_pdf, filename=paste(title, 'cumul_break_date', '.pdf', sep=''), width=10, height=10, units='cm', dpi=100) # Saving ggsave(plot=p, path=outdir_png, filename=paste(title, 'cumul_break_date', '.png', sep=''), width=10, height=10, units='cm', dpi=300) }