diff --git a/plotting/break.R b/plotting/break.R index dc38c9a83f3bcaf31a6415c5836e322c89e6090d..7daa4bf7cdbdffdc5ed8696ae4527413a6a7a541 100644 --- a/plotting/break.R +++ b/plotting/break.R @@ -23,7 +23,7 @@ # # plotting/break.R # -# +# Plot tool useful to study date break. ## 1. BREAK PLOTTING @@ -40,24 +40,35 @@ histogram = function (data_bin, df_meta, figdir='', filedir_opt='') { dir.create(outdir) } + # Fix the major and minor date break between tick for axis datebreak = 10 dateminbreak = 1 + # Computes histogram by year res_hist = hist(data_bin, breaks='years', plot=FALSE) + # Gets the count by breaks counts = res_hist$counts + # In pourcentage counts_pct = counts/nCode * 100 + # Gets the limits of the cells breaks = as.Date(res_hist$breaks) + # Gets the middle of the cells mids = as.Date(res_hist$mids) - - p = ggplot() + theme_ash + - - theme(panel.grid.major.y=element_line(color='grey85', size=0.15), - axis.title.y=element_blank()) + + # Open a new plot with personal theme + p = ggplot() + theme_ash + + + # Y grid + theme(panel.grid.major.y=element_line(color='grey85', size=0.15), + # Remove y title + axis.title.y=element_blank()) + + + # Plot bar geom_bar(aes(x=mids, y=counts_pct), stat='identity', fill="#00A3A8") + - + + # X axis scale_x_date(date_breaks=paste(as.character(datebreak), 'year', sep=' '), date_minor_breaks=paste(as.character(dateminbreak), @@ -67,18 +78,19 @@ histogram = function (data_bin, df_meta, figdir='', filedir_opt='') { limits=c(min(data_bin)-years(0), max(data_bin)+years(0)), expand=c(0, 0)) + - + + # Y axis scale_y_continuous(limits=c(0, max(counts_pct)*1.1), expand=c(0, 0)) + # Saving of plot ggsave(plot=p, path=outdir, filename=paste('hist_break_date', '.pdf', sep=''), width=10, height=10, units='cm', dpi=100) } - ### 1.2. Cumulative cumulative = function (data_bin, df_meta, dyear=10, figdir='', filedir_opt='') { @@ -92,16 +104,24 @@ cumulative = function (data_bin, df_meta, dyear=10, figdir='', filedir_opt='') { dir.create(outdir) } + # Fix the major and minor date break between tick for axis datebreak = 10 dateminbreak = 1 + # Computes histogram by year res_hist = hist(data_bin, breaks='years', plot=FALSE) + # Gets the count by breaks counts = res_hist$counts + # Compute the cumulative sum cumul = cumsum(counts) + # In percentage cumul_pct = cumul/nCode * 100 + # Gets the limits of the cells breaks = as.Date(res_hist$breaks) + # Gets the middle of the cells mids = as.Date(res_hist$mids) + # Duplicates start and end value to extend graph mids = c(mids[1] - years(dyear), mids[1] - years(1), mids, mids[length(mids)] + years(dyear)) @@ -109,31 +129,44 @@ cumulative = function (data_bin, df_meta, dyear=10, figdir='', filedir_opt='') { cumul_pct, cumul_pct[length(cumul_pct)]) + # Centers the middle date mids = mids + months(6) - + # Shifts the breaking date to be coherent with the start + # of the rupture breaks = breaks + 1 + # Remove the last date because it is too much breaks = breaks[-length(breaks)] + # Creates a blank datebreak list to plot cumulative graph DB = c() + # For all the date break cells for (i in 1:length(breaks)) { + # Duplicates the date break for the number of times + # it is counts in the histogram DB = c(DB, rep(breaks[i], times=counts[i])) } + # Estimates the median q50 = as.Date(quantile(DB, probs=0.5)) + years(1) - + # Print the median print(paste('mediane :', q50)) - - p = ggplot() + theme_ash + - - theme(panel.grid.major.y=element_line(color='grey85', size=0.15), - axis.title.y=element_blank()) + + # Open a new plot with personal theme + p = ggplot() + theme_ash + + + # Y grid + theme(panel.grid.major.y=element_line(color='grey85', size=0.15), + # Remove y title + axis.title.y=element_blank()) + + + # Plot line of cumulative sum geom_line(aes(x=mids, y=cumul_pct), color="#00A3A8") + - + # Plot the median line geom_line(aes(x=c(q50, q50), y=c(0, 100)), color="wheat", lty='dashed') + - + + # X axis scale_x_date(date_breaks=paste(as.character(datebreak), 'year', sep=' '), date_minor_breaks=paste(as.character(dateminbreak), @@ -143,19 +176,14 @@ cumulative = function (data_bin, df_meta, dyear=10, figdir='', filedir_opt='') { limits=c(min(mids)-years(0), max(mids)+years(0)), expand=c(0, 0)) + - + + # Y axis scale_y_continuous(limits=c(-1, 101), expand=c(0, 0)) - + + # Saving plot ggsave(plot=p, path=outdir, filename=paste('cumul_break_date', '.pdf', sep=''), width=10, height=10, units='cm', dpi=100) } - - - - - - -