diff --git a/plotting/datasheet.R b/plotting/datasheet.R index 53d670d9753c848890db0e5ebe3ea19f3ef42007..551f3e2da0edd0d4dfdf75a1a1a66984a7399226 100644 --- a/plotting/datasheet.R +++ b/plotting/datasheet.R @@ -174,7 +174,7 @@ datasheet_panel = function (list_df2plot, df_meta, trend_period, mean_period, ax if (!is.null(time_header)) { # Extracts the data serie corresponding to the code time_header_code = time_header[time_header$code == code,] - + if (is.null(axis_xlim)) { # Gets the limits of the time serie axis_xlim_code = c(min(time_header_code$Date), @@ -182,7 +182,7 @@ datasheet_panel = function (list_df2plot, df_meta, trend_period, mean_period, ax } else { axis_xlim_code = axis_xlim } - + # Gets the time serie plot Htime = time_panel(time_header_code, df_trend_code=NULL, trend_period=trend_period, @@ -193,6 +193,8 @@ datasheet_panel = function (list_df2plot, df_meta, trend_period, mean_period, ax first=TRUE, lim_pct=lim_pct) # Stores it P[[2]] = Htime + } else { + axis_xlim_code = axis_xlim } # Computes the number of column of plot asked on the datasheet @@ -307,7 +309,7 @@ datasheet_panel = function (list_df2plot, df_meta, trend_period, mean_period, ax lim_pct=lim_pct) # Stores the plot - P[[i+nbh]] = p + P[[i+nbh]] = p } if (!is.null(df_page)) { @@ -1064,7 +1066,7 @@ time_panel = function (df_data_code, df_trend_code, var, type, alpha=0.1, colorF ymin=yminR, xmax=xmaxR, ymax=ymaxR), - linetype=0, fill='white', alpha=0.5) + linetype=0, fill='white', alpha=0.3) # Get the character variable for naming the trend colorLine = leg_trend_per$colorLine diff --git a/plotting/layout.R b/plotting/layout.R index b1da9cf972f30815f331e7a7f281e06c0b48919e..1c8dc15a02e7bfc23297ff443a0dbd94b8c6898c 100644 --- a/plotting/layout.R +++ b/plotting/layout.R @@ -126,7 +126,7 @@ contour = void + ## 3. LAYOUT _________________________________________________________ # Generates a PDF that gather datasheets, map and summarize matrix about the trend analyses realised on selected stations layout_panel = function (df_data, df_meta, layout_matrix, - toplot=c('datasheet', 'matrix', 'map'), + what_plot=c('datasheet', 'matrix', 'map'), figdir='', filedir_opt='', filename_opt='', variable='', df_trend=NULL, alpha=0.1, unit2day=365.25, var='', @@ -246,7 +246,7 @@ layout_panel = function (df_data, df_meta, layout_matrix, df_page = tibble(section='Sommaire', subsection=NA, n=1) # If map needs to be plot - if ('map' %in% toplot) { + if ('map' %in% what_plot) { df_page = map_panel(list_df2plot, df_meta, idPer_trend=length(trend_period), @@ -266,7 +266,7 @@ layout_panel = function (df_data, df_meta, layout_matrix, } # If summarize matrix needs to be plot - if ('matrix' %in% toplot) { + if ('matrix' %in% what_plot) { df_page = matrix_panel(list_df2plot, df_meta, trend_period, @@ -286,7 +286,7 @@ layout_panel = function (df_data, df_meta, layout_matrix, } # If datasheets needs to be plot - if ('datasheet' %in% toplot) { + if ('datasheet' %in% what_plot) { df_page = datasheet_panel(list_df2plot, df_meta, trend_period=trend_period, diff --git a/plotting/map.R b/plotting/map.R index 94d89b5f563086fa35e692d31433a89042fa3962..d6261e7696643364224e88ee53dfc4a8bce4adba 100644 --- a/plotting/map.R +++ b/plotting/map.R @@ -738,6 +738,7 @@ map_panel = function (list_df2plot, df_meta, df_shapefile, idPer_trend=1, xValue = c() yValue = c() color = c() + shape = c() # Start X position of the distribution start_hist = 1 @@ -775,6 +776,20 @@ map_panel = function (list_df2plot, df_meta, df_shapefile, idPer_trend=1, times=countsOk[ii])) color = c(color, rep('grey85', times=countsNOk[ii])) + + if (j > 1) { + shape = 21 + } else { + if (midsOk[ii] > 0) { + shapetmp = 25 + } else { + shapetmp = 24 + } + shape = c(shape, rep(shapetmp, + times=countsOk[ii])) + shape = c(shape, rep(21, + times=countsNOk[ii])) + } } # Makes a tibble to plot the distribution @@ -784,7 +799,7 @@ map_panel = function (list_df2plot, df_meta, df_shapefile, idPer_trend=1, # Plots the point of the distribution geom_point(data=plot_value, aes(x=xValue, y=yValue), - shape=21, + shape=shape, color=color, fill=color, stroke=0.4, alpha=1) diff --git a/processing/analyse.R b/processing/analyse.R index 42540eee35309f58ff4a47e32964aeeac1d98696..99f56da5eed68ad7f45005cdcaf09236514f65e0 100644 --- a/processing/analyse.R +++ b/processing/analyse.R @@ -97,11 +97,19 @@ get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) { ### 1.1. QA __________________________________________________________ # Realise the trend analysis of the average annual flow (QA) # hydrological variable -get_QAtrend = function (df_data, df_meta, period, alpha, yearLac_day, df_mod=tibble()) { +get_QAtrend = function (df_data, df_meta, period, alpha, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { + # Local corrections if needed + res = flag_data(df_data, df_meta, + df_flag=df_flag, + df_mod=df_mod) + df_data = res$data + df_mod = res$mod + # Removes incomplete data from time series res = missing_data(df_data, df_meta, - yearLac_day=yearLac_day, + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, df_mod=df_mod) df_data = res$data df_mod = res$mod @@ -156,11 +164,19 @@ get_QAtrend = function (df_data, df_meta, period, alpha, yearLac_day, df_mod=tib ### 1.2. QMNA ________________________________________________________ # Realise the trend analysis of the monthly minimum flow in the # year (QMNA) hydrological variable -get_QMNAtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day, df_mod=tibble()) { +get_QMNAtrend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { + # Local corrections if needed + res = flag_data(df_data, df_meta, + df_flag=df_flag, + df_mod=df_mod) + df_data = res$data + df_mod = res$mod + # Removes incomplete data from time series res = missing_data(df_data, df_meta, - yearLac_day=yearLac_day, + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, df_mod=df_mod) df_data = res$data df_mod = res$mod @@ -266,11 +282,18 @@ rollmean_code = function (df_data, Code, nroll=10, df_mod=NULL) { # Realises the trend analysis of the minimum 10 day average flow # over the year (VCN10) hydrological variable -get_VCN10trend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day, df_mod=tibble()) { +get_VCN10trend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { # Get all different stations code Code = levels(factor(df_meta$code)) + # Local corrections if needed + res = flag_data(df_data, df_meta, + df_flag=df_flag, + df_mod=df_mod) + df_data = res$data + df_mod = res$mod + # Computes the rolling average by 10 days over the data res = rollmean_code(df_data, Code, 10, df_mod=df_mod) df_data_roll = res$data @@ -278,7 +301,8 @@ get_VCN10trend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_ # Removes incomplete data from time series res = missing_data(df_data_roll, df_meta, - yearLac_day=yearLac_day, + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod @@ -373,12 +397,19 @@ which_underfirst = function (L, UpLim, select_longest=TRUE) { return (id) } -get_tDEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day, thresold_type='VCN10', select_longest=TRUE, df_mod=tibble()) { +get_tDEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, thresold_type='VCN10', select_longest=TRUE, df_mod=tibble()) { # Get all different stations code Code = levels(factor(df_meta$code)) # Gets the number of station nCode = length(Code) + + # Local corrections if needed + res = flag_data(df_data, df_meta, + df_flag=df_flag, + df_mod=df_mod) + df_data = res$data + df_mod = res$mod # Computes the rolling average by 10 days over the data res = rollmean_code(df_data, Code, 10, df_mod=df_mod) @@ -388,7 +419,8 @@ get_tDEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_d # Removes incomplete data from time series df_data = missing_data(df_data, df_meta=df_meta, - yearLac_day=yearLac_day) + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim) # Samples the data df_data = sampling_data(df_data, @@ -398,7 +430,8 @@ get_tDEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_d # Removes incomplete data from the averaged time series res = missing_data(df_data_roll, df_meta=df_meta, - yearLac_day=yearLac_day, + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod @@ -527,21 +560,29 @@ get_tDEBtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_d ### 1.5. tCEN date ___________________________________________________ # Realises the trend analysis of the date of the minimum 10 day # average flow over the year (VCN10) hydrological variable -get_tCENtrend = function (df_data, df_meta, period, alpha, sampleSpan, yearLac_day, df_mod=tibble()) { +get_tCENtrend = function (df_data, df_meta, period, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { # Get all different stations code Code = levels(factor(df_meta$code)) # Blank tibble to store the data averaged df_data_roll = tibble() + # Local corrections if needed + res = flag_data(df_data, df_meta, + df_flag=df_flag, + df_mod=df_mod) + df_data = res$data + df_mod = res$mod + # Computes the rolling average by 10 days over the data res = rollmean_code(df_data, Code, 10, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod - + # Removes incomplete data from time series res = missing_data(df_data_roll, df_meta, - yearLac_day=yearLac_day, + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod diff --git a/processing/extract.R b/processing/extract.R index d52307a35fe9488f63e7a4ad8bf99ca99124da57..4f51828d01620661016fb1fd105151e3c7b8960c 100644 --- a/processing/extract.R +++ b/processing/extract.R @@ -510,7 +510,7 @@ extract_data = function (computer_data_path, filedir, filename, ## 4. SHAPEFILE MANAGEMENT ___________________________________________ # Generates a list of shapefiles to draw a hydrological map of # the France -ini_shapefile = function (resources_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, sbs_shpdir, sbs_shpname, rv_shpdir, rv_shpname, is_river=TRUE) { +ini_shapefile = function (resources_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, sbs_shpdir, sbs_shpname, rv_shpdir, rv_shpname, show_river=TRUE) { # Path for shapefile fr_shppath = file.path(resources_path, fr_shpdir, fr_shpname) @@ -534,7 +534,7 @@ ini_shapefile = function (resources_path, fr_shpdir, fr_shpname, bs_shpdir, bs_s df_subbassin = tibble(fortify(subbassin)) # If the river shapefile needs to be load - if (is_river) { + if (show_river) { # Hydrographic network river = readOGR(dsn=rv_shppath, verbose=FALSE) ### trop long ### river = river[which(river$Classe == 1),] diff --git a/processing/format.R b/processing/format.R index 88e390b6081094257c0a7d998f5c69f3cf8372f8..7805a11ba1b6a0b29b371ac7b6595140990f2e65 100644 --- a/processing/format.R +++ b/processing/format.R @@ -81,8 +81,54 @@ join_selection = function (df_data_AEAG, df_data_INRAE, df_meta_AEAG, df_meta_IN return (list(data=df_data, meta=df_meta)) } -### 1.2. Manages missing data ________________________________________ -missing_data = function (df_data, df_meta, yearLac_day=3, yearStart='01-01', Code=NULL, df_mod=NULL) { +### 1.2. Local correction of data ____________________________________ +flag_data = function (df_data, df_meta, df_flag, Code=NULL, df_mod=NULL) { + + if (is.null(Code)) { + # Get all different stations code + Code = levels(factor(df_meta$code)) + nCode = length(Code) + } else { + nCode = length(Code) + } + + for (code in Code) { + if (code %in% df_flag$code) { + + df_flag_code = df_flag[df_flag$code == code,] + nbFlag = nrow(df_flag_code) + + for (i in 1:nbFlag) { + newValue = df_flag_code$newValue[i] + date = df_flag_code$Date[i] + OKcor = df_data$code == code & df_data$Date == date + oldValue = df_data$Value[OKcor] + df_data$Value[OKcor] = newValue + + if (!is.null(df_mod)) { + df_mod = + add_mod(df_mod, code, + type='Value correction', + fun_name='Manual new value assignment', + comment=paste('At ', date, + ' the value ', oldValue, + ' becomes ', newValue, + sep='')) + } + } + } + } + + if (!is.null(df_mod)) { + res = list(data=df_data, mod=df_mod) + return (res) + } else { + return (df_data) + } +} + +### 1.3. Manages missing data ________________________________________ +missing_data = function (df_data, df_meta, dayLac_lim=3, yearNA_lim=10, yearStart='01-01', Code=NULL, df_mod=NULL) { if (is.null(Code)) { # Get all different stations code @@ -95,17 +141,45 @@ missing_data = function (df_data, df_meta, yearLac_day=3, yearStart='01-01', Cod for (code in Code) { # Extracts the data corresponding to the code df_data_code = df_data[df_data$code == code,] + + DateNA = df_data_code$Date[is.na(df_data_code$Value)] + dDateNA = diff(DateNA) + if (any(dDateNA != 1)) { + + dDateNA = c(10, dDateNA) + idJump = which(dDateNA != 1) + NJump = length(idJump) - - Date_NA = df_data_code$Date[is.na(df_data_code$Value)] - # print(Date_NA) - # Start_NA - - - + for (i in 1:NJump) { + idStart = idJump[i] + + if (i < NJump) { + idEnd = idJump[i+1] - 1 + } else { + idEnd = length(DateNA) + } + + Start = DateNA[idStart] + End = DateNA[idEnd] + + duration = (End - Start)/365.25 + if (duration >= yearNA_lim) { + df_data_code$Value[df_data_code$Date <= End] = NA + + if (!is.null(df_mod)) { + df_mod = + add_mod(df_mod, code, + type='Missing data management', + fun_name='NA assignment', + comment=paste('From the start of measurements', + ' to ', End, sep='')) + } + } + } + } + DateMD = substr(df_data_code$Date, 6, 10) - idyearStart = which(DateMD == yearStart) if (DateMD[1] != yearStart) { idyearStart = c(1, idyearStart) @@ -139,7 +213,7 @@ missing_data = function (df_data, df_meta, yearLac_day=3, yearStart='01-01', Cod yearLacMiss_pct = nbNA/nbDate * 100 - if (nbNA > yearLac_day) { + if (nbNA > dayLac_lim) { df_data_code_year$Value = NA df_data_code[OkYear,] = df_data_code_year @@ -151,7 +225,7 @@ missing_data = function (df_data, df_meta, yearLac_day=3, yearStart='01-01', Cod ' to ', End, sep='')) } - } else if (nbNA <= yearLac_day & nbNA > 1) { + } else if (nbNA <= dayLac_lim & nbNA > 1) { DateJ = as.numeric(df_data_code_year$Date) Value = df_data_code_year$Value @@ -182,7 +256,7 @@ missing_data = function (df_data, df_meta, yearLac_day=3, yearStart='01-01', Cod } } -### 1.3. Sampling of the data ________________________________________ +### 1.4. Sampling of the data ________________________________________ sampling_data = function (df_data, df_meta, sampleSpan=c('05-01', '11-30'), Code=NULL, df_mod=NULL) { if (is.null(Code)) { diff --git a/script.R b/script.R index 18d5b49edacd6083148266b23c62ad842a68c082..7934621a712344c0b1189fa5a69da915b032b775 100644 --- a/script.R +++ b/script.R @@ -58,13 +58,13 @@ filename = # "" # "all" c( - # "S2235610_HYDRO_QJM.txt", - # "P0885010_HYDRO_QJM.txt" - # "O3006710_HYDRO_QJM.txt", - # "O4704030_HYDRO_QJM.txt" - # "O0384010_HYDRO_QJM.txt", - "O0362510_HYDRO_QJM.txt" - # "Q7002910_HYDRO_QJM.txt" + "S2235610_HYDRO_QJM.txt", + "P0885010_HYDRO_QJM.txt", + "P0364010_HYDRO_QJM.txt", + "O7635010_HYDRO_QJM.txt", + "O3141010_HYDRO_QJM.txt", + "Q6332510_HYDRO_QJM.txt", + "Q7002910_HYDRO_QJM.txt" # "Q0214010_HYDRO_QJM.txt", # "O3035210_HYDRO_QJM.txt", # "O0554010_HYDRO_QJM.txt", @@ -104,7 +104,7 @@ which_layout = # Selection axis_xlim = NULL - # c("2002-01-01", "2003-01-01") + # c("1982-01-01", "1983-01-01") ## ANALYSIS # Time period to analyse @@ -121,15 +121,41 @@ mean_period = list(period1, period2) alpha = 0.1 # Number of missing days per year before remove the year -yearLac_day = 3 +dayLac_lim = 3 + +# Maximum continuously missing years before removing everything +# before it +yearNA_lim = 10 + +# Local corrections of the data +df_flag = tibble( + code=c('O3141010', + 'O7635010', + 'O7635010', + 'O7635010', + 'O7635010' + ), + Date=c('1974-07-04', + '1948-09-06', + '1949-02-08', + '1950-07-20', + '1953-07-22' + ), + newValue=c(9.5, + 4, + 3, + 1, + 3) # /!\ Unit +) # Sampling span of the data sampleSpan = c('05-01', '11-30') - -## MAP # Is the hydrological network needs to be plot -is_river = FALSE +show_river = FALSE + +# If results and data used in the analysis needs to be written +saving = FALSE ############### END OF REGION TO MODIFY (without risk) ############### @@ -289,7 +315,9 @@ if ('analyse' %in% which_layout) { res = get_QAtrend(df_data, df_meta, period=trend_period, alpha=alpha, - yearLac_day=yearLac_day) + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, + df_flag=df_flag) df_QAdata = res$data df_QAmod = res$mod res_QAtrend = res$analyse @@ -299,7 +327,9 @@ if ('analyse' %in% which_layout) { period=trend_period, alpha=alpha, sampleSpan=sampleSpan, - yearLac_day=yearLac_day) + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, + df_flag=df_flag) df_QMNAdata = res$data df_QMNAmod = res$mod res_QMNAtrend = res$analyse @@ -309,7 +339,9 @@ if ('analyse' %in% which_layout) { period=trend_period, alpha=alpha, sampleSpan=sampleSpan, - yearLac_day=yearLac_day) + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, + df_flag=df_flag) df_VCN10data = res$data df_VCN10mod = res$mod res_VCN10trend = res$analyse @@ -321,7 +353,9 @@ if ('analyse' %in% which_layout) { sampleSpan=sampleSpan, thresold_type='VCN10', select_longest=TRUE, - yearLac_day=yearLac_day) + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, + df_flag=df_flag) df_tDEBdata = res$data df_tDEBmod = res$mod res_tDEBtrend = res$analyse @@ -331,7 +365,9 @@ if ('analyse' %in% which_layout) { period=trend_period, alpha=alpha, sampleSpan=sampleSpan, - yearLac_day=yearLac_day) + dayLac_lim=dayLac_lim, + yearNA_lim=yearNA_lim, + df_flag=df_flag) df_tCENdata = res$data df_tCENmod = res$mod res_tCENtrend = res$analyse @@ -350,17 +386,19 @@ if ('analyse' %in% which_layout) { ## 4. SAVING _________________________________________________________ -# for (v in var) { -# df_datatmp = get(paste('df_', v, 'data', sep='')) -# df_modtmp = get(paste('df_', v, 'mod', sep='')) -# res_trendtmp = get(paste('res_', v, 'trend', sep='')) -# # Modified data saving -# write_data(df_datatmp, df_modtmp, resdir, optdir='modified_data', -# filedir=v) -# # Trend analysis saving -# write_analyse(res_trendtmp, resdir, optdir='trend_analyse', -# filedir=v) -# } +if (saving) { + for (v in var) { + df_datatmp = get(paste('df_', v, 'data', sep='')) + df_modtmp = get(paste('df_', v, 'mod', sep='')) + res_trendtmp = get(paste('res_', v, 'trend', sep='')) + # Modified data saving + write_data(df_datatmp, df_modtmp, resdir, optdir='modified_data', + filedir=v) + # Trend analysis saving + write_analyse(res_trendtmp, resdir, optdir='trend_analyse', + filedir=v) + } +} # res_tDEBtrend = read_listofdf(resdir, 'res_tDEBtrend') @@ -370,12 +408,12 @@ df_shapefile = ini_shapefile(resources_path, fr_shpdir, fr_shpname, bs_shpdir, bs_shpname, sbs_shpdir, sbs_shpname, - rv_shpdir, rv_shpname, is_river=is_river) + rv_shpdir, rv_shpname, show_river=show_river) ### 5.1. Simple time panel to criticize station data _________________ # Plot time panel of debit by stations if ('serie' %in% which_layout) { - layout_panel(toplot=c('datasheet'), + layout_panel(what_plot=c('datasheet'), df_meta=df_meta, df_data=list(df_data, df_sqrt), @@ -395,10 +433,10 @@ if ('serie' %in% which_layout) { ### 5.2. Analysis layout _____________________________________________ if ('analyse' %in% which_layout) { - layout_panel(toplot=c( - 'datasheet' + layout_panel(what_plot=c( + # 'datasheet' # 'matrix', - # 'map' + 'map' ), df_meta=df_meta,