From 040d77ddf11d7ea9f80ae1f1c3d8047ae7631ea2 Mon Sep 17 00:00:00 2001 From: "louis.heraut" <louis.heraut@inrae.fr> Date: Fri, 11 Mar 2022 23:57:55 +0100 Subject: [PATCH] read write fst --- Rcode/advanced_settings.R | 10 ++-- Rcode/processing/analyse.R | 54 +++++++++++++-------- Rcode/processing/extract.R | 3 -- Rcode/processing/format.R | 58 ++++++++++------------ Rcode/processing/read_write.R | 10 ++-- Rcode/processing/script_analyse.R | 80 ++++++++++++++++++++----------- main.R | 27 ++++++++--- 7 files changed, 145 insertions(+), 97 deletions(-) diff --git a/Rcode/advanced_settings.R b/Rcode/advanced_settings.R index 8e33d91..e77fe01 100644 --- a/Rcode/advanced_settings.R +++ b/Rcode/advanced_settings.R @@ -154,9 +154,13 @@ yearNA_lim = 10 sampleSpan = c('05-01', '11-30') ### 3.4. Saving option _______________________________________________ -data_saving = TRUE -# meta_saving = TRUE -fast_format = FALSE +saving = c( + 'data' + 'meta', + 'analyse' +) + +fast_format = TRUE ### 3.5. Statistical option __________________________________________ # The risk of the Mann-Kendall trend detection test diff --git a/Rcode/processing/analyse.R b/Rcode/processing/analyse.R index ef2291c..0eb1025 100644 --- a/Rcode/processing/analyse.R +++ b/Rcode/processing/analyse.R @@ -102,12 +102,12 @@ get_intercept = function (df_Xtrend, df_Xlist, unit2day=365.25) { ### 1.1. XA __________________________________________________________ # Realise the trend analysis of the average annual flow (QA) # hydrological variable -get_XAtrend = function (df_data, df_meta, period, per.start, alpha, dayLac_lim, yearNA_lim, funct=mean, df_flag=NULL, df_mod=tibble(), ...) { +get_XAtrend = function (df_data, df_meta, period, perStart, alpha, dayLac_lim, yearNA_lim, funct=mean, df_flag=NULL, df_mod=tibble(), ...) { print(paste0('Computes XA trend of ', as.character(substitute(funct)), ' function with hydrological month start ', - substr(per.start, 1, 2))) + substr(perStart, 1, 2))) if (!is.null(df_flag)) { # Local corrections if needed @@ -122,7 +122,7 @@ get_XAtrend = function (df_data, df_meta, period, per.start, alpha, dayLac_lim, res = missing_data(df_data, df_meta, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, - yearStart=per.start, + perStart=perStart, df_mod=df_mod) df_data = res$data df_mod = res$mod @@ -146,7 +146,7 @@ get_XAtrend = function (df_data, df_meta, period, per.start, alpha, dayLac_lim, funct=funct, timestep='year', period=per, - per.start=per.start, + per.start=perStart, pos.datetime=1, ...) @@ -180,10 +180,10 @@ get_XAtrend = function (df_data, df_meta, period, per.start, alpha, dayLac_lim, ### 1.2. QMNA ________________________________________________________ # Realise the trend analysis of the monthly minimum flow in the # year (QMNA) hydrological variable -get_QMNAtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { +get_QMNAtrend = function (df_data, df_meta, period, perStart, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { print(paste0('Computes QMNA trend with hydrological month start ', - substr(per.start, 1, 2))) + substr(perStart, 1, 2))) # Local corrections if needed res = flag_data(df_data, df_meta, @@ -196,6 +196,7 @@ get_QMNAtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan res = missing_data(df_data, df_meta, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, + perStart=perStart, df_mod=df_mod) df_data = res$data df_mod = res$mod @@ -214,9 +215,10 @@ get_QMNAtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan Imax = 0 # Blank tibble for data to return df_QMNAtrendB = tibble() - + # For all periods for (per in period) { + # Prepare the data to fit the entry of extract.Var df_QMNAlist = prepare(df_data, colnamegroup=c('code')) # Compute the montly mean over the data @@ -235,7 +237,7 @@ get_QMNAtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan df_QMNAEx = extract.Var(data.station=df_QMNAlist, funct=min, period=per, - per.start=per.start, + per.start=perStart, timestep='year', pos.datetime=1, na.rm=TRUE) @@ -302,34 +304,43 @@ rollmean_code = function (df_data, Code, nroll=10, df_mod=NULL) { # Realises the trend analysis of the minimum 10 day average flow # over the year (VCN10) hydrological variable -get_VCN10trend = function (df_data, df_meta, period, per.start, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { +get_VCN10trend = function (df_data, df_meta, period, perStart, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { print(paste0('Computes VCN10 trend with hydrological month start ', - substr(per.start, 1, 2))) + substr(perStart, 1, 2))) # Get all different stations code Code = levels(factor(df_meta$code)) + print('flag') + # Local corrections if needed res = flag_data(df_data, df_meta, df_flag=df_flag, df_mod=df_mod) df_data = res$data df_mod = res$mod + + print('roll') # Computes the rolling average by 10 days over the data res = rollmean_code(df_data, Code, 10, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod + print('miss') + # Removes incomplete data from time series res = missing_data(df_data_roll, df_meta, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, + perStart=perStart, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod + print('sample') + # Samples the data res = sampling_data(df_data_roll, df_meta, sampleSpan=sampleSpan, @@ -337,6 +348,8 @@ get_VCN10trend = function (df_data, df_meta, period, per.start, alpha, sampleSpa df_data_roll = res$data df_mod = res$mod + print('ok') + # Make sure to convert the period to a list period = as.list(period) # Set the max interval period as the minimal possible @@ -352,7 +365,7 @@ get_VCN10trend = function (df_data, df_meta, period, per.start, alpha, sampleSpa df_VCN10Ex = extract.Var(data.station=df_VCN10list, funct=min, period=per, - per.start=per.start, + per.start=perStart, timestep='year', pos.datetime=1, na.rm=TRUE) @@ -421,10 +434,10 @@ which_underfirst = function (L, UpLim, select_longest=TRUE) { return (id) } -get_tDEBtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, thresold_type='VCN10', select_longest=TRUE, df_mod=tibble()) { +get_tDEBtrend = function (df_data, df_meta, period, perStart, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, thresold_type='VCN10', select_longest=TRUE, df_mod=tibble()) { print(paste0('Computes tDEB trend with hydrological month start ', - substr(per.start, 1, 2))) + substr(perStart, 1, 2))) # Get all different stations code Code = levels(factor(df_meta$code)) @@ -447,7 +460,8 @@ get_tDEBtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan df_data = missing_data(df_data, df_meta=df_meta, dayLac_lim=dayLac_lim, - yearNA_lim=yearNA_lim) + yearNA_lim=yearNA_lim, + perStart=perStart) # Samples the data df_data = sampling_data(df_data, @@ -459,6 +473,7 @@ get_tDEBtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan df_meta=df_meta, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, + perStart=perStart, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod @@ -496,7 +511,7 @@ get_tDEBtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan funct=min, timestep='year', period=per, - per.start=per.start, + per.start=perStart, pos.datetime=1, na.rm=TRUE) @@ -533,7 +548,7 @@ get_tDEBtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan df_tDEBEx_code = extract.Var(data.station=df_tDEBlist_code, funct=which_underfirst, period=per, - per.start=per.start, + per.start=perStart, timestep='year', pos.datetime=1, UpLim=QT_code, @@ -589,10 +604,10 @@ get_tDEBtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan ### 1.5. tCEN date ___________________________________________________ # Realises the trend analysis of the date of the minimum 10 day # average flow over the year (VCN10) hydrological variable -get_tCENtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { +get_tCENtrend = function (df_data, df_meta, period, perStart, alpha, sampleSpan, dayLac_lim, yearNA_lim, df_flag, df_mod=tibble()) { print(paste0('Computes tCEN trend with hydrological month start ', - substr(per.start, 1, 2))) + substr(perStart, 1, 2))) # Get all different stations code Code = levels(factor(df_meta$code)) @@ -615,6 +630,7 @@ get_tCENtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan res = missing_data(df_data_roll, df_meta, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, + perStart=perStart, df_mod=df_mod) df_data_roll = res$data df_mod = res$mod @@ -641,7 +657,7 @@ get_tCENtrend = function (df_data, df_meta, period, per.start, alpha, sampleSpan df_tCENEx = extract.Var(data.station=df_tCENlist, funct=which.min, period=per, - per.start=per.start, + per.start=perStart, timestep='year', pos.datetime=1) diff --git a/Rcode/processing/extract.R b/Rcode/processing/extract.R index 6953e74..d6e51b0 100644 --- a/Rcode/processing/extract.R +++ b/Rcode/processing/extract.R @@ -592,9 +592,6 @@ extract_data = function (computer_data_path, filedir, filename, df_data = tibble(Date=as.Date(as.character(df_data$Date), format="%Y%m%d"), Value=df_data$Qls * 1E-3, - Qmmj=df_data$Qmmj, - val_H=as.character(df_data$val_H), - val_I=as.character(df_data$val_I), code=code) return (df_data) diff --git a/Rcode/processing/format.R b/Rcode/processing/format.R index 42c3cb5..ba27251 100644 --- a/Rcode/processing/format.R +++ b/Rcode/processing/format.R @@ -128,7 +128,7 @@ flag_data = function (df_data, df_meta, df_flag, Code=NULL, df_mod=NULL) { } ### 1.3. Manages missing data ________________________________________ -missing_data = function (df_data, df_meta, dayLac_lim=3, yearNA_lim=10, yearStart='01-01', Code=NULL, df_mod=NULL) { +missing_data = function (df_data, df_meta, dayLac_lim=3, yearNA_lim=10, perStart='01-01', Code=NULL, df_mod=NULL) { if (is.null(Code)) { # Get all different stations code @@ -137,7 +137,7 @@ missing_data = function (df_data, df_meta, dayLac_lim=3, yearNA_lim=10, yearStar } else { nCode = length(Code) } - + for (code in Code) { # Extracts the data corresponding to the code df_data_code = df_data[df_data$code == code,] @@ -178,18 +178,19 @@ missing_data = function (df_data, df_meta, dayLac_lim=3, yearNA_lim=10, yearStar } } } - - DateMD = substr(df_data_code$Date, 6, 10) - idyearStart = which(DateMD == yearStart) - if (DateMD[1] != yearStart) { - idyearStart = c(1, idyearStart) + + DateMD = format(df_data_code$Date, "%m-%d") + idperStart = which(DateMD == perStart) + + if (DateMD[1] != perStart) { + idperStart = c(1, idperStart) } - NidyearStart = length(idyearStart) - - for (i in 1:NidyearStart) { - Start = df_data_code$Date[idyearStart[i]] - if (i < NidyearStart) { - End = df_data_code$Date[idyearStart[i+1] - 1] + NidperStart = length(idperStart) + + for (i in 1:NidperStart) { + Start = df_data_code$Date[idperStart[i]] + if (i < NidperStart) { + End = df_data_code$Date[idperStart[i+1] - 1] } else { End = df_data_code$Date[length(df_data_code$Date)] } @@ -198,9 +199,9 @@ missing_data = function (df_data, df_meta, dayLac_lim=3, yearNA_lim=10, yearStar df_data_code_year = df_data_code[OkYear,] StartReal = as.Date(paste(substr(Start, 1, 4), - yearStart, sep='-')) + perStart, sep='-')) EndReal = as.Date(paste(as.numeric(substr(Start, 1, 4)) + 1, - yearStart, sep='-')) + perStart, sep='-')) nbDate = as.numeric(difftime(EndReal, StartReal, units="days")) @@ -270,18 +271,14 @@ sampling_data = function (df_data, df_meta, sampleSpan=c('05-01', '11-30'), Code # 1972 is leap year reference is case of leap year comparison sampleStart = as.Date(paste('1972', sampleSpan[1], sep='-')) sampleEnd = as.Date(paste('1972', sampleSpan[2], sep='-')) - - for (code in Code) { - # Extracts the data corresponding to the code - df_data_code = df_data[df_data$code == code,] - - DateMD = substr(df_data_code$Date, 6, 10) - Date = paste('1972', DateMD, sep='-') - - df_data_code$Value[Date < sampleStart | Date > sampleEnd] = NA + DateMD = format(df_data$Date, "%m-%d") + Date = paste('1972', DateMD, sep='-') + + df_data$Value[Date < sampleStart | Date > sampleEnd] = NA - if (!is.null(df_mod)) { + if (!is.null(df_mod)) { + for (code in Code) { df_mod = add_mod(df_mod, code, type='Seasonal sampling ', fun_name='NA assignment', @@ -289,13 +286,8 @@ sampling_data = function (df_data, df_meta, sampleSpan=c('05-01', '11-30'), Code ' and after ', sampleEnd, sep='')) } - - # Leap year verification - # print(df_data_code[df_data_code$Date > as.Date("1992-02-25"),]) - - df_data[df_data$code == code,] = df_data_code } - + if (!is.null(df_mod)) { res = list(data=df_data, mod=df_mod) return (res) @@ -306,8 +298,6 @@ sampling_data = function (df_data, df_meta, sampleSpan=c('05-01', '11-30'), Code ## 2. DURING TREND ANALYSE ___________________________________________ - - date_correction = function (df_XEx, per) { # Takes the first date as example @@ -555,7 +545,7 @@ clean = function (df_Xtrend, df_XEx, df_Xlist) { df_Xtrend = relocate(df_Xtrend, intercept, .after=trend) # Creates a list of results to return - res = list(trend=df_Xtrend, data=df_Xlist$data, info=df_Xlist$info) + res = list(trend=df_Xtrend, data=df_Xlist$data) return (res) } diff --git a/Rcode/processing/read_write.R b/Rcode/processing/read_write.R index 6e0f310..859bc30 100644 --- a/Rcode/processing/read_write.R +++ b/Rcode/processing/read_write.R @@ -107,23 +107,25 @@ write_critique = function (df_critique, resdir, filename='critique') { # write_critique(df_critique, resdir) ### 1.4. Fast for R __________________________________________________ -write_dataFST = function (df_data, resdir, filedir='fst') { +write_dataFST = function (df_data, resdir, filedir='fst', + filename='data.fst') { outdir = file.path(resdir, filedir) if (!(file.exists(outdir))) { dir.create(outdir, recursive=TRUE) } - outfile = file.path(outdir, 'data.fst') + outfile = file.path(outdir, filename) fst::write_fst(df_data, outfile, compress=0) } -write_metaFST = function (df_meta, resdir, filedir='fst') { +write_metaFST = function (df_meta, resdir, filedir='fst', + filename='meta.fst') { outdir = file.path(resdir, filedir) if (!(file.exists(outdir))) { dir.create(outdir, recursive=TRUE) } - outfile = file.path(outdir, 'meta.fst') + outfile = file.path(outdir, filename) fst::write_fst(df_meta, outfile, compress=0) } diff --git a/Rcode/processing/script_analyse.R b/Rcode/processing/script_analyse.R index e2871a4..1074f03 100644 --- a/Rcode/processing/script_analyse.R +++ b/Rcode/processing/script_analyse.R @@ -63,13 +63,13 @@ if ('station_trend_analyse' %in% to_do) { ) if (fast_format) { - monthAll = matrix(rep(paste0(formatC(1:12, width=2, + perSTARTAll = matrix(rep(paste0(formatC(1:12, width=2, flag=0), '-01'), length(varAll)), nrow=length(varAll), byrow=TRUE) } else { - monthAll = matrix(c('09-01', + perSTARTAll = matrix(c('09-01', '01-01', '01-01', '01-01', @@ -87,17 +87,17 @@ if ('station_trend_analyse' %in% to_do) { type = c(type, typeAll[Ok]) glose = c(glose, gloseAll[Ok]) } - month = matrix(monthAll[varAll %in% to_analyse,], + perSTART = matrix(perSTARTAll[varAll %in% to_analyse,], nrow=length(var)) - nbMonth = ncol(month) + nbPerStart = ncol(perSTART) ### 1.3. Trend analyses ______________________________________________ - for (i in 1:nbMonth) { + for (i in 1:nbPerStart) { # QA trend if ('QA' %in% var) { res = get_XAtrend(df_data, df_meta, period=trend_period, - per.start=month['QA' == var, i], + perStart=perSTART['QA' == var, i], alpha=alpha, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, @@ -113,7 +113,7 @@ if ('station_trend_analyse' %in% to_do) { if ('QMNA' %in% var) { res = get_QMNAtrend(df_data, df_meta, period=trend_period, - per.start=month['QMNA' == var, i], + perStart=perSTART['QMNA' == var, i], alpha=alpha, sampleSpan=sampleSpan, dayLac_lim=dayLac_lim, @@ -128,7 +128,7 @@ if ('station_trend_analyse' %in% to_do) { if ('VCN10' %in% var) { res = get_VCN10trend(df_data, df_meta, period=trend_period, - per.start=month['VCN10' == var, i], + perStart=perSTART['VCN10' == var, i], alpha=alpha, sampleSpan=sampleSpan, dayLac_lim=dayLac_lim, @@ -143,7 +143,7 @@ if ('station_trend_analyse' %in% to_do) { if ('tDEB' %in% var) { res = get_tDEBtrend(df_data, df_meta, period=trend_period, - per.start=month['tDEB' == var, i], + perStart=perSTART['tDEB' == var, i], alpha=alpha, sampleSpan=sampleSpan, thresold_type='VCN10', @@ -160,7 +160,7 @@ if ('station_trend_analyse' %in% to_do) { if ('tCEN' %in% var) { res = get_tCENtrend(df_data, df_meta, period=trend_period, - per.start=month['tCEN' == var, i], + perStart=perSTART['tCEN' == var, i], alpha=alpha, sampleSpan=sampleSpan, dayLac_lim=dayLac_lim, @@ -173,7 +173,7 @@ if ('station_trend_analyse' %in% to_do) { ### 1.3. Saving ______________________________________________________ - if (meta_saving) { + if ('meta' %in% saving) { # Sourcing the R file source(file.path('Rcode', 'processing', 'read_write.R'), encoding='UTF-8') @@ -184,7 +184,7 @@ if ('station_trend_analyse' %in% to_do) { } } - if (data_saving) { + if ('data' %in% saving) { # Sourcing the R file source(file.path('Rcode', 'processing', 'read_write.R'), encoding='UTF-8') @@ -195,24 +195,48 @@ if ('station_trend_analyse' %in% to_do) { df_datatmp = get(paste('df_', v, 'data', sep='')) # Gets the modification file of the data for the variable df_modtmp = get(paste('df_', v, 'mod', sep='')) - # Gets the trend results for the variable - res_trendtmp = get(paste('res_', v, 'trend', sep='')) + monthStart = substr(perSTART[v == var, i], 1, 2) + + # Writes modified data + write_data(df_datatmp, df_modtmp, resdir, + filedir=file.path('modified_data', + v, monthStart)) + if (fast_format) { write_dataFST(df_datatmp, resdir, - filedir=file.path('fst', - month[v == var, i], - v)) - } else { - # Writes modified data - write_data(df_datatmp, df_modtmp, resdir, - filedir=file.path('modified_data', - month[v == var, i], - v)) - # Writes trend analysis results + filedir='fst', + filename=paste0('data_', v, + '_', monthStart, + '.fst')) + } + } + } + + if ('analyse' %in% saving) { + # Sourcing the R file + source(file.path('Rcode', 'processing', 'read_write.R'), + encoding='UTF-8') + + # For all the variable + for (v in var) { + # Gets the trend results for the variable + res_trendtmp = get(paste('res_', v, 'trend', sep='')) + + monthStart = substr(perSTART[v == var, i], 1, 2) + + # Writes trend analysis results write_analyse(res_trendtmp, resdir, filedir=file.path('trend_analyses', - v)) + v, monthStart)) + + if (fast_format) { + write_dataFST(res_trendtmp$data, + resdir, + filedir='fst', + filename=paste0(v, 'Ex_', + monthStart, + '.fst')) } } } @@ -282,7 +306,7 @@ if ('climate_trend_analyse' %in% to_do) { # TA trend res = get_XAtrend(df_data_P, df_climate_meta, period=trend_period, - per.start='09-01', + perStart='09-01', alpha=alpha, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, @@ -295,7 +319,7 @@ if ('climate_trend_analyse' %in% to_do) { # PA trend res = get_XAtrend(df_data_T, df_climate_meta, period=trend_period, - per.start='09-01', + perStart='09-01', alpha=alpha, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, @@ -309,7 +333,7 @@ if ('climate_trend_analyse' %in% to_do) { # ETPA trend res = get_XAtrend(df_data_ETP, df_climate_meta, period=trend_period, - per.start='09-01', + perStart='09-01', alpha=alpha, dayLac_lim=dayLac_lim, yearNA_lim=yearNA_lim, diff --git a/main.R b/main.R index 5d03743..a26bbb8 100644 --- a/main.R +++ b/main.R @@ -101,8 +101,8 @@ filename = to_do = c( 'station_extraction', - 'station_trend_analyse' - # 'station_trend_plot' + 'station_trend_analyse', + 'station_trend_plot' ) ## 4. ANALYSIS PARAMETERS ____________________________________________ @@ -112,7 +112,9 @@ to_do = # - periodSub tends to represent the period with the most accessible # flow data periodAll = c("1800-01-01", "2020-12-31") -periodSub = c("1968-01-01", "2020-12-31") +periodSub = + # NULL + c("1968-01-01", "2020-12-31") # Periods of time to average. More precisely : # - periodRef tends to represent the reference period of the climate @@ -137,9 +139,22 @@ modify_advanced_settings = FALSE # Sets working directory setwd(computer_work_path) -# Creates list of period for analyses -trend_period = list(periodAll, periodSub) -mean_period = list(periodRef, periodCur) +# Creates list of period for trend analysis +trend_period = list() +if (!is.null(periodAll)) { + trend_period = append(trend_period, list(periodAll)) +} +if (!is.null(periodSub)) { + trend_period = append(trend_period, list(periodSub)) +} +# Creates list of period for average analysis +mean_period = list() +if (!is.null(periodRef)) { + mean_period = append(mean_period, list(periodRef)) +} +if (!is.null(periodCur)) { + mean_period = append(mean_period, list(periodCur)) +} # Gets the path to the advanced settings advanced_settings_path = file.path('Rcode', 'advanced_settings.R') -- GitLab