diff --git a/processing/format.R b/processing/format.R index 57d6cc35ab22d1ead1f38d4e7b91e8421bbe47d5..696c8580cb61ebe49f195d126ca2f207fc477de7 100644 --- a/processing/format.R +++ b/processing/format.R @@ -1,23 +1,71 @@ library(dplyr) -prepare = function(df_data, df_info) { - - df_data$Gcode = as.integer(factor(df_data$code)) - df_data = tibble(Date=df_data$Date, - Gcode=df_data$Gcode, - Qm3s=df_data$Qm3s - ) +join = function (df_data_BH, df_data_NV, df_info_BH, df_info_NV) { + + if (!is.null(df_data_NV) & !is.null(df_data_BH)) { + + # Get the station in common + common = levels(factor(df_info_NV[df_info_NV$code %in% df_info_BH$code,]$code)) + # Get the Nv station to add + NVadd = levels(factor(df_info_NV[!(df_info_NV$code %in% df_info_BH$code),]$code)) + + # Select only the NV info to add + df_info_NVadd = df_info_NV[df_info_NV$code %in% NVadd,] + # Join NV data to BH data + df_info = full_join(df_info_BH, df_info_NVadd, by=c("code", "nom", "L93X", "L93Y", "surface_km2", "file_path")) - df_info$Gcode = as.integer(factor(df_info$code)) + # Select only the NV data to add + df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,] + # Join NV info to BH info + df_data = full_join(df_data_BH, df_data_NVadd, by=c("Date", "Qm3s", "code")) + } else if (is.null(df_data_NV) & !is.null(df_data_BH)) { + df_info = df_info_BH + df_data = df_data_BH + } else if (!is.null(df_data_NV) & is.null(df_data_BH)) { + df_info = df_info_NV + df_data = df_data_NV + } else { + stop('No data') + } + return (list(data=df_data, info=df_info)) } -clean = function (df_xTrend, df_info) { +prepare = function(df_data, colnamegroup) { + + colnamegroup = c(colnamegroup) + + colindgroup = which(colnames(df_data) == colnamegroup) + + df_data = group_by_at(df_data, colindgroup) + + data = tibble(Date=df_data$Date, + group=group_indices(df_data), + Qm3s=df_data$Qm3s) + + Gkey = group_keys(df_data) + info = bind_cols(group=seq(1:nrow(Gkey)), + Gkey) + + # df_data = tibble(Date=df_data$Date, + # group=as.integer(factor(df_data$code)), + # Qm3s=df_data$Qm3s) + + # df_info$group = as.integer(factor(df_info$code)) + + return (list(data=data, info=info)) +} + + +clean = function (df_xTrend, df_list) { + + df_xTrend = bind_cols(df_xTrend, + df_list$info[df_xTrend$group1, + 2:ncol(df_list$info)]) - print(df_info$Gcode == df_xTrend$group1) - df_xTrend$code = df_info$code[df_info$Gcode == df_xTrend$group1] + colnames(df_xTrend)[1] = 'group' return (df_xTrend) } diff --git a/script.R b/script.R index 6c5468e58c2c346838f859c4c852a73512030f9d..6da417bd7ad163024c92764a1bc31d315046ca68 100644 --- a/script.R +++ b/script.R @@ -16,15 +16,14 @@ computer_work_path = ### BANQUE HYDRO ### # Path to the directory where BH data is stored BHfiledir = - # "test" - "" - # "BanqueHydro_Export2021" + # "" + "BanqueHydro_Export2021" ## Manual selection ## # Name of the file that will be analysed from the BH directory BHfilename = - "" - # c("H5920011_HYDRO_QJM.txt", "K4470010_HYDRO_QJM.txt") + # "" + c("H5920011_HYDRO_QJM.txt")#, "K4470010_HYDRO_QJM.txt") # "all" ## Or list selection ## @@ -40,13 +39,13 @@ BHlistname = ### NIVALE ### # Path to the directory where NV data is stored NVfiledir = - # "" - "France207" + "" + # "France207" # Name of the file that will be analysed from the NV directory NVfilename = - # "" - "all" + "" + # "all" # Path to the list file of information about station that will be analysed @@ -54,14 +53,14 @@ NVlistdir = "" NVlistname = - # "" - "liste_bv_principaux_global.txt" + "" + # "liste_bv_principaux_global.txt" ### TREND ANALYSIS ### # Time period to analyse -period = c("1960-01-01","2020-01-01") - +period = c("1960-01-01","2019-12-31") +# period = c("1960-01-01","2020-01-01") @@ -132,33 +131,11 @@ df_info_NV = extractNVlist_info(computer_data_path, NVfiledir, NVlistdir, NVlist # Extract data about selected stations df_data_NV = extractNV_data(computer_data_path, NVfiledir, NVfilename) + # JOIN # -if (!is.null(df_data_NV) & !is.null(df_data_BH)) { - - # Get the station in common - common = levels(factor(df_info_NV[df_info_NV$code %in% df_info_BH$code,]$code)) - # Get the Nv station to add - NVadd = levels(factor(df_info_NV[!(df_info_NV$code %in% df_info_BH$code),]$code)) - - # Select only the NV info to add - df_info_NVadd = df_info_NV[df_info_NV$code %in% NVadd,] - # Join NV data to BH data - df_info = full_join(df_info_BH, df_info_NVadd, by=c("code", "nom", "L93X", "L93Y", "surface_km2", "file_path")) - - # Select only the NV data to add - df_data_NVadd = df_data_NV[df_data_NV$code %in% NVadd,] - # Join NV info to BH info - df_data = full_join(df_data_BH, df_data_NVadd, by=c("Date", "Qm3s", "code")) - -} else if (is.null(df_data_NV) & !is.null(df_data_BH)) { - df_info = df_info_BH - df_data = df_data_BH -} else if (!is.null(df_data_NV) & is.null(df_data_BH)) { - df_info = df_info_NV - df_data = df_data_NV -} else { - stop('No data') -} +df_join = join(df_data_BH, df_data_NV, df_info_BH, df_info_NV) +df_data = df_join$data +df_info = df_join$info # TIME PANEL # @@ -168,18 +145,51 @@ if (!is.null(df_data_NV) & !is.null(df_data_BH)) { ### /!\ Removed 185 row(s) containing missing values (geom_path) -> remove NA ### +# ANALYSE # # Compute gap parameters for stations # df_lac = get_lacune(df_data, df_info) -df_list = prepare(df_data, df_info) - -df_meanEx = extract.Var(data.station=df_list, - funct=mean, - period=period, - pos.datetime=1, - na.rm=TRUE) -df_meanTrend = Estimate.stats(data.extract=df_meanEx) ### /!\ verify order conservation ### -df_meanTrend = clean(df_meanTrend, df_info) +# AVERAGE ANNUAL FLOW : QA # +df_QAlist = prepare(df_data, colnamegroup=c('code')) + +df_QAEx = extract.Var(data.station=df_QAlist, + funct=mean, + period=period, + pos.datetime=1, + na.rm=TRUE) +df_QATrend = Estimate.stats(data.extract=df_QAEx) +df_QATrend = clean(df_QATrend, df_QAlist) + + +# MONTHLY MINIMUM FLOW IN THE YEAR : QMNA # + +fMNA = function (X) { + # prendre un paquet de 1 ans et faire la moyenne par mois et retourner le minimum des debit + dpm = length(X)/12 + # print(dpm) + # print(length(X)) + monthmean = c() + for (i in 1:12) { + id = round(dpm*(i-1)+1, 0) + iu = round(i*dpm, 0) + monthmean = append(monthmean, mean(X[id:iu], na.rm=TRUE)) + # print(paste('start', id)) + # print(paste('end', iu)) + # print('') + } + # print(monthmean) + return (min(monthmean, na.rm=TRUE)) +} + +df_QMNAlist = prepare(df_data, colnamegroup=c('code')) + +df_QMNAEx = extract.Var(data.station=df_QMNAlist, + funct=fMNA, + period=period, + pos.datetime=1)#, + # na.rm=TRUE) ### /!\ PAS COMPRIS ### +df_QMNATrend = Estimate.stats(data.extract=df_QMNAEx) +df_QMNATrend = clean(df_QMNATrend, df_QMNAlist)