diff --git a/DESCRIPTION b/DESCRIPTION index 78274e5279e73081d90c7b8ef55d20352b56b6ef..9a71a5fe8c24437b7b86091779ab42d0b3c5621a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: floodam.data Title: Collect and Process Data on Flood Damage (French Context) -Version: 0.9.40.0 +Version: 0.9.41.0 Authors@R: c( person(given = "Frédéric", family = "Grelot", @@ -52,7 +52,6 @@ Depends: R (>= 3.4.0) Imports: data.table, - fst, grDevices, httr, kableExtra, diff --git a/NAMESPACE b/NAMESPACE index 0d0969c2da5d18b128e4386ac5b8c676c2c8f9d0..28fd9c733967d5325d1bfebdba903f59ead39fa9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,10 @@ # Generated by roxygen2: do not edit by hand export(adapt.eaip) -export(adapt.geo_sirene) export(adapt_admin_express) export(adapt_ban) export(adapt_gaspar) +export(adapt_geo_sirene) export(adapt_rpg) export(add_journal) export(add_log_info) @@ -32,6 +32,7 @@ export(download_archive) export(download_ban) export(download_bd_topo) export(download_gaspar) +export(download_geo_sirene) export(extract.stat.log) export(extract_building) export(extract_building_agricultural) @@ -42,6 +43,9 @@ export(format_archive) export(format_scope) export(generate_report) export(get_archive) +export(get_base_url) +export(get_date_from_html) +export(get_link_from_html) export(gpkg_from_7z) export(in_layer) export(plot_construction_date) @@ -60,7 +64,9 @@ export(save_archive) export(select_scope) export(send_message) export(shp_from_7z) +export(subset_with_message) export(summarise_dwelling) +export(to_logical) export(update_ban) export(update_state) import(kableExtra) diff --git a/NEWS.md b/NEWS.md index c6e42e822c5c8ece01863fdf58ca338b24efc14c..5a50ef908dbafa5b1b363caa2e1b4a25a8a5865a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,98 @@ +# floodam.data 0.9.41.0 + +## Enhancements + +* `adapt_geo_sirene()` (rename) + * new name for `adapat.geo_sirene` + * process changed to fit with `add_log_info()` and use of what classical + steps should be for all data (read with a scheme, select observations, split + by departments if necessary, add eaip information, save data and plot a map) + * documentation updated +* `add_eaip()` (internal) + * messages added to better fllow analysis +* `add_log_info()` + * better generation of sub-messages for analysis +* `analyse_archive()` + * new `origin` for "geo-sirene" + * documentation & tests updated +* `apply_scheme()` + * `Date` is used to detect when to apply `as.Date()` (consistency) + * `logical` type is detected and transformed with `to_logical()` +* `download_archive()` + * adaptation to `get_archive()` (removal of parameter `html`, inclusion of + field `local_origin` in `to_do` parameter) + * field `short` in `to_do` may be missing + * documentation updated +* `download_ban()` + * adaptation to `get_link_from_html()` + * introduction of parameter `repository`, different from `origin` (only url) + * simplification of process + * documentation updated (markdown format) +* `download_geo_sirene()` (rename) + * new name for `download.geo_sirene` + * introduction of parameter `repository`, different from `origin` (only url) + * introduction of parameter `scope`, used when `repository` is "data.gouv" + * simplification of process + * documentation updated (markdown format) +* `format_journal()` (internal) + * enhanced format dates coming from `base::Sys.Date()` +* `get_archive()` + * adaptation to `get_link_from_html()` + * simplification and introduction of `local_origin` to deal with relative + links + * documentation updated +* `get_base_url()` (new) + * return the parent url from a url + * documentation & tests added +* `get_date_from_html()` (new) + * find admissible dates in a html page + * documentation & tests added +* `get_link_from_html()` (new) + * find links in a html page + * documentation & tests added +* `read_csv_with_scheme()` + * `Date` is used to detect when to apply `as.Date()` (consistency) + * `logical` type is detected and transformed with `to_logical()` + * preprocessing to remove observations with `NA` in lon\lat before + transformation with `sf::st_as_sf()` + * messages generation added +* `scheme_bd_topo_3` (data) + * complete missing `type_source` +* `scheme_gaspar_azi` (data) + * correct value for `type` and `type_source` +* `scheme_gaspar_catnat` (data) + * correct value for `type` and `type_source` +* `scheme_gaspar_dicrim` (data) + * correct value for `type` and `type_source` +* `scheme_gaspar_pcs` (data) + * correct value for `type` and `type_source` +* `scheme_gaspar_pprn` (data) + * correct value for `type` and `type_source` +* `scheme_gaspar_risq` (data) + * correct value for `type_source` +* `scheme_rpg_1` (data) + * correct value for `type_source` +* `scheme_rpg_1` (data) + * correct value for `type_source` +* `scheme_sirene_2019` (data) + * style of field names "." → "_" + * "longitude" and "latitude" renamed to "lon" and "lat" in `name` field for + consistency and usage in `read_csv_with_scheme()` + * documentation updated +* `scheme_sirene_na` (data) + * style of field names "." → "_" + * documentation updated +* `subset_with_message()` (new) + * subset without side-effects and with messages useful for `add_log_info()` + * documentation added +* `split_dep()` (new, internal) + * split a `sf` `data.frame `either geomatically if a department `sf` object + is given, or through the information given in `commune` if not + * documentation added +* `to_logical()` + * default treatment for easy English and French usage + * documentation updated & tests added + # floodam.data 0.9.40.0 ## Enhancements @@ -103,14 +198,15 @@ ## Enhancements * `adapt_admin_express()` - * replace adapt.admin_express - * use add_log_info for journal from new journal system - * use read_with_scheme to read and format data - * use save_archive to save result - * option 'export' renamed in 'extension' for cohérence with save_archive + * replace `adapt.admin_express` + * use `add_log_info()` for journal from new journal system + * use `read_with_scheme()` to read and format data + * use `save_archive()` to save result + * option 'export' renamed in 'extension' for consistency with + `save_archive()` * documentation updated * `analyse_archive()` - * function within its own file and exporteds + * function within its own file and exported * treat 2 formats ('ign' & 'floodam.data') * addition of warnings * function tested in test_analyse_archive @@ -141,12 +237,12 @@ * utilisation de add_journal_new au lieu de add_journal * `add_journal()` * reprise de add_journal_new - * ancienne add_journal renommée temporairement en add_journal_old le + * ancien add_journal renommée temporairement en add_journal_old le temps de la migration * meilleure présentation des sous-tâches en introduisant le paramètre degree * `write_journal()` (new) - * permet d'écrire un journal produit par add_journal_new soit au format + * permet d'écrire un journal produit par `add_journal_new()` soit au format "csv", soit au format "log" en fonction de l'extension du fichier * si aucun fichier n'est donné, le message au format "log" est affiché sur la sortie standard @@ -173,12 +269,12 @@ * pour zip, attention fait appel à une commande 'system' en modifiant le nom du fichier pour faire une commande qui sera détectée par `data.table::fread()`, et présume que le format sera csv et pourra être lu - avec read_csv_with_scheme + avec `read_csv_with_scheme()` * pour zip, modifie temporairemen l'option 'datatable.fread.input.cmd.message' pour éviter le message renvoyé par `data.table::fread()` * `read_csv_with_scheme()` (new) - * correspond à ancienne version de read_with_scheme qui était csv + * correspond à ancienne version de `read_with_scheme()` qui était "csv" spécifique * `read_gpkg_with_scheme()` (new) * gestion de gpkg avec scheme et projection @@ -191,7 +287,7 @@ * Séparation claire de la décompression et de la lecture * Sécurité si plusieurs gpkg * `download_admin_express()` - * transformation de download.admin_express + * transformation de `download.admin_express` * meilleure gestion des options (et match.arg) * `scheme_admin_express_3_1 `(data) * nouveau format à partir des infos IGN (version 3-1) @@ -208,7 +304,7 @@ * documentation * corrections de coquilles (à la volée) pour les datasets * regroupement des documentions pour fonction de la famille - read_with_scheme + `read_with_scheme()` * data.cquest.org * toutes les mentions à l'adresse passent en https au lieu de http * Roxygen diff --git a/R/adapt.geo_sirene.R b/R/adapt.geo_sirene.R deleted file mode 100644 index acd0073b1be295a72ae055b6e410dee97fcab31f..0000000000000000000000000000000000000000 --- a/R/adapt.geo_sirene.R +++ /dev/null @@ -1,324 +0,0 @@ -#' @title Adapt sirene data -#' -#' @description -#' The following treatment are performed: -#' - Only some fileds are selected, field names (colnames) are consistently set. -#' - NA values are consistently set. -#' - Type of fields are consistently set, some checks are performed on transformations. -#' - Data set is cut and saved by departments -#' - Only observations with spatial coordinates are kept. -#' - Exposition of observations (belonging or not to eaip) is added. -#' -#' @param origin character, path to the directory where archive are stored -#' @param destination character, path to the directory where results should be saved -#' @param archive character, vector of archive to be adpated -#' @param export character, option for export format of the results -#' @param scheme data.frame, how archive variables should be treated -#' @param na_value data.frame, list of NA values for each variables -#' @param eaip_path character, path where eaip archives should be find -#' @param projection optional numeric, 4 digits projection to use -#' @param order_by optional character, name of the variable to sort the output data. -#' Default sorting is by state and eaip (if added). -#' @param retrieve logical, should the result be returned -#' @param verbose logical, should the function give some sumup informations. -#' -#' @return if retrieve = TRUE, data.frame of adapated data stored in archive. -#' -#' @export -#' -#' @encoding UTF-8 -#' @author Victor Champonnois et Frédéric Grelot -#' -#' @examples -#' -#' \dontrun{ -#' origin = "" -#' destination = tempdir() -#' archive = "geo_siret_34.csv.gz" -#' result = adapt.geo_sirene(origin, destination, verbose = TRUE, retrieve = TRUE) -#' } - -adapt.geo_sirene = function(origin, - destination, - archive, - export = "fst", - scheme = floodam.data::scheme_sirene_2019, - na_value = floodam.data::scheme_sirene_na, - eaip_path = NULL, - projection = 4326, - order_by = c("state", "eaip"), - retrieve = FALSE, - verbose = FALSE) { - - ### Recursive call if archive is missing - if (missing(archive)) {archive = list.files(origin, pattern = ".csv.gz$")} - if (length(archive) > 1) - return(lapply(archive, - adapt.geo_sirene, - origin = origin, - destination = destination, - export = export, - scheme = scheme, - na_value = na_value, - eaip_path = eaip_path, - projection = projection, - order_by = order_by, - retrieve = retrieve, - verbose = verbose)) - if (length(archive) == 0) - return(NULL) - - ### Analyse archive - path = file.path(origin, archive) - analyse = regmatches(archive, regexec("(.*?)[.](csv[.]gz|gz)$", archive, perl = TRUE))[[1]] - if (length(analyse) == 0) return(NULL) - name = analyse[2] - extension = analyse[3] - location = utils::tail(strsplit(origin, "/")[[1]], 1) - - ### Start journal information - dir.create(destination, showWarnings = FALSE, recursive = TRUE) # Check if error may be generated. - start_time = Sys.time() - journal = add_journal_old( - "WARNING: This file is automatically produced by floodam.data. Do not edit by hand.", "", - "Contents", "\t1. General log informations", "\t2. Stats on removed data", "\t3. Stats on exposition", "", - first = TRUE) - journal = add_journal_old( - sprintf("%s from '%s' adpated by floodam.data (%s)", archive, location, utils::packageVersion("floodam.data")), - journal = journal, verbose = verbose, first = TRUE) - journal = add_journal_old(sprintf("Starting time: %s", Sys.time()), journal = journal, verbose = verbose) - on.exit(journal <- add_journal_old("", - sprintf("Total time elapsed for %s from '%s': %s", name, location, fmt(Sys.time() - start_time)), - journal = journal, verbose = verbose, first = TRUE)) - on.exit(write(journal, file.path(destination, sprintf("%s.%s", name, "log"))), add = TRUE) - - ### Read file and do selection, change colnames, start easy formatting - scheme = scheme[order(scheme[["order"]]), ] - colClasses = ifelse(scheme[["keep"]], "character", "NULL") - colClasses[scheme[["type"]] == "integer" & scheme[["keep"]]] = "integer" - colClasses[scheme[["type"]] == "numeric" & scheme[["keep"]]] = "numeric" - scheme = scheme[scheme[["keep"]], ] - rownames(scheme) = scheme[["name"]] - - size = c(inital = file.info(path)[["size"]]) - result = data.table::fread(path, colClasses = colClasses, na.strings = "", - data.table = FALSE, col.names = scheme[["name"]]) - - ### Mores adaptations - # NA treament - for (i in 1:nrow(na_value)) { - var = na_value[i, "name"] - result[[var]] = treat_na(result[[var]], na_value[i, "na.value"]) - } - - # format factors (after NA treatment) - selection = rownames(scheme)[scheme[["type"]] == "factor"] - result[selection] = lapply(result[selection], as.factor) - - # format Date (after NA treatment) - selection = rownames(scheme)[scheme[["type"]] == "Date"] - result[selection] = lapply(result[selection], as.Date) - for (var in selection) { - if (all(is.na(result[[var]]))) { - journal = add_journal_old(sprintf("Possible format error for '%s'. All values are NA.", var), - journal = journal, verbose = verbose) - } - } - - # format logical - result[["employer"]] = to_logical(result[["employer"]], "O", "N") - result[["hq"]] = to_logical(result[["hq"]], "true", "false") - - selection = rownames(scheme)[scheme[["type"]] == "logical"] - for (var in selection) { - if (all(is.na(result[[var]]))) { - journal = add_journal_old(sprintf("Possible format error for '%s'. All values are NA.", var), - journal = journal, verbose = verbose) - } - } - - # Check type - test = sapply(result, class) == scheme[["type"]] - if (all(test)) { - journal = add_journal_old("All variables have the good type.", journal = journal, verbose = verbose) - } else { - journal = add_journal_old( - sprintf("Some variables do not have the good type. Check %s.", - paste("'", names(test)[!test], "'", sep = "", collapse = ", ")), - journal = journal, verbose = verbose) - } - - int_time = Sys.time() - journal = add_journal_old( - sprintf("Time elapsed: %s", fmt(int_time - start_time)), - journal = journal, verbose = verbose) - - ### GIS adaptations by departments - - result = split(result, substring(result[["commune"]], 1, 2)) - - # Remove some departements ? - departement_remove = grep("98", names(result)) - if (length(departement_remove) > 0) result = result[-departement_remove] - - stat_remove = list() - stat_eaip = list() - - for (d in names(result)) { - journal = add_journal_old("", - sprintf("Treating department %s in %s from '%s'.", d, name, location), - journal = journal, verbose = verbose, first = TRUE) - - # Treat observations with missing spatial coordinates - result[[d]][["coord"]] = stats::complete.cases(result[[d]][, c("longitude", "latitude")]) - - stat = table(result[[d]][c("state", "coord")]) - colnames(stat) = c("missing", "present") - - journal = add_journal_old( - sprintf("Observations removed due to missing spatial coordinates: %s over %s, %s remaining (%1.1f%%)", - sum(stat[, "missing"]), sum(stat), sum(stat[, "present"]), - sum(stat[, "present"]) / sum(stat) * 100), - journal = journal, verbose = verbose) - stat_remove[[d]] = as.data.frame(stat) - - # Removing observations with missing spatial coordinates - result[[d]] = result[[d]][result[[d]][["coord"]],] - result[[d]][["coord"]] = NULL - - # Transform in sf format - result[[d]] = sf::st_as_sf(result[[d]], coords =c("longitude", "latitude"), crs = projection) - - # Adding information from eaip - if (!is.null(eaip_path)) { - if (is.character(eaip_path)) { - eaip = try( - suppressWarnings(readRDS(file.path(eaip_path, sprintf("eaip_%s.rds", format_scope(d))))), - silent = TRUE - ) - } - if ("try-error" %in% class(eaip)) { - journal = add_journal_old(sprintf("'eaip' analysis not included: '%s' is not a path to an rds archive.", path), - journal = journal, - verbose = verbose) - } else if (!"sf" %in% class(eaip)) { - journal = add_journal_old( - sprintf("'eaip' analysis not included: '%s' is not a sf object.", path), - journal = journal, - verbose = verbose) - } else { - eaip = sf::st_transform(eaip, crs = projection) - result[[d]][["eaip"]] = in_layer(result[[d]], eaip) - - stat = table(unclass(result[[d]])[c("state", "eaip")]) - colnames(stat) = c("out", "in") - - journal = add_journal_old( - sprintf("'eaip' analysis included: %s in eaip over %s (%1.1f%%)", - sum(stat[, "in"]), sum(stat), sum(stat[, "in"]) / sum(stat) * 100), - journal = journal, verbose = verbose) - stat_eaip[[d]] = as.data.frame(stat) - } - } - - ### Ordering - selection = order_by[order_by %in% names(result[[d]])] - if (length(selection) == 0) { - journal = add_journal_old( - "Result unsorted. 'order_by' does not contain any admissible name for geo_sirene data.", - journal = journal, verbose = verbose) - } else { - result[[d]] = result[[d]][do.call(order, unclass(result[[d]])[selection]), ] - journal = add_journal_old( - sprintf("Result sorted by %s", paste("'", selection, "'", sep = "", collapse = " + ")), - journal = journal, verbose = verbose) - } - - ### Save result - if ("rds" %in% export) { - if (length(result) == 1) { - path = file.path(destination, sprintf("%s.%s", name, "rds")) - } else { - path = file.path(destination, sprintf("%s-%s.%s", name, d, "rds")) - } - saveRDS(result[[d]], path) - journal = add_journal_old( - sprintf("Result saved as %s. Size: %s (%0.3f of inital)", - path, file.info(path)[["size"]], file.info(path)[["size"]] / size), - journal = journal, verbose = verbose) - } - if ("fst" %in% export) { - sf::st_geometry(result[[d]]) = NULL - if (length(result) == 1) { - path = file.path(destination, sprintf("%s.%s", name, "fst")) - } else { - path = file.path(destination, sprintf("%s-%s.%s", name, d, "fst")) - } - fst::write.fst(result[[d]], path, compress = 100) - journal = add_journal_old( - sprintf("Result saved as %s. Size: %s (%0.3f of inital)", - path, file.info(path)[["size"]], file.info(path)[["size"]] / size), - journal = journal, verbose = verbose) - } - if (all(! export %in% c("rds", "fst"))) { - journal = add_journal_old( - sprintf("Result not saved: '%s' not admissible as export", export), - journal = journal, verbose = verbose) - } - - - journal = add_journal_old( - sprintf("Time elapsed: %s", fmt(Sys.time() - int_time)), - journal = journal, verbose = verbose) - int_time = Sys.time() - } - - ### Add useful journal information - journal_end = length(journal) - journal[4] = paste(sep = "", - journal[4], sprintf(": 8-%s", journal_end + 2) - ) - - stat_remove = do.call(rbind, stat_remove) - if (!is.null(stat_remove)) { - stat_remove[["departement"]] = substring(rownames(stat_remove), 1, 2) - stat_remove = stat_remove[c("departement", "state", "coord", "Freq")] - stat_remove[["Freq"]] = as.integer(stat_remove[["Freq"]]) - row.names(stat_remove) = NULL - journal[5] = paste(sep = "", - journal[5], - sprintf(": %s-%s", journal_end + 4, journal_end + 4 + nrow(stat_remove)) - ) - journal_end = journal_end + nrow(stat_remove) - on.exit(suppressWarnings( - write(c("", utils::capture.output(stat_remove)), file.path(destination, sprintf("%s.%s", name, "log")), - append = TRUE)), - add = TRUE) - } else { - journal[5] = paste(sep = "", journal[5], ": none") - } - - stat_eaip = do.call(rbind, stat_eaip) - if (!is.null(stat_eaip)) { - stat_eaip[["departement"]] = substring(rownames(stat_eaip), 1, 2) - stat_eaip = stat_eaip[c("departement", "state", "eaip", "Freq")] - stat_eaip[["Freq"]] = as.integer(stat_eaip[["Freq"]]) - row.names(stat_eaip) = NULL - journal[6] = paste(sep = "", - journal[6], - sprintf(": %s-%s", journal_end + 6, journal_end + 6 + nrow(stat_eaip)) - ) - on.exit(suppressWarnings( - write(c("", utils::capture.output(stat_eaip)), file.path(destination, sprintf("%s.%s", name, "log")), - append = TRUE)), - add = TRUE) - } else { - journal[6] = paste(sep = "", journal[6], ": none") - } - - ### Retrieve result - if (retrieve) { - if (length(result) == 1) return(invisible(result[[1]])) - return(invisible(result)) - } -} diff --git a/R/adapt_admin_express.R b/R/adapt_admin_express.R index d41b8afa7a75799cd1e6164d9e8622fa5b774bcb..53e07f86f69332678173a0464e6508be34cb9c59 100644 --- a/R/adapt_admin_express.R +++ b/R/adapt_admin_express.R @@ -3,10 +3,10 @@ #' @description #' `adapt_admin_express()` treats Admin Express archives to be in a standard #' format. It is suited for IGN archives. It used by default -#' \code{scheme_admin_express_3_1} as scheme. For older versions -#' \code{scheme_admin_express_2_0} is also available. +#' `scheme_admin_express_3_1`` as scheme. For older versions +#' `scheme_admin_express_2_0` is also available. #' -#' Beware that code{adapt_admin_express} will perform all possibles combinations +#' Beware that `adapt_admin_express()` will perform all possibles combinations #' of archives present in origin and layers available in scheme if those #' parameters are not specified. #' @@ -15,7 +15,7 @@ #' #' - depending on how many archives and layers are to be treated, a recursive #' call may be perform to do all desired combinations. -#' - archive is read with [read_with_scheme()], which may imply that it is +#' - archive is read with `read_with_scheme()`, which may imply that it is #' uncompressed, then read (usually as a shp or a gpkg file), reprojected if #' asked, and finally adapted. #' - the output is saved as rds in given destination withe[save_archive()]. The diff --git a/R/adapt_geo_sirene.R b/R/adapt_geo_sirene.R new file mode 100644 index 0000000000000000000000000000000000000000..4cbb8f849d967ae37bdbfa3c27e12940d88d9f01 --- /dev/null +++ b/R/adapt_geo_sirene.R @@ -0,0 +1,318 @@ +#' @title Adapt geolocalized Sirene data +#' +#' @description +#' `adapt_geo_sirene()` adapt geolocalized Sirene database to a common format. +#' +#' @details +#' The following treatments are performed: +#' - Only some fileds are selected, field names (colnames) are consistently set. +#' - NA values are consistently set. +#' - Type of fields are consistently set, some checks are performed on transformations. +#' - Data set is cut and saved by departments +#' - Only observations with spatial coordinates are kept. +#' - Exposition of observations (belonging or not to eaip) is added. +#' +#' @param origin character, path to the directory where archive are stored. +#' @param destination character, path to the directory where results should be +#' saved. +#' @param archive character, vector of archive to be adpated. +#' @param scheme data.frame, how archive variables should be treated. +#' @param projection numeric, 4 digits projection to use. +#' @param active logical, should only active activities be kept. +#' @param path_eaip character, path where eaip archives should be find. +#' @param path_admin character, path where administrative archives should be +#' find. +#' @param journal logical, should a journal file be saved. +#' @param map logical, should a map be plotted (for check purpose). +#' @param verbose logical, should the function give some sumup informations. +#' @param retrieve logical, should the result be returned. +#' +#' @return if retrieve = TRUE, data.frame of adapated data stored in archive. +#' +#' @export +#' +#' @encoding UTF-8 +#' +#' @examples +#' +#' \dontrun{ +#' dest = tempdir() +#' download_geo_sirene(dest, "data.gouv", scope = "department", version = "34") +#' archive = "geo_siret_34.csv.gz" +#' origin = file.path(dest, "version_2019", "2024-03") +#' dest = file.path(dest, "adapted") +#' result = adapt_geo_sirene(origin, dest, verbose = TRUE, retrieve = TRUE) +#' } + +adapt_geo_sirene = function( + origin, + destination, + archive, + scheme = floodam.data::scheme_sirene_2019, + projection = 4326, + active = TRUE, + path_eaip = NULL, + path_admin = NULL, + journal = TRUE, + map = !is.null(path_eaip), + verbose = TRUE, + retrieve = FALSE +) { + ### Recursive call if archive is missing + if (missing(archive)) {archive = list.files(origin, pattern = ".csv.gz$")} + if (length(archive) > 1) { + return( + lapply( + archive, + adapt_geo_sirene, + origin = origin, + destination = destination, + scheme = scheme, + projection = projection, + active = active, + path_eaip = path_eaip, + path_admin = path_admin, + journal = journal, + map = map, + verbose = verbose, + retrieve = retrieve + ) + ) + } + if (length(archive) == 0) { + return(NULL) + } + + archive = file.path(origin, archive) + message(sprintf("'geo_sirene' adaptation from '%s'...", basename(archive))) + old_s2 = suppressMessages(sf::sf_use_s2(FALSE)) + on.exit(suppressMessages(sf::sf_use_s2(old_s2))) + + # Load admin_dep if found + if (!is.null(path_admin)) { + admin_dep = file_version( + path_admin, + "admin-express-cog_department.*rds" + ) + if (length(admin_dep) == 1) { + admin_dep = readRDS(admin_dep) + } else { + admin_dep = NULL + } + } else { + admin_dep = NULL + } + + # Analyse archive and make elementary tests + info = suppressWarnings(analyse_archive(archive, "geo-sirene")) + if (!info["data"] %in% "geo-siret") { + cat("\t- Not 'geo-siret' data. Nothing is done.") + return(NULL) + } + if (info["version"] != "2019") { + cat( + sprintf( + "\t- version '%s' is not implemented. Nothing is done.", + info["version"] + ) + ) + return(NULL) + } + + # Prepare outputs and open journal + geo_siret = format_archive(info, extension = "rds") + dir.create(destination, showWarnings = FALSE, recursive = TRUE) + if (isTRUE(journal)) { + journal = file.path(destination, gsub(".rds", ".log", geo_siret)) + init_log_info( + journal, + main = sprintf("%s in %s", basename(archive), origin), + treatment = "adapt_geo_sirene", + ) + } else { + journal = FALSE + } + + # Reading archive with scheme information + result = add_log_info( + archive, + read_with_scheme, + info = list( + "short" = "read 'geo-siret'", + "long" = "Reading original 'geo-siret' archive and applying scheme" + ), + journal = journal, + verbose = verbose, + verification = expression(info_dim(result)), + scheme = scheme, + projection = projection + ) + + # NA treament → TO BE KEPT ??? if yes need adaptation. + # for (i in 1:nrow(na_value)) { + # var = na_value[i, "name"] + # result[[var]] = treat_na(result[[var]], na_value[i, "na.value"]) + # } + + # Select only active activities if asked. + if (isTRUE(active)) { + result = add_log_info( + result, + fun = subset_with_message, + info = list( + "short" = "select active activities", + "long" = "Selecting active activities" + ), + journal = journal, + verbose = verbose, + verification = expression(info_dim(result)), + rows = result[["state"]] == "A" + ) + } + + # Split result by department. + result = add_log_info( + result, + fun = split_dep, + info = list( + "short" = "split 'geo-siret' by department", + "long" = "Splitting result by department" + ), + journal = journal, + verbose = verbose, + verification = expression(length(result)), + admin_dep = admin_dep + ) + + # Select only scope if any, otherwise select only valid department + if (info["scope"] %in% format_scope(floodam.data::department[["code"]])) { + result = result[info["scope"]] + } else { + selection = names(result) %in% + format_scope(floodam.data::department[["code"]]) + result = result[selection] + } + + # At this stage result must be split by valid department + for (dep in names(result)) { + info["scope"] = dep + geo_siret = format_archive(info, extension = "rds") + + # Adding 'eaip' information + if (!is.null(path_eaip)) { + result[[dep]] = add_log_info( + result[[dep]], + add_eaip, + info = list( + "short" = sprintf("add eaip [%s]", info["scope"]), + "long" = sprintf( + "Addition of 'eaip' information [%s]", + info["scope"] + ) + ), + journal = journal, + verbose = verbose, + verification = expression(sum(result[['eaip']])), + path_eaip = path_eaip, + department = info["scope"] + ) + } + + # Saving data + add_log_info( + result[[dep]], + saveRDS, + info = list( + "short" = sprintf( + "save [%s] to %s", + info["scope"], + file.path(destination, geo_siret) + ), + "long" = sprintf("Saving geo-siret [%s]", info["scope"]) + ), + journal = journal, + verbose = verbose, + verification = expression(object.size(x)), + file = file.path(destination, geo_siret) + ) + + # Plot map + if (map == TRUE) { + map_log = gsub(".rds", ".png", geo_siret) + add_log_info( + result[[dep]], + map_log_geo_siret, + info = list( + "short" = sprintf( + "plot check map [%s] to %s", + info["scope"], + file.path(destination, map_log) + ), + "long" = sprintf( + "Plotting map (check purpose) [%s]", + info["scope"] + ) + ), + journal = journal, + verbose = verbose, + dep = info["scope"], + admin_dep = admin_dep, + path_map = file.path(destination, map_log) + ) + } + } + + ### Retrieve result + if (retrieve == TRUE) { + if (length(result) == 1) return(invisible(result[[1]])) + return(invisible(result)) + } +} + +map_log_geo_siret = function(x, path_map, admin_dep = NULL, dep) { + grDevices::png(path_map, width = 1000, height = 1000) + on.exit(grDevices::dev.off()) + + if (is.null(x[["eaip"]])) { + col = "gray" + } else { + col = ifelse(x[["eaip"]], "red", "black") + col[is.na(col)] = "gray" + } + + plot( + x[0], + type = "p", + pch = 15, + cex = .05, + col = col, + border = col, + cex.axis = 2, + axes = TRUE, + xaxt = "n", + yaxt = "n", + reset = FALSE + ) + + if (!is.null(admin_dep)) { + col = ifelse( + admin_dep[["department"]] == format_scope(dep, "insee"), + scales::alpha("yellow", .05), scales::alpha("blue", .05) + ) + plot(admin_dep[0], col = col, border = "black", add = TRUE) + } + graphics::axis(2, at = pretty(sf::st_bbox(x)[c(2, 4)], 5), cex.axis = 2) + graphics::axis(4, at = pretty(sf::st_bbox(x)[c(2, 4)], 5), cex.axis = 2) + graphics::axis(1, at = pretty(sf::st_bbox(x)[c(1, 3)], 5), cex.axis = 2) + graphics::axis(3, at = pretty(sf::st_bbox(x)[c(1, 3)], 5), cex.axis = 2) + graphics::grid() + + graphics::legend( + x = "bottomright", + inset = 1/100, + legend = c("within eaip", "outside eaip"), + fill = c("red", "black"), + cex = 1 + ) + return(invisible(NULL)) +} \ No newline at end of file diff --git a/R/add_journal.R b/R/add_journal.R index 9c08c9606824de7897c28f2e50dc6a4271f36294..8f52149c9096ab9e5a50d92c308ca5c97872242d 100644 --- a/R/add_journal.R +++ b/R/add_journal.R @@ -17,10 +17,10 @@ #' #' If the parameter 'status' is not missing, `add_journal()` try to find #' to which 'task' this is relevant. This can be done by giving explictly the -#' 'id', or by ginving the 'task'. If nothing is given, it is supposed that it +#' 'id', or by giving the 'task'. If nothing is given, it is supposed that it #' corresponds with the the last task with missing 'status'. For found 'task', #' it sets 'status' to given 'status', 'end' to current time. 'elapsed' to the -#' formatted difftime between 'end' and 'start' with \code{floodam.data::fmt}. +#' formatted difftime between 'end' and 'start' with `floodam.data::fmt()`. #' #' If verbose == TRUE, a message is sent. If 'task' is given, a new line is #' sent, but not ended by 'EOL'. When 'status' is given, message depend on what @@ -28,7 +28,7 @@ #' current task, it is completed with status and elapsed time. If not, a new #' line is sent with the whole information (start, task, status, elapsed). #' 'degree' is used to have some tabular formatting. The message sent is -#' formatted by \code{floodam.date:::format_journal}. +#' formatted by `floodam.date:::format_journal()`. #' #' @param journal data.frame, journal to be amended. May be missing (see #' details). @@ -46,7 +46,6 @@ #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot #' #' @examples #' @@ -192,7 +191,7 @@ format_journal = function(journal) { if (is.na(journal[i, "status"])) { msg = sprintf( "%s: [%s] %s%s...", - journal[i, "start"], + format(journal[i, "start"]), id, tab, journal[i, "task"] @@ -200,7 +199,7 @@ format_journal = function(journal) { } else { msg = sprintf( "%s: [%s] %s%s... %s [%s]", - journal[i, "start"], + format(journal[i, "start"]), id, tab, journal[i, "task"], diff --git a/R/add_log_info.R b/R/add_log_info.R index 16e15af8c384d8b935ad5e10e7d7fb5fc03da0f6..3b5a0f5da6063d5985ae098d99e9df944f369722 100644 --- a/R/add_log_info.R +++ b/R/add_log_info.R @@ -1,18 +1,35 @@ #' @title Apply a step to data analysis #' #' @description -#' This function allows to embed a analysis made by a function, so that log -#' information can be saved in a consistent way, messages can be sent depending -#' on the parameter verbose. -#' +#' `add_log_info()` encapsulates an analysis performed by another function, to +#' feed a consistently saved log. Messages can also be returned directly. +#' +#' @details +#' `add_log_info()` is designed to feed a log step by step during a +#' multi-stage analysis: +#' - Information on stage names is entered in the `info` parameter, in both +#' short (for the log) and long (for on-the-fly messages) versions. +#' -The level of analysis is controlled by the `level` parameter, which +#' influences the number of tabs in messages sent or saved. +#' +#' Messages generated include step names and elapsed time for analysis. These +#' messages can be completed in two ways: +#' - An expression can be sent via the `verification` parameter, which must be +#' kept simple and act either on input data (x) or on results (result). +#' - messages returned by the `fun` function are captured and minimally +#' formatted by adding tabs according to the level value. +#' +#' Captured messages are added to the log file, while those generated by +#' `verification` are added to a dedicated csv file. +#' +#' WARNING: because of partial match used in R for arguments, some errors may +#' occur when add_log_info is used without naming explicitly arguments. +#' #' @param x data to be adapted. #' @param fun function, the analysis that may performed on x. #' @param info list, gives the name of the step both in 'short' and 'long' #' terms. -#' @param journal either FALSE, if not must be a character of length one, and -#' will be interpreted at the log file where to save any log information. Its -#' format should be in '*.log' in order to create also a '*-log.csv' log file -#' that will store information on time and any given verification in csv format. +#' @param journal character or FALSE. See details. #' @param verbose logical, should the some messages be sent in real time. #' @param verification expression, a simple treatment that will be added to #' *log.csv, may be missing. @@ -25,14 +42,49 @@ #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot +#' +#' @examples +#' journal = file.path(tempdir(), "journal.log") +#' info = list("short" = "Mean by Species", "long" = "Mean by Species") +#' result = add_log_info( +#' iris[4], +#' aggregate, +#' info, +#' journal, +#' by = iris["Species"], +#' FUN = mean, +#' verification = expression(ncol(result)) +#' ) +#' aggregate_special = function(x, ...) { +#' selection = sapply(x, is.numeric) | sapply(x, is.logical) +#' if (sum(!selection) != 0) { +#' message( +#' sprintf( +#' "- %s colum(s) are(is) neither numeric nor logical (%s). ", +#' sum(!selection), +#' paste0(names(x)[!selection], "|") +#' ), +#' "They are(It is) dropped." +#' ) +#' } +#' aggregate(x[selection], ...) +#' } +#' result = add_log_info( +#' iris, +#' aggregate_special, +#' info, +#' journal, +#' by = iris["Species"], +#' FUN = mean, +#' verification = expression(ncol(result)) +#' ) add_log_info = function( x, fun, info, journal, - verbose, + verbose = TRUE, verification, level = 1, ... @@ -41,14 +93,16 @@ add_log_info = function( if (is.na(level)) level = 1 level = paste(rep("\t", level), collapse = "") - if (verbose == TRUE) cat(sprintf("%s- %s... ", level, info[["long"]])) + if (isTRUE(verbose)) { + message(sprintf("%s- %s... ", level, info[["long"]]), appendLF = FALSE) + } start = Sys.time() - - result = fun(x, ...) - + msg_add = utils::capture.output(result <- fun(x, ...), type = "message") + msg_add = grep("^- ", msg_add, value = TRUE) + if ( - journal != FALSE && + !isFALSE(journal) && ! missing(verification) && methods::is(verification, "expression") ) { @@ -64,9 +118,12 @@ add_log_info = function( duration = sprintf("(%s %s)", round(elapsed, 2), attr(elapsed, "units")) time = round(as.numeric(elapsed, units = "secs"), 3) - if (journal != FALSE) { + if (!isFALSE(journal)) { msg = sprintf("%s- %s: done %s.", level, info[["short"]], duration) write(msg, journal, append = TRUE) + if (length(msg_add) > 0) { + write(paste0(level, msg_add), journal, append = TRUE) + } utils::write.table( data.frame( @@ -84,7 +141,12 @@ add_log_info = function( ) } - if (verbose == TRUE) cat(sprintf("done %s\n", duration)) + if (isTRUE(verbose)) { + message(sprintf("done %s", duration)) + if (length(msg_add) > 0) { + message(paste0("\t", level, msg_add, collapse = "\n")) + } + } return(result) } @@ -126,12 +188,8 @@ init_log_info = function( } info_sf = function() { - info = sf::sf_extSoftVersion() - info = paste( - c(names(info), "s2"), - c(info, sf::sf_use_s2()), - sep = ":", collapse = ", " - ) + info = c(sf::sf_extSoftVersion(), "s2" = sf::sf_use_s2()) + info = paste(names(info), info, sep = ":", collapse = ", ") sprintf("sf (%s): %s", utils::packageVersion("sf"), info) } diff --git a/R/analyse_archive.R b/R/analyse_archive.R index 3505135d5868820d9cf35c50d3cfb03f485f000f..82234dc716c1b278eac1c8f682a6eb2a1615fd75 100644 --- a/R/analyse_archive.R +++ b/R/analyse_archive.R @@ -1,13 +1,20 @@ -#' @title Analyse names of archive to get useful infomations. +#' @title Get useful infomations from archive path #' #' @description -#' The function is set for IGN's names of archive, that should normally be -#' "well" structured with this scheme : -#' data-type_version_precision_format_projection_scope_date.ext. Sometimes a -#' less frequent format is -#' data-name_version_precision_format_projection_scope_proj_date.ext. In the -#' latter case, "proj" is given in another format than "projection", but also in -#' a documented way. +#' `analyse_archive()` analyses paths of archives to get useful infomations. It +#' can deal with IGN's names, `floodam.data`'s names. +#' +#' @details +#' +#' `analyse_archive()` is set for IGN's names of archive, that should normally +#' be "well" structured with this scheme : +#' - data-type_version_precision_format_projection_scope_date.ext. +#' +#' Sometimes a less frequent format is: +#' - data-name_version_precision_format_projection_scope_proj_date.ext. +#' +#' In the latter case, "proj" is given in another format than "projection", but +#' also in a documented way. #' #' From this structure, an analysis is done to retrieve, the type of data, the #' version, the precision (very often it is empty), the format used (something @@ -19,17 +26,17 @@ #' #' This is also done when the format is the less frequent case, because in this #' case, the projection is not given. +#' +#' `analyse_archive()` is also set for `floodam-data`'s names. #' #' @param x character vector -#' @param origin character, to choose the format. Only "ign" is available at the -#' moment. -#' +#' @param origin character, to choose the format. See details. +#' #' @return either a vector or a matrix depending on the length of x. #' #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot #' #' @examples #' ign = c( @@ -55,13 +62,29 @@ #' "ADMIN-EXPRESS-COG_3-1__SHP_RGAF09UTM20_GLP_2022-04-15.7z" #' ) #' analyse_archive(floodam, "floodam.data") +#' +#' geo_sirene = c( +#' "2024-01/geo_siret_34.csv.gz", +#' "2024-01/StockEtablissement_utf8_geo.csv.gz", +#' "2024-01/StockEtablissementActif_utf8_geo.csv.gz", +#' "2024-01/StockEtablissementFerme_utf8_geo.csv.gz", +#' "2024-01/geo_sirene.csv.gz", +#' "2024-01/etablissements_actifs.csv.gz", +#' "2024-01/etablissements_fermes.csv.gz", +#' "bad-date/geo_siret_34.csv.gz" +#' ) +#' analyse_archive(geo_sirene, "geo-sirene") -analyse_archive = function(x, origin = c("ign", "floodam.data")) { +analyse_archive = function( + x, + origin = c("ign", "floodam.data", "geo-sirene") +) { origin = match.arg(origin) if (length(x) > 1) return(t(sapply(x, analyse_archive, origin = origin))) date_pattern = "[0-9]{4}-[0-9]{2}-[0-9]{2}" + month_pattern = "^[0-9]{4}-[0-9]{2}$" if (origin == "ign") { scope_admissible = c( @@ -223,6 +246,67 @@ analyse_archive = function(x, origin = c("ign", "floodam.data")) { } } + if (origin == "geo-sirene") { + result = stats::setNames( + rep(NA_character_, 7), + c( + "data", + "precision", + "version", + "scope", + "vintage", + "extension", + "name" + ) + ) + + result["extension"] = basename_ext(x) + result["name"] = basename_core(x) + result["vintage"] = basename(dirname(x)) + if (!grepl(month_pattern, result["vintage"])) result["vintage"] = NA + + if (result["name"] == "geo_sirene") { + result["data"] = "geo-siret" + result["version"] = "2017" + result["scope"] = "france" + } + if (result["name"] == "etablissements_actifs") { + result["data"] = "geo-siret" + result["precision"] = "active" + result["version"] = "2017" + result["scope"] = "france" + } + if (result["name"] == "etablissements_fermes") { + result["data"] = "geo-siret" + result["precision"] = "closed" + result["version"] = "2017" + result["scope"] = "france" + } + if (result["name"] == "StockEtablissementActif_utf8_geo") { + result["data"] = "geo-siret" + result["precision"] = "active" + result["version"] = "2019" + result["scope"] = "france" + } + if (result["name"] == "StockEtablissementFerme_utf8_geo") { + result["data"] = "geo-siret" + result["precision"] = "closed" + result["version"] = "2019" + result["scope"] = "france" + } + if (result["name"] == "StockEtablissement_utf8_geo") { + result["data"] = "geo-siret" + result["version"] = "2019" + result["scope"] = "france" + } + if (grepl("geo_siret", result["name"])) { + result["data"] = "geo-siret" + result["version"] = "2019" + temp = unlist(strsplit(result["name"], "_")) + result["scope"] = format_scope(temp[length(temp)]) + } + } + # Replacing empty string by NA result[result == ""] = NA diff --git a/R/apply_scheme.R b/R/apply_scheme.R index 5e224511faf6534e8ad3b5830c99cacf5de30f05..1c73553c1a0f399e5a02bf309e201fc00ed0745a 100644 --- a/R/apply_scheme.R +++ b/R/apply_scheme.R @@ -2,8 +2,8 @@ #' #' @description #' The function applies a scheme to a data.frame or a sf object, which consists -#' in keeping only variable that should be kept, renaming variables, and setting -#' the type of variable. +#' in keeping only variables that should be kept, renaming variables, and +#' setting the type of variables. #' #' @param x data.frame or sf, data to be adapted. #' @param scheme data.frame, scheme to be applied. @@ -25,7 +25,6 @@ #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot apply_scheme = function(x, scheme, selection = NULL, name_origin = NULL) { if (!is.null(name_origin)) { @@ -62,7 +61,7 @@ apply_scheme = function(x, scheme, selection = NULL, name_origin = NULL) { selection = scheme[["name"]][scheme[["type"]] == "factor"] x[selection] = lapply(x[selection], as.factor) - selection = scheme[["name"]][scheme[["type"]] == "date"] + selection = scheme[["name"]][scheme[["type"]] == "Date"] x[selection] = lapply(x[selection], as.Date) selection = scheme[["name"]][scheme[["type"]] == "numeric"] @@ -71,6 +70,9 @@ apply_scheme = function(x, scheme, selection = NULL, name_origin = NULL) { selection = scheme[["name"]][scheme[["type"]] == "integer"] x[selection] = lapply(x[selection], as.integer) + selection = scheme[["name"]][scheme[["type"]] == "logical"] + x[selection] = lapply(x[selection], to_logical) + if (!is.null(geometry)) { x = sf::st_set_geometry(x, geometry) } diff --git a/R/basename_extended.R b/R/basename_extended.R index 153ca230901cf6e157c39651ebbd2a17779691a3..6020011c3a099c245f79eb799c6235d07a45868f 100644 --- a/R/basename_extended.R +++ b/R/basename_extended.R @@ -3,22 +3,20 @@ #' #' @description #' Utilities for getting name of files and extension in another whay than of -#' [tools::file_ext()] and others corresponding functions. +#' `tools::file_ext()`` and others corresponding functions. +#' +#' @details +#' `basename_core()`` differs from `tools::file_path_sans_ext()` in this: +#' - `basename()` is first applied +#' - "compounded" extensions are removed. See examples. #' #' @param x character vector, file paths #' #' @return character vector of processed files. #' -#' @details -#' -#' \code{basename_core()} differs from [tools::file_path_sans_ext()] in this: -#' - [basename()] is first applied -#' - "compounded" extensions are removed. See examples. -#' #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot #' #' @examples #' @@ -29,7 +27,6 @@ #' basename_core("toto.7z.001") #' basename_core("path/toto.csv") #' basename_core("path/toto.csv.gz") -#' basename_core = function(x) { x = basename(x) @@ -40,9 +37,8 @@ basename_core = function(x) { #' @rdname basename_extended #' #' @details -#' -#' \code{basename_ext()} differs from [tools::file_path_sans_ext()] in this: -#' - [basename()] is first applied +#' `basename_ext()` differs from `tools::file_path_sans_ext()` in this: +#' - `basename()` is first applied #' - "compounded" extensions are extracted. See examples. #' #' @export diff --git a/R/data.R b/R/data.R index a0d1ba2f8ab9e1f3b7417c99f4d1b324db2229ce..55f9e0ad119ca3c62b2c306f11491b72af7b023f 100644 --- a/R/data.R +++ b/R/data.R @@ -361,15 +361,15 @@ #' @format A data frame with 57 rows and 10 variables: #' \describe{ #' \item{name}{character, name of variables used in floodam.data.} -#' \item{name.origin}{character, name of variables as defined in original +#' \item{name_origin}{character, name of variables as defined in original #' data.} #' \item{order}{integer, order of the variables in original data.} #' \item{keep}{logical, defined if floodam.data will keep this variable.} #' \item{type}{character, type that will be used within floodam.data.} -#' \item{label.french}{character, variables description in French.} +#' \item{label_french}{character, variables description in French.} #' \item{source}{character, source of the variables.} -#' \item{length.source}{integer, length of variables in source.} -#' \item{type.source}{character, type of variables in source.} +#' \item{length_source}{integer, length of variables in source.} +#' \item{type_source}{character, type of variables in source.} #' \item{comment}{character, some comments on choice made.} #' } "scheme_sirene_2019" @@ -382,7 +382,7 @@ #' @format A data frame with 3 rows and 2 variables: #' \describe{ #' \item{name}{character, name of variable as used in floodam.data.} -#' \item{na.value}{character, value that is to be considered as na.value.} +#' \item{na_value}{character, value that is to be considered as na.value.} #' } "scheme_sirene_na" diff --git a/R/download.geo_sirene.R b/R/download.geo_sirene.R deleted file mode 100644 index 8d85ab58769f3be58b8183d91ff64a3bbc38cbd8..0000000000000000000000000000000000000000 --- a/R/download.geo_sirene.R +++ /dev/null @@ -1,85 +0,0 @@ -#' Download Géo SIRENE -#' -#' Function used to downlaod Geolocalized SIRENE data-base. -#' -#' @param destination character, the address where dara are stocked. -#' @param origin character, either a keyword or the address from where data are downloaded. Best to keep it -#' at default value. See details. -#' @param name character, vector of acceptable names fo archive to be downloaded. -#' @param extension character, vector of acceptable types of archive to be downloaded. -#' @param version string, version of ADMIN EXPRESS to be downloaded. -#' @param date character, date of the archive to be downloaded. -#' @param month character, vector of month to be downloaded -#' @param version_insee string, INSEE version to be downloaded. Only used with \code{origin = "cquest"}. -#' @param verbose logical, should the function send some messages while running. -#' -#' @return nothing -#' -#' @section Details: -#' -#' If \code{origin == "cquest"}, then all necessary variables are filled with those values: -#' \itemize{ -#' \item \bold{origin} is changed to \url{https://data.cquest.org/geo_sirene}. This url is adapted depending -#' on version_insee. -#' \item \bold{version_insee} If value is "v2019", then the version 2019 is dowloaded from -#' \url{https://data.cquest.org/geo_sirene}. -#' \item \bold{name} If version is not set to null, then default value is changed to "StockEtablissement_utf8", -#' which should be the most complete version at France level. -#' \item \bold{extension} default value is changed to "csv.gz". -#' \item \bold{month} is used to set something equivalent to date, but directly in origin. -#' \item \bold{date} is set to null, because conflict may occur with month. -#' } -#' -#' If not, everything shall be filled so that `download_archive()` can make a successful download. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' origin = "https://data.cquest.org/geo_sirene" -#' destination = tempdir() -#' month = c("2018-11", "2019-05", "2020-03") -#' download.geo_sirene(origin, destination, month = month) -#' unlink(destination) -#' } - -download.geo_sirene = function( - destination, - origin = "cquest", - name = NULL, - extension = NULL, - version = NULL, - date = NULL, - month = NULL, - version_insee = "v2019", - verbose = TRUE) { - if (origin == "cquest") { - origin = "https://data.cquest.org/geo_sirene" - if (version_insee == "v2019") { - origin = file.path(origin, version_insee) - version_insee = "version_2019" - } else { - version_insee = "version_2017" - } - if (is.null(month)) { - x = read_url(origin) - month = max(unlist(regmatches(x, gregexpr("(?<=href=([\\\"']))\\d{4}-\\d{2}(?=\\/\\1)", x, perl = TRUE)))) - } - month_origin = month - if (version_insee == "version_2019") month_origin = gsub("(2018|2019)", "\\1/\\1", month, perl = TRUE) - origin = file.path(origin, month_origin) - destination = file.path(destination, version_insee, month) - if (is.null(extension)) extension = "csv.gz" - if (is.null(version) && is.null(name)) name = "StockEtablissement_utf8" - date = NULL - } - - to_do = data.frame( - origin = origin, - destination = destination, - short = "download.geo_sirene", - stringsAsFactors = FALSE - ) - - download_archive(to_do, extension = extension, name = name, date = date, version = version, verbose = verbose) -} \ No newline at end of file diff --git a/R/download_archive.R b/R/download_archive.R index a65a482567bac3610651370351fe7e473c141db0..5cb6df8f89b824f18f9dc28da5c7a69c1e71d1d2 100644 --- a/R/download_archive.R +++ b/R/download_archive.R @@ -1,14 +1,26 @@ -#' Download distant archives +#' @title Download distant archives #' -#' Core function used to downlaod archives +#' @description +#' `download_archive()` is the core function used to downlaod archives from +#' other specific functions. +#' +#' @details +#' `download_archive()` is used by other more specific functions. It takes as +#' input a `to_do` `data.frame` giving correspondance between origin urls to +#' local destinations. `to_do` is expected to contain those fields: +#' +#' * `origin`: the origin urls where to find archives. +#' * `destination`: the local places where to save archives. +#' * `short`: the name in short of what type of archives is being +#' downloaded, used for log information. If missing, it is set to "unknown". +#' * `local_origin`: the local origin for sites giving relative links, and for +#' which it is different from `origin`. If missing, `origin` is used. #' -#' This function is used by other more specific functions. It takes as input a -#' to_do data.frame giving correspondance between origin urls to local -#' destinations. It also to select some types of archives to be downloaded. On -#' the good construction of the to_do data.frame depends the success of the -#' downloads! +#' For each line of `to_do`, all archives that matches condition given by other +#' parameters (`extension`, `name`, `date`, `version`) are downloaded. #' -#' @param to_do data.frame. Two columns of character are expected. +#' @param to_do data.frame. Four columns of character are expected, thow of +#' which being mandatory. See details. #' @param extension character, vector of acceptable types of archive to be #' downloaded. #' @param name character, vector of acceptable names fo archive to be @@ -17,13 +29,11 @@ #' filter. #' @param version character, something like a version that should be used as a #' filter. -#' @param html character, indicates if the page is "simple" or is coming from -#' "data.gouv". This has implications on how to look for links. -#' @param verbose logical, should the function send somme messages while +#' @param verbose logical, should the function send some messages while #' running. #' -#' @return nothing -#' +#' @return An invisible `data.frame` of log. +#' #' @export #' #' @examples @@ -32,16 +42,14 @@ #' to_do_cquest = data.frame( #' origin = "https://data.cquest.org/registre_parcellaire_graphique/2018", #' destination = destination, -#' short = "rpg_cquest", -#' stringsAsFactors = FALSE +#' short = "rpg_cquest" #' ) #' download_archive(to_do_cquest, version = "D976", verbose = TRUE) #' #' to_do_ign = data.frame( #' origin = "https://geoservices.ign.fr/rpg", #' destination = destination, -#' short = "rpg_ign", -#' stringsAsFactors = FALSE +#' short = "rpg_ign" #' ) #' download_archive(to_do_ign, version = "D976", date = "2020", verbose = TRUE) #' download_archive(to_do_ign, version = "D976", verbose = TRUE) @@ -56,7 +64,6 @@ download_archive = function( name = NULL, date = NULL, version = NULL, - html = "simple", verbose = FALSE ) { result = list() @@ -79,7 +86,7 @@ download_archive = function( name = name, date = date, version = version, - html = html + local_origin = to_do[i, "local_origin"] ) ), silent = TRUE @@ -157,11 +164,16 @@ download_archive = function( verbose = verbose ) result[[origin]] = journal + short = if (is.null(to_do[i, "short"])) { + "unknown" + } else { + gsub("/", "_", to_do[i, "short"]) + } write_journal( journal, file.path( to_do[i, "destination"], - sprintf("%s.%s", gsub("/", "_", to_do[i, "short"]), "log") + sprintf("%s.%s", short , "log") ) ) } diff --git a/R/download_ban.R b/R/download_ban.R index 4a25a7b128270ebaf4247a042f7bf6a10545d5d9..1c909e1977091b19e074eca5ecfb470312335efe 100644 --- a/R/download_ban.R +++ b/R/download_ban.R @@ -1,89 +1,87 @@ -#' Download BAN +#' @title Download BAN #' -#' Function used to downlaod BAN +#' @description +#' `downlaod_ban()` is used has a wrapper of `download_archive()` to download +#' easily BAN database. +#' +#' @details +#' +#' If `repository` is set to "data.gouv", then all necessary variables are +#' filled with those values: +#' +#' * **origin** is set to "https://adresse.data.gouv.fr/data/ban/adresses" +#' * **name** default value is changed to "adresses" +#' * **extension** default value is changed to "csv.gz" +#' * **date** default value is used to choose a given directory. If +#' date is `NULL`, then the most recent version is taken (equivalent to +#' "latest"). +#' * **version** must be something within "ban" or "bal". When "ban" +#' is used the file with one position per address is used. When "bal" is +#' used, the file with several position per address is used. +#' * **department** is used. Should be admissible French +#' departments (a formatting will be attempted with `format_scope()`). +#' If `NULL`, region may be used. If both are `NULL`, all departments from +#' `floodam.data::department`` will be used. +#' * **region** may be used if department is `NULL`. Should be +#' admissible French regions (a formatting will be attempted with +#' `format_scope()`). If region is given and department is `NULL`, all +#' departments from given region will be used. +#' +#' If `repository` is set to "other", everything shall be filled so that +#' `download_archive()` can make a successful download. #' #' @param destination character, the address where data are stocked. -#' @param origin character, either a keyword or the address from where data are downloaded. See details. -#' @param name character, vector of acceptable names for archive to be downloaded. -#' @param extension character, vector of acceptable types of archive to be downloaded. +#' @param repository character, keyword or the repository from where data are +#' downloaded. Best to use given options. See details. +#' @param origin character, url address from where data are downloaded. It is +#' set when repository is something else than "other". +#' @param extension character, vector of acceptable types of archive to be +#' downloaded. #' @param date character, date of the archive to be downloaded. #' @param version string, version of BAN to be downloaded. See details. -#' @param department integer, or converted to integer. Departments that should be considered. See details. -#' @param region integer, or converted to integer. Regions that should be considered. See details. +#' @param department integer, or converted to integer. Departments that should +#' be considered. See details. +#' @param region integer, or converted to integer. Regions that should be +#' considered. See details. #' @param verbose logical, should the function send some messages while running. #' -#' @return nothing -#' -#' @section Details: -#' -#' If \code{origin == "data.gouv"}, then all necessary variables are filled with -#' those values: -#' \itemize{ -#' \item \bold{origin} is changed to -#' "https://adresse.data.gouv.fr/data/ban/adresses" -#' \item \bold{name} default value is changed to "adresses" -#' \item \bold{extension} default value is changed to "csv.gz" -#' \item \bold{date} default value is used to choose a given directory. If -#' date is null, then the most recent version is taken (equivalent to -#' "latest"). -#' \item \bold{version} must be something within "ban" or "bal". When "ban" -#' is used the file with one position per address is used. When "bal" is -#' used, the file with several position per address is used. -#' \item \bold{department} is used. Should be admissible French -#' departments (a formatting will be attempted with `format_scope()`). -#' If NULL, region may be used. If both are NULL, all departments from -#' floodam.data::department will be used. -#' \item \bold{region} may be used if department is null. Should be -#' admissible French regions (a formatting will be attempted with -#' `format_scope()`). If region is given and department is NULL, all -#' departments from given region will be used. -#' } -#' -#' If not, everything shall be filled so that `download_archive()` can make -#' a successful download. +#' @return An invisible `data.frame` of log. #' #' @export #' #' @examples #' \dontrun{ -#' destination = tempdir() -#' download_ban(destination) -#' download_ban(destination, department = 34) -#' download_ban(destination, department = 34, date = "2022-01-01") -#' download_ban(destination, region = 11, date = "2022-01-01") -#' download_ban(destination, department = 34, date = "2022-09-20", version = "bal") +#' dest = tempdir() +#' download_ban(dest) +#' download_ban(dest, department = 34) +#' download_ban(dest, department = 34, date = "2022-01-01") +#' download_ban(dest, region = 76, date = "2022-01-01") +#' download_ban(dest, department = 976, date = "2022-09-20", version = "bal") #' -#' unlink(destination) +#' unlink(dest) #' } download_ban = function( destination, - origin = "data.gouv", - name = NULL, + repository = c("data.gouv", "other"), + origin = NULL, extension = NULL, date = NULL, - version = NULL, + version = c("ban", "bal"), department = NULL, region = NULL, - verbose = TRUE) { + verbose = TRUE +) { + repository = match.arg(repository) - if (origin == "data.gouv") { - version = match.arg(version, c("ban", "bal")) + if (repository == "data.gouv") { + version = match.arg(version) origin = "https://adresse.data.gouv.fr/data/ban/adresses" name = if (is.null(name)) "adresses" extension = if (is.null(extension)) "csv.gz" date = if (is.null(date)) "latest" else date - date_admissible = as.Date( - names( - get_archive( - origin, - name = "[0-9]{4}-[0-9]{2}-[0-9]{2}", - html = "data.gouv", - extension = NULL - ) - ) - ) + date_admissible = as.Date(get_date_from_html(origin, expected = "day")) date = if (format(date) == "latest") { format(max(date_admissible)) } else { @@ -91,18 +89,19 @@ download_ban = function( format(date_admissible[selection]) } - if ("ban" %in% version) { - origin = file.path(origin, date, "csv") - destination = file.path(destination, "ban", date) - } - if ("bal" %in% version) { - origin = file.path(origin, date, "csv-bal") - destination = file.path(destination, "bal", date) - - } + origin = switch( + version, + ban = file.path(origin, date, "csv"), + bal = file.path(origin, date, "csv-bal") + ) + destination = file.path(destination, version, date) if (!is.null(department)) { - version = format_scope(department, type = "insee", scope = "department") + version = format_scope( + department, + type = "insee", + scope = "department" + ) } else { if (!is.null(region)) { region = format_scope(region, type = "insee", scope = "region") @@ -118,6 +117,7 @@ download_ban = function( to_do = data.frame( origin = origin, destination = destination, + local_origin = get_base_url(origin), short = "download_ban", stringsAsFactors = FALSE ) @@ -128,7 +128,6 @@ download_ban = function( name = name, date = NULL, version = version, - html = "data.gouv", verbose = verbose ) } diff --git a/R/download_geo_sirene.R b/R/download_geo_sirene.R new file mode 100644 index 0000000000000000000000000000000000000000..93657e9cf79ebd2a81bea6bd9f1bb152398eb748 --- /dev/null +++ b/R/download_geo_sirene.R @@ -0,0 +1,162 @@ +#' @title Download geo SIRENE +#' +#' @description +#' `downlaod_geo_sirene()` is used has a wrapper of `download_archive()` to +#' download easily geolocalized SIRENE database. +#' +#' @details +#' +#' If `repository` is set to "cquest", then all necessary variables are filled +#' with those values: +#' +#' * **origin** is changed to https://data.cquest.org/geo_sirene. This +#' url is adapted depending on version_insee. Some extra efforts are necessary +#' to deal with version_insee and vintage of data. +#' * **version_insee** can be set either to "2019" (default) or "2017". +#' #' * **month** is used to set something equivalent to date, and to adapt +#' `origin`. Some extra efforts is necessary depending on years because of the +#' structure of the repository (some months are grouped by year, some not, and +#' this evolves). +#' * **name** default value is changed to "StockEtablissement_utf8" when +#' `version_insee` is "2019", "etablissements_actifs" when `version_insee` is +#' "2017" expect for old vintage for which it is changed to "geo_sirene". +#' * **extension** default value is changed to "csv.gz". +#' +#' If `repository` is set to "data.gouv", then all necessary variables are +#' filled with those values: +#' +#' * **origin** is changed to https://files.data.gouv.fr/geo-sirene. +#' * **version_insee** is set to "2019". +#' * **month** is used to set something equivalent to date, and to adapt +#' `origin`. +#' * **scope** is used to access to available scopes for download (France, +#' departments or communes). It is used to adapt `origin`. +#' * **name** is set depending on `scope`: "StockEtablissementActif_utf8_geo" +#' for "france", "geo_siret" for "department", `NULL` for "commune". +#' * **extension** is set depending on `scope`: "csv.gz" for "france" and +#' "department", "csv" for "commune". +#' +#' If `origin` is set to "other", everything shall be filled so that +#' `download_archive()` can make a successful download. +#' +#' @param destination character, the address where dara are stocked. +#' @param repository character, keyword or the repository from where data are +#' downloaded. Best to use given options. See details. +#' @param origin character, url address from where data are downloaded. It is +#' set when repository is something else than "other". +#' @param name character, vector of acceptable names fo archive to be +#' downloaded. See details. +#' @param extension character, vector of acceptable types of archive to be +#' downloaded. +#' @param version string, version of geo_sirene to be downloaded. Useful to set +#' departments or communes when `repository` is "data.gouv". +#' @param month character, vector of month to be downloaded. +#' @param version_insee string, INSEE version to be downloaded. Only used with +#' `origin` = "cquest". See details. +#' @param scope string, scope for download when repository is "data.gouv". +#' See details. +#' @param verbose logical, should the function send some messages while running. +#' +#' @return An invisible `data.frame` of log. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' dest = tempdir() +#' month = c("2019-01", "2020-01", "2021-01") +#' download_geo_sirene(dest) +#' download_geo_sirene(dest, month = month) +#' download_geo_sirene(dest, version_insee = "2017") +#' download_geo_sirene(dest, version_insee = "2017", month = "2018-01") +#' +#' download_geo_sirene(dest, repository = "data.gouv") +#' month = c("2020-01", "2021-01", "2022-01", "2023-01", "2024-01") +#' log_sirene = download_geo_sirene(dest, "data.gouv", month = month) +#' log_sirene +#' download_geo_sirene(dest, "data.gouv", scope = "department", version = "34") +#' download_geo_sirene(dest, "data.gouv", scope = "commune", version = "34170") +#' unlink(destination) +#' } + +download_geo_sirene = function( + destination, + repository = c("cquest", "data.gouv", "other"), + origin = NULL, + name = NULL, + extension = NULL, + version = NULL, + month = NULL, + version_insee = c("2019", "2017"), + scope = c("france", "department", "commune"), + verbose = TRUE +) { + repository = match.arg(repository) + scope = match.arg(scope) + + if (repository == "cquest") { + version_insee = match.arg(version_insee) + origin = "https://data.cquest.org/geo_sirene" + origin = file.path(origin, sprintf("v%s", version_insee)) + version_insee = sprintf("version_%s", version_insee) + + if (is.null(month)) month = max(get_date_from_html(origin)) + month_origin = month + + # Need to deal with arborescence organized by year (but not for all) + if (version_insee == "version_2019") { + year = get_date_from_html(origin, expected = "year", last = FALSE) + pattern = sprintf("(%s)", paste(year, collapse = "|")) + month_origin = gsub(pattern, "\\1/\\1", month, perl = TRUE) + } + + origin = file.path(origin, month_origin) + destination = file.path(destination, version_insee, month) + if (is.null(extension)) extension = "csv.gz" + if (is.null(version) && is.null(name)) { + if (version_insee == "version_2019") { + name = "StockEtablissement_utf8" + } + if (version_insee == "version_2017" && month >= "2019-04") { + name = "etablissements_actifs" + } + if (version_insee == "version_2017" && month <= "2019-03") { + name = "geo_sirene" + } + } + } + + if (repository == "data.gouv") { + version_insee = "version_2019" + origin = "https://files.data.gouv.fr/geo-sirene" + if (is.null(month)) month = max(get_date_from_html(origin)) + origin = file.path(origin, month) + destination = file.path(destination, version_insee, month) + extension = "csv.gz" + name = "StockEtablissementActif_utf8_geo" + if (scope == "department") { + origin = file.path(origin, "dep") + name = "geo_siret" + } + if (scope == "commune") { + origin = file.path(origin, "communes") + name = NULL + extension = "csv" + } + } + + to_do = data.frame( + origin = origin, + destination = destination, + short = "download_geo_sirene", + stringsAsFactors = FALSE + ) + + download_archive( + to_do, + extension = extension, + name = name, + version = version, + verbose = verbose + ) +} \ No newline at end of file diff --git a/R/extract_building.R b/R/extract_building.R index e201b48d3f0bd91d26740349e1c91f6561fbd959..a78fbcbc586b2d62adba76ef499430091f88e8d3 100644 --- a/R/extract_building.R +++ b/R/extract_building.R @@ -1,46 +1,52 @@ #' @title Extract building information from BD TOPO #' #' @description -#' The function `extract_building`` extracts the layer 'batiment' from BD TOPO. -#' First it uncompresses the archive '*.7z', typically comming from the use of -#' download.bd_topo(). Then it extracts from 'gpkg file' the 'batiment', -#' 'commune' and 'departement' layers as sf objects, applies scheme to all -#' created sf objects. A selection of 'polygon' is made in 'batiment', to ensure -#' that only those of current 'department' are kept, and the 'commune' to which -#' they belong is added. The projection is set to 'projection'. If asked, the -#' eaip information is added. If asked a map for checking purpose is saved. If -#' asked the resulting sf object is returned (this is not default behaviour). -#' Result is saved according to 'destination' path. +#' `extract_building()` extracts the layer 'batiment' from BD TOPO. +#' +#' @details +#' - First `extract_building()` uncompresses the archive '*.7z', typically +#' comming from the use of `download_bd_topo()`. +#' - Then it extracts from 'gpkg file' the 'batiment', 'commune' and +#' 'departement' layers as sf objects, applies scheme to all +#' created sf objects. +#' - A selection of 'polygon' is made in 'batiment', to ensure that only those +#' of current 'department' are kept, and the 'commune' to which they belong is +#' added. The projection is set to 'projection'. +#' - If asked, the eaip information is added. If asked a map for checking +#' purpose is saved. +#' - If asked the resulting sf object is returned (this is not default +#' behaviour). +#' - Result is saved according to 'destination' path. #' #' @param origin character, path to the directory where archive are stored. -#' @param destination character, path to the directory where results should be saved. +#' @param destination character, path to the directory where results should be +#' saved. #' @param archive character, vector of archive to be adpated. #' @param scheme data.frame, how archive variables should be treated. +#' @param projection numeric, 4 digits projection to use. +#' @param path_eaip character, path where eaip archives should be find. #' @param journal logical, should a journal file be saved. #' @param map logical, should a map be plotted (for check purpose). -#' @param path_eaip character, path where eaip archives should be find. -#' @param projection numeric, 4 digits projection to use. -#' @param retrieve logical, should the result be returned. #' @param verbose logical, should the function give some sumup informations. +#' @param retrieve logical, should the result be returned. #' #' @return if retrieve = TRUE, data.frame of adapated data stored in archive. #' #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot extract_building = function( origin, destination, archive, scheme = floodam.data::scheme_bd_topo_3, + projection = 4326, + path_eaip = NULL, journal = TRUE, map = FALSE, - path_eaip = NULL, - projection = 4326, - retrieve = FALSE, - verbose = TRUE + verbose = TRUE, + retrieve = FALSE ) { ### Recursive call if archive is missing if (missing(archive)) {archive = list.files(origin, pattern = ".7z$")} diff --git a/R/get_archive.R b/R/get_archive.R index 03021034db94a9346513203ad354afdae9a48d18..2434c8997d06386042a8deec7a54553d29fe7477 100644 --- a/R/get_archive.R +++ b/R/get_archive.R @@ -14,37 +14,40 @@ #' filter. #' @param directory logical, should directories be found instead of archives. #' See details. -#' @param html character, indicates if the page is "simple" or is coming from -#' "data.gouv". This has implications on how to look for links. -#' +#' @param local_origin character, the local url address to add to relative +#' links. When set to `NULL`` (default), origin is used. + #' @return A character vector of all archives or directory found in origin #' matching with given constraints. #' #' @details +#' First, all links are retrieved with `get_link_from_html()`. +#' +#' If `directory` is TRUE, only finishing pattern "\" are kept, and this pattern +#' is erased. `extension` is not used in this case. #' -#' First, a regex search is made to find in x names enclosed in href="name" or -#' href='name'. +#' When `extension` is givens, it may contain different possibilities. All are +#' matched at the end of archives' names. #' -#' `extension` may contain different possibilities. It will be matches at -#' the end of archives' names. This may lead to an empty character as result. +#' At this point, a check is made to see if remaining links are relative (not +#' starting by "http" or "https") or absolute. All relative links are completed +#' with `local_origin`. All links are named with `base::basename()`, those names +#' are used for further selections. #' -#' `name` may contain different possibilities. It will be matched at the -#' beginning of archives' names. This may lead to an empty character as result. +#' `name` may contain different possibilities. It is matched at the +#' beginning of archives' names. #' +#' `version` may contain different possibilities. Il will be matched +#' anywhere in archives' names. This may lead an empty character as result. +#' #' `date` may contain either "last", and so anything that can be considered #' as a date in archives' names ("\%Y", "\%Y-\%m", "\%Y-\%m-\%d") is matched #' against and the max is taken. If nothing matches, all archives' names are -#' kept. code{date} may also contain anything admissible for code{create_date}. +#' kept. `date` may also contain anything admissible for `create_date()`. #' If so, anything that can be considered as a date in archives' names ("\%Y", #' "\%Y-\%m", "\%Y-\%m-\%d") is matched against, and only archives' names whose -#' date pertain to \code{create_date(date)} are kept, possibly nothing. +#' date pertain to `create_date(date)` are kept, possibly nothing. #' -#' `version` may contain different possibilities. Il will be matched -#' anywhere in archives' names. This may lead an empty character as result. -#' -#' If `directory` is set to TRUE, `extension` is not used. Instead, -#' links finishing by "\" are looked after. -#' #' @export #' #' @examples @@ -74,6 +77,14 @@ #' get_archive(origin, "7z", "ADMIN-EXPRESS-COG", date = "last") #' get_archive(origin, "7z", "ADMIN-EXPRESS-COG", version = "FRA", date = "last") #' get_archive(origin, "7z", "ADMIN-EXPRESS", date = 2021:2022) +#' +#' # "BAN" archives in data.gouv.fr +#' origin = "https://adresse.data.gouv.fr/data/ban/adresses" +#' get_archive( +#' origin, +#' name = "[0-9]{4}-[0-9]{2}-[0-9]{2}", +#' extension = NULL, +#' local_origin = get_base_url(origin) #' } get_archive = function( @@ -83,10 +94,8 @@ get_archive = function( date = NULL, version = NULL, directory = FALSE, - html = c("simple", "data.gouv") + local_origin = NULL ) { - html = match.arg(html) - if (length(origin) > 1) { result = lapply( origin, @@ -95,59 +104,31 @@ get_archive = function( name = name, date = date, version = version, - directory = directory + directory = directory, + local_origin = local_origin ) names(result) = NULL result = result[-duplicated(result)] return(unlist(result)) } - x = read_url(origin) - if ("try-error" %in% class(x)) { - return(invisible()) + if (is.null(local_origin)) local_origin = origin + result = get_link_from_html(origin) + + if (directory == TRUE) { + pattern = complete_pattern("/", extension = FALSE, ending = TRUE) + result = grep(pattern, result, value = TRUE) + result = gsub(pattern, "", result) + extension = NULL } - - if (directory == TRUE) {extension = "/"} - if (html == "data.gouv") { - result = unlist(strsplit(x, '"')) - if (!is.null(extension)) { - result = unique( - grep( - complete_pattern( - extension, - extension = !directory, - ending = TRUE - ), - result, - value = TRUE) - ) - } - if (!is.null(name)) { - result = unique( - grep( - complete_pattern(sprintf("%s[_-]*", name), begin = TRUE), - result, - value = TRUE - ) - ) - } - } else { - pattern = "(?<=href=([\"'])).*?(?=\\1)" - result = grep( - complete_pattern(extension, extension = !directory, ending = TRUE), - unlist(regmatches(x, gregexpr(pattern, x, perl = TRUE))), - value = TRUE - ) - result = grep( - "^(?![.]{1,2}$)", - gsub("/$", "", result), - perl = TRUE, - value = TRUE - ) + if (!is.null(extension)) { + pattern = complete_pattern(extension, extension = TRUE, ending = TRUE) + result = grep(pattern, result, value = TRUE) + result = grep("^(?![.]{1,2}$)", result, perl = TRUE, value = TRUE) } selection = grepl("^http", result) - result[!selection] = file.path(origin, result[!selection]) + result[!selection] = file.path(local_origin, result[!selection]) result = stats::setNames(result, basename(result)) if (!is.null(name)) { diff --git a/R/get_link_from_html.R b/R/get_link_from_html.R new file mode 100644 index 0000000000000000000000000000000000000000..e2bbd9eda0bfbec9885b31222c7d8168f2a62b09 --- /dev/null +++ b/R/get_link_from_html.R @@ -0,0 +1,116 @@ +#' @title Get links in a html page +#' +#' @description +#' `get_link_from_html()` returns all links present in a html page. +#' +#' `get_date_from_html()` returns links present in a html page that correspond +#' to dates. +#' +#' @param url character, address of the url. +#' @param x character vector, of url is missing, gives directly a character +#' vector that could have been the result from `read_url(url)`. +#' @param expected character, what type of dates are expected between month, +#' day, or year. +#' @param last logical, should a "last" date be looked for. +#' +#' @return +#' character of links founded. NULL if nothing has been found. +#' +#' @export +#' @encoding UTF-8 +#' +#' @examples +#' x = c( +#' "<html>", +#' "<head><title>Index of /example/</title></head>", +#' "<body>", +#' "<h1>Index of /example/</h1><hr><pre>", +#' "<a href='../'>../</a>", +#' "<a href='parent/2023-11/archive.csv.gz'>archive.csv.gz</a>", +#' "<a href='2023-12/archive.csv.gz'>archive.csv.gz</a>", +#' "<a href=\"2024-01/\">2024-01/</a> 07-Jan-2024 09:54", +#' "<a href=\"parent/child/2024-02/\">2024-02/</a> 07-Feb-2024 09:54", +#' "<a href=\"2024-03\">2024-03</a> 07-Mar-2024 10:02", +#' "<a href=\"2024-01-01/\">2024-01-01/</a> 02-Jan-2024 09:56", +#' "<a href=\"2024-01-02\">2024-01-02</a> 03-jan-2024 11:02", +#' "<a href=\"2023/\">2023/</a> 01-Feb-2023 15:54", +#' "<a href=\"2024\">2024</a> 01-Feb-2024 15:54", +#' "<a href=\"last/\">last/</a> 07-Mar-2024 10:02", +#' "</pre><hr></body>", +#' "</html>" +#' ) +#' get_link_from_html(x = x) +#' +#' get_date_from_html(x = x) +#' get_date_from_html(x = x, expected = "day") +#' get_date_from_html(x = x, expected = "year") +#' get_date_from_html(x = x, expected = "any") +#' get_date_from_html(x = x[-(8:10)]) +#' get_date_from_html(x = x[-(8:10)], last = TRUE) + +get_link_from_html = function(url = NULL, x = NULL) { + + if (!is.null(url)) x = read_url(url) + if (is.null(x)) return(x) + + # Make sure that x is a vector with only one tag or one value per element + x = paste(x, collapse = "") + pattern = "!-!" + while (length(grep(pattern, x) > 0)) { + pattern = sprintf("%s%s", pattern, pattern) + } + x = gsub(">", sprintf(">%s", pattern), x) + x = gsub("<", sprintf("%s<", pattern), x) + x = gsub(sprintf("%s%s", pattern, pattern), pattern, x) + x = unlist(strsplit(x, pattern)) + + pattern = "(?<=href=([\"'])).*?(?=\\1)" + result = unlist(regmatches(x, gregexpr(pattern, x, perl = TRUE))) + if (identical(result, character())) result = NULL + + return(result) +} + +#' @rdname get_link_from_html +#' @export + +get_date_from_html = function( + url = NULL, + x = NULL, + expected = c("month", "day", "year", "any"), + last = FALSE +) { + expected = match.arg(expected) + + pattern = "/$" + result = gsub(pattern, "", get_link_from_html(url, x)) + if (is.null(result)) return(result) + + result = basename(result) + pattern = switch( + expected, + month = "\\d{4}-\\d{2}", + day = "\\d{4}-\\d{2}-\\d{2}", + year = "\\d{4}", + "\\d{4}-\\d{2}-\\d{2}|\\d{4}-\\d{2}|\\d{4}" + ) + if (isTRUE(last)) { + pattern = sprintf("(%s|last|latest)", pattern) + } + pattern = sprintf("^%s$", pattern) + + result = grep(pattern, result, value = TRUE) + + if (identical(result, "last")) { + msg = c( + "Only `last` has been found. ", + sprintf( + "Possible mismatch with expected format (%s)", + expected + ) + ) + warning(msg) + } + + return(result) +} \ No newline at end of file diff --git a/R/read_with_scheme.R b/R/read_with_scheme.R index 53d1b396148fd7d0fb1e17f7a4131fd8616f0c2b..71fe7d58f900dab4ef4d6c2c1a7bd3b84e80c10c 100644 --- a/R/read_with_scheme.R +++ b/R/read_with_scheme.R @@ -30,7 +30,6 @@ #' @export #' #' @encoding UTF-8 -#' @author Frédéric Grelot read_with_scheme = function( x, @@ -126,9 +125,10 @@ read_csv_with_scheme = function( treatment = list( "factor" = as.factor, - "date" = as.Date, + "Date" = as.Date, "numeric" = as.numeric, - "integer" = as.integer + "integer" = as.integer, + "logical" = to_logical ) for (type in names(treatment)) { selection = scheme[["name"]][scheme[["type"]] == type] @@ -141,10 +141,20 @@ read_csv_with_scheme = function( colClasses = "character" ) } + message(sprintf("- %s observations loaded.", nrow(result))) if (all(c("lon", "lat") %in% names(result))) { + selection = !is.na(result[["lon"]]) & !is.na(result[["lon"]]) + if (any(!selection)) { + message( + sprintf( + "- %s observations without lon/lat (removed).", + sum(!selection) + ) + ) + } result = sf::st_as_sf( - result, + result[selection, ], coords = c("lon", "lat"), crs = projection ) diff --git a/R/utilities.R b/R/utilities.R index 29ef122c445e9d17d26199919daaff4dcf1cd66f..5c9b289c00fe8bcf70d8e8f87cd6e471a0e2a284 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,27 +1,4 @@ ## In this file are grouped function that are used but should not be exported -#' -#' @title Transform a character vector in logical one -#' -#' @description -#' Use a modality for TRUE and onther for FALSE -#' -#' @param x character vector -#' @param true character, modality for TRUE -#' @param false character, modality for FALSE -#' -#' @return logical vector -#' -#' @encoding UTF-8 -#' @author Victor Champonnois et Frédéric Grelot -#' -#' @examples -#' x = sample(c("red", "blue"), 10, replace = TRUE) -#' floodam.data:::to_logical(x, "red", "blue") -#' floodam.data:::to_logical(x, "red", "green") - -to_logical = function(x, true = TRUE, false = FALSE) { - data.table::fifelse(x == true, TRUE, data.table::fifelse(x == false, FALSE, NA)) -} #' @title Add NA for a given value #' @@ -40,12 +17,14 @@ to_logical = function(x, true = TRUE, false = FALSE) { #' floodam.data:::treat_na(x, "green") treat_na = function(x, value) { - na.value = switch(EXPR = typeof(x), + na_value = switch( + EXPR = typeof(x), "character" = NA_character_, "integer" = NA_integer_, "double" = NA_real_, - NA) - data.table::fifelse(x == value, na.value, x) + NA + ) + data.table::fifelse(x == value, na_value, x) } #' @title Add information to a journal @@ -92,4 +71,166 @@ file_version = function(path, pattern, version = "last") { 1 ) return(result) -} \ No newline at end of file +} + +#' @title Get parent url from an url +#' +#' @description +#' `getbase_url()` gets the base parent from an url. +#' +#' @details +#' `get_base_url()` keeps unchanged links that do not start exactlty by +#' "http://" or "https://". +#' +#' @param url character, address of the url. +#' +#' @export +#' +#' @examples +#' +#' get_base_url("http://parent/child/archive.csv.gz") +#' get_base_url("https://parent/child/archive.csv.gz") +#' get_base_url("parent/child/archive.csv.gz") + +get_base_url = function(url) { + pattern = "(https?://[^/]+)/.*" + gsub(pattern, "\\1", url) +} + +#' @title Subset with message +#' +#' @description +#' `subset_with_message()` makes subset of data.frame that is suitable for +#' a processing of data. +#' +#' @details +#' `subset_with_message()` sends message when: +#' - `rows` is not logical because it is an obligation for this function. +#' - `columns` is not a character vector of valid column names, because it is an +#' obligation for this function. +#' - `rows` conducts to delete some observations. +#' +#' @param x data.frame. +#' @param rows logical, observations to be kept. +#' @param columns character, variables to be kept. +#' +#' @return `data.frame` of selected observations and variables. +#' +#' @export + +subset_with_message = function(x, rows, columns) { + if (missing(rows)) rows = rep(TRUE, nrow(x)) + if (missing(columns)) columns = colnames(x) + if (!is.logical(rows)) { + message("- rows must be logical. No subset performed on observations.") + rows = rep(TRUE, nrow(x)) + } + if (!is.character(columns) || !all(columns %in% names(x))) { + message( + "- columns must be valid column names. ", + "No subset performed on variables" + ) + columns = names(x) + } + if (sum(rows) < nrow(x)) { + message( + sprintf( + "- %s observations not in selection (removed).", + sum(!rows) + ) + ) + } + return(x[rows, columns, dop = FALSE]) +} + +#' @title Transform a vector in a logical ones +#' +#' @description +#' `as_logical()` enhances `as.logical()`, especially when input is a +#' character, by treating some comon cases, at least in English and French +#' context. +#' +#' @details +#' When applied to a character vector, `as_logical()` tests if all values are +#' in a set of admissibles values for `TRUE` cases, `FALSE` cases, and `NA` +#' cases. If it is the case, it makes all substitutions so that a `logical` +#' vector is returned by `as.logical()`. +#' +#' Some defaults cases are implemented (all insensitive to case) +#' +#' - English TRUE/FALSE with: +#' - `TRUE` for "T" and "TRUE" +#' - `FALSE` for "F" and "FALSE" +#' - English YES/NO with: +#' - `TRUE` for "Y" and "YES" +#' - `FALSE` for "N" and "NO" +#' - French TRUE/FALSE with: +#' - `TRUE` for "V" and "VRAI" +#' - `FALSE` for "F" and "FAUX" +#' - French YES/NO with: +#' - `TRUE` for "O" and "OUI" +#' - `FALSE` for "N" and "NON" +#' +#' `true` and `false` can be used to add more cases. To be used they must be +#' both given, if not none is considered. Even if `true` and `false` are given, +#' default substitutions for French and English cases are performed. +#' +#' @param x vector, values to be transformed. +#' @param true character, optional values for `TRUE` cases. +#' @param false character, optional values for `FALSE` cases. +#' @param as_is logical, should result be sent as character (for recursive +#' call). +#' +#' @return logical +#' +#' @encoding UTF-8 +#' +#' @export +#' +#' @examples +#' +#' # Default behaviour +#' to_logical(c(0:2, NA)) +#' +#' # For English (default, case insensitive) +#' to_logical(c("true", "false", "t", "f", "True", "False", "", "NA", NA)) +#' to_logical(c("y", "n", "Yes", "No")) +#' +#' # For French (default) +#' to_logical(c("n", "o", "Non", "Oui")) +#' to_logical(c("v", "f", "vrai", "faux")) +#' +#' # For other convention (language) +#' x = c("si", "no", NA) +#' to_logical(x) +#' to_logical(x, true = "SI", false = "NO") +#' +#' # Cannot mix language (by default) +#' x = c("NON", "OUI", "YES", "NO") +#' to_logical(x) +#' +#' # Can use parameters in this case +#' to_logical(x, true = c("OUI", "YES"), false = c("NO", "NON")) + +to_logical = function(x, true, false, as_is = FALSE) { + if (is.character(x)) { + x = toupper(x) + na = c("", "NA", NA) + if (!missing(true) & !(missing(false))) { + true = toupper(true) + false = toupper(false) + if (all(x %in% c(true, false, na))) { + x[x %in% true] = TRUE + x[x %in% false] = FALSE + } + if (isTRUE(as_is)) { + return(x) + } + } else { + x = to_logical(x, c("y", "yes"), c("n", "no"), TRUE) + x = to_logical(x, c("o", "oui"), c("n", "non"), TRUE) + x = to_logical(x, c("v", "vrai"), c("f", "faux"), TRUE) + } + } + as.logical(x) +} diff --git a/R/utilities_geomatic.R b/R/utilities_geomatic.R index 1481fbebcb299dc2f5c70e7629b324d70589d966..10cea73615abc7158793eca35d4ddba0a9d623ff 100644 --- a/R/utilities_geomatic.R +++ b/R/utilities_geomatic.R @@ -16,7 +16,6 @@ #' @return sf object with both commune and department information added. #' #' @encoding UTF-8 -#' @author Frédéric Grelot add_commune = function(x, commune, department = NULL) { if (!is.null(department)) { @@ -47,11 +46,14 @@ add_eaip = function(x, path_eaip, department) { path_eaip, sprintf("eaip_%s.rds", format_scope(department)) ) + message(sprintf("- %s observations treated.", nrow(x))) if (length(eaip) == 0) { x[["eaip"]] = NA + message("- no eaip layer found. NA added.") } else { eaip = readRDS(eaip) x[["eaip"]] = in_layer(x, eaip) + message(sprintf("- %s observations in eaip.", sum(x[["eaip"]]))) } return(x) } @@ -60,4 +62,68 @@ analyse_projection = function(x, default = 2154) { result = rep(default, length(x)) result[x == "LAMB93"] = 2154 return(result) -} \ No newline at end of file +} + +#' @title Split a `sf` data.frame by `department` +#' +#' @description +#' Split a `sf` data.frame with information `commune` by `department`. +#' `commune` information is mandatory in input. +#' +#' @details +#' The information in `commune` is interpreted to construct a first version of +#' department for each observation. +#' +#' If `admin_dep` is given, a `sf::st_intersects()` is performed to get +#' departement from geographic perspective. A consistency analysis between both +#' versions is done. In case of inconsistencies, messages ar thrown. The +#' geographic perspective is kept. +#' +#' @param x sf object. +#' @param admin_dep sf object. +#' +#' @return A list of sf objects. +#' +#' @encoding UTF-8 + +split_dep = function(x, admin_dep = NULL) { + dep_commune = substring(x[["commune"]], 1, 2) + selection = dep_commune %in% "97" + dep_commune[selection] = substring(x[["commune"]][selection], 1, 3) + if (is.null(admin_dep)) { + message( + "- No layer department provided, ", + "split is done on 'commune' field." + ) + dep = dep_commune + } else { + dep = sapply( + sf::st_intersects(x, admin_dep), + function(x) {if (length(x) != 1) NA else x} + ) + dep = suppressMessages( + sf::st_drop_geometry(admin_dep)[dep, "department"] + ) + test = dep == dep_commune + if (any(is.na(test))) { + message( + sprintf( + "- %s observations out of scope (removed).", + sum(is.na(test)) + ) + ) + } + if (any(test %in% FALSE)) { + message( + sprintf( + "- %s observations not in 'commune' department (removed).", + sum(test %in% FALSE) + ) + ) + } + } + x["department"] = dep + result = split(x, dep) + names(result) = format_scope(names(result)) + return(result) +} diff --git a/data-raw/scheme.R b/data-raw/scheme.R index d32dc6f7a8169b44c9e76307ef134bd076ca4ecd..ce5030509462b6b8904597015304bb29af637fb5 100644 --- a/data-raw/scheme.R +++ b/data-raw/scheme.R @@ -248,7 +248,7 @@ scheme_sirene_2019 = data.table::fread( "inst/extdata/scheme_sirene_2019.csv", data.table = FALSE ) -Encoding(scheme_sirene_2019[["label.french"]]) = "UTF-8" +Encoding(scheme_sirene_2019[["label_french"]]) = "UTF-8" Encoding(scheme_sirene_2019[["comment"]]) = "UTF-8" usethis::use_data(scheme_sirene_2019, overwrite = TRUE) diff --git a/data/scheme_admin_express_2_0.rda b/data/scheme_admin_express_2_0.rda index 46637cd80a19137cd64538a13de33112b0a665a6..8c778cebbb5edc68a5c46a16faf37f4a6c9f3dab 100644 Binary files a/data/scheme_admin_express_2_0.rda and b/data/scheme_admin_express_2_0.rda differ diff --git a/data/scheme_admin_express_3_1.rda b/data/scheme_admin_express_3_1.rda index 5b8c336292ca2c75f2a9f5a286753dff3ab0b90d..02a887dfc39de068a4365c06a02bd834e9de055a 100644 Binary files a/data/scheme_admin_express_3_1.rda and b/data/scheme_admin_express_3_1.rda differ diff --git a/data/scheme_bd_topo_3.rda b/data/scheme_bd_topo_3.rda index 885743924346776aa963c2de35f8f44fe84065ef..19d2ed8aa04dce7b3caf5e262b3f43d7142fa2be 100644 Binary files a/data/scheme_bd_topo_3.rda and b/data/scheme_bd_topo_3.rda differ diff --git a/data/scheme_gaspar_azi.rda b/data/scheme_gaspar_azi.rda index 2957942f11bb0584311a8961266c0a045b35b89c..110b93ac93e1fbdf32d34a838fa4a048d8e8afda 100644 Binary files a/data/scheme_gaspar_azi.rda and b/data/scheme_gaspar_azi.rda differ diff --git a/data/scheme_gaspar_catnat.rda b/data/scheme_gaspar_catnat.rda index b4c162838a3c79f0e699fe27ce3b6b789f5a7492..1d23cf7a658a632997491e8147fbaddfa95baeb1 100644 Binary files a/data/scheme_gaspar_catnat.rda and b/data/scheme_gaspar_catnat.rda differ diff --git a/data/scheme_gaspar_pprn.rda b/data/scheme_gaspar_pprn.rda index e64d45ed4114968a6364f4e4ad3d9bb23b71b88f..9aae8f718a84a488d7aacd0d494ec4facb3eae09 100644 Binary files a/data/scheme_gaspar_pprn.rda and b/data/scheme_gaspar_pprn.rda differ diff --git a/data/scheme_gaspar_risq.rda b/data/scheme_gaspar_risq.rda index 265e5bb38c2fb7188b14d58d8fbe1aba58c8b5d3..27a0b16b88b32f5e344bb2a26fa3311ba9326f82 100644 Binary files a/data/scheme_gaspar_risq.rda and b/data/scheme_gaspar_risq.rda differ diff --git a/data/scheme_insee_logement_2019.rda b/data/scheme_insee_logement_2019.rda index 6bfb19bc31b301748c64f3e226852bb168bf6583..c1d363a58629c962a6985c175d97ccb550bbc4ff 100644 Binary files a/data/scheme_insee_logement_2019.rda and b/data/scheme_insee_logement_2019.rda differ diff --git a/data/scheme_rpg_1.rda b/data/scheme_rpg_1.rda index 225982e92dd538b35076f324a99d6ad75dafa243..15b48afc865ebd2946df224b227e074a27d24fa1 100644 Binary files a/data/scheme_rpg_1.rda and b/data/scheme_rpg_1.rda differ diff --git a/data/scheme_rpg_2.rda b/data/scheme_rpg_2.rda index 46ed4cebf1a21a25761d56a4211aafd33b65510b..edbc13f1282828ea1439c1f60476f5da15896691 100644 Binary files a/data/scheme_rpg_2.rda and b/data/scheme_rpg_2.rda differ diff --git a/data/scheme_sirene_2019.rda b/data/scheme_sirene_2019.rda index fe6fb54b1e29b4e5f26dffe4c45d5d56b325ed68..af70c3ce386dbbe36399a85be83a28ee05c4090d 100644 Binary files a/data/scheme_sirene_2019.rda and b/data/scheme_sirene_2019.rda differ diff --git a/data/scheme_sirene_na.rda b/data/scheme_sirene_na.rda index 1f32dacc83c5c94cadb82fa52aa7e7a2c8616fb9..b6d276daa5152ced87e5883022c9c5d88469fe60 100644 Binary files a/data/scheme_sirene_na.rda and b/data/scheme_sirene_na.rda differ diff --git a/dev/tag-message b/dev/tag-message index 7012fc7c3524308b111df2993a1a10c3ba30e2dd..93cb15eea283b6ca57745957b4267630bf273cc8 100644 --- a/dev/tag-message +++ b/dev/tag-message @@ -1,8 +1,8 @@ -floodam.data Version: 0.9.40.0 +floodam.data Version: 0.9.41.0 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ **Details** see NEWS.md -# git tag -a v0.9.40.0 -F dev/tag-message +# git tag -a v0.9.41.0 -F dev/tag-message # git push --tags diff --git a/inst/extdata/scheme_admin_express_2_0.csv b/inst/extdata/scheme_admin_express_2_0.csv index 22d3c1bed6da58f9d91521b7cf83bd03031583ea..48766ac017a2ccc1cb4f4575689ccde526d5ae3f 100644 --- a/inst/extdata/scheme_admin_express_2_0.csv +++ b/inst/extdata/scheme_admin_express_2_0.csv @@ -1,27 +1,27 @@ -"name","name_origin","layer","order","keep","type","label_fr","source","length_source","type_source","comment" -"id","ID","COMMUNE",1,"FALSE","character","Identifiant de la commune","IGN",24,"character", -"commune_label_fr","NOM_COM","COMMUNE",2,"TRUE","character","Nom de la commune","IGN",50,"character", -"commune_label_fr_cap","NOM_COM_M","COMMUNE",3,"FALSE","character","Nom de la commune en majuscules","IGN",50,"character", -"commune","INSEE_COM","COMMUNE",4,"TRUE","character","Numéro INSEE de la commune","IGN",5,"character", -"status","STATUT","COMMUNE",5,"FALSE","character","Statut administratif","IGN",22,"character", -"district","INSEE_ARR","COMMUNE",6,"FALSE","character","Numéro INSEE de l'arrondissement","IGN",2,"character", -"department","INSEE_DEP","COMMUNE",7,"TRUE","character","Numéro INSEE du département","IGN",3,"character", -"region","INSEE_REG","COMMUNE",8,"FALSE","character","Numéro INSEE de la région","IGN",2,"character", -"epci","CODE_EPCI","COMMUNE",9,"TRUE","character","Code de l'EPCI","IGN",21,"character", -"population","POPULATION","COMMUNE",10,"TRUE","integer","Population de la commune","IGN",8,"integer", -"type","TYPE","COMMUNE",11,"FALSE","character","Type de la commune selon le COG","IGN",3,"character", -"id","ID","DEPARTEMENT",1,"FALSE","character","Identifiant du département","IGN",24,"character", -"department_label_fr","NOM_DEP","DEPARTEMENT",2,"TRUE","character","Nom du département","IGN",30,"character", -"department_label_fr_cap","NOM_DEP_M","DEPARTEMENT",3,"FALSE","character","Nom du département en capitales","IGN",30,"character", -"department","INSEE_DEP","DEPARTEMENT",4,"TRUE","character","Numéro INSEE du département","IGN",3,"character", -"region","INSEE_REG","DEPARTEMENT",5,"TRUE","character","Numéro INSEE de la région","IGN",2,"character", -"capital","CHF_DEP","DEPARTEMENT",6,"FALSE","character","Code INSEE de la commune chef-lieu du département","IGN",5,"character","Apparemment non présent dans les versions récentes" -"id","ID","EPCI",1,"FALSE","character","Identifiant de l'EPCI","IGN",24,"character", -"epci","CODE_EPCI","EPCI",2,"TRUE","character","Code de l'EPCI","IGN",9,"character", -"epci_label_fr","NOM_EPCI","EPCI",3,"TRUE","character","Nom de l'EPCI","IGN",230,"character", -"type","TYPE_EPCI","EPCI",4,"TRUE","character","Type de l'EPCI","IGN",9,"character", -"id","ID","REGION",1,"FALSE","character","Identifiant de la région","IGN",24,"character", -"region_label_fr","NOM_REG","REGION",2,"TRUE","character","Nom de la région","IGN",35,"character", -"region_label_fr_cap","NOM_REG_M","REGION",3,"FALSE","character","Nom de la région","IGN",35,"character", -"region","INSEE_REG","REGION",4,"TRUE","character","Numéro INSEE de la région","IGN",2,"character", -"capital","CHF_REG","REGION",5,"FALSE","character","Code INSEE du chef-lieu de la région","IGN",5,"character","Apparemment non présent dans les versions récentes" +name;name_origin;layer;order;keep;type;label_fr;source;length_source;type_source;comment +id;ID;COMMUNE;1;FALSE;character;Identifiant de la commune;IGN;24;character; +commune_label_fr;NOM_COM;COMMUNE;2;TRUE;character;Nom de la commune;IGN;50;character; +commune_label_fr_cap;NOM_COM_M;COMMUNE;3;FALSE;character;Nom de la commune en majuscules;IGN;50;character; +commune;INSEE_COM;COMMUNE;4;TRUE;character;Numéro INSEE de la commune;IGN;5;character; +status;STATUT;COMMUNE;5;FALSE;character;Statut administratif;IGN;22;character; +district;INSEE_ARR;COMMUNE;6;FALSE;character;Numéro INSEE de l'arrondissement;IGN;2;character; +department;INSEE_DEP;COMMUNE;7;TRUE;character;Numéro INSEE du département;IGN;3;character; +region;INSEE_REG;COMMUNE;8;FALSE;character;Numéro INSEE de la région;IGN;2;character; +epci;CODE_EPCI;COMMUNE;9;TRUE;character;Code de l'EPCI;IGN;21;character; +population;POPULATION;COMMUNE;10;TRUE;integer;Population de la commune;IGN;8;integer; +type;TYPE;COMMUNE;11;FALSE;character;Type de la commune selon le COG;IGN;3;character; +id;ID;DEPARTEMENT;1;FALSE;character;Identifiant du département;IGN;24;character; +department_label_fr;NOM_DEP;DEPARTEMENT;2;TRUE;character;Nom du département;IGN;30;character; +department_label_fr_cap;NOM_DEP_M;DEPARTEMENT;3;FALSE;character;Nom du département en capitales;IGN;30;character; +department;INSEE_DEP;DEPARTEMENT;4;TRUE;character;Numéro INSEE du département;IGN;3;character; +region;INSEE_REG;DEPARTEMENT;5;TRUE;character;Numéro INSEE de la région;IGN;2;character; +capital;CHF_DEP;DEPARTEMENT;6;FALSE;character;Code INSEE de la commune chef-lieu du département;IGN;5;character;Apparemment non présent dans les versions récentes +id;ID;EPCI;1;FALSE;character;Identifiant de l'EPCI;IGN;24;character; +epci;CODE_EPCI;EPCI;2;TRUE;character;Code de l'EPCI;IGN;9;character; +epci_label_fr;NOM_EPCI;EPCI;3;TRUE;character;Nom de l'EPCI;IGN;230;character; +type;TYPE_EPCI;EPCI;4;TRUE;character;Type de l'EPCI;IGN;9;character; +id;ID;REGION;1;FALSE;character;Identifiant de la région;IGN;24;character; +region_label_fr;NOM_REG;REGION;2;TRUE;character;Nom de la région;IGN;35;character; +region_label_fr_cap;NOM_REG_M;REGION;3;FALSE;character;Nom de la région;IGN;35;character; +region;INSEE_REG;REGION;4;TRUE;character;Numéro INSEE de la région;IGN;2;character; +capital;CHF_REG;REGION;5;FALSE;character;Code INSEE du chef-lieu de la région;IGN;5;character;Apparemment non présent dans les versions récentes diff --git a/inst/extdata/scheme_admin_express_3_1.csv b/inst/extdata/scheme_admin_express_3_1.csv index c2068629ceea48207bf409c2e04b88449b083152..b6c225169f76868df0e4204e9f40b917a8d5dca3 100644 --- a/inst/extdata/scheme_admin_express_3_1.csv +++ b/inst/extdata/scheme_admin_express_3_1.csv @@ -1,25 +1,25 @@ -"name","name_origin","layer","order","keep","type","label_fr","source","length_source","type_source","comment" -"id","ID","COMMUNE",1,"FALSE","character","Identifiant de la commune","IGN",24,"character", -"commune_label_fr","NOM","COMMUNE",2,"TRUE","character","Nom de la commune","IGN",50,"character", -"commune_label_fr_cap","NOM_M","COMMUNE",3,"FALSE","character","Nom de la commune en majuscules","IGN",50,"character", -"commune","INSEE_COM","COMMUNE",4,"TRUE","character","Code INSEE de la commune","IGN",5,"character", -"status","STATUT","COMMUNE",5,"FALSE","character","Statut administratif","IGN",22,"character", -"population","POPULATION","COMMUNE",6,"TRUE","integer","Population de la commune","IGN",8,"integer", -"canton","INSEE_CAN","COMMUNE",7,"FALSE","character","Code INSEE du canton","IGN",2,"character", -"district","INSEE_ARR","COMMUNE",8,"FALSE","character","Code INSEE de l'arrondissement","IGN",2,"character", -"department","INSEE_DEP","COMMUNE",9,"TRUE","character","Code INSEE du département","IGN",3,"character", -"region","INSEE_REG","COMMUNE",10,"TRUE","character","Code INSEE de la région","IGN",2,"character", -"epci","SIREN_EPCI","COMMUNE",11,"TRUE","character","Code SIREN des EPCI","IGN",21,"character", -"id","ID","DEPARTEMENT",1,"FALSE","character","Identifiant du département","IGN",24,"character", -"department_label_fr_cap","NOM_M","DEPARTEMENT",2,"FALSE","character","Nom du département en majuscules","IGN",30,"character", -"department_label_fr","NOM","DEPARTEMENT",3,"TRUE","character","Nom du département","IGN",30,"character", -"department","INSEE_DEP","DEPARTEMENT",4,"TRUE","character","Code INSEE du département","IGN",3,"character", -"region","INSEE_REG","DEPARTEMENT",5,"TRUE","character","Code INSEE de la région","IGN",2,"character", -"id","ID","EPCI",1,"FALSE","character","Identifiant de l'EPCI","IGN",24,"character", -"epci","CODE_SIREN","EPCI",2,"TRUE","character","Code SIREN de l'entité administrative","IGN",9,"character", -"epci_label_fr","NOM","EPCI",3,"TRUE","character","Nom de l'EPCI","IGN",230,"character", -"type","NATURE","EPCI",4,"TRUE","character","Nature de l'EPCI","IGN",9,"character", -"id","ID","REGION",1,"FALSE","character","Identifiant de la région","IGN",24,"character", -"region_label_fr_cap","NOM_REG_M","REGION",2,"FALSE","character","Nom de la région en majuscules","IGN",35,"character", -"region_label_fr","NOM_REG","REGION",3,"TRUE","character","Nom de la région","IGN",35,"character", -"region","INSEE_REG","REGION",4,"TRUE","character","Code INSEE de la région","IGN",2,"character", +name;name_origin;layer;order;keep;type;label_fr;source;length_source;type_source;comment +id;ID;COMMUNE;1;FALSE;character;Identifiant de la commune;IGN;24;character; +commune_label_fr;NOM;COMMUNE;2;TRUE;character;Nom de la commune;IGN;50;character; +commune_label_fr_cap;NOM_M;COMMUNE;3;FALSE;character;Nom de la commune en majuscules;IGN;50;character; +commune;INSEE_COM;COMMUNE;4;TRUE;character;Code INSEE de la commune;IGN;5;character; +status;STATUT;COMMUNE;5;FALSE;character;Statut administratif;IGN;22;character; +population;POPULATION;COMMUNE;6;TRUE;integer;Population de la commune;IGN;8;integer; +canton;INSEE_CAN;COMMUNE;7;FALSE;character;Code INSEE du canton;IGN;2;character; +district;INSEE_ARR;COMMUNE;8;FALSE;character;Code INSEE de l'arrondissement;IGN;2;character; +department;INSEE_DEP;COMMUNE;9;TRUE;character;Code INSEE du département;IGN;3;character; +region;INSEE_REG;COMMUNE;10;TRUE;character;Code INSEE de la région;IGN;2;character; +epci;SIREN_EPCI;COMMUNE;11;TRUE;character;Code SIREN des EPCI;IGN;21;character; +id;ID;DEPARTEMENT;1;FALSE;character;Identifiant du département;IGN;24;character; +department_label_fr_cap;NOM_M;DEPARTEMENT;2;FALSE;character;Nom du département en majuscules;IGN;30;character; +department_label_fr;NOM;DEPARTEMENT;3;TRUE;character;Nom du département;IGN;30;character; +department;INSEE_DEP;DEPARTEMENT;4;TRUE;character;Code INSEE du département;IGN;3;character; +region;INSEE_REG;DEPARTEMENT;5;TRUE;character;Code INSEE de la région;IGN;2;character; +id;ID;EPCI;1;FALSE;character;Identifiant de l'EPCI;IGN;24;character; +epci;CODE_SIREN;EPCI;2;TRUE;character;Code SIREN de l'entité administrative;IGN;9;character; +epci_label_fr;NOM;EPCI;3;TRUE;character;Nom de l'EPCI;IGN;230;character; +type;NATURE;EPCI;4;TRUE;character;Nature de l'EPCI;IGN;9;character; +id;ID;REGION;1;FALSE;character;Identifiant de la région;IGN;24;character; +region_label_fr_cap;NOM_REG_M;REGION;2;FALSE;character;Nom de la région en majuscules;IGN;35;character; +region_label_fr;NOM_REG;REGION;3;TRUE;character;Nom de la région;IGN;35;character; +region;INSEE_REG;REGION;4;TRUE;character;Code INSEE de la région;IGN;2;character; diff --git a/inst/extdata/scheme_ban.csv b/inst/extdata/scheme_ban.csv index 7e0fca86f9959e44e58466960e4fc9d06b1ae7dc..ec0661e88855c9d72e27d4cf0528c7806de44209 100644 --- a/inst/extdata/scheme_ban.csv +++ b/inst/extdata/scheme_ban.csv @@ -1,24 +1,24 @@ -name name_origin order keep type label_fr source length_source type_source comment -id id 1 TRUE character Identifiant data.gouv.fr NA character Clé d’interopérabilité telle que définie dans la spécification du format d'échange BAL 1.3. Lorsqu'aucun code FANTOIR n'est connu, un code transitoire composé de 6 caractères alpha-numériques est généré. -id_fantoir id_fantoir 2 FALSE character identifiant FANTOIR data.gouv.fr NA character Identifiant FANTOIR de la voie, le cas échant. L'identifiant est préfixé par la commune de rattachement FANTOIR (commune actuelle ou commune ancienne) -address_number numero 3 TRUE integer Numéro dans la voie data.gouv.fr NA integer Numéro de l’adresse dans la voie -address_rep rep 4 TRUE factor Indice de répétition data.gouv.fr NA character Indice de répétition associé au numéro (par exemple bis, a…) -address_street nom_voie 5 TRUE character Nom de la voie data.gouv.fr NA character Nom de la voie en minuscules accentuées -address_code code_postal 6 TRUE character Code postal data.gouv.fr NA character Code postal du bureau de distribution de la voie -commune code_insee 7 TRUE character Code INSEE data.gouv.fr NA character Code INSEE de la commune actuelle sur la base du Code Officiel géographique en vigueur -commune_name nom_commune 8 FALSE character Nom officiel de la commune data.gouv.fr NA character Nom officiel de la commune actuelle -commune_old code_insee_ancienne_commune 9 FALSE character Code INSEE de l'ancienne commune data.gouv.fr NA character Code INSEE de l'ancienne commune sur laquelle est située l'adresse -commune_old_name nom_ancienne_commune 10 FALSE character Nom de l'ancienne commune data.gouv.fr NA character Nom de l'ancienne commune sur laquelle est située l'adresse -x x 11 FALSE numeric Abscisse en projection légale data.gouv.fr NA numeric Coordonnées cartographique en projection légale -y y 12 FALSE numeric Ordonnée en projection légale data.gouv.fr NA numeric Coordonnées cartographique en projection légale -lon lon 13 TRUE numeric Longitude en WGS-84 data.gouv.fr NA numeric Longitude en WGS-84 -lat lat 14 TRUE numeric Latitude en WGS-84 data.gouv.fr NA numeric Latitude en WGS-84 -position_type type_position 15 TRUE factor Type de position data.gouv.fr NA factor Type de position telle que définie dans la spécification du format d'échange BAL 1.3. Peut-être vide si inconnu ou non renseigné. -alias alias 16 FALSE logical Vide data.gouv.fr NA logical Vide -locality_name nom_ld 17 TRUE character Nom du lieu-dit de rattachement data.gouv.fr NA character Nom du lieu-dit de rattachement (ou autre type de toponyme) -commune_name_postal libelle_acheminement 18 FALSE character Nom de la commune d’acheminement data.gouv.fr NA character Nom de la commune d’acheminement -address_street_postal nom_afnor 19 FALSE character Nom postale de la voie data.gouv.fr NA character Nom de la voie normalisé selon la norme postale -position_source source_position 20 TRUE factor Source de la position géographique data.gouv.fr NA factor Source de la position géographique. Valeurs possibles : (commune, cadastre, arcep, laposte, insee, sdis, inconnue) -address_source source_nom_voie 21 FALSE factor Source du nom de la voie data.gouv.fr NA factor Source du nom de la voie. Valeurs possibles : (commune, cadastre, arcep, laposte, insee, sdis, inconnue) -commune_certification certification_commune 22 TRUE logical Certification par la commune data.gouv.fr NA integer Indique si l’adresse a été certifiée par la commune. Valeurs possibles : (1 pour oui, 0 pour non) -registry_parcel cad_parcelles 23 FALSE character Parcelles cadastrales data.gouv.fr NA character Liste les identifiants des parcelles cadastrales auxquelles l’adresse est rattachée, si l'information est connue. Codage de l’identifiant sur 14 caractères. Séparateur |. Donnée en cours de fiabilisation +name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment +id;id;1;TRUE;character;Identifiant;data.gouv.fr;NA;character;Clé d’interopérabilité telle que définie dans la spécification du format d'échange BAL 1.3. Lorsqu'aucun code FANTOIR n'est connu, un code transitoire composé de 6 caractères alpha-numériques est généré. +id_fantoir;id_fantoir;2;FALSE;character;identifiant FANTOIR;data.gouv.fr;NA;character;Identifiant FANTOIR de la voie, le cas échant. L'identifiant est préfixé par la commune de rattachement FANTOIR (commune actuelle ou commune ancienne) +address_number;numero;3;TRUE;integer;Numéro dans la voie;data.gouv.fr;NA;integer;Numéro de l’adresse dans la voie +address_rep;rep;4;TRUE;factor;Indice de répétition;data.gouv.fr;NA;character;Indice de répétition associé au numéro (par exemple bis, a…) +address_street;nom_voie;5;TRUE;character;Nom de la voie;data.gouv.fr;NA;character;Nom de la voie en minuscules accentuées +address_code;code_postal;6;TRUE;character;Code postal;data.gouv.fr;NA;character;Code postal du bureau de distribution de la voie +commune;code_insee;7;TRUE;character;Code INSEE;data.gouv.fr;NA;character;Code INSEE de la commune actuelle sur la base du Code Officiel géographique en vigueur +commune_name;nom_commune;8;FALSE;character;Nom officiel de la commune;data.gouv.fr;NA;character;Nom officiel de la commune actuelle +commune_old;code_insee_ancienne_commune;9;FALSE;character;Code INSEE de l'ancienne commune;data.gouv.fr;NA;character;Code INSEE de l'ancienne commune sur laquelle est située l'adresse +commune_old_name;nom_ancienne_commune;10;FALSE;character;Nom de l'ancienne commune;data.gouv.fr;NA;character;Nom de l'ancienne commune sur laquelle est située l'adresse +x;x;11;FALSE;numeric;Abscisse en projection légale;data.gouv.fr;NA;numeric;Coordonnées cartographique en projection légale +y;y;12;FALSE;numeric;Ordonnée en projection légale;data.gouv.fr;NA;numeric;Coordonnées cartographique en projection légale +lon;lon;13;TRUE;numeric;Longitude en WGS-84;data.gouv.fr;NA;numeric;Longitude en WGS-84 +lat;lat;14;TRUE;numeric;Latitude en WGS-84;data.gouv.fr;NA;numeric;Latitude en WGS-84 +position_type;type_position;15;TRUE;factor;Type de position;data.gouv.fr;NA;factor;Type de position telle que définie dans la spécification du format d'échange BAL 1.3. Peut-être vide si inconnu ou non renseigné. +alias;alias;16;FALSE;logical;Vide;data.gouv.fr;NA;logical;Vide +locality_name;nom_ld;17;TRUE;character;Nom du lieu-dit de rattachement;data.gouv.fr;NA;character;Nom du lieu-dit de rattachement (ou autre type de toponyme) +commune_name_postal;libelle_acheminement;18;FALSE;character;Nom de la commune d’acheminement;data.gouv.fr;NA;character;Nom de la commune d’acheminement +address_street_postal;nom_afnor;19;FALSE;character;Nom postale de la voie;data.gouv.fr;NA;character;Nom de la voie normalisé selon la norme postale +position_source;source_position;20;TRUE;factor;Source de la position géographique;data.gouv.fr;NA;factor;Source de la position géographique. Valeurs possibles : (commune, cadastre, arcep, laposte, insee, sdis, inconnue) +address_source;source_nom_voie;21;FALSE;factor;Source du nom de la voie;data.gouv.fr;NA;factor;Source du nom de la voie. Valeurs possibles : (commune, cadastre, arcep, laposte, insee, sdis, inconnue) +commune_certification;certification_commune;22;TRUE;logical;Certification par la commune;data.gouv.fr;NA;integer;Indique si l’adresse a été certifiée par la commune. Valeurs possibles : (1 pour oui, 0 pour non) +registry_parcel;cad_parcelles;23;FALSE;character;Parcelles cadastrales;data.gouv.fr;NA;character;Liste les identifiants des parcelles cadastrales auxquelles l’adresse est rattachée, si l'information est connue. Codage de l’identifiant sur 14 caractères. Séparateur |. Donnée en cours de fiabilisation diff --git a/inst/extdata/scheme_bd_topo_3.csv b/inst/extdata/scheme_bd_topo_3.csv index 783d27b55ee58f3bba5d0ee4e3c68ab74b97d92f..3b1fd2f5108600e6d421606e7b0bcb4988546e29 100644 --- a/inst/extdata/scheme_bd_topo_3.csv +++ b/inst/extdata/scheme_bd_topo_3.csv @@ -1,60 +1,60 @@ -"name";"name_origin_gpkg";"name_origin_shp";"layer";"order";"keep";"type";"unit";"label_fr";"source";"length_source";"type_source";"comment" -"id";"cleabs";"ID";"batiment";1;"TRUE";"character";"NA";"Identifiant du bâtiment";"IGN";"NA";"character";"Identifiant unique de l'objet" -"nature";"nature";"NATURE";"batiment";2;"TRUE";"factor";"NA";"Nature du bâtiment";"IGN";"NA";"factor";"Attribut permettant de distinguer différents types de bâtiments selon leur architecture" -"destination_principal";"usage_1";"USAGE1";"batiment";3;"TRUE";"factor";"NA";"Usage principal";"IGN";"NA";"factor";"Usage du bâtiment ou d'une partie du bâtiment" -"destination_secondary";"usage_2";"USAGE2";"batiment";4;"TRUE";"factor";"NA";"Usage secondaire";"IGN";"NA";"factor";"Autre usage d'un bâtiment de fonction mixte" -"building_light";"construction_legere";"LEGER";"batiment";5;"TRUE";"logical";"NA";"Construction légère";"IGN";"NA";"logical";"Indique qu'il s'agit d'une structure légère, non attachée au sol par l'intermédiaire de fondations, ou d'un bâtiment ou partie de bâtiment ouvert sur au moins un côté" -"state";"etat_de_l_objet";"ETAT";"batiment";6;"TRUE";"factor";"NA";"État de l'objet bati";"IGN";"NA";"factor";"État ou stade de vie d'un objet qui, pour le thème bâti, peut être en projet, en construction, en service ou en ruine" -"date_creation";"date_creation";"DATE_CREAT";"batiment";7;"TRUE";"Date";"NA";"Date de création";"IGN";"NA";"Date";"Date et heure à laquelle l'objet a été saisi pour la première fois dans la base de données de production de l'IGN" -"date_update";"date_modification";"DATE_MAJ";"batiment";8;"TRUE";"Date";"NA";"Date de modification";"IGN";"NA";"Date";"Date et heure à laquelle l'objet a été modifié pour la dernière fois dans la base de données de production" -"date_apparition";"date_d_apparition";"DATE_APP";"batiment";9;"TRUE";"Date";"NA";"Date d'apparition";"IGN";"NA";"Date";"Date de création, de construction ou d'apparition de l'objet, ou date la plus ancienne à laquelle on peut attester de sa présence sur le terrain. La date d'apparition est issue de la date de construction présente dans les fichiers MAJIC (données de la DGFIP)" -"date_confirmation";"date_de_confirmation";"DATE_CONF";"batiment";10;"TRUE";"Date";"NA";"Date de confirmation";"IGN";"NA";"Date";"Date la plus récente à laquelle on peut attester de la présence de l'objet sur le terrain" -"source";"sources";"SOURCE";"batiment";11;"TRUE";"character";"NA";"Sources";"IGN";"NA";"character";"Sources attestant l'existence de l'objet, éventuellement dans le cadre d'un partenariat : organisme, administration, fichier de référence" -"source_id";"identifiants_sources";"ID_SOURCE";"batiment";12;"TRUE";"character";"NA";"Identifiants sources";"IGN";"NA";"character";"Identifiants de l'objet dans les répertoires des organismes consultés pour leur inventaire" -"accuracy_planimetric";"precision_planimetrique";"PREC_PLANI";"batiment";13;"TRUE";"numeric";"m";"Précision planimétrique";"IGN";"NA";"numeric";"Précision altimétrique (en mètres) de la géométrie décrivant l'objet" -"accuracy_altimetric";"precision_altimetrique";"PREC_ALTI";"batiment";14;"TRUE";"numeric";"m";"Précision altimétrique";"IGN";"NA";"numeric";"Précision planimétrique (en mètres) de la géométrie décrivant l'objet" -"dwelling";"nombre_de_logements";"NB_LOGTS";"batiment";15;"TRUE";"integer";"NA";"Nombre de logements";"IGN";"NA";"integer";"Nombre de logement dans le bâtiment" -"level";"nombre_d_etages";"NB_ETAGES";"batiment";16;"TRUE";"integer";"NA";"Nombre d'étages";"IGN";"NA";"integer";"Nombre total d'étages du bâtiment, rez-de-chaussée compris. Les sous-sol ne sont pas comptés" -"material_wall";"materiaux_des_murs";"MAT_MURS";"batiment";17;"TRUE";"factor";"NA";"Matériaux des murs";"IGN";"NA";"character";"Code indiquant les matériaux des murs du bâtiment, issu des informations contenues dans les fichiers fonciers" -"material_roof";"materiaux_de_la_toiture";"MAT_TOITS";"batiment";18;"TRUE";"factor";"NA";"Matériaux de la toiture";"IGN";"NA";"character";"Code indiquant les matériaux de la toiture du bâtiment, issu des informations contenues dans les fichiers fonciers" -"height";"hauteur";"HAUTEUR";"batiment";19;"TRUE";"numeric";"m";"Hauteur";"IGN";"NA";"numeric";"Hauteur du bâtiment mesurée entre le sol et le point haut de la gouttière (altitude maximum de la polyligne décrivant le bâtiment), exprimée en mètres." -"z_min_floor";"altitude_minimale_sol";"Z_MIN_SOL";"batiment";20;"TRUE";"numeric";"m";"Altitude minimale sol";"IGN";"NA";"numeric";"Altitude représentative au pied du bâtiment, du côté bas de la pente, exprimée en mètres" -"z_min_roof";"altitude_minimale_toit";"Z_MIN_TOIT";"batiment";21;"TRUE";"numeric";"m";"Altitude minimale toit";"IGN";"NA";"numeric";"Altitude du toit du bâtiment au niveau de l'arête décrivant son contour, exprimée en mètres." -"z_max_roof";"altitude_maximale_toit";"Z_MAX_TOIT";"batiment";22;"TRUE";"numeric";"m";"Altitude maximale toit";"IGN";"NA";"numeric";"Altitude qui correspond à la hauteur maximale du toit, c’est-à -dire au faîte du toit, exprimée en mètres." -"z_max_floor";"altitude_maximale_sol";"Z_MAX_SOL";"batiment";23;"TRUE";"numeric";"m";"Altitude maximale sol";"IGN";"NA";"numeric";"Altitude qui correspond à l’altitude maximale située au pied d’un bâtiment, exprimée en mètres" -"origin";"origine_du_batiment";"ORIGIN_BAT";"batiment";24;"TRUE";"factor";"NA";"Origine du bâtiment";"IGN";"NA";"factor";"Précise l'origine de la géométrie du bâtiment" -"majic_quality";"appariement_fichiers_fonciers";"APP_FF";"batiment";25;"TRUE";"character";"NA";"Appariement fichiers fonciers";"IGN";"NA";"character";"Indicateur relatif à la fiabilité de l'appariement avec les fichiers fonciers" -"id";"cleabs";"ID";"commune";1;"TRUE";"character";"NA";;"IGN";"NA";"character";"Identifiant unique de l'objet" -"commune";"code_insee";"INSEE_COM";"commune";2;"TRUE";"factor";"NA";;"IGN";"NA";"character";"Il s’agit du numéro officiel attribué par l’INSEE à la Commune : il est composé de 5 chiffres, les deux premiers correspondant au numéro de Département" -"district";"code_insee_de_l_arrondissement";"INSEE_ARR";"commune";3;"FALSE";"factor";"NA";;"IGN";"NA";"character";"Code INSEE de l'Arrondissement auquel appartient cette Commune" -"territorial_authority";"code_insee_de_la_collectivite_t";"INSEE_COL";"commune";4;"FALSE";"factor";"NA";;"IGN";"NA";"character";"Code INSEE de la Collectivité territoriale de niveau départemental incluant cette Commune" -"department";"code_insee_du_departement";"INSEE_DEP";"commune";5;"TRUE";"factor";"NA";;"IGN";"NA";"character";"Code INSEE du Département" -"region";"code_insee_de_la_region";"INSEE_REG";"commune";6;"TRUE";"factor";"NA";;"IGN";"NA";"character";"Code INSEE de la Région à laquelle appartient la Commune concernée" -"population";"population";"POPULATION";"commune";7;"TRUE";"integer";"NA";;"IGN";"NA";"integer";"Chiffre de population sans double compte" -"area";"surface_en_ha";"SURFACE_HA";"commune";8;"TRUE";"integer";"ha";;"IGN";"NA";"integer";"Superficie cadastrale en hectares de la commune telle que donnée par l'INSEE" -"date_creation";"date_creation";"DATE_CREAT";"commune";9;"FALSE";"Date";"NA";;"IGN";"NA";"Date";"Date et heure à laquelle l'objet a été saisi pour la première fois dans la base de données de production de l'IGN" -"date_update";"date_modification";"DATE_MAJ";"commune";10;"FALSE";"Date";"NA";;"IGN";"NA";"Date";"Date et heure à laquelle l'objet a été modifié pour la dernière fois dans la base de données de production" -"date_apparition";"date_d_apparition";"DATE_APP";"commune";11;"TRUE";"Date";"NA";;"IGN";"NA";"Date";"Date de création, de construction ou d'apparition de l'objet, ou date la plus ancienne à laquelle on peut attester de sa présence sur le terrain" -"date_confirmation";"date_de_confirmation";"DATE_CONF";"commune";12;"TRUE";"Date";"NA";;"IGN";"NA";"Date";"Date la plus récente à laquelle on peut attester de la présence de l'objet sur le terrain" -"postal_code";"code_postal";"CODE_POST";"commune";13;"TRUE";"factor";"NA";;"IGN";"NA";"character";"Code postal lié à la Commune" -"name";"nom_officiel";"NOM";"commune";14;"TRUE";"character";"NA";;"IGN";"NA";"character";"Ce champ comporte le toponyme intégral (avec article s’il y a) de la Commune. Il est saisi en lettres majuscules et minuscules, toutes deux accentuées" -"district_head";"chef_lieu_d_arrondissement";"CL_ARROND";"commune";15;"TRUE";"logical";"NA";;"IGN";"NA";"logical";"Indique si la commune est chef-lieu d'Arrondissement ou non" -"territorial_authority_head";"chef_lieu_de_collectivite_terr";"CL_COLLTER";"commune";16;"TRUE";"logical";"NA";;"IGN";"NA";"logical";"Indique si la commune est chef-lieu d'une Collectivité territoriale ou non" -"department_head";"chef_lieu_de_departement";"CL_DEPART";"commune";17;"TRUE";"logical";"NA";;"IGN";"NA";"logical";"Indique si la commune est chef-lieu d'un Département ou non" -"region_head";"chef_lieu_de_region";"CL_REGION";"commune";18;"TRUE";"logical";"NA";;"IGN";"NA";"logical";"Indique si la commune est chef-lieu d'une Région ou non" -"state_capital";"capitale_d_etat";"CAPITALE";"commune";19;"FALSE";"logical";"NA";;"IGN";"NA";"logical";"Indique si la commune est la capitale d'Etat ou non" -"date_census";"date_du_recensement";"DATE_RCT";"commune";20;"TRUE";"Date";"NA";;"IGN";"NA";"Date";"Date du recensement correspondant au chiffre de population saisi dans le champ 'Population' (en 2007, ce champ prend la valeur '01/01/1999')" -"institution_census";"organisme_recenseur";"RECENSEUR";"commune";21;"FALSE";"character";"NA";;"IGN";"NA";"character";"Organisme officiel fournissant les données de la population des unités administratives françaises" -"siren_epci";"codes_siren_des_epci";"SIREN_EPCI";"commune";22;"FALSE";"character";"NA";;"IGN";"NA";"character";"code SIREN des EPCI à fiscalité propre auxquels appartient la Commune" -"link_head";"lien_vers_chef_lieu";"ID_CH_LIEU";"commune";23;"FALSE";"character";"NA";;"IGN";"NA";;"Lien vers l’identifiant de la Zone d'habitation du chef-lieu duquel dépend la Commune" -"link_administrative_authority";"liens_vers_autorite_administrat";"ID_AUT_ADM";"commune";24;"FALSE";"character";"NA";;"IGN";"NA";;"Lien vers la clé absolue de l'objet Zone d'activité ou d'intérêt, de Nature=""Mairie"", de la Commune concernée, lieu de l'autorité administrative communale" -"siren";"code_siren";"CODE_SIREN";"commune";25;"FALSE";"character";"NA";;"IGN";"NA";"character";"Code SIREN de l'entité administrative" -"id";"cleabs";"ID";"departement";1;"TRUE";"character";"NA";;"IGN";"NA";"character";"Identifiant unique de l'objet" -"department";"code_insee";"INSEE_DEP";"departement";2;"FALSE";"factor";"NA";;"IGN";"NA";"character";"Code INSEE du Département" -"region";"code_insee_de_la_region";"INSEE_REG";"departement";3;"TRUE";"factor";"NA";;"IGN";"NA";"character";"Code INSEE de la Région" -"name";"nom_officiel";"NOM";"departement";4;"TRUE";"character";"NA";;"IGN";"NA";"character";"Nom du Département selon l'INSEE" -"date_creation";"date_creation";"DATE_CREAT";"departement";5;"FALSE";"Date";"NA";;"IGN";"NA";"Date";"Date et heure à laquelle l'objet a été saisi pour la première fois dans la base de données de production de l'IGN" -"date_update";"date_modification";"DATE_MAJ";"departement";6;"FALSE";"Date";"NA";;"IGN";"NA";"Date";"Date et heure à laquelle l'objet a été modifié pour la dernière fois dans la base de données de production" -"date_apparition";"date_d_apparition";"DATE_APP";"departement";7;"TRUE";"Date";"NA";;"IGN";"NA";"Date";"Date de création, de construction ou d'apparition de l'objet, ou date la plus ancienne à laquelle on peut attester de sa présence sur le terrain" -"date_confirmation";"date_de_confirmation";"DATE_CONF";"departement";8;"TRUE";"Date";"NA";;"IGN";"NA";"Date";"Date la plus récente à laquelle on peut attester de la présence de l'objet sur le terrain" -"link_administrative_authority";"liens_vers_autorite_administrat";"ID_AUT_ADM";"departement";9;"FALSE";"character";"NA";;"IGN";"NA";;"Lien vers (clé absolue) de l'objet Zone d'activité ou d'intérêt de Nature='Préfecture' du département concerné" +name;name_origin_gpkg;name_origin_shp;layer;order;keep;type;unit;label_fr;source;length_source;type_source;comment +id;cleabs;ID;batiment;1;TRUE;character;NA;Identifiant du bâtiment;IGN;NA;character;Identifiant unique de l'objet +nature;nature;NATURE;batiment;2;TRUE;factor;NA;Nature du bâtiment;IGN;NA;factor;Attribut permettant de distinguer différents types de bâtiments selon leur architecture +destination_principal;usage_1;USAGE1;batiment;3;TRUE;factor;NA;Usage principal;IGN;NA;factor;Usage du bâtiment ou d'une partie du bâtiment +destination_secondary;usage_2;USAGE2;batiment;4;TRUE;factor;NA;Usage secondaire;IGN;NA;factor;Autre usage d'un bâtiment de fonction mixte +building_light;construction_legere;LEGER;batiment;5;TRUE;logical;NA;Construction légère;IGN;NA;logical;Indique qu'il s'agit d'une structure légère, non attachée au sol par l'intermédiaire de fondations, ou d'un bâtiment ou partie de bâtiment ouvert sur au moins un côté +state;etat_de_l_objet;ETAT;batiment;6;TRUE;factor;NA;État de l'objet bati;IGN;NA;factor;État ou stade de vie d'un objet qui, pour le thème bâti, peut être en projet, en construction, en service ou en ruine +date_creation;date_creation;DATE_CREAT;batiment;7;TRUE;Date;NA;Date de création;IGN;NA;Date;Date et heure à laquelle l'objet a été saisi pour la première fois dans la base de données de production de l'IGN +date_update;date_modification;DATE_MAJ;batiment;8;TRUE;Date;NA;Date de modification;IGN;NA;Date;Date et heure à laquelle l'objet a été modifié pour la dernière fois dans la base de données de production +date_apparition;date_d_apparition;DATE_APP;batiment;9;TRUE;Date;NA;Date d'apparition;IGN;NA;Date;Date de création, de construction ou d'apparition de l'objet, ou date la plus ancienne à laquelle on peut attester de sa présence sur le terrain. La date d'apparition est issue de la date de construction présente dans les fichiers MAJIC (données de la DGFIP) +date_confirmation;date_de_confirmation;DATE_CONF;batiment;10;TRUE;Date;NA;Date de confirmation;IGN;NA;Date;Date la plus récente à laquelle on peut attester de la présence de l'objet sur le terrain +source;sources;SOURCE;batiment;11;TRUE;character;NA;Sources;IGN;NA;character;Sources attestant l'existence de l'objet, éventuellement dans le cadre d'un partenariat : organisme, administration, fichier de référence +source_id;identifiants_sources;ID_SOURCE;batiment;12;TRUE;character;NA;Identifiants sources;IGN;NA;character;Identifiants de l'objet dans les répertoires des organismes consultés pour leur inventaire +accuracy_planimetric;precision_planimetrique;PREC_PLANI;batiment;13;TRUE;numeric;m;Précision planimétrique;IGN;NA;numeric;Précision altimétrique (en mètres) de la géométrie décrivant l'objet +accuracy_altimetric;precision_altimetrique;PREC_ALTI;batiment;14;TRUE;numeric;m;Précision altimétrique;IGN;NA;numeric;Précision planimétrique (en mètres) de la géométrie décrivant l'objet +dwelling;nombre_de_logements;NB_LOGTS;batiment;15;TRUE;integer;NA;Nombre de logements;IGN;NA;integer;Nombre de logement dans le bâtiment +level;nombre_d_etages;NB_ETAGES;batiment;16;TRUE;integer;NA;Nombre d'étages;IGN;NA;integer;Nombre total d'étages du bâtiment, rez-de-chaussée compris. Les sous-sol ne sont pas comptés +material_wall;materiaux_des_murs;MAT_MURS;batiment;17;TRUE;factor;NA;Matériaux des murs;IGN;NA;character;Code indiquant les matériaux des murs du bâtiment, issu des informations contenues dans les fichiers fonciers +material_roof;materiaux_de_la_toiture;MAT_TOITS;batiment;18;TRUE;factor;NA;Matériaux de la toiture;IGN;NA;character;Code indiquant les matériaux de la toiture du bâtiment, issu des informations contenues dans les fichiers fonciers +height;hauteur;HAUTEUR;batiment;19;TRUE;numeric;m;Hauteur;IGN;NA;numeric;Hauteur du bâtiment mesurée entre le sol et le point haut de la gouttière (altitude maximum de la polyligne décrivant le bâtiment), exprimée en mètres. +z_min_floor;altitude_minimale_sol;Z_MIN_SOL;batiment;20;TRUE;numeric;m;Altitude minimale sol;IGN;NA;numeric;Altitude représentative au pied du bâtiment, du côté bas de la pente, exprimée en mètres +z_min_roof;altitude_minimale_toit;Z_MIN_TOIT;batiment;21;TRUE;numeric;m;Altitude minimale toit;IGN;NA;numeric;Altitude du toit du bâtiment au niveau de l'arête décrivant son contour, exprimée en mètres. +z_max_roof;altitude_maximale_toit;Z_MAX_TOIT;batiment;22;TRUE;numeric;m;Altitude maximale toit;IGN;NA;numeric;Altitude qui correspond à la hauteur maximale du toit, c’est-à -dire au faîte du toit, exprimée en mètres. +z_max_floor;altitude_maximale_sol;Z_MAX_SOL;batiment;23;TRUE;numeric;m;Altitude maximale sol;IGN;NA;numeric;Altitude qui correspond à l’altitude maximale située au pied d’un bâtiment, exprimée en mètres +origin;origine_du_batiment;ORIGIN_BAT;batiment;24;TRUE;factor;NA;Origine du bâtiment;IGN;NA;factor;Précise l'origine de la géométrie du bâtiment +majic_quality;appariement_fichiers_fonciers;APP_FF;batiment;25;TRUE;character;NA;Appariement fichiers fonciers;IGN;NA;character;Indicateur relatif à la fiabilité de l'appariement avec les fichiers fonciers +id;cleabs;ID;commune;1;TRUE;character;NA;;IGN;NA;character;Identifiant unique de l'objet +commune;code_insee;INSEE_COM;commune;2;TRUE;factor;NA;;IGN;NA;character;Il s’agit du numéro officiel attribué par l’INSEE à la Commune : il est composé de 5 chiffres, les deux premiers correspondant au numéro de Département +district;code_insee_de_l_arrondissement;INSEE_ARR;commune;3;FALSE;factor;NA;;IGN;NA;character;Code INSEE de l'Arrondissement auquel appartient cette Commune +territorial_authority;code_insee_de_la_collectivite_t;INSEE_COL;commune;4;FALSE;factor;NA;;IGN;NA;character;Code INSEE de la Collectivité territoriale de niveau départemental incluant cette Commune +department;code_insee_du_departement;INSEE_DEP;commune;5;TRUE;factor;NA;;IGN;NA;character;Code INSEE du Département +region;code_insee_de_la_region;INSEE_REG;commune;6;TRUE;factor;NA;;IGN;NA;character;Code INSEE de la Région à laquelle appartient la Commune concernée +population;population;POPULATION;commune;7;TRUE;integer;NA;;IGN;NA;integer;Chiffre de population sans double compte +area;surface_en_ha;SURFACE_HA;commune;8;TRUE;integer;ha;;IGN;NA;integer;Superficie cadastrale en hectares de la commune telle que donnée par l'INSEE +date_creation;date_creation;DATE_CREAT;commune;9;FALSE;Date;NA;;IGN;NA;Date;Date et heure à laquelle l'objet a été saisi pour la première fois dans la base de données de production de l'IGN +date_update;date_modification;DATE_MAJ;commune;10;FALSE;Date;NA;;IGN;NA;Date;Date et heure à laquelle l'objet a été modifié pour la dernière fois dans la base de données de production +date_apparition;date_d_apparition;DATE_APP;commune;11;TRUE;Date;NA;;IGN;NA;Date;Date de création, de construction ou d'apparition de l'objet, ou date la plus ancienne à laquelle on peut attester de sa présence sur le terrain +date_confirmation;date_de_confirmation;DATE_CONF;commune;12;TRUE;Date;NA;;IGN;NA;Date;Date la plus récente à laquelle on peut attester de la présence de l'objet sur le terrain +postal_code;code_postal;CODE_POST;commune;13;TRUE;factor;NA;;IGN;NA;character;Code postal lié à la Commune +name;nom_officiel;NOM;commune;14;TRUE;character;NA;;IGN;NA;character;Ce champ comporte le toponyme intégral (avec article s’il y a) de la Commune. Il est saisi en lettres majuscules et minuscules, toutes deux accentuées +district_head;chef_lieu_d_arrondissement;CL_ARROND;commune;15;TRUE;logical;NA;;IGN;NA;logical;Indique si la commune est chef-lieu d'Arrondissement ou non +territorial_authority_head;chef_lieu_de_collectivite_terr;CL_COLLTER;commune;16;TRUE;logical;NA;;IGN;NA;logical;Indique si la commune est chef-lieu d'une Collectivité territoriale ou non +department_head;chef_lieu_de_departement;CL_DEPART;commune;17;TRUE;logical;NA;;IGN;NA;logical;Indique si la commune est chef-lieu d'un Département ou non +region_head;chef_lieu_de_region;CL_REGION;commune;18;TRUE;logical;NA;;IGN;NA;logical;Indique si la commune est chef-lieu d'une Région ou non +state_capital;capitale_d_etat;CAPITALE;commune;19;FALSE;logical;NA;;IGN;NA;logical;Indique si la commune est la capitale d'Etat ou non +date_census;date_du_recensement;DATE_RCT;commune;20;TRUE;Date;NA;;IGN;NA;Date;Date du recensement correspondant au chiffre de population saisi dans le champ 'Population' (en 2007, ce champ prend la valeur '01/01/1999') +institution_census;organisme_recenseur;RECENSEUR;commune;21;FALSE;character;NA;;IGN;NA;character;Organisme officiel fournissant les données de la population des unités administratives françaises +siren_epci;codes_siren_des_epci;SIREN_EPCI;commune;22;FALSE;character;NA;;IGN;NA;character;code SIREN des EPCI à fiscalité propre auxquels appartient la Commune +link_head;lien_vers_chef_lieu;ID_CH_LIEU;commune;23;FALSE;character;NA;;IGN;NA;character;Lien vers l’identifiant de la Zone d'habitation du chef-lieu duquel dépend la Commune +link_administrative_authority;liens_vers_autorite_administrat;ID_AUT_ADM;commune;24;FALSE;character;NA;;IGN;NA;character;Lien vers la clé absolue de l'objet Zone d'activité ou d'intérêt, de Nature=Mairie, de la Commune concernée, lieu de l'autorité administrative communale +siren;code_siren;CODE_SIREN;commune;25;FALSE;character;NA;;IGN;NA;character;Code SIREN de l'entité administrative +id;cleabs;ID;departement;1;TRUE;character;NA;;IGN;NA;character;Identifiant unique de l'objet +department;code_insee;INSEE_DEP;departement;2;FALSE;factor;NA;;IGN;NA;character;Code INSEE du Département +region;code_insee_de_la_region;INSEE_REG;departement;3;TRUE;factor;NA;;IGN;NA;character;Code INSEE de la Région +name;nom_officiel;NOM;departement;4;TRUE;character;NA;;IGN;NA;character;Nom du Département selon l'INSEE +date_creation;date_creation;DATE_CREAT;departement;5;FALSE;Date;NA;;IGN;NA;Date;Date et heure à laquelle l'objet a été saisi pour la première fois dans la base de données de production de l'IGN +date_update;date_modification;DATE_MAJ;departement;6;FALSE;Date;NA;;IGN;NA;Date;Date et heure à laquelle l'objet a été modifié pour la dernière fois dans la base de données de production +date_apparition;date_d_apparition;DATE_APP;departement;7;TRUE;Date;NA;;IGN;NA;Date;Date de création, de construction ou d'apparition de l'objet, ou date la plus ancienne à laquelle on peut attester de sa présence sur le terrain +date_confirmation;date_de_confirmation;DATE_CONF;departement;8;TRUE;Date;NA;;IGN;NA;Date;Date la plus récente à laquelle on peut attester de la présence de l'objet sur le terrain +link_administrative_authority;liens_vers_autorite_administrat;ID_AUT_ADM;departement;9;FALSE;character;NA;;IGN;NA;character;Lien vers (clé absolue) de l'objet Zone d'activité ou d'intérêt de Nature='Préfecture' du département concerné diff --git a/inst/extdata/scheme_gaspar_azi.csv b/inst/extdata/scheme_gaspar_azi.csv index b3be6bb3457f2c5281f9f9d0fb35087b589bb808..702b01ede7df1251edba111faa75f3d562fa55a8 100644 --- a/inst/extdata/scheme_gaspar_azi.csv +++ b/inst/extdata/scheme_gaspar_azi.csv @@ -1,17 +1,17 @@ name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment -id;cod_nat_azi;1;TRUE;character;ID de l’atlas;georisques;;text; -azi_label_fr;lib_azi;2;TRUE;character;nom de l’atlas;georisques;;text; -catchment_label_fr;lib_bassin_risque;3;TRUE;character;nom du bassin de risque;georisques;;text; -hazard_label_fr;liste_risques;4;TRUE;factor;libellé du risque;georisques;;text; -commune;cod_commune;5;TRUE;factor;code INSEE de la commune;georisques;;text; -commune_label_fr;lib_commune;6;FALSE;factor;nom de la commune concernée;georisques;;text; -date_program_start;dat_program_deb;7;TRUE;date;date de début de programmation;georisques;;text; -date_program_end;dat_program_fin;8;TRUE;date;date de fin de programmation;georisques;;text; -date_study_start;dat_etu_deb;9;TRUE;date;date de début des études;georisques;;text; -date_study_end;dat_etu_fin;10;TRUE;date;date de fin des études;georisques;;text; -date_info_start;dat_info_deb;11;TRUE;date;date de début d’information des communes;georisques;;text; -date_info_end;dat_info_fin;12;TRUE;date;date de fin d’information des communes;georisques;;text; -date_atlas;dat_realisation;13;TRUE;date;date de réalisation de l’atlas;georisques;;text; -date_access;dat_diffusion;14;TRUE;date;date de diffusion de l’atlas;georisques;;text; -date_internet;dat_pub_net;15;TRUE;date;date de publication sur Internet;georisques;;text; -date_update;dat_maj;16;TRUE;date;date de mise à jour de la fiche gaspar;georisques;;text; +id;cod_nat_azi;1;TRUE;character;ID de l’atlas;georisques;;character; +azi_label_fr;lib_azi;2;TRUE;character;nom de l’atlas;georisques;;character; +catchment_label_fr;lib_bassin_risque;3;TRUE;character;nom du bassin de risque;georisques;;character; +hazard_label_fr;liste_risques;4;TRUE;factor;libellé du risque;georisques;;character; +commune;cod_commune;5;TRUE;factor;code INSEE de la commune;georisques;;character; +commune_label_fr;lib_commune;6;FALSE;factor;nom de la commune concernée;georisques;;character; +date_program_start;dat_program_deb;7;TRUE;Date;date de début de programmation;georisques;;character; +date_program_end;dat_program_fin;8;TRUE;Date;date de fin de programmation;georisques;;character; +date_study_start;dat_etu_deb;9;TRUE;Date;date de début des études;georisques;;character; +date_study_end;dat_etu_fin;10;TRUE;Date;date de fin des études;georisques;;character; +date_info_start;dat_info_deb;11;TRUE;Date;date de début d’information des communes;georisques;;character; +date_info_end;dat_info_fin;12;TRUE;Date;date de fin d’information des communes;georisques;;character; +date_atlas;dat_realisation;13;TRUE;Date;date de réalisation de l’atlas;georisques;;character; +date_access;dat_diffusion;14;TRUE;Date;date de diffusion de l’atlas;georisques;;character; +date_internet;dat_pub_net;15;TRUE;Date;date de publication sur Internet;georisques;;character; +date_update;dat_maj;16;TRUE;Date;date de mise à jour de la fiche gaspar;georisques;;character; diff --git a/inst/extdata/scheme_gaspar_catnat.csv b/inst/extdata/scheme_gaspar_catnat.csv index a2b6b44245df13f46fcff6aee99692f92da13bc9..ecb61fb41f9bb8d333229b5584510658013b9fb5 100644 --- a/inst/extdata/scheme_gaspar_catnat.csv +++ b/inst/extdata/scheme_gaspar_catnat.csv @@ -1,11 +1,11 @@ name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment -id;cod_nat_catnat;1;TRUE;character;ID de l’arrêté;georisques;;text; -commune;cod_commune;2;TRUE;factor;code INSEE de la commune;georisques;;text; -commune_label_fr;lib_commune;3;FALSE;factor;nom de la commune;georisques;;text; +id;cod_nat_catnat;1;TRUE;character;ID de l’arrêté;georisques;;character; +commune;cod_commune;2;TRUE;factor;code INSEE de la commune;georisques;;character; +commune_label_fr;lib_commune;3;FALSE;factor;nom de la commune;georisques;;character; catnat;num_risque_jo;4;TRUE;factor;code du type d’événement;georisques;;integer; -catnat_label_fr;lib_risque_jo;5;TRUE;factor;intitulé du type d’événement;georisques;;text; -date_start;dat_deb;6;TRUE;date;date de début de l’événement;georisques;;text; -date_end;dat_fin;7;TRUE;date;date de fin de l’événement;georisques;;text; -date_decree;dat_pub_arrete;8;TRUE;date;date de l’arrếté;georisques;;text; -date_jo;dat_pub_jo;9;TRUE;date;date de publication au journal officiel;georisques;;text; -date_update;dat_maj;10;TRUE;date;date de mise à jour de la fiche gaspar;georisques;;text; +catnat_label_fr;lib_risque_jo;5;TRUE;factor;intitulé du type d’événement;georisques;;character; +date_start;dat_deb;6;TRUE;Date;date de début de l’événement;georisques;;character; +date_end;dat_fin;7;TRUE;Date;date de fin de l’événement;georisques;;character; +date_decree;dat_pub_arrete;8;TRUE;Date;date de l’arrếté;georisques;;character; +date_jo;dat_pub_jo;9;TRUE;Date;date de publication au journal officiel;georisques;;character; +date_update;dat_maj;10;TRUE;Date;date de mise à jour de la fiche gaspar;georisques;;character; diff --git a/inst/extdata/scheme_gaspar_dicrim.csv b/inst/extdata/scheme_gaspar_dicrim.csv index c94d71bd1ff05183feab28648a74903a188f75be..5499b17e465c7fcf98ce87751a14d7298fbd7e0d 100644 --- a/inst/extdata/scheme_gaspar_dicrim.csv +++ b/inst/extdata/scheme_gaspar_dicrim.csv @@ -1,4 +1,4 @@ -name,name_origin,order,keep,type,label_fr,source,length_source,type_source,comment -commune,cod_commune,1,TRUE,factor,code INSEE de la commune,georisques,,text, -commune_label_fr,lib_commune,2,FALSE,factor,nom de la commune concernée,georisques,,text, -dat_publi_dicrim,dat_publi_dicrim,3,TRUE,date,date de publication du DICRIM,georisques,,text, +name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment +commune;cod_commune;1;TRUE;factor;code INSEE de la commune;georisques;;character; +commune_label_fr;lib_commune;2;FALSE;factor;nom de la commune concernée;georisques;;character; +dat_publi_dicrim;dat_publi_dicrim;3;TRUE;Date;date de publication du DICRIM;georisques;;character; diff --git a/inst/extdata/scheme_gaspar_pcs.csv b/inst/extdata/scheme_gaspar_pcs.csv index 9445e9fac45a87811d025d4ee72bf36edbf44dae..f48e7cc0604cc04ab928ca9bae26c49948aed616 100644 --- a/inst/extdata/scheme_gaspar_pcs.csv +++ b/inst/extdata/scheme_gaspar_pcs.csv @@ -1,10 +1,10 @@ -name,name_origin,order,keep,type,label_fr,source,length_source,type_source,comment -id,cod_nat_pcs,1,TRUE,character,id du pcs,georisques,,text, -pcs_label_fr,lib_pcs,2,TRUE,character,nom du pcs,georisques,,text, -commune,cod_commune,3,TRUE,factor,code INSEE de la commune,georisques,,text, -commune_label_fr,lib_commune,4,FALSE,factor,nom de la commune concernée,georisques,,text, -catchment_label_fr,lib_bassin_risque,5,TRUE,character,nom du bassin de risque,georisques,,text, -date_etu_start,dat_etu_pcs_deb,6,TRUE,date,date de début de l’etude du pcs,georisques,,text, -date_etu_end,dat_etu_pcs_fin,7,TRUE,date,date de fin de l’etude du pcs,georisques,,text, -dat_notif_fin,dat_notification_pcs_fin,8,TRUE,date,date de notification du pcs,georisques,,text, -date_update,dat_maj,9,TRUE,date,date de mise à jour de la fiche gaspar,georisques,,text, +name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment +id;cod_nat_pcs;1;TRUE;character;id du pcs;georisques;;character; +pcs_label_fr;lib_pcs;2;TRUE;character;nom du pcs;georisques;;character; +commune;cod_commune;3;TRUE;factor;code INSEE de la commune;georisques;;character; +commune_label_fr;lib_commune;4;FALSE;factor;nom de la commune concernée;georisques;;character; +catchment_label_fr;lib_bassin_risque;5;TRUE;character;nom du bassin de risque;georisques;;character; +date_etu_start;dat_etu_pcs_deb;6;TRUE;Date;date de début de l’etude du pcs;georisques;;character; +date_etu_end;dat_etu_pcs_fin;7;TRUE;Date;date de fin de l’etude du pcs;georisques;;character; +dat_notif_fin;dat_notification_pcs_fin;8;TRUE;Date;date de notification du pcs;georisques;;character; +date_update;dat_maj;9;TRUE;Date;date de mise à jour de la fiche gaspar;georisques;;character; diff --git a/inst/extdata/scheme_gaspar_pprn.csv b/inst/extdata/scheme_gaspar_pprn.csv index 5649314a71f10b1ed0691cca7151d9b966e10269..b5451d6a0d40c7c2fc9c038679e376ada54d00b6 100644 --- a/inst/extdata/scheme_gaspar_pprn.csv +++ b/inst/extdata/scheme_gaspar_pprn.csv @@ -1,44 +1,44 @@ name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment -id;cod_nat_pprn;1;TRUE;character;ID du PPRN;georisques;;text; -pprn_label_fr;lib_pprn;2;TRUE;factor;libellé du PPRN;georisques;;text; -catchment_label_fr;lib_bassin_risques;3;TRUE;factor;libellé du bassin de risque;georisques;;text; +id;cod_nat_pprn;1;TRUE;character;ID du PPRN;georisques;;character; +pprn_label_fr;lib_pprn;2;TRUE;factor;libellé du PPRN;georisques;;character; +catchment_label_fr;lib_bassin_risques;3;TRUE;factor;libellé du bassin de risque;georisques;;character; hazard;num_risque;4;TRUE;factor;code du risque;georisques;;integer; -hazard_label_fr;lib_risque;5;TRUE;factor;libellé du risque;georisques;;text; -pprn_revised;list_codenat_ppr_revise;6;TRUE;list;code du PPRN révisé;georisques;;text; -commune;cod_commune;7;TRUE;factor;code INSEE de la commune;georisques;;text; -commune_label_fr;lib_commune;8;FALSE;factor;nom de la commune;georisques;;text; -comment;lib_commentaire;9;TRUE;character;commentaire;georisques;;text; -type_label_fr;cod_ppr;10;TRUE;factor;code du PPRN;georisques;;text; -;dat_program_deb;11;FALSE;date;date de début prévisionnel de l'élaboration;georisques;;text; -;dat_program_fin;12;FALSE;date;date de fin prévisionnelle de l'élaboration;georisques;;text; -;dat_montage_deb;13;FALSE;date;date de début de réflexion de l'élaboration;georisques;;text; -;dat_montage_fin;14;FALSE;date;date de fin de réflexion de l'élaboration;georisques;;text; -date_prescription;dat_prescription;15;TRUE;date;date de l'arrêté préfectoral portant prescription;georisques;;text; -date_anticipated;dat_appli_ant;16;TRUE;date;date de l'arrêté préfectoral portant application anticipée;georisques;;text; -date_deprescription;dat_deprescription;17;TRUE;date;date de l'arrêté préfectoral portant déprescription;georisques;;text; -;dat_etu_hydro_deb;18;FALSE;date;date de début de réalisation des études hydrologiques et hydrauliques;georisques;;text; -;dat_etu_hydro_fin;19;FALSE;date;date de fin de réalisation des études hydrologiques et hydrauliques;georisques;;text; -;dat_etu_alea_deb;20;FALSE;date;date de début de l'élaboration des cartes d’aléas;georisques;;text; -;dat_etu_alea_fin;21;FALSE;date;date de fin de l'élaboration des cartes d’aléas;georisques;;text; -;dat_etu_enjeu_deb;22;FALSE;date;date de début de l'élaboration des cartes d’enjeux;georisques;;text; -;dat_etu_enjeu_fin;23;FALSE;date;date de fin de l'élaboration des cartes d’enjeux;georisques;;text; -;dat_etu_zona_regl_deb;24;FALSE;date;date de début de l'élaboration du zonage réglementaire;georisques;;text; -;dat_etu_zona_regl_fin;25;FALSE;date;date de fin de l'élaboration du zonage réglementaire;georisques;;text; -;dat_etu_rglmt_deb;26;FALSE;date;date de début de la rédaction de la notice de présentation et du règlement;georisques;;text; -;dat_etu_rglmt_fin;27;FALSE;date;date de fin de la rédaction de la notice de présentation et du règlement;georisques;;text; -;dat_concertation_deb;28;FALSE;date;date de début de la concertation;georisques;;text; -;dat_concertation_fin;29;FALSE;date;date de fin de la concertation;georisques;;text; -;dat_consultation_deb;30;FALSE;date;date de début de la consultation;georisques;;text; -;dat_consultation_fin;31;FALSE;date;date de fin de la consultation;georisques;;text; -;dat_consult_serv_deb;32;FALSE;date;date de début de la consultation des services;georisques;;text; -;dat_consult_serv_fin;33;FALSE;date;date de fin de la consultation des services;georisques;;text; -date_mise_a_enquete_deb;dat_mise_a_enquete_deb;34;TRUE;date;date d’ouverture de l’enquête publique;georisques;;text; -date_mise_a_enquete_fin;dat_mise_a_enquete_fin;35;TRUE;date;date de fermeture de l’enquête publique;georisques;;text; -date_approval;dat_approbation;36;TRUE;date;date de l'arrêté préfectoral portant approbation;georisques;;text; -date_amendment;dat_modification;37;TRUE;date;date de l'arrêté préfectoral prescrivant une modification;georisques;;text; -date_cancellation;dat_annulation;38;TRUE;date;date de la décision d'annulation du Tribunal Administratif;georisques;;text; -date_plu;dat_annexion_plu;39;TRUE;date;date de l'arrêté municipal annexant le PPR au PLU;georisques;;text; -date_extension;dat_proroga;40;TRUE;date;date de l'arrêté préfectoral prorogeant de 18 mois la date de prescription;georisques;;text; -date_repeal;dat_abrog;41;TRUE;date;date de l'arrêté préfectoral portant abrogation;georisques;;text; -revision;etat_revision;42;TRUE;logical;Indique si le PPR a été révisé ou non;georisques;;text; -date_update;dat_maj;43;TRUE;date;date de mise à jour de la fiche gaspar;georisques;;text; +hazard_label_fr;lib_risque;5;TRUE;factor;libellé du risque;georisques;;character; +pprn_revised;list_codenat_ppr_revise;6;TRUE;list;code du PPRN révisé;georisques;;character; +commune;cod_commune;7;TRUE;factor;code INSEE de la commune;georisques;;character; +commune_label_fr;lib_commune;8;FALSE;factor;nom de la commune;georisques;;character; +comment;lib_commentaire;9;TRUE;character;commentaire;georisques;;character; +type_label_fr;cod_ppr;10;TRUE;factor;code du PPRN;georisques;;character; +;dat_program_deb;11;FALSE;Date;date de début prévisionnel de l'élaboration;georisques;;character; +;dat_program_fin;12;FALSE;Date;date de fin prévisionnelle de l'élaboration;georisques;;character; +;dat_montage_deb;13;FALSE;Date;date de début de réflexion de l'élaboration;georisques;;character; +;dat_montage_fin;14;FALSE;Date;date de fin de réflexion de l'élaboration;georisques;;character; +date_prescription;dat_prescription;15;TRUE;Date;date de l'arrêté préfectoral portant prescription;georisques;;character; +date_anticipated;dat_appli_ant;16;TRUE;Date;date de l'arrêté préfectoral portant application anticipée;georisques;;character; +date_deprescription;dat_deprescription;17;TRUE;Date;date de l'arrêté préfectoral portant déprescription;georisques;;character; +;dat_etu_hydro_deb;18;FALSE;Date;date de début de réalisation des études hydrologiques et hydrauliques;georisques;;character; +;dat_etu_hydro_fin;19;FALSE;Date;date de fin de réalisation des études hydrologiques et hydrauliques;georisques;;character; +;dat_etu_alea_deb;20;FALSE;Date;date de début de l'élaboration des cartes d’aléas;georisques;;character; +;dat_etu_alea_fin;21;FALSE;Date;date de fin de l'élaboration des cartes d’aléas;georisques;;character; +;dat_etu_enjeu_deb;22;FALSE;Date;date de début de l'élaboration des cartes d’enjeux;georisques;;character; +;dat_etu_enjeu_fin;23;FALSE;Date;date de fin de l'élaboration des cartes d’enjeux;georisques;;character; +;dat_etu_zona_regl_deb;24;FALSE;Date;date de début de l'élaboration du zonage réglementaire;georisques;;character; +;dat_etu_zona_regl_fin;25;FALSE;Date;date de fin de l'élaboration du zonage réglementaire;georisques;;character; +;dat_etu_rglmt_deb;26;FALSE;Date;date de début de la rédaction de la notice de présentation et du règlement;georisques;;character; +;dat_etu_rglmt_fin;27;FALSE;Date;date de fin de la rédaction de la notice de présentation et du règlement;georisques;;character; +;dat_concertation_deb;28;FALSE;Date;date de début de la concertation;georisques;;character; +;dat_concertation_fin;29;FALSE;Date;date de fin de la concertation;georisques;;character; +;dat_consultation_deb;30;FALSE;Date;date de début de la consultation;georisques;;character; +;dat_consultation_fin;31;FALSE;Date;date de fin de la consultation;georisques;;character; +;dat_consult_serv_deb;32;FALSE;Date;date de début de la consultation des services;georisques;;character; +;dat_consult_serv_fin;33;FALSE;Date;date de fin de la consultation des services;georisques;;character; +date_mise_a_enquete_deb;dat_mise_a_enquete_deb;34;TRUE;Date;date d’ouverture de l’enquête publique;georisques;;character; +date_mise_a_enquete_fin;dat_mise_a_enquete_fin;35;TRUE;Date;date de fermeture de l’enquête publique;georisques;;character; +date_approval;dat_approbation;36;TRUE;Date;date de l'arrêté préfectoral portant approbation;georisques;;character; +date_amendment;dat_modification;37;TRUE;Date;date de l'arrêté préfectoral prescrivant une modification;georisques;;character; +date_cancellation;dat_annulation;38;TRUE;Date;date de la décision d'annulation du Tribunal Administratif;georisques;;character; +date_plu;dat_annexion_plu;39;TRUE;Date;date de l'arrêté municipal annexant le PPR au PLU;georisques;;character; +date_extension;dat_proroga;40;TRUE;Date;date de l'arrêté préfectoral prorogeant de 18 mois la date de prescription;georisques;;character; +date_repeal;dat_abrog;41;TRUE;Date;date de l'arrêté préfectoral portant abrogation;georisques;;character; +revision;etat_revision;42;TRUE;logical;Indique si le PPR a été révisé ou non;georisques;;character; +date_update;dat_maj;43;TRUE;Date;date de mise à jour de la fiche gaspar;georisques;;character; diff --git a/inst/extdata/scheme_gaspar_risq.csv b/inst/extdata/scheme_gaspar_risq.csv index b32ae506784fe2a94e2246b00f2ce610cf1fcab9..c9026816b3a3af7d94ffc4299721bbb269c9d8d5 100644 --- a/inst/extdata/scheme_gaspar_risq.csv +++ b/inst/extdata/scheme_gaspar_risq.csv @@ -1,4 +1,4 @@ name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment -commune;cod_commune;2;TRUE;factor;code INSEE de la commune;georisques;;text; -commune_label_fr;lib_commune;3;FALSE;factor;nom de la commune concernée;georisques;;text; -hazard_label_fr;lib_risque_long;4;TRUE;factor;libellé complet du risque;georisques;;text; +commune;cod_commune;2;TRUE;factor;code INSEE de la commune;georisques;;character; +commune_label_fr;lib_commune;3;FALSE;factor;nom de la commune concernée;georisques;;character; +hazard_label_fr;lib_risque_long;4;TRUE;factor;libellé complet du risque;georisques;;character; diff --git a/inst/extdata/scheme_insee_logement_2019.csv b/inst/extdata/scheme_insee_logement_2019.csv index 735023161f362221f3fdcb59cc489d53d307d34c..6307ffb6d3916d217060b2c5505d9a07256323a6 100644 --- a/inst/extdata/scheme_insee_logement_2019.csv +++ b/inst/extdata/scheme_insee_logement_2019.csv @@ -1,8 +1,8 @@ -"name";"name_origin";"order";"keep";"type";"label_fr";"source";"length_source";"type_source";"comment" -"niv_geo";"NIVGEO";1;"FALSE";"character";"niveau administratif";"insee.fr";"NA";"character";"niveau administratif groupant les données." -"commune";"CODGEO";2;"TRUE";"character";"Code INSEE";"insee.fr";"NA";"character";"Code INSEE de la commune actuelle sur la base du Code Officiel géographique en vigueur" -"commune_name";"LIBGEO";3;"FALSE";"character";"Nom officiel de la commune";"insee.fr";"NA";"character";"Nom officiel de la commune actuelle" -"usage";"CATL";4;"FALSE";"factor";"Catégorie de logement";"insee.fr";"NA";"integer";"Catégorie de logement : 1) résidences principales; 2)logements occasionnels; 3)résidences secondaires; 4)logements vacants" -"period";"ACHL20";5;"TRUE";"factor";"Epoque d'achèvement de la construction";"insee.fr";"NA";"character";"Epoque d'achèvement de la construction : A11) Avant 1919; A12) De 1919 à 1945; B11) De 1946 à 1970; B12) De 1971 à 1990; C100) De 1991 à 2005; C106P) De 2006 à 2015" -"type";"TYPLR";6;"TRUE";"factor";"Type de logement";"insee.fr";"NA";"integer";"Type de logement : 1) maisons; 2) appartements; 3) autres" -"dwelling";"NB";7;"TRUE";"numeric";"Nombre de logements";"insee.fr";"NA";"character";"Nombre de logements. Pour une raison inconnue le nombre contient des chiffres decimaux" +name;name_origin;order;keep;type;label_fr;source;length_source;type_source;comment +niv_geo;NIVGEO;1;FALSE;character;niveau administratif;insee.fr;NA;character; +commune;CODGEO;2;TRUE;character;Code INSEE;insee.fr;NA;character; +commune_name;LIBGEO;3;FALSE;character;Nom officiel de la commune;insee.fr;NA;character; +usage;CATL;4;FALSE;factor;Catégorie de logement;insee.fr;NA;integer;(1 : résidences principales, 2 : logements occasionnels, 3 : résidences secondaires, 4 : logements vacants) +period;ACHL20;5;TRUE;factor;Époque d'achèvement de la construction;insee.fr;NA;character;(A11 : avant 1919, A12 : 1919 à 1945, B11 : 1946 à 1970, B12 : 1971 à 1990, C100 : 1991 à 2005, C106P : 2006 à 2015) +type;TYPLR;6;TRUE;factor;Type de logement;insee.fr;NA;integer;(1 : maisons, 2 : appartements, 3 : autres) +dwelling;NB;7;TRUE;numeric;Nombre de logements;insee.fr;NA;character;Pour une raison non documentée le nombre contient des chiffres décimaux diff --git a/inst/extdata/scheme_rpg_1.csv b/inst/extdata/scheme_rpg_1.csv index b9c4e45268178bff26ed4057f419648f5c7f946a..13af69b767892e93522ab39d40e1f98235aedead 100644 --- a/inst/extdata/scheme_rpg_1.csv +++ b/inst/extdata/scheme_rpg_1.csv @@ -1,10 +1,10 @@ -"name";"name_origin";"layer";"order";"keep";"type";"label_fr";"source";"length_source";"type_source";"comment" -"ilot";"NUM_ILOT";"ILOTS_ANONYMES";1;"FALSE";"character";"ID de l’ilôt";"IGN";;"text"; -"commune";"COMMUNE";"ILOTS_ANONYMES";2;"FALSE";"factor";"Commune de rattachement de la parcelle";"IGN";;"text"; -"juridical";"FORME_JURI";"ILOTS_ANONYMES";3;"FALSE";"factor";"Forme juridique de l’exploitation";"IGN";;"text"; -"surface_declared";"SURF_DECLA";"ILOTS_ANONYMES";4;"FALSE";"numeric";"Surface déclarée";"IGN";;"numeric"; -"department";"DEP_RATTACH";"ILOTS_ANONYMES";5;"FALSE";"factor";"Département de rattachement de la parcelle";"IGN";;"text"; -"surface_graph";"SURF_GRAPH";"ILOTS_ANONYMES";6;"FALSE";"numeric";"Surface d’origine de la parcelle graphique";"IGN";;"numeric"; -"surface";"SURF_CULTU";"ILOTS_ANONYMES";7;"TRUE";"numeric";"Surface d’origine de la parcelle graphique";"IGN";;"numeric"; -"group";"CODE_CULTU";"ILOTS_ANONYMES";8;"TRUE";"factor";"Code du groupe de la culture présente";"IGN";;"text"; -"group.label.fr";"NOM_CULTU";"ILOTS_ANONYMES";9;"FALSE";"factor";"Étiquette du groupe de la culture présente";"IGN";;"text"; +name;name_origin;layer;order;keep;type;label_fr;source;length_source;type_source;comment +ilot;NUM_ILOT;ILOTS_ANONYMES;1;FALSE;character;ID de l’ilôt;IGN;;character; +commune;COMMUNE;ILOTS_ANONYMES;2;FALSE;factor;Commune de rattachement de la parcelle;IGN;;character; +juridical;FORME_JURI;ILOTS_ANONYMES;3;FALSE;factor;Forme juridique de l’exploitation;IGN;;character; +surface_declared;SURF_DECLA;ILOTS_ANONYMES;4;FALSE;numeric;Surface déclarée;IGN;;numeric; +department;DEP_RATTACH;ILOTS_ANONYMES;5;FALSE;factor;Département de rattachement de la parcelle;IGN;;character; +surface_graph;SURF_GRAPH;ILOTS_ANONYMES;6;FALSE;numeric;Surface d’origine de la parcelle graphique;IGN;;numeric; +surface;SURF_CULTU;ILOTS_ANONYMES;7;TRUE;numeric;Surface d’origine de la parcelle graphique;IGN;;numeric; +group;CODE_CULTU;ILOTS_ANONYMES;8;TRUE;factor;Code du groupe de la culture présente;IGN;;character; +group.label.fr;NOM_CULTU;ILOTS_ANONYMES;9;FALSE;factor;Étiquette du groupe de la culture présente;IGN;;character; diff --git a/inst/extdata/scheme_rpg_2.csv b/inst/extdata/scheme_rpg_2.csv index 7978e1064e26d3f2c62563ce707fb2c9c6d5f3d8..06dadf95b05174e6ca8858a9d46bc6bf5ef6c883 100644 --- a/inst/extdata/scheme_rpg_2.csv +++ b/inst/extdata/scheme_rpg_2.csv @@ -1,8 +1,8 @@ -"name";"name_origin";"layer";"order";"keep";"type";"label_fr";"source";"length_source";"type_source";"comment" -"id";"ID_PARCEL";"PARCELLES_GRAPHIQUES";1;"TRUE";"character";"ID de la parcelle graphique";"IGN";;"text"; -"surface";"SURF_PARC";"PARCELLES_GRAPHIQUES";2;"TRUE";"numeric";"Surface d’origine de la parcelle graphique";"IGN";;"numeric"; -"culture";"CODE_CULTU";"PARCELLES_GRAPHIQUES";3;"TRUE";"factor";"Code de la culture présente";"IGN";;"text"; -"group";"CODE_GROUP";"PARCELLES_GRAPHIQUES";4;"TRUE";"factor";"Code du groupe de la culture présente";"IGN";;"text"; -"catch_1";"CULTURE_D1";"PARCELLES_GRAPHIQUES";5;"TRUE";"factor";"Première culture dérobée";"IGN";;"text"; -"catch_2";"CULTURE_D2";"PARCELLES_GRAPHIQUES";6;"TRUE";"factor";"Seconde culture dérobée";"IGN";;"text"; -"ilot";"NUM_ILOT";"ILOTS_ANONYMES";1;"FALSE";"character";"ID de l’ilôt";"IGN";;"text"; +name;name_origin;layer;order;keep;type;label_fr;source;length_source;type_source;comment +id;ID_PARCEL;PARCELLES_GRAPHIQUES;1;TRUE;character;ID de la parcelle graphique;IGN;;character; +surface;SURF_PARC;PARCELLES_GRAPHIQUES;2;TRUE;numeric;Surface d’origine de la parcelle graphique;IGN;;numeric; +culture;CODE_CULTU;PARCELLES_GRAPHIQUES;3;TRUE;factor;Code de la culture présente;IGN;;character; +group;CODE_GROUP;PARCELLES_GRAPHIQUES;4;TRUE;factor;Code du groupe de la culture présente;IGN;;character; +catch_1;CULTURE_D1;PARCELLES_GRAPHIQUES;5;TRUE;factor;Première culture dérobée;IGN;;character; +catch_2;CULTURE_D2;PARCELLES_GRAPHIQUES;6;TRUE;factor;Seconde culture dérobée;IGN;;character; +ilot;NUM_ILOT;ILOTS_ANONYMES;1;FALSE;character;ID de l’ilôt;IGN;;character; diff --git a/inst/extdata/scheme_sirene_2019.csv b/inst/extdata/scheme_sirene_2019.csv index 23da98db309231f28f30cdf7123872ac5e9b36da..177290fe8ca937358e77173b27f2f3e2bb856db3 100644 --- a/inst/extdata/scheme_sirene_2019.csv +++ b/inst/extdata/scheme_sirene_2019.csv @@ -1,58 +1,58 @@ -name;name.origin;order;keep;type;label.french;source;length.source;type.source;comment -siren;siren;1;FALSE;character;Numéro Siren;INSEE;9;text;Redondant avec siret -nic;nic;2;FALSE;character;Numéro NIC;INSEE;5;text;Redondant avec siret -siret;siret;3;TRUE;character;Numéro Siret;INSEE;14;text; -diffusion;statutDiffusionEtablissement;4;FALSE;factor;Statut de diffusion de l’établissement;INSEE;1;code;Diffusion (grand) public ou non -creation_date;dateCreationEtablissement;5;TRUE;Date;Date de création de l’établissement;INSEE;10;date; -staff;trancheEffectifsEtablissement;6;TRUE;factor;Tranche d’effectif salarié de l’établissement;INSEE;2;code; -staff_year;anneeEffectifsEtablissement;7;TRUE;integer;Année de validité de la tranche d’effectif salarié de l’établissement;INSEE;4;date; -ape_rm;activitePrincipaleRegistreMetiersEtablissement;8;FALSE;factor;Activité exercée par l’artisan inscrit au registre des métiers;INSEE;6;code;Redonant avec ape et beaucoup de valeurs manquantes -last_date;dateDernierTraitementEtablissement;9;TRUE;Date;Date du dernier traitement de l’établissement dans le répertoire Sirene;INSEE;10;date; -hq;etablissementSiege;10;TRUE;logical;Qualité de siège ou non de l’établissement;INSEE;5;text; +name;name_origin;order;keep;type;label_french;source;length_source;type_source;comment +siren;siren;1;FALSE;character;Numéro Siren;INSEE;9;character;Redondant avec siret +nic;nic;2;FALSE;character;Numéro NIC;INSEE;5;character;Redondant avec siret +siret;siret;3;TRUE;character;Numéro Siret;INSEE;14;character; +diffusion;statutDiffusionEtablissement;4;FALSE;factor;Statut de diffusion de l’établissement;INSEE;1;character;Diffusion (grand) public ou non +creation_date;dateCreationEtablissement;5;TRUE;Date;Date de création de l’établissement;INSEE;10;character; +staff;trancheEffectifsEtablissement;6;TRUE;factor;Tranche d’effectif salarié de l’établissement;INSEE;2;character; +staff_year;anneeEffectifsEtablissement;7;TRUE;integer;Année de validité de la tranche d’effectif salarié de l’établissement;INSEE;4;character; +ape_rm;activitePrincipaleRegistreMetiersEtablissement;8;FALSE;factor;Activité exercée par l’artisan inscrit au registre des métiers;INSEE;6;character;Redonant avec ape et beaucoup de valeurs manquantes +last_date;dateDernierTraitementEtablissement;9;TRUE;Date;Date du dernier traitement de l’établissement dans le répertoire Sirene;INSEE;10;character; +hq;etablissementSiege;10;TRUE;logical;Qualité de siège ou non de l’établissement;INSEE;5;character; period_number;nombrePeriodesEtablissement;11;FALSE;numeric;Nombre de périodes de l’établissement;INSEE;2;numeric; -address;complementAdresseEtablissement;12;FALSE;character;Complément d’adresse;INSEE;38;text;Redondant avec geo_adresse +address;complementAdresseEtablissement;12;FALSE;character;Complément d’adresse;INSEE;38;character;Redondant avec geo_adresse route_number;numeroVoieEtablissement;13;FALSE;numeric;Numéro de voie;INSEE;4;numeric;Redondant avec geo_adresse -repetition;indiceRepetitionEtablissement;14;FALSE;character;Indice de répétition dans la voie;INSEE;1;text;Redondant avec geo_adresse -toute_type;typeVoieEtablissement;15;FALSE;factor;Type de voie;INSEE;4;code;Redondant avec geo_adresse -route_label;libelleVoieEtablissement;16;FALSE;character;Libellé de voie;INSEE;100;text;Redondant avec geo_adresse -postal;codePostalEtablissement;17;FALSE;character;Code postal;INSEE;5;text;Redondant avec geo_adresse -commune_label;libelleCommuneEtablissement;18;FALSE;character;Libellé de la commune;INSEE;100;text;Redondant avec geo_adresse -foreign_commune_label;libelleCommuneEtrangerEtablissement;19;FALSE;character;Libellé de la commune pour un établissement situé à l’étranger;INSEE;100;text;Redondant avec geo_adresse -distribution;distributionSpecialeEtablissement;20;FALSE;character;Distribution spéciale de l’établissement;INSEE;26;text;Redondant avec geo_adresse -commune;codeCommuneEtablissement;21;TRUE;character;Code commune de l’établissement;INSEE;5;code;Code INSEE commune -cedex;codeCedexEtablissement;22;FALSE;character;Code cedex;INSEE;9;text;Redondant avec geo_adresse -cedex_label;libelleCedexEtablissement;23;FALSE;character;Libellé du code cedex;INSEE;100;text;Redondant avec geo_adresse -country;codePaysEtrangerEtablissement;24;FALSE;factor;Code pays pour un établissement situé à l’étranger;INSEE;5;code;Redondant avec geo_adresse -country_label;libellePaysEtrangerEtablissement;25;FALSE;character;Libellé du pays pour un établissement situé à l’étranger;INSEE;100;text;Redondant avec geo_adresse -address_2;complementAdresse2Etablissement;26;FALSE;character;Complément d’adresse secondaire;INSEE;38;text;Pour entrée secondaire d’une entreprise +repetition;indiceRepetitionEtablissement;14;FALSE;character;Indice de répétition dans la voie;INSEE;1;character;Redondant avec geo_adresse +toute_type;typeVoieEtablissement;15;FALSE;factor;Type de voie;INSEE;4;character;Redondant avec geo_adresse +route_label;libelleVoieEtablissement;16;FALSE;character;Libellé de voie;INSEE;100;character;Redondant avec geo_adresse +postal;codePostalEtablissement;17;FALSE;character;Code postal;INSEE;5;character;Redondant avec geo_adresse +commune_label;libelleCommuneEtablissement;18;FALSE;character;Libellé de la commune;INSEE;100;character;Redondant avec geo_adresse +foreign_commune_label;libelleCommuneEtrangerEtablissement;19;FALSE;character;Libellé de la commune pour un établissement situé à l’étranger;INSEE;100;character;Redondant avec geo_adresse +distribution;distributionSpecialeEtablissement;20;FALSE;character;Distribution spéciale de l’établissement;INSEE;26;character;Redondant avec geo_adresse +commune;codeCommuneEtablissement;21;TRUE;character;Code commune de l’établissement;INSEE;5;character;Code INSEE commune +cedex;codeCedexEtablissement;22;FALSE;character;Code cedex;INSEE;9;character;Redondant avec geo_adresse +cedex_label;libelleCedexEtablissement;23;FALSE;character;Libellé du code cedex;INSEE;100;character;Redondant avec geo_adresse +country;codePaysEtrangerEtablissement;24;FALSE;factor;Code pays pour un établissement situé à l’étranger;INSEE;5;character;Redondant avec geo_adresse +country_label;libellePaysEtrangerEtablissement;25;FALSE;character;Libellé du pays pour un établissement situé à l’étranger;INSEE;100;character;Redondant avec geo_adresse +address_2;complementAdresse2Etablissement;26;FALSE;character;Complément d’adresse secondaire;INSEE;38;character;Pour entrée secondaire d’une entreprise route_2_number;numeroVoie2Etablissement;27;FALSE;numeric;Numéro de la voie de l’adresse secondaire;INSEE;4;numeric;Pour entrée secondaire d’une entreprise -repetition_2;indiceRepetition2Etablissement;28;FALSE;character;Indice de répétition dans la voie pour l’adresse secondaire;INSEE;1;text;Pour entrée secondaire d’une entreprise -route_2_type;typeVoie2Etablissement;29;FALSE;factor;Type de voie de l’adresse secondaire;INSEE;4;code;Pour entrée secondaire d’une entreprise -route_2_label;libelleVoie2Etablissement;30;FALSE;character;Libellé de voie de l’adresse secondaire;INSEE;100;text;Pour entrée secondaire d’une entreprise -postal_2;codePostal2Etablissement;31;FALSE;character;Code postal de l’adresse secondaire;INSEE;5;text;Pour entrée secondaire d’une entreprise -commune_2_label;libelleCommune2Etablissement;32;FALSE;character;Libellé de la commune de l’adresse secondaire;INSEE;100;text;Pour entrée secondaire d’une entreprise -foreign_commune_2_label;libelleCommuneEtranger2Etablissement;33;FALSE;character;Libellé de la commune de l’adresse secondaire pour un établissement situé à l’étranger;INSEE;100;text;Pour entrée secondaire d’une entreprise -distribution_2;distributionSpeciale2Etablissement;34;FALSE;character;Distribution spéciale de l’adresse secondaire de l’établissement;INSEE;26;text;Pour entrée secondaire d’une entreprise -commune_2;codeCommune2Etablissement;35;FALSE;factor;Code commune de l’adresse secondaire;INSEE;5;code;Pour entrée secondaire d’une entreprise -cedex_2;codeCedex2Etablissement;36;FALSE;character;Code cedex de l’adresse secondaire;INSEE;9;text;Pour entrée secondaire d’une entreprise -cedex_2_label;libelleCedex2Etablissement;37;FALSE;character;Libellé du code cedex de l’adresse secondaire;INSEE;100;text;Pour entrée secondaire d’une entreprise -country_2;codePaysEtranger2Etablissement;38;FALSE;factor;Code pays de l’adresse secondaire pour un établissement situé à l’étranger;INSEE;5;code;Pour entrée secondaire d’une entreprise -country_2_label;libellePaysEtranger2Etablissement;39;FALSE;character;Libellé du pays de l’adresse secondaire pour un établissement situé à l’étranger;INSEE;100;text;Pour entrée secondaire d’une entreprise -start_date;dateDebut;40;FALSE;Date;Date de début d''une période d'historique d'un établissement;INSEE;10;date; -state;etatAdministratifEtablissement;41;TRUE;factor;État administratif de l’établissement;INSEE;1;code; -sign_1;enseigne1Etablissement;42;FALSE;character;Première ligne d’enseigne de l’établissement;INSEE;50;text; -sign_2;enseigne2Etablissement;43;FALSE;character;Deuxième ligne d’enseigne de l’établissement;INSEE;50;text; -sign_3;enseigne3Etablissement;44;FALSE;character;Troisième ligne d’enseigne de l’établissement;INSEE;50;text; -name_usual;denominationUsuelleEtablissement;45;FALSE;character;Dénomination usuelle de l’établissement;INSEE;100;text;Beaucoup de valeurs manquantes -ape;activitePrincipaleEtablissement;46;TRUE;factor;Activité principale de l''établissement pendant la période;INSEE;6;code; -ape_label;nomenclatureActivitePrincipaleEtablissement;47;FALSE;factor;Nomenclature d’activité de la variable ape;INSEE;8;code;Redondant avec activitePrincipaleEtablissement -employer;caractereEmployeurEtablissement;48;TRUE;logical;Caractère employeur de l’établissement;INSEE;1;code; -longitude;longitude;49;TRUE;numeric;Longitude géographique (en degré);data.cquest.org;;numeric; -latitude;latitude;50;TRUE;numeric;Latitude géographique (en degré);data.cquest.org;;numeric; +repetition_2;indiceRepetition2Etablissement;28;FALSE;character;Indice de répétition dans la voie pour l’adresse secondaire;INSEE;1;character;Pour entrée secondaire d’une entreprise +route_2_type;typeVoie2Etablissement;29;FALSE;factor;Type de voie de l’adresse secondaire;INSEE;4;character;Pour entrée secondaire d’une entreprise +route_2_label;libelleVoie2Etablissement;30;FALSE;character;Libellé de voie de l’adresse secondaire;INSEE;100;character;Pour entrée secondaire d’une entreprise +postal_2;codePostal2Etablissement;31;FALSE;character;Code postal de l’adresse secondaire;INSEE;5;character;Pour entrée secondaire d’une entreprise +commune_2_label;libelleCommune2Etablissement;32;FALSE;character;Libellé de la commune de l’adresse secondaire;INSEE;100;character;Pour entrée secondaire d’une entreprise +foreign_commune_2_label;libelleCommuneEtranger2Etablissement;33;FALSE;character;Libellé de la commune de l’adresse secondaire pour un établissement situé à l’étranger;INSEE;100;character;Pour entrée secondaire d’une entreprise +distribution_2;distributionSpeciale2Etablissement;34;FALSE;character;Distribution spéciale de l’adresse secondaire de l’établissement;INSEE;26;character;Pour entrée secondaire d’une entreprise +commune_2;codeCommune2Etablissement;35;FALSE;factor;Code commune de l’adresse secondaire;INSEE;5;character;Pour entrée secondaire d’une entreprise +cedex_2;codeCedex2Etablissement;36;FALSE;character;Code cedex de l’adresse secondaire;INSEE;9;character;Pour entrée secondaire d’une entreprise +cedex_2_label;libelleCedex2Etablissement;37;FALSE;character;Libellé du code cedex de l’adresse secondaire;INSEE;100;character;Pour entrée secondaire d’une entreprise +country_2;codePaysEtranger2Etablissement;38;FALSE;factor;Code pays de l’adresse secondaire pour un établissement situé à l’étranger;INSEE;5;character;Pour entrée secondaire d’une entreprise +country_2_label;libellePaysEtranger2Etablissement;39;FALSE;character;Libellé du pays de l’adresse secondaire pour un établissement situé à l’étranger;INSEE;100;character;Pour entrée secondaire d’une entreprise +start_date;dateDebut;40;FALSE;Date;Date de début d''une période d'historique d'un établissement;INSEE;10;character; +state;etatAdministratifEtablissement;41;TRUE;factor;État administratif de l’établissement;INSEE;1;character; +sign_1;enseigne1Etablissement;42;FALSE;character;Première ligne d’enseigne de l’établissement;INSEE;50;character; +sign_2;enseigne2Etablissement;43;FALSE;character;Deuxième ligne d’enseigne de l’établissement;INSEE;50;character; +sign_3;enseigne3Etablissement;44;FALSE;character;Troisième ligne d’enseigne de l’établissement;INSEE;50;character; +name_usual;denominationUsuelleEtablissement;45;FALSE;character;Dénomination usuelle de l’établissement;INSEE;100;character;Beaucoup de valeurs manquantes +ape;activitePrincipaleEtablissement;46;TRUE;factor;Activité principale de l''établissement pendant la période;INSEE;6;character; +ape_label;nomenclatureActivitePrincipaleEtablissement;47;FALSE;factor;Nomenclature d’activité de la variable ape;INSEE;8;character;Redondant avec activitePrincipaleEtablissement +employer;caractereEmployeurEtablissement;48;TRUE;logical;Caractère employeur de l’établissement;INSEE;1;character; +lon;longitude;49;TRUE;numeric;Longitude géographique (en degré);data.cquest.org;;numeric; +lat;latitude;50;TRUE;numeric;Latitude géographique (en degré);data.cquest.org;;numeric; geo_score;geo_score;51;TRUE;numeric;Score de la qualité de la géolocalisation;data.cquest.org;;numeric; -geo_type;geo_type;52;TRUE;factor;Type de géolocalisation;data.cquest.org;;text; -geo_adresse;geo_adresse;53;TRUE;character;Adresse obtenue par la géolocalisation;data.cquest.org;;text; -geo_id;geo_id;54;FALSE;character;geo_id;data.cquest.org;;code; -geo_ligne;geo_ligne;55;FALSE;character;geo_ligne;data.cquest.org;;code; -geo_l4;geo_l4;56;FALSE;character;geo_l4;data.cquest.org;;text; -geo_l5;geo_l5;57;FALSE;character;geo_l5;data.cquest.org;;code; +geo_type;geo_type;52;TRUE;factor;Type de géolocalisation;data.cquest.org;;character; +geo_adresse;geo_adresse;53;TRUE;character;Adresse obtenue par la géolocalisation;data.cquest.org;;character; +geo_id;geo_id;54;FALSE;character;geo_id;data.cquest.org;;character; +geo_ligne;geo_ligne;55;FALSE;character;geo_ligne;data.cquest.org;;character; +geo_l4;geo_l4;56;FALSE;character;geo_l4;data.cquest.org;;character; +geo_l5;geo_l5;57;FALSE;character;geo_l5;data.cquest.org;;character; diff --git a/inst/extdata/scheme_sirene_na.csv b/inst/extdata/scheme_sirene_na.csv index c1000df643d7cfc07c1acdad9f767ad05425d180..7d7f55d805d99248a33edd9aba5f90f620ca2032 100644 --- a/inst/extdata/scheme_sirene_na.csv +++ b/inst/extdata/scheme_sirene_na.csv @@ -1,4 +1,4 @@ -name;na.value +name;na_value creation_date;1900-01-01 ape;00.0Z ape;00.00Z diff --git a/script/maintain_geo_sirene.R b/script/maintain_geo_sirene.R index 1ae7a8bc29f3017440411709fa0fde369cd2206f..2d04e664dd0317d3f099814a326793a9cf64fe20 100755 --- a/script/maintain_geo_sirene.R +++ b/script/maintain_geo_sirene.R @@ -5,18 +5,39 @@ arg = commandArgs(trailingOnly = TRUE) ## ## nohup ./script_dl.geo_sirene.R > script_dl.geo_sirene.out & ## -origin = "https://data.cquest.org/geo_sirene/" -destination = "~/floodam-data/sirene/geo_sirene" + if(length(arg) == 0) month = format(Sys.Date(), "%Y-%m") if(length(arg) == 1) month = arg[1] if(length(arg) >= 2) month = floodam.data::create.month(arg[1], arg[2]) -# month = floodam.data::create.month("2018-10", "2020-04") -invisible(floodam.data::download.geo_sirene(origin, destination, month = month)) +destination = "data-floodam/original/geo_sirene" +floodam.data::download_geo_sirene(destination, month = month) + +floodam.data::alert_mattermost( + sprintf( + "Hello ! Vintage **%s** of **geo_sirene** has been downloaded.\n", + paste("-", month, collapse = "\n") + ) +) + +origin = file.path(destination, "version_2019", month) +destination = file.path( + "data-floodam/adapted/geo_sirene", + "version_2019", + month +) + +adapt_geo_sirene( + origin, + destination, + archive = "StockEtablissementActif_utf8_geo.csv.gz", + path_eaip = "data-floodam/adapted/eaip", + path_admin = "data-floodam/adapted/admin-express" +) floodam.data::alert_mattermost( sprintf( - "Hello ! For your information, following versions of geo_sirene have been downloaded:\n%s", + "Hello ! Vintage **%s** of **geo_sirene** has been adapted.\n", paste("-", month, collapse = "\n") ) ) \ No newline at end of file diff --git a/tests/testthat/test_analyse_archive.R b/tests/testthat/test_analyse_archive.R index d4b50a7525e19abcd31b9fb1f1ab92c9a8600078..8c56bef0cec6b86a807b4c7761d1b4864751057c 100644 --- a/tests/testthat/test_analyse_archive.R +++ b/tests/testthat/test_analyse_archive.R @@ -8,23 +8,45 @@ test_that("Test analyse_archive", ) expect_identical( - names(analyse), - c( - "data", "version", "precision", "type", "projection", - "scope", "vintage", "year", "extension", "name" - ) + analyse["data"], + setNames("RPG", "data") + ) + expect_identical( + analyse["version"], + setNames("2-0", "version") + ) + expect_identical( + analyse["precision"], + setNames(NA_character_, "precision") + ) + expect_identical( + analyse["type"], + setNames("SHP", "type") + ) + expect_identical( + analyse["projection"], + setNames("LAMB93", "projection") + ) + expect_identical( + analyse["scope"], + setNames("R76", "scope") + ) + expect_identical( + analyse["vintage"], + setNames("2020-01-01", "vintage") + ) + expect_identical( + analyse["year"], + setNames("2020", "year") + ) + expect_identical( + analyse["extension"], + setNames("7z", "extension") + ) + expect_identical( + analyse["name"], + setNames("RPG_2-0__SHP_LAMB93_R76_2020-01-01", "name") ) - - expect_identical(analyse[["data"]], "RPG") - expect_identical(analyse[["version"]], "2-0") - expect_identical(analyse[["precision"]], NA_character_) - expect_identical(analyse[["type"]], "SHP") - expect_identical(analyse[["projection"]], "LAMB93") - expect_identical(analyse[["scope"]], "R76") - expect_identical(analyse[["vintage"]], "2020-01-01") - expect_identical(analyse[["year"]], "2020") - expect_identical(analyse[["extension"]], "7z") - expect_identical(analyse[["name"]], "RPG_2-0__SHP_LAMB93_R76_2020-01-01") archive_floodam = "rpg_parcelle_D001_2020-01-01.rds" analyse = expect_warning( @@ -40,19 +62,136 @@ test_that("Test analyse_archive", ) # origin = "floodam.data" - analyse = expect_silent(analyse_archive(archive_floodam, origin = "floodam.data")) + analyse = expect_silent( + analyse_archive(archive_floodam, origin = "floodam.data") + ) expect_identical( - names(analyse), - c("data", "precision", "scope", "vintage", "extension", "name") + analyse["data"], + setNames("rpg", "data") + ) + expect_identical( + analyse["precision"], + setNames("parcelle", "precision") + ) + expect_identical( + analyse["scope"], + setNames("D001", "scope") + ) + expect_identical( + analyse["vintage"], + setNames("2020-01-01", "vintage") + ) + expect_identical( + analyse["extension"], + setNames("rds", "extension") + ) + expect_identical( + analyse["name"], + setNames("rpg_parcelle_D001_2020-01-01", "name") + ) + + # origin = "geo-sirene" + geo_sirene = c( + "2024-01/geo_siret_34.csv.gz", + "2024-01/StockEtablissement_utf8_geo.csv.gz", + "2024-01/StockEtablissementActif_utf8_geo.csv.gz", + "2024-01/StockEtablissementFerme_utf8_geo.csv.gz", + "2024-01/geo_sirene.csv.gz", + "2024-01/etablissements_actifs.csv.gz", + "2024-01/etablissements_fermes.csv.gz", + "bad-date/geo_siret_34.csv.gz" + ) + + analyse = expect_silent( + analyse_archive(geo_sirene, origin = "geo-sirene") + ) + names = c( + "data", "precision", "version", "scope", "vintage", + "extension", "name" ) - expect_identical(analyse[["data"]], "rpg") - expect_identical(analyse[["precision"]], "parcelle") - expect_identical(analyse[["scope"]], "D001") - expect_identical(analyse[["vintage"]], "2020-01-01") - expect_identical(analyse[["extension"]], "rds") - expect_identical(analyse[["name"]], "rpg_parcelle_D001_2020-01-01") + expect_identical(colnames(analyse), names) + expect_identical( + analyse_archive(geo_sirene[1], origin = "geo-sirene"), + setNames( + c( + "geo-siret", NA, "2019", "D034", "2024-01", "csv.gz", + "geo_siret_34" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[2], origin = "geo-sirene"), + setNames( + c( + "geo-siret", NA, "2019", "france", "2024-01", "csv.gz", + "StockEtablissement_utf8_geo" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[3], origin = "geo-sirene"), + setNames( + c( + "geo-siret", "active", "2019", "france", "2024-01", + "csv.gz", "StockEtablissementActif_utf8_geo" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[4], origin = "geo-sirene"), + setNames( + c( + "geo-siret", "closed", "2019", "france", "2024-01", + "csv.gz", "StockEtablissementFerme_utf8_geo" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[5], origin = "geo-sirene"), + setNames( + c( + "geo-siret", NA, "2017", "france", "2024-01", + "csv.gz", "geo_sirene" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[6], origin = "geo-sirene"), + setNames( + c( + "geo-siret", "active", "2017", "france", "2024-01", + "csv.gz", "etablissements_actifs" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[7], origin = "geo-sirene"), + setNames( + c( + "geo-siret", "closed", "2017", "france", "2024-01", + "csv.gz", "etablissements_fermes" + ), + names + ) + ) + expect_identical( + analyse_archive(geo_sirene[8], origin = "geo-sirene"), + setNames( + c( + "geo-siret", NA, "2019", "D034", NA, "csv.gz", + "geo_siret_34" + ), + names + ) + ) } ) diff --git a/tests/testthat/test_get_base_url.R b/tests/testthat/test_get_base_url.R new file mode 100644 index 0000000000000000000000000000000000000000..f47b2e1aa1dc264189f16b039a7e01bec9f9bb84 --- /dev/null +++ b/tests/testthat/test_get_base_url.R @@ -0,0 +1,18 @@ +test_that("Test get_base_url", + { + expect_identical( + get_base_url("http://parent/child/archive.csv.gz"), + "http://parent" + ) + + expect_identical( + get_base_url("https://parent/child/archive.csv.gz"), + "https://parent" + ) + + expect_identical( + get_base_url("parent/child/archive.csv.gz"), + "parent/child/archive.csv.gz" + ) + } +) \ No newline at end of file diff --git a/tests/testthat/test_get_date_from_html.R b/tests/testthat/test_get_date_from_html.R new file mode 100644 index 0000000000000000000000000000000000000000..13f6f2893900ba5f130c810fd03093f67b9552de --- /dev/null +++ b/tests/testthat/test_get_date_from_html.R @@ -0,0 +1,38 @@ +test_that("Test get_date_from_html", + { + x = c( + "<html>", + "<head><title>Index of /example/</title></head>", + "<body>", + "<h1>Index of /example/</h1><hr><pre>", + "<a href='../'>../</a>", + "<a href='parent/2023-11/archive.csv.gz'>archive.csv.gz</a>", + "<a href='2023-12/archive.csv.gz'>archive.csv.gz</a>", + "<a href=\"2024-01/\">2024-01/</a> 07-Jan-2024 09:54", + "<a href=\"parent/child/2024-02/\">2024-02/</a> 07-Feb-2024 09:54", + "<a href=\"2024-03\">2024-03</a> 07-Mar-2024 10:02", + "<a href=\"2024-01-01/\">2024-01-01/</a> 02-Jan-2024 09:56", + "<a href=\"2024-01-02\">2024-01-02</a> 03-jan-2024 11:02", + "<a href=\"2023/\">2023/</a> 01-Feb-2023 15:54", + "<a href=\"2024\">2024</a> 01-Feb-2024 15:54", + "<a href=\"last/\">last/</a> 07-Mar-2024 10:02", + "</pre><hr></body>", + "</html>" + ) + + month = c("2024-01", "2024-02", "2024-03") + last = c(month, "last") + day = c("2024-01-01", "2024-01-02") + year = c("2023", "2024") + combine = c(month, day, year) + + expect_identical(get_date_from_html(x = x), month) + expect_identical(get_date_from_html(x = x, expected = "month"), month) + expect_identical(get_date_from_html(x = x, expected = "day"), day) + expect_identical(get_date_from_html(x = x, expected = "any"), combine) + expect_identical(get_date_from_html(x = x, last = TRUE), last) + expect_identical(get_date_from_html(x = x[-(8:10)]), character()) + last = expect_warning(get_date_from_html(x = x[-(8:10)], last = TRUE)) + expect_identical(last, "last") + } +) \ No newline at end of file diff --git a/tests/testthat/test_get_link_from_html.R b/tests/testthat/test_get_link_from_html.R new file mode 100644 index 0000000000000000000000000000000000000000..0b277ef0537185bccfd1ebbe6d6103ec35dc5b3f --- /dev/null +++ b/tests/testthat/test_get_link_from_html.R @@ -0,0 +1,31 @@ +test_that("Test get_date_from_html", + { + x = c( + "<html>", + "<head><title>Index of /example/</title></head>", + "<body>", + "<h1>Index of /example/</h1><hr><pre>", + "<a href='../'>../</a>", + "<a href='parent/child/archive.csv.gz'>archive.csv.gz</a>", + "<a href=\"2024-01/\">2024-01/</a> 07-Jan-2024 00:12 -", + "<a href=\"2024-02/\">2024-02/</a> 07-Feb-2024 09:54 -", + "<a href=\"2024-03\">2024-03</a> 07-Mar-2024 10:02 -", + "<a href=\"2024-01-01/\">2024-01-01/</a> 02-Jan-2024 09:56 -", + "<a href=\"2024-01-02\">2024-01-02</a> 03-jan-2024 11:02 -", + "<a href=\"2023/\">2023/</a> 01-Feb-2023 15:54 -", + "<a href=\"2024\">2024</a> 01-Feb-2024 15:54 -", + "<a href=\"last/\">last/</a> 07-Mar-2024 10:02 -", + "</pre><hr></body>", + "</html>" + ) + + result = c( + "../", "parent/child/archive.csv.gz", "2024-01/", "2024-02/", + "2024-03", "2024-01-01/", "2024-01-02", "2023/", "2024", "last/") + + expect_identical(get_link_from_html(x = x), result) + + x = paste(x, collapse = "") + expect_identical(get_link_from_html(x = x), result) + } +) \ No newline at end of file diff --git a/tests/testthat/test_to_logical.R b/tests/testthat/test_to_logical.R new file mode 100644 index 0000000000000000000000000000000000000000..b9cae6d92a6edf352364633d48d5c6a2fc0065ba --- /dev/null +++ b/tests/testthat/test_to_logical.R @@ -0,0 +1,42 @@ +test_that("Test to_logical", + { + x = c(0:2, NA) + expect_identical(to_logical(x), as.logical(x)) + + x = c( + "true", "false", "T", "F", "True", "False", "TRUE", "FALSE", + "", "NA", NA + ) + expect_identical(to_logical(x), as.logical(x)) + + x = c("t", "f") + expect_identical(to_logical(x), as.logical(toupper(x))) + + # English yes/no + x = c("y", "Yes", "n", "No") + response = c(TRUE, TRUE, FALSE, FALSE) + expect_identical(to_logical(x), response) + expect_identical(to_logical(tolower(x)), response) + expect_identical(to_logical(toupper(x)), response) + + # French yes/no + x = c("o", "oui", "n", "non") + expect_identical(to_logical(x), response) + + # French true/false + x = c("v", "Vrai", "f", "Faux") + expect_identical(to_logical(x), response) + + # Mix do not work or explicit true and false + x = c("oui", "vrai") + expect_identical(to_logical(x), c(NA, NA)) + expect_identical(to_logical(x, true = x), c(NA, NA)) + expect_identical(to_logical(x, true = x, false = ""), c(TRUE, TRUE)) + + # For other convention (language) + x = c("s", "si", "n", "no") + true = c("S", "SI") + false = c("N", "NO") + expect_identical(to_logical(x, true, false), response) + } +)