onrn.R 9.29 KiB
# Libraries
# library(geau)
# library(sf)
# Updating data from remote 
## Local
today = Sys.Date()
archive_dir = sprintf("data-common/data/ONRN/archive/%s", today)
dir.create(archive_dir, showWarnings = FALSE, recursive = TRUE)
## Remote
remote_dir = "https://files.georisques.fr/onrn"
archive = c(
    "ONRN_Population_EAIP_CE",
    "ONRN_Population_EAIP_SM",
    "ONRN_Emprise_totale_bat_EAIP_CE",
    "ONRN_Emprise_totale_bat_EAIP_SM",
    "ONRN_Emprise_habitations_sans_etage_EAIP_CE",
    "ONRN_Emprise_habitations_sans_etage_EAIP_SM",
    "ONRN_Entreprises_EAIP",
    "sinistralite/ONRN_nbReco_Inondation",
    "sinistralite/ONRN_CoutMoyen_Inondation",
    "sinistralite/ONRN_CoutCommune_Inondation",
    "sinistralite/ONRN_Frequence_Inondation",
    "sinistralite/ONRN_SsurP_Inondation",
    "sinistralite/ONRN_CoutParHabitant_Inondation",
    "sinistralite/ONRN_nbReco_Inondation",
    "sinistralite/ONRN_nbReco_Inondation",
    "ONRN_Avancement_PPRNI",
    "ONRN_Anciennete_PPRNI"
## Download
mapply(
    utils::download.file,
    url = file.path(remote_dir, sprintf("%s.zip", archive)),
    destfile = file.path(archive_dir, gsub("sinistralite/", "", sprintf("%s.zip", archive))),
    method = "wget"
## Unzip
mapply(
    utils::unzip,
    zipfile = file.path(archive_dir, gsub("sinistralite/", "", sprintf("%s.zip", archive))),
    exdir = file.path(archive_dir, "raw")
## Convert to UTF-8
onrn_raw = file.path(archive_dir, "raw")
for(f in dir(onrn_raw, pattern = ".csv")) {
    system(sprintf("iconv -f ISO-8859-1 -t UTF-8 %s -o %s", file.path(onrn_raw, f), file.path(onrn_raw, "temp.csv")))
    system(sprintf("mv %s %s", file.path(onrn_raw, "temp.csv"), file.path(onrn_raw, f)))
## Remove pdf
unlink(dir(onrn_raw, pattern = ".pdf", full.names = TRUE))
# Treatment
## Selection
selection = geau::so_ii_scope
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
## Nombre reconnaissance Cat-Nat "ONRN_nbRecos_Inon" pattern = "ONRN_nbRecos_Inon" variable = "n_catnat" temp = rio::import( dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame", col_types = "text")[c(1, 3)] names(temp) = c("commune", variable) temp[[variable]][temp[[variable]] == "Pas de reconnaissance"] = 0 rownames(temp) = temp[["commune"]] temp = temp[selection, ] result = temp ### Fréquence sinistre: "ONRN_FreqMoyenne_Inon" pattern = "ONRN_FreqMoyenne_Inon" variable = "freq_sin" temp = rio::import( dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame", col_types = "text")[c(1, 3)] names(temp) = c("commune", variable) conversion = data.frame( freq_sin = c("Pas de sinistre ou de risque répertoriés à CCR", "Entre 0 et 1 ‰", "Entre 1 et 2 ‰", "Entre 2 et 5 ‰", "Entre 5 et 10 ‰", "Plus de 10 ‰"), freq_sin_min = c(0, 0, 1, 2, 5, 10)/1000, freq_sin_max = c(0, 1, 2, 5, 10, 1000)/1000) temp = merge(temp, conversion, all.x = TRUE)[-1] temp[[variable]] = (temp[[2]] + temp[[3]]) / 2 temp[[variable]][temp[["freq_sin_max"]] == 1] = 1.5 * temp[[2]][temp[["freq_sin_max"]] == 1] rownames(temp) = temp[["commune"]] temp = temp[selection, ] result = merge(result, temp, by = "commune", all.x = TRUE) ### Coût des inondations: "ONRN_CoutCum_Inon" pattern = "ONRN_CoutCum_Inon" variable = "cost" temp = rio::import( dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame", col_types = "text")[c(1, 3)] names(temp) = c("commune", variable) conversion = data.frame( cost = c( "Pas de sinistre répertorié à CCR", "Entre 0 k€ et 100 k€", "Entre 100 k€ et 500 k€", "Entre 500 k€ et 2 M€", "Entre 2 M€ et 5 M€", "Entre 5 M€ et 10 M€", "Entre 10 M€ et 50 M€", "Entre 50 M€ et 100 M€", "Supérieur à 100 M€" ), cost_min = c(0, 0, 1e5, 5e5, 2e6, 5e6, 10e6, 50e6, 100e6), cost_max = c(0, 1e5, 5e5, 2e6, 5e6, 10e6, 50e6, 100e6, +Inf) ) all(unique(temp[[variable]]) %in% conversion[[variable]]) temp = merge(temp, conversion, all.x = TRUE)[-1] temp[[variable]] = (temp[[2]] + temp[[3]]) / 2 temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])] rownames(temp) = temp[["commune"]] temp = temp[selection, ]
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
result = merge(result, temp, by = "commune", all.x = TRUE) ### Coût moyen des inondations: "ONRN_CtMoyen_Inon" pattern = "ONRN_CtMoyen_Inon" variable = "cost_mean" temp = rio::import( dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame", col_types = "text")[c(1, 3)] names(temp) = c("commune", variable) conversion = data.frame( cost_mean = c( "Pas de sinistre répertorié à CCR", "Entre 0 et 2,5 k€", "Entre 2,5 et 5 k€", "Entre 5 et 10 k€", "Entre 10 et 20k€", "Plus de 20 k€" ), cost_mean_min = c(0, 0, 2.5e3, 5e3, 10e3, 20e3), cost_mean_max = c(0, 2.5e3, 5e3, 10e3, 20e3, +Inf) ) all(unique(temp[[variable]]) %in% conversion[[variable]]) temp = merge(temp, conversion, all.x = TRUE)[-1] temp[[variable]] = (temp[[2]] + temp[[3]]) / 2 temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])] rownames(temp) = temp[["commune"]] temp = temp[selection, ] result = merge(result, temp, by = "commune", all.x = TRUE) ### Coût par habitant des inondations: "ONRN_CoutInon_parHabitant" pattern = "ONRN_CoutInon_parHabitant" variable = "cost_hab" temp = rio::import( dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame", col_types = "text")[c(1, 3)] names(temp) = c("commune", variable) conversion = data.frame( cost_hab = c( "Pas de sinistre répertorié à CCR", "Moins de 100 €/habitant", "Entre 100 € et 500€/habitant", "Entre 500 € et 1 k€/habitant", "Entre 1 k€ € et 10 k€/habitant", "Supérieur à 10 k€/habitant", NA ), cost_hab_min = c(0, 0, 100, 500, 1000, 10000, NA), cost_hab_max = c(0, 100, 500, 1000, 10000, +Inf, NA) ) all(unique(temp[[variable]]) %in% conversion[[variable]]) temp = merge(temp, conversion, all.x = TRUE)[-1] temp[[variable]] = (temp[[2]] + temp[[3]]) / 2 temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])] rownames(temp) = temp[["commune"]] temp = temp[selection, ] result = merge(result, temp, by = "commune", all.x = TRUE) ### Sinistre sur Prime des inondations : "ONRN_SsurP_Inon" pattern = "ONRN_SsurP_Inon" variable = "ratio" temp = rio::import(
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame", col_types = "text")[c(1, 3)] names(temp) = c("commune", variable) conversion = data.frame( ratio = c( "Pas de sinistre ou de prime répertoriés à CCR", "Entre 0 et 10 %", "Entre 10 et 50 %", "Entre 50 et 100%", "Entre 100 et 200 %", "Plus de 200%"), ratio_min = c(0, 0, 0.1, 0.5, 1, 2), ratio_max = c(0, 0.1, 0.5, 1, 2, +Inf)) all(unique(temp[[variable]]) %in% conversion[[variable]]) temp = merge(temp, conversion, all.x = TRUE)[-1] temp[[variable]] = (temp[[2]] + temp[[3]]) / 2 temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])] rownames(temp) = temp[["commune"]] temp = temp[selection, ] result = merge(result, temp, by = "commune", all.x = TRUE) ### PPRI approuvé: "PPRi_anciennete_avancement" pattern = "PPRi_anciennete_avancement" variable = c("ppri_year", "ppri_state", "ppri_state_sub", "ppri_age_ori") temp = rio::import( dir(onrn_raw, pattern = pattern, full.names = TRUE), setclass = "data.frame")[c(1, 8, 7, 2, 9)] names(temp) = c("commune", variable) variable = "state" conversion = data.frame( ppri_age_ori = c( "Approuvé depuis moins de 5 ans", "Approuvé entre 5 et 10 ans", "Approuvé entre 10 et 20 ans", "Approuvé depuis plus de 20 ans", "Prescrit depuis moins de 4 ans", "Prescrit depuis plus de 4 ans" ), ppri_state_age = c("approuve", "approuve", "approuve", "approuve", "prescrit", "prescrit"), ppri_age_min = c(0, 5, 10, 20, 0, 4), ppri_age_max = c(5, 10, 20, +Inf, 4, +Inf) ) all(unique(temp[[variable]]) %in% conversion[[variable]]) temp = merge(temp, conversion, all.x = TRUE)[-1] rownames(temp) = temp[["commune"]] temp = temp[selection, ] temp[["commune"]] = selection result = merge(result, temp, by = "commune", all.x = TRUE) ### Bilan Sinistre - Prime : estimation # result = read.csv2(geau::current_version("data-common/so-ii/onrn")) #### Some data to compute premium per habitant pop_france = 66992159 # INSEE (2018) premium_france = 1670000000 # (CCR2019a pour 2018) premium_hab = premium_france / pop_france #### Need to compute cumulative population period = seq(1995, 2018) available = as.integer(dimnames(geau::so_ii_population)[[2]]) selection = as.character(available[sapply(period, function(x){which.min(abs(available - x))})]) pop_commune = rowSums(geau::so_ii_population[, selection])
281282283284285286287288289290291292293294295296
result[["balance"]] = (1 - result[["ratio"]]) * pop_commune * premium_hab result = result[c( "commune", "n_catnat", "freq_sin", "cost", "cost_mean", "cost_hab", "ratio", "balance", "ppri_year", "ppri_state", "ppri_state_sub", "ppri_state_age", "ppri_age_min", "ppri_age_max", "freq_sin_min", "freq_sin_max", "cost_min", "cost_max", "cost_mean_min", "cost_mean_max", "cost_hab_min", "cost_hab_max", "ratio_min", "ratio_max" )] write.csv2(result, sprintf("data-common/so-ii/onrn/onrn-%s.csv", today), row.names = FALSE) # write.csv2(result, geau::current_version("data-common/so-ii/onrn"), row.names = FALSE) unlink(onrn_raw, recursive = TRUE, force = TRUE)