script onrn

- récupération des données sur le site ONRN - traitement des données en lien avec sinistralité - ajout de la variable "balance" - sauvegarde dans data-common/so-ii/onrn Refs #7

script onrn
- récupération des données sur le site ONRN - traitement des données en lien avec sinistralité - ajout de la variable "balance" - sauvegarde dans data-common/so-ii/onrn Refs #7
1c7e2910 · Grelot Frederic · 7f5bbc15 · 1c7e2910
Commit 1c7e2910 authored 3 years ago by Grelot Frederic
Hide whitespace changes
Inline Side-by-side

Showing

with 280 additions and 0 deletions
+280 -0
--- a/script/onrn.R
+++ b/script/onrn.R
+# Libraries
+
+# library(geau)
+# library(sf)
+
+# Updating data from remote 
+
+## Local
+
+today = Sys.Date()
+archive_dir = sprintf("data-common/data/ONRN/archive/%s", today)
+dir.create(archive_dir, showWarnings = FALSE, recursive = TRUE)
+
+## Remote
+
+remote_dir = "https://files.georisques.fr/onrn"
+archive = c(
+    "ONRN_Population_EAIP_CE",
+    "ONRN_Population_EAIP_SM",
+    "ONRN_Emprise_totale_bat_EAIP_CE",
+    "ONRN_Emprise_totale_bat_EAIP_SM",
+    "ONRN_Emprise_habitations_sans_etage_EAIP_CE",
+    "ONRN_Emprise_habitations_sans_etage_EAIP_SM",
+    "ONRN_Entreprises_EAIP",
+    "sinistralite/ONRN_nbReco_Inondation",
+    "sinistralite/ONRN_CoutMoyen_Inondation",
+    "sinistralite/ONRN_CoutCommune_Inondation",
+    "sinistralite/ONRN_Frequence_Inondation",
+    "sinistralite/ONRN_SsurP_Inondation",
+    "sinistralite/ONRN_CoutParHabitant_Inondation",
+    "sinistralite/ONRN_nbReco_Inondation",
+    "sinistralite/ONRN_nbReco_Inondation",
+    "ONRN_Avancement_PPRNI",
+    "ONRN_Anciennete_PPRNI"
+)
+
+## Download
+
+mapply(
+    utils::download.file,
+    url = file.path(remote_dir, sprintf("%s.zip", archive)),
+    destfile = file.path(archive_dir, gsub("sinistralite/", "", sprintf("%s.zip", archive))),
+    method = "wget"
+)
+
+## Unzip
+
+mapply(
+    utils::unzip,
+    zipfile = file.path(archive_dir, gsub("sinistralite/", "", sprintf("%s.zip", archive))),
+    exdir = file.path(archive_dir, "raw")
+)
+
+## Convert to UTF-8
+
+onrn_raw = file.path(archive_dir, "raw")
+for(f in dir(onrn_raw, pattern = ".csv")) {
+    system(sprintf("iconv -f ISO-8859-1 -t UTF-8 %s -o %s", file.path(onrn_raw, f), file.path(onrn_raw, "temp.csv")))
+    system(sprintf("mv %s %s", file.path(onrn_raw, "temp.csv"), file.path(onrn_raw, f)))
+}
+
+## Remove pdf
+
+unlink(dir(onrn_raw, pattern = ".pdf", full.names = TRUE))
+
+# Treatment
+
+## Selection
+
+selection = geau::so_ii_scope
+
+## Nombre reconnaissance Cat-Nat "ONRN_nbRecos_Inon"
+
+pattern = "ONRN_nbRecos_Inon"
+variable = "n_catnat"
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame", col_types = "text")[c(1, 3)]
+names(temp) = c("commune", variable)
+temp[[variable]][temp[[variable]] == "Pas de reconnaissance"] = 0
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+
+result = temp
+
+### Fréquence sinistre: "ONRN_FreqMoyenne_Inon"
+
+pattern = "ONRN_FreqMoyenne_Inon"
+variable = "freq_sin"
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame", col_types = "text")[c(1, 3)]
+names(temp) = c("commune", variable)
+
+conversion = data.frame(
+	freq_sin = c("Pas de sinistre ou de risque répertoriés à CCR", "Entre 0 et 1 ‰", "Entre 1 et 2 ‰", "Entre 2 et 5 ‰", "Entre 5 et 10 ‰", "Plus de 10 ‰"),
+	freq_sin_min = c(0, 0, 1, 2, 5, 10)/1000,
+	freq_sin_max = c(0, 1, 2, 5, 10, 1000)/1000)
+temp = merge(temp, conversion, all.x = TRUE)[-1]
+temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
+temp[[variable]][temp[["freq_sin_max"]] == 1] = 1.5 * temp[[2]][temp[["freq_sin_max"]] == 1]
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+
+result = merge(result, temp, by = "commune", all.x = TRUE)
+
+### Coût des inondations: "ONRN_CoutCum_Inon"
+
+pattern = "ONRN_CoutCum_Inon"
+variable = "cost"
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame", col_types = "text")[c(1, 3)]
+names(temp) = c("commune", variable)
+
+conversion = data.frame(
+	cost = c(
+		"Pas de sinistre répertorié à CCR",
+		"Entre 0 k€ et 100 k€",
+		"Entre 100 k€ et 500 k€",
+		"Entre 500 k€ et 2 M€",
+		"Entre 2 M€ et 5 M€",  
+		"Entre 5 M€ et 10 M€",
+		"Entre 10 M€ et 50 M€",
+		"Entre 50 M€ et 100 M€", 
+		"Supérieur à 100 M€"
+    ),
+	cost_min = c(0, 0, 1e5, 5e5, 2e6, 5e6, 10e6, 50e6, 100e6),
+	cost_max = c(0, 1e5, 5e5, 2e6, 5e6, 10e6, 50e6, 100e6, +Inf)
+)
+all(unique(temp[[variable]]) %in% conversion[[variable]])
+temp = merge(temp, conversion, all.x = TRUE)[-1]
+temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
+temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+
+result = merge(result, temp, by = "commune", all.x = TRUE)
+
+### Coût moyen des inondations: "ONRN_CtMoyen_Inon"
+
+pattern = "ONRN_CtMoyen_Inon"
+variable = "cost_mean"
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame", col_types = "text")[c(1, 3)]
+names(temp) = c("commune", variable)
+
+conversion = data.frame(
+	cost_mean = c(
+		"Pas de sinistre répertorié à CCR",
+		"Entre 0 et 2,5 k€",
+		"Entre 2,5 et 5 k€",
+		"Entre 5 et 10 k€",
+		"Entre 10 et 20k€",
+		"Plus de 20 k€"
+    ),
+	cost_mean_min = c(0, 0, 2.5e3, 5e3, 10e3, 20e3),
+	cost_mean_max = c(0, 2.5e3, 5e3, 10e3, 20e3, +Inf)
+)
+all(unique(temp[[variable]]) %in% conversion[[variable]])
+temp = merge(temp, conversion, all.x = TRUE)[-1]
+temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
+temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+
+result = merge(result, temp, by = "commune", all.x = TRUE)
+
+### Coût par habitant des inondations: "ONRN_CoutInon_parHabitant"
+
+pattern = "ONRN_CoutInon_parHabitant"
+variable = "cost_hab"
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame", col_types = "text")[c(1, 3)]
+names(temp) = c("commune", variable)
+
+conversion = data.frame(
+	cost_hab = c(
+		"Pas de sinistre répertorié à CCR",
+		"Moins de 100 €/habitant",
+		"Entre 100 € et 500€/habitant",
+		"Entre 500 € et 1 k€/habitant",
+		"Entre 1 k€ € et 10 k€/habitant",
+		"Supérieur à 10 k€/habitant",
+        NA
+    ),
+	cost_hab_min = c(0, 0, 100, 500, 1000, 10000, NA),
+	cost_hab_max = c(0, 100, 500, 1000, 10000, +Inf, NA)
+)
+all(unique(temp[[variable]]) %in% conversion[[variable]])
+temp = merge(temp, conversion, all.x = TRUE)[-1]
+temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
+temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+
+result = merge(result, temp, by = "commune", all.x = TRUE)
+
+### Sinistre sur Prime des inondations : "ONRN_SsurP_Inon"
+
+pattern = "ONRN_SsurP_Inon"
+variable = "ratio"
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame", col_types = "text")[c(1, 3)]
+names(temp) = c("commune", variable)
+
+conversion = data.frame(
+	ratio = c(
+		"Pas de sinistre ou de prime répertoriés à CCR",
+		"Entre 0 et 10 %",
+		"Entre 10 et 50 %",
+		"Entre 50 et 100%",
+		"Entre 100 et 200 %",                           
+		"Plus de 200%"),
+	ratio_min = c(0, 0, 0.1, 0.5, 1, 2),
+	ratio_max = c(0, 0.1, 0.5, 1, 2, +Inf))
+all(unique(temp[[variable]]) %in% conversion[[variable]])
+temp = merge(temp, conversion, all.x = TRUE)[-1]
+temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
+temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+
+result = merge(result, temp, by = "commune", all.x = TRUE)
+
+### PPRI approuvé: "PPRi_anciennete_avancement"
+
+pattern = "PPRi_anciennete_avancement"
+variable = c("ppri_year", "ppri_state", "ppri_state_sub", "ppri_age_ori")
+temp = rio::import(
+    dir(onrn_raw, pattern = pattern, full.names = TRUE),
+    setclass = "data.frame")[c(1, 8, 7, 2, 9)]
+names(temp) = c("commune", variable)
+
+variable = "state"
+conversion = data.frame(
+	ppri_age_ori = c(
+		"Approuvé depuis moins de 5 ans",
+		"Approuvé entre 5 et 10 ans",
+		"Approuvé entre 10 et 20 ans",
+		"Approuvé depuis plus de 20 ans",
+		"Prescrit depuis moins de 4 ans",
+		"Prescrit depuis plus de 4 ans"
+    ),
+	ppri_state_age = c("approuve", "approuve", "approuve", "approuve", "prescrit", "prescrit"),
+	ppri_age_min = c(0,  5, 10,   20, 0,   4),
+	ppri_age_max = c(5, 10, 20, +Inf, 4, +Inf)
+)
+all(unique(temp[[variable]]) %in% conversion[[variable]])
+temp = merge(temp, conversion, all.x = TRUE)[-1]
+
+rownames(temp) = temp[["commune"]]
+temp = temp[selection, ]
+temp[["commune"]] = selection
+
+result = merge(result, temp, by = "commune", all.x = TRUE)
+
+### Bilan Sinistre - Prime : estimation
+
+# balance = ratio * population * premium_hab
+population = 66992159 # INSEE (2018)
+premium = 1670000000 # (CCR2019a pour 2018)
+premium_hab = premium / population
+
+result[["balance"]] = (1 - result[["ratio"]]) * geau::so_ii_population[ , "2018"] * premium_hab
+
+write.csv2(result, sprintf("data-common/so-ii/onrn/onrn-%s.csv", today), row.names = FALSE)
+
+unlink(onrn_raw, recursive = TRUE, force = TRUE)
+
+metropole@data$loss.absolute = metropole@data$POPULATION * (metropole@data$loss.ratio - 1) / metropole@data$area