# Libraries

# library(geau)
# library(sf)

# Updating data from remote 

## Local

today = Sys.Date()
archive_dir = sprintf("data-common/data/ONRN/archive/%s", today)
dir.create(archive_dir, showWarnings = FALSE, recursive = TRUE)

## Remote

remote_dir = "https://files.georisques.fr/onrn"
archive = c(
    "ONRN_Population_EAIP_CE",
    "ONRN_Population_EAIP_SM",
    "ONRN_Emprise_totale_bat_EAIP_CE",
    "ONRN_Emprise_totale_bat_EAIP_SM",
    "ONRN_Emprise_habitations_sans_etage_EAIP_CE",
    "ONRN_Emprise_habitations_sans_etage_EAIP_SM",
    "ONRN_Entreprises_EAIP",
    "sinistralite/ONRN_nbReco_Inondation",
    "sinistralite/ONRN_CoutMoyen_Inondation",
    "sinistralite/ONRN_CoutCommune_Inondation",
    "sinistralite/ONRN_Frequence_Inondation",
    "sinistralite/ONRN_SsurP_Inondation",
    "sinistralite/ONRN_CoutParHabitant_Inondation",
    "sinistralite/ONRN_nbReco_Inondation",
    "sinistralite/ONRN_nbReco_Inondation",
    "ONRN_Avancement_PPRNI",
    "ONRN_Anciennete_PPRNI"
)

## Download

mapply(
    utils::download.file,
    url = file.path(remote_dir, sprintf("%s.zip", archive)),
    destfile = file.path(archive_dir, gsub("sinistralite/", "", sprintf("%s.zip", archive))),
    method = "wget"
)

## Unzip

mapply(
    utils::unzip,
    zipfile = file.path(archive_dir, gsub("sinistralite/", "", sprintf("%s.zip", archive))),
    exdir = file.path(archive_dir, "raw")
)

## Convert to UTF-8

onrn_raw = file.path(archive_dir, "raw")
for(f in dir(onrn_raw, pattern = ".csv")) {
    system(sprintf("iconv -f ISO-8859-1 -t UTF-8 %s -o %s", file.path(onrn_raw, f), file.path(onrn_raw, "temp.csv")))
    system(sprintf("mv %s %s", file.path(onrn_raw, "temp.csv"), file.path(onrn_raw, f)))
}

## Remove pdf

unlink(dir(onrn_raw, pattern = ".pdf", full.names = TRUE))

# Treatment

## Selection

selection = geau::so_ii_scope

## Nombre reconnaissance Cat-Nat "ONRN_nbRecos_Inon"

pattern = "ONRN_nbRecos_Inon"
variable = "n_catnat"
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame", col_types = "text")[c(1, 3)]
names(temp) = c("commune", variable)
temp[[variable]][temp[[variable]] == "Pas de reconnaissance"] = 0

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]

result = temp

### Fréquence sinistre: "ONRN_FreqMoyenne_Inon"

pattern = "ONRN_FreqMoyenne_Inon"
variable = "freq_sin"
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame", col_types = "text")[c(1, 3)]
names(temp) = c("commune", variable)

conversion = data.frame(
	freq_sin = c("Pas de sinistre ou de risque répertoriés à CCR", "Entre 0 et 1 ‰", "Entre 1 et 2 ‰", "Entre 2 et 5 ‰", "Entre 5 et 10 ‰", "Plus de 10 ‰"),
	freq_sin_min = c(0, 0, 1, 2, 5, 10)/1000,
	freq_sin_max = c(0, 1, 2, 5, 10, 1000)/1000)
temp = merge(temp, conversion, all.x = TRUE)[-1]
temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
temp[[variable]][temp[["freq_sin_max"]] == 1] = 1.5 * temp[[2]][temp[["freq_sin_max"]] == 1]

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]

result = merge(result, temp, by = "commune", all.x = TRUE)

### Coût des inondations: "ONRN_CoutCum_Inon"

pattern = "ONRN_CoutCum_Inon"
variable = "cost"
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame", col_types = "text")[c(1, 3)]
names(temp) = c("commune", variable)

conversion = data.frame(
	cost = c(
		"Pas de sinistre répertorié à CCR",
		"Entre 0 k€ et 100 k€",
		"Entre 100 k€ et 500 k€",
		"Entre 500 k€ et 2 M€",
		"Entre 2 M€ et 5 M€",  
		"Entre 5 M€ et 10 M€",
		"Entre 10 M€ et 50 M€",
		"Entre 50 M€ et 100 M€", 
		"Supérieur à 100 M€"
    ),
	cost_min = c(0, 0, 1e5, 5e5, 2e6, 5e6, 10e6, 50e6, 100e6),
	cost_max = c(0, 1e5, 5e5, 2e6, 5e6, 10e6, 50e6, 100e6, +Inf)
)
all(unique(temp[[variable]]) %in% conversion[[variable]])
temp = merge(temp, conversion, all.x = TRUE)[-1]
temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]

result = merge(result, temp, by = "commune", all.x = TRUE)

### Coût moyen des inondations: "ONRN_CtMoyen_Inon"

pattern = "ONRN_CtMoyen_Inon"
variable = "cost_mean"
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame", col_types = "text")[c(1, 3)]
names(temp) = c("commune", variable)

conversion = data.frame(
	cost_mean = c(
		"Pas de sinistre répertorié à CCR",
		"Entre 0 et 2,5 k€",
		"Entre 2,5 et 5 k€",
		"Entre 5 et 10 k€",
		"Entre 10 et 20k€",
		"Plus de 20 k€"
    ),
	cost_mean_min = c(0, 0, 2.5e3, 5e3, 10e3, 20e3),
	cost_mean_max = c(0, 2.5e3, 5e3, 10e3, 20e3, +Inf)
)
all(unique(temp[[variable]]) %in% conversion[[variable]])
temp = merge(temp, conversion, all.x = TRUE)[-1]
temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]

result = merge(result, temp, by = "commune", all.x = TRUE)

### Coût par habitant des inondations: "ONRN_CoutInon_parHabitant"

pattern = "ONRN_CoutInon_parHabitant"
variable = "cost_hab"
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame", col_types = "text")[c(1, 3)]
names(temp) = c("commune", variable)

conversion = data.frame(
	cost_hab = c(
		"Pas de sinistre répertorié à CCR",
		"Moins de 100 €/habitant",
		"Entre 100 € et 500€/habitant",
		"Entre 500 € et 1 k€/habitant",
		"Entre 1 k€ € et 10 k€/habitant",
		"Supérieur à 10 k€/habitant",
        NA
    ),
	cost_hab_min = c(0, 0, 100, 500, 1000, 10000, NA),
	cost_hab_max = c(0, 100, 500, 1000, 10000, +Inf, NA)
)
all(unique(temp[[variable]]) %in% conversion[[variable]])
temp = merge(temp, conversion, all.x = TRUE)[-1]
temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]

result = merge(result, temp, by = "commune", all.x = TRUE)

### Sinistre sur Prime des inondations : "ONRN_SsurP_Inon"

pattern = "ONRN_SsurP_Inon"
variable = "ratio"
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame", col_types = "text")[c(1, 3)]
names(temp) = c("commune", variable)

conversion = data.frame(
	ratio = c(
		"Pas de sinistre ou de prime répertoriés à CCR",
		"Entre 0 et 10 %",
		"Entre 10 et 50 %",
		"Entre 50 et 100%",
		"Entre 100 et 200 %",                           
		"Plus de 200%"),
	ratio_min = c(0, 0, 0.1, 0.5, 1, 2),
	ratio_max = c(0, 0.1, 0.5, 1, 2, +Inf))
all(unique(temp[[variable]]) %in% conversion[[variable]])
temp = merge(temp, conversion, all.x = TRUE)[-1]
temp[[variable]] = (temp[[2]] + temp[[3]]) / 2
temp[[variable]][is.infinite(temp[[variable]])] = 1.5 * temp[[2]][is.infinite(temp[[variable]])]

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]

result = merge(result, temp, by = "commune", all.x = TRUE)

### PPRI approuvé: "PPRi_anciennete_avancement"

pattern = "PPRi_anciennete_avancement"
variable = c("ppri_year", "ppri_state", "ppri_state_sub", "ppri_age_ori")
temp = rio::import(
    dir(onrn_raw, pattern = pattern, full.names = TRUE),
    setclass = "data.frame")[c(1, 8, 7, 2, 9)]
names(temp) = c("commune", variable)

variable = "state"
conversion = data.frame(
	ppri_age_ori = c(
		"Approuvé depuis moins de 5 ans",
		"Approuvé entre 5 et 10 ans",
		"Approuvé entre 10 et 20 ans",
		"Approuvé depuis plus de 20 ans",
		"Prescrit depuis moins de 4 ans",
		"Prescrit depuis plus de 4 ans"
    ),
	ppri_state_age = c("approuve", "approuve", "approuve", "approuve", "prescrit", "prescrit"),
	ppri_age_min = c(0,  5, 10,   20, 0,   4),
	ppri_age_max = c(5, 10, 20, +Inf, 4, +Inf)
)
all(unique(temp[[variable]]) %in% conversion[[variable]])
temp = merge(temp, conversion, all.x = TRUE)[-1]

rownames(temp) = temp[["commune"]]
temp = temp[selection, ]
temp[["commune"]] = selection

result = merge(result, temp, by = "commune", all.x = TRUE)

### Bilan Sinistre - Prime : estimation

# result = read.csv2(geau::current_version("data-common/so-ii/onrn"))

#### Some data to compute premium per habitant
pop_france = 66992159 # INSEE (2018)
premium_france = 1670000000 # (CCR2019a pour 2018)
premium_hab = premium_france / pop_france

#### Need to compute cumulative population
period = seq(1995, 2018)
available = as.integer(dimnames(geau::so_ii_population)[[2]])
selection = as.character(available[sapply(period, function(x){which.min(abs(available - x))})])
pop_commune = rowSums(geau::so_ii_population[, selection])

result[["balance"]] = (1 - result[["ratio"]]) * pop_commune * premium_hab

result = result[c(
    "commune", "n_catnat", "freq_sin", "cost", "cost_mean", "cost_hab", "ratio", "balance",
    "ppri_year", "ppri_state", "ppri_state_sub", "ppri_state_age", "ppri_age_min", "ppri_age_max",
    "freq_sin_min", "freq_sin_max", "cost_min", "cost_max", "cost_mean_min", "cost_mean_max",
    "cost_hab_min", "cost_hab_max", "ratio_min", "ratio_max"
)]

write.csv2(result, sprintf("data-common/so-ii/onrn/onrn-%s.csv", today), row.names = FALSE)
# write.csv2(result, geau::current_version("data-common/so-ii/onrn"), row.names = FALSE)

unlink(onrn_raw, recursive = TRUE, force = TRUE)