diff --git a/experiment/eurocode_data/main_eurocode_drawing.py b/experiment/eurocode_data/main_eurocode_drawing.py index 8321738bf7beb042b0da166d3c5508ee81704b73..564c01ec25e4032075b8e4f5c41f475413dc548d 100644 --- a/experiment/eurocode_data/main_eurocode_drawing.py +++ b/experiment/eurocode_data/main_eurocode_drawing.py @@ -62,16 +62,17 @@ def main_drawing(): (NonStationaryLocationAndScaleTemporalModel, 2017), ][1:] altitudes = EUROCODE_ALTITUDES[:] - uncertainty_methods = [ConfidenceIntervalMethodFromExtremes.my_bayes, ConfidenceIntervalMethodFromExtremes.ci_bayes] + uncertainty_methods = [ConfidenceIntervalMethodFromExtremes.my_bayes, + ConfidenceIntervalMethodFromExtremes.ci_normal] show = True if fast_plot: show = True - model_class_and_last_year = model_class_and_last_year[:1] - altitudes = altitudes[2:4] + model_class_and_last_year = model_class_and_last_year[:2] + altitudes = altitudes[2:] # altitudes = altitudes[:] - massif_names = massif_names[:2] - uncertainty_methods = uncertainty_methods[:1] + massif_names = massif_names[:3] + uncertainty_methods = uncertainty_methods[:2] model_name_to_massif_name_to_ordered_return_level = {} for model_class, last_year_for_the_data in model_class_and_last_year: diff --git a/extreme_fit/distribution/gev/ci_fevd_fixed.R b/extreme_fit/distribution/gev/ci_fevd_fixed.R new file mode 100644 index 0000000000000000000000000000000000000000..c84e977daef565a9f74b34b98006299f81f2be3f --- /dev/null +++ b/extreme_fit/distribution/gev/ci_fevd_fixed.R @@ -0,0 +1,405 @@ +# Title : TODO +# Objective : TODO +# Created by: erwan +# Created on: 31/10/2019 + +ci.fevd.mle_fixed <- function (x, alpha = 0.05, type = c("return.level", "parameter"), + return.period = 100, which.par = 1, R = 502, method = c("normal", + "boot", "proflik"), xrange = NULL, nint = 20, verbose = FALSE, + tscale = FALSE, return.samples = FALSE, ...) +{ + if (missing(method)) + miss.meth <- TRUE + else miss.meth <- FALSE + method <- tolower(method) + method <- match.arg(method) + type <- tolower(type) + type <- match.arg(type) + theta.hat <- x$results$par + theta.names <- names(theta.hat) + np <- length(theta.hat) + if (type == "parameter" && missing(which.par)) + which.par <- 1:np + if (any(theta.names == "log.scale")) { + id <- theta.names == "log.scale" + theta.hat[id] <- exp(theta.hat[id]) + theta.names[id] <- "scale" + names(theta.hat) <- theta.names + } + const <- is.fixedfevd(x) + if (type == "return.level") + par.name <- paste(return.period, "-", x$period.basis, + " return level", sep = "") + else if (type == "parameter") + par.name <- theta.names[which.par] + if (type == "return.level" && !const) { + return(ci.rl.ns.fevd.mle(x = x, alpha = alpha, return.period = return.period, + method = method, verbose = verbose, return.samples = return.samples, + ...)) + } + if (type == "parameter") + p <- theta.hat[which.par] + else { + if (is.element(x$type, c("PP", "GP", "Beta", "Pareto", + "Exponential"))) + lam <- mean(c(datagrabber(x)[, 1]) > x$threshold) + else lam <- 1 + if (is.element(x$type, c("PP", "GEV", "Gumbel", "Weibull", + "Frechet"))) + loc <- theta.hat["location"] + else loc <- 0 + scale <- theta.hat["scale"] + if (!is.element(x$type, c("Gumbel", "Exponential"))) + shape <- theta.hat["shape"] + else shape <- 0 + if (x$type == "PP") + mod <- "GEV" + else mod <- x$type + p <- rlevd(period = return.period, loc = loc, scale = scale, + shape = shape, threshold = x$threshold, type = mod, + npy = x$npy, rate = lam) + } + if (verbose) { + cat("\n", "Preparing to calculate ", (1 - alpha) * 100, + "% CI for ", ifelse(type == "return.level", paste(return.period, + "-", x$period.basis, " return level", sep = ""), + paste(par.name, " parameter", sep = "")), "\n") + cat("\n", "Model is ", ifelse(const, " fixed", " non-stationary."), + "\n") + if (method == "normal") + cat("\n", "Using Normal Approximation Method.\n") + else if (method == "boot") + cat("\n", "Using Bootstrap Method.\n") + else if (method == "proflik") + cat("\n", "Using Profile Likelihood Method.\n") + } + if (method == "normal") { + method.name <- "Normal Approx." + z.alpha <- qnorm(alpha/2, lower.tail = FALSE) + cov.theta <- parcov.fevd(x) + if (is.null(cov.theta)) + stop("ci: Sorry, unable to calculate the parameter covariance matrix. Maybe try a different method.") + var.theta <- diag(cov.theta) + if (any(var.theta < 0)) + stop("ci: negative Std. Err. estimates obtained. Not trusting any of them.") + if (type == "parameter") { + se.theta <- sqrt(var.theta) + if (tscale) { + if (!const && !is.element("scale", theta.names) && + !is.element("shape", theta.names) && !all(x$threshold == + x$threshold[1])) { + stop("ci: invalid argument configurations.") + } + if (!is.element(x$type, c("GP", "Beta", "Pareto"))) + stop("ci: invalid argument configurations.") + theta.hat["scale"] <- theta.hat["scale"] - theta.hat["shape"] * + x$threshold + theta.names[theta.names == "scale"] <- "tscale" + if (!any(theta.names[which.par] == "tscale")) + stop("ci: invalid argument configurations.") + names(theta.hat) <- theta.names + p <- theta.hat[which.par] + d <- rbind(1, -x$threshold) + names(se.theta) <- theta.names + se.theta["tscale"] <- sqrt(t(d) %*% cov.theta %*% + d) + } + else se.theta <- sqrt(var.theta)[which.par] + se.theta <- se.theta[which.par] + par.name <- theta.names[which.par] + } + else if (type == "return.level") { + grads <- rlgrad.fevd(x, period = return.period) + grads <- t(grads) + if (is.element(x$type, c("GP", "Beta", "Pareto", + "Exponential"))) { + if (x$type == "Exponential") + cov.theta <- diag(c(lam * (1 - lam)/x$n, var.theta)) + else cov.theta <- rbind(c(lam * (1 - lam)/x$n, + 0, 0), cbind(0, cov.theta)) + } + else lam <- 1 + var.theta <- t(grads) %*% cov.theta %*% grads + } + else stop("ci: invalid type argument. Must be return.level or parameter.") + if (length(p) > 1) { + if (type == "return.level") + se.theta <- sqrt(diag(var.theta)) + out <- cbind(p - z.alpha * se.theta, p, p + z.alpha * + se.theta) + rownames(out) <- par.name + conf.level <- paste(round((1 - alpha) * 100, digits = 2), + "%", sep = "") + colnames(out) <- c(paste(conf.level, " lower CI", + sep = ""), "Estimate", paste(conf.level, " upper CI", + sep = "")) + attr(out, "data.name") <- x$call + attr(out, "method") <- method.name + attr(out, "conf.level") <- (1 - alpha) * 100 + class(out) <- "ci" + return(out) + } + else out <- c(p - z.alpha * sqrt(var.theta[which.par]), + p, p + z.alpha * sqrt(var.theta[which.par])) + } + else if (method == "boot") { + method.name <- "Parametric Bootstrap" + if (verbose) + cat("\n", "Simulating data from fitted model. Size = ", + R, "\n") + if (const) { + if (is.null(x$blocks)) { + Z <- rextRemes(x, n = R * x$n) + Z <- matrix(Z, x$n, R) + } + else { + Z <- rextRemes(x, n = round(R * x$npy * x$blocks$nBlocks)) + Z <- matrix(Z, round(x$npy * x$blocks$nBlocks), + R) + } + } + else Z <- rextRemes(x, n = R) + if (verbose) + cat("\n", "Simulated data found.\n") + y <- datagrabber(x, response = FALSE) + if (is.element(x$type, c("PP", "GP", "Exponential", "Beta", + "Pareto"))) { + x2 <- datagrabber(x, cov.data = FALSE) + eid <- x2 > x$threshold + Z2 <- matrix(x$threshold, x$n, R) + Z2[eid, ] <- Z[eid, ] + Z <- Z2 + lam <- mean(eid) + } + else { + eid <- !logical(x$n) + lam <- 1 + } + ipar <- list() + if (any(is.element(c("location", "mu0"), theta.names))) { + if (is.element("location", theta.names)) + ipar$location <- theta.hat["location"] + else { + id <- substring(theta.names, 1, 2) == "mu" + ipar$location <- theta.hat[id] + } + } + if (is.element("scale", theta.names)) + ipar$scale <- theta.hat["scale"] + else { + if (!x$par.models$log.scale) + id <- substring(theta.names, 1, 3) == "sig" + else id <- substring(theta.names, 1, 3) == "phi" + ipar$scale <- theta.hat[id] + } + if (!is.element(x$type, c("Gumbel", "Exponential"))) { + if (is.element("shape", theta.names)) + ipar$shape <- theta.hat["shape"] + else { + id <- substring(theta.names, 1, 2) == "xi" + ipar$shape <- theta.hat[id] + } + } + bfun <- function(z, x, y, p, ipar, eid, rate) { + pm <- x$par.models + if (is.null(y)) + fit <- fevd(x = z, threshold = x$threshold, location.fun = pm$location, + scale.fun = pm$scale, shape.fun = pm$shape, + use.phi = pm$log.scale, type = x$type, method = x$method, + initial = ipar, span = x$span, time.units = x$time.units, + period.basis = x$period.basis, optim.args = x$optim.args, + priorFun = x$priorFun, priorParams = x$priorParams, + verbose = FALSE) + else fit <- fevd(x = z, data = y, threshold = x$threshold, + location.fun = pm$location, scale.fun = pm$scale, + shape.fun = pm$shape, use.phi = pm$log.scale, + type = x$type, method = x$method, initial = ipar, + span = x$span, time.units = x$time.units, period.basis = x$period.basis, + optim.args = x$optim.args, priorFun = x$priorFun, + priorParams = x$priorParams, verbose = FALSE) + fit$cov.data <- y + res <- distill(fit, cov = FALSE) + return(res) + } + if (verbose) + cat("\n", "Fitting model to simulated data sets (this may take a while!).") + if (type == "parameter") { + sam <- apply(Z, 2, bfun, x = x, y = y, ipar = ipar) + if (tscale) { + if (!const && !is.element("scale", theta.names) && + !is.element("shape", theta.names)) + stop("ci: invalid argument configurations.") + if (!is.element(x$type, c("GP", "Beta", "Pareto"))) + stop("ci: invalid argument configurations.") + sam["scale", ] <- sam["scale", ] - sam["shape", + ] * x$threshold + theta.hat["scale"] <- theta.hat["scale"] - theta.hat["shape"] * + x$threshold + theta.names[theta.names == "scale"] <- "tscale" + rownames(sam) <- theta.names + names(theta.hat) <- theta.names + } + sam <- sam[which.par, ] + if (return.samples) + return(t(sam)) + } + else if (type == "return.level") { + pars <- apply(Z, 2, bfun, x = x, y = y, ipar = ipar)[1:np, + ] + th.est <- numeric(3) + if (is.element(x$type, c("PP", "GEV", "Gumbel", "Weibull", + "Frechet"))) { + loc <- pars["location", ] + th.est[1] <- theta.hat["location"] + } + else loc <- rep(0, R) + scale <- pars["scale", ] + th.est[2] <- theta.hat["scale"] + if (!is.element(x$type, c("Gumbel", "Exponential"))) { + shape <- pars["shape", ] + th.est[3] <- theta.hat["shape"] + } + else { + shape <- rep(1e-10, R) + th.est[3] <- 1e-08 + } + if (return.samples) + out <- t(pars) + th <- rbind(loc, scale, shape) + rlfun <- function(theta, p, u, type, npy, rate) rlevd(period = p, + loc = theta[1], scale = theta[2], shape = theta[3], + threshold = u, type = type, npy = npy, rate = rate) + if (x$type == "PP") + mod <- "GEV" + else mod <- x$type + sam <- apply(th, 2, rlfun, p = return.period, u = x$threshold, + type = mod, npy = x$npy, rate = lam) + if (is.matrix(sam)) + rownames(sam) <- paste(rownames(sam), "-", x$period.basis, + sep = "") + else sammy.name <- paste(return.period, "-", x$period.basis, + sep = "") + if (return.samples) { + if (is.matrix(sam)) + out <- cbind(pars, t(sam)) + else { + onames <- colnames(out) + out <- cbind(out, sam) + colnames(out) <- c(onames, sammy.name) + } + return(out) + } + theta.hat <- rlevd(period = return.period, loc = th.est[1], + scale = th.est[2], shape = th.est[3], threshold = x$threshold, + type = x$type, npy = x$npy, rate = lam) + } + else stop("ci: invalid type argument. Must be return.level or parameter.") + if (is.matrix(sam)) { + out <- apply(sam, 1, quantile, probs = c(alpha/2, + 1 - alpha/2)) + out.names <- rownames(out) + out <- rbind(out[1, ], theta.hat, out[2, ]) + rownames(out) <- c(out.names[1], "Estimate", out.names[2]) + colnames(out) <- rownames(sam) + out <- t(out) + attr(out, "data.name") <- x$call + attr(out, "method") <- method.name + attr(out, "conf.level") <- (1 - alpha) * 100 + attr(out, "R") <- R + class(out) <- "ci" + return(out) + } + else { + out <- quantile(sam, probs = c(alpha/2, 1 - alpha/2)) + out <- c(out[1], mean(sam), out[2]) + attr(out, "R") <- R + } + if (verbose) + cat("\n", "Finished fitting model to simulated data.\n") + } + else if (method == "proflik") { + if (x$type == "PP" && !is.null(x$blocks)) + stop("ci: cannot do profile likelihood with blocks.") + if (tscale) + stop("ci: invalid argument configurations.") + if (type == "parameter" && length(which.par) > 1) + stop("ci: can only do one parameter at a time with profile likelihood method.") + else if (type == "return.level" && length(return.period) > + 1) + stop("ci: can only do one return level at a time with profile likelihood method.") + method.name <- "Profile Likelihood" + if (verbose) { + if (x$type != "PP") + cat("\n", "Calculating profile likelihood. This may take a few moments.\n") + else cat("\n", "Calculating profile likelihood. This may take several moments.\n") + } + if (is.null(xrange)) { + hold2 <- c(ci(x, alpha = alpha, method = "normal", + type = type, return.period = return.period, which.par = which.par))[c(1, + 3)] + if (!any(is.na(hold2))) + xrange <- range(c(hold2, log2(hold2), 4 * hold2, + hold2 - 4 * hold2, hold2 + 4 * hold2), finite = TRUE) + else if (!is.na(hold2[2])) + xrange <- range(c(p - 2 * abs(log2(abs(p))), + hold2[2], 4 * hold2[2], -4 * hold2[2], log2(p)), + finite = TRUE) + else if (!is.na(hold2[1])) + xrange <- range(c(p - 2 * abs(log2(abs(p))), + hold2[1], 4 * hold2[1], -4 * hold2[1], log2(p)), + finite = TRUE) + else if (all(is.na(hold2))) + xrange <- c(p - 2 * abs(log2(abs(p))), p + 2 * + abs(log2(abs(p)))) + if (verbose) + cat("\n", "Using a range of ", xrange[1], " to ", + xrange[2], "\n") + } + if (is.null(x$blocks)) { + if (!is.null(xrange)) + hold <- profliker(x, type = type, xrange = xrange, + return.period = return.period, which.par = which.par, + nint = nint, plot = verbose, ...) + else hold <- profliker(x, type = type, return.period = return.period, + which.par = which.par, nint = nint, plot = verbose, + ...) + } + else stop("Sorry: profile likelihood with blocks is not supported.") + ma <- -x$results$value + crit <- ma - 0.5 * qchisq(1 - alpha, 1) + if (verbose) { + cat("\n", "Profile likelihood has been calculated. Now, trying to find where it crosses the critical value = ", + crit, "\n") + abline(h = crit, col = "blue") + } + crit2 <- ma - 0.5 * qchisq((1 - alpha) + abs(log2(1 - + alpha))/2, 1) + print(hold) + print(crit2) + id <- hold > crit2 + print(id) + z <- seq(xrange[1], xrange[2], length = length(hold)) + z <- z[id] + parlik <- hold[id] + print("here") + print(z) + print(parlik) + # smth <- spline(z, parlik, n = 200) + # ind <- smth$y > crit + # out <- range(smth$x[ind]) + # if (verbose) + # abline(v = out, lty = 2, col = "darkblue", lwd = 2) + # out <- c(out[1], p, out[2]) + } + # else stop("ci: invalid method argument.") + # conf.level <- paste(round((1 - alpha) * 100, digits = 2), + # "%", sep = "") + # names(out) <- c(paste(conf.level, " lower CI", sep = ""), + # par.name, paste(conf.level, " upper CI", sep = "")) + # attr(out, "data.name") <- x$call + # attr(out, "method") <- method.name + # attr(out, "conf.level") <- (1 - alpha) * 100 + # class(out) <- "ci" + # return(out) +} + diff --git a/extreme_fit/distribution/gev/main_fevd_mle.R b/extreme_fit/distribution/gev/main_fevd_mle.R index 969019099cf4108f240ae242521ee530c69c395c..f50daac0f92bcb092f873ed0fe8a3f1614f6e9d9 100644 --- a/extreme_fit/distribution/gev/main_fevd_mle.R +++ b/extreme_fit/distribution/gev/main_fevd_mle.R @@ -7,18 +7,19 @@ library(data.table) library(stats4) library(SpatialExtremes) source('fevd_fixed.R') +source('ci_fevd_fixed.R') # Sample from a GEV set.seed(42) N <- 50 -loc = 0; scale = 1; shape <- 1 +loc = 0; scale = 1; shape <- 0.1 x_gev <- rgev(N, loc = loc, scale = scale, shape = shape) coord <- matrix(ncol=1, nrow = N) coord[,1]=seq(0,N-1,1) colnames(coord) = c("T") coord = data.frame(coord, stringsAsFactors = TRUE) res = fevd_fixed(x_gev, data=coord, method='MLE', verbose=TRUE, use.phi=FALSE) -# res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='MLE', verbose=TRUE, use.phi=FALSE, time.units = "years", units = "years") -# print(res) +# res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='MLE', verbose=FALSE, use.phi=FALSE) +print(res) # Some display for the results # m = res$results @@ -31,10 +32,10 @@ print(res$results$par) # Confidence interval staionary -method = "normal" -res_ci = ci(res, alpha = 0.05, type = c("return.level", "parameter"), - return.period = 50, method = method, xrange = NULL, nint = 20, verbose = FALSE, - tscale = FALSE) +method = "proflik" +res_ci = ci.fevd.mle(res, alpha = 0.05, type = c("return.level"), + return.period = 50, method = method, xrange = c(-200,200), nint = 10, R=502, verbose = TRUE, + tscale = FALSE, return.samples = FALSE) print(res_ci) # Bug to solve for the non stationary - the returned parameter do not match with the return level @@ -45,12 +46,12 @@ print(res_ci) # print(r) # param = findpars(res, qcov = v) # print(param) -# res_ci = ci(res, alpha = 0.05, type = c("return.level", "parameter"), -# return.period = 50, method = "normal", xrange = NULL, nint = 20, verbose = FALSE, +# res_ci = ci(res, alpha = 0.05, type = c("return.level"), +# return.period = 50, method = "boot", xrange = NULL, nint = 20, verbose = FALSE, # tscale = FALSE, return.samples = FALSE, qcov=v) # print(res_ci) -# -# + + diff --git a/test/test_extreme_fit/test_model/test_confidence_interval.py b/test/test_extreme_fit/test_model/test_confidence_interval.py index 55cc629096850f742868355a5e22b44cad11d81f..9b6a5b9d7e9734caa262a07438fee6f69c3c890d 100644 --- a/test/test_extreme_fit/test_model/test_confidence_interval.py +++ b/test/test_extreme_fit/test_model/test_confidence_interval.py @@ -81,6 +81,25 @@ class TestConfidenceInterval(unittest.TestCase): NonStationaryLocationAndScaleTemporalModel: (-15.17041284612494, 43.69511224410276, 102.56063733433047), } + def test_ci_boot(self): + self.fit_method = TemporalMarginFitMethod.extremes_fevd_mle + self.ci_method = ConfidenceIntervalMethodFromExtremes.ci_boot + self.model_class_to_triplet= { + # I think the boostrapping works only in the stationary context + # In the arg of the function for the non stationary return level there is only the method "normal" available + StationaryTemporalModel: (10.260501562662334, 39.91206869180525, 120.3789497755127), + } + + # Proflik seems to crash with the error + # def test_ci_proflik(self): + # self.fit_method = TemporalMarginFitMethod.extremes_fevd_mle + # self.ci_method = ConfidenceIntervalMethodFromExtremes.ci_proflik + # self.model_class_to_triplet= { + # # I think the profil likelihood works only in the stationary context + # # In the arg of the function for the non stationary return level there is only the method "normal" available + # StationaryTemporalModel: (-4.703945484843988, 30.482318639674023, 65.66858276419204), + # } + def tearDown(self) -> None: for model_class, expected_triplet in self.model_class_to_triplet.items(): eurocode_ci = self.compute_eurocode_ci(model_class) @@ -113,6 +132,14 @@ class TestConfidenceIntervalModifiedCoordinates(TestConfidenceInterval): self.model_class_to_triplet = {} self.assertTrue(True) + def test_ci_boot(self): + self.model_class_to_triplet = {} + self.assertTrue(True) + + # def test_ci_proflik(self): + # self.model_class_to_triplet = {} + # self.assertTrue(True) + if __name__ == '__main__': unittest.main()