From 155d8b89f35802226462e34a83c3b599c4354671 Mon Sep 17 00:00:00 2001 From: Le Roux Erwan <erwan.le-roux@irstea.fr> Date: Tue, 17 Dec 2019 17:34:16 +0100 Subject: [PATCH] [Confidence Interval] fix r.ci.fevd.mle_fixed for the Gumbel case --- extreme_fit/distribution/gev/ci_fevd_fixed.R | 296 +++++++++++++++++-- extreme_fit/distribution/gev/main_fevd_mle.R | 35 ++- 2 files changed, 287 insertions(+), 44 deletions(-) diff --git a/extreme_fit/distribution/gev/ci_fevd_fixed.R b/extreme_fit/distribution/gev/ci_fevd_fixed.R index c84e977d..3b5a7ba4 100644 --- a/extreme_fit/distribution/gev/ci_fevd_fixed.R +++ b/extreme_fit/distribution/gev/ci_fevd_fixed.R @@ -1,8 +1,6 @@ -# Title : TODO -# Objective : TODO -# Created by: erwan -# Created on: 31/10/2019 +# todo: report bug on exTremes, fix is around line 510 where I set theta[4] = 0 (and it was Nan before) +# it was important to do that to make the code work in the Gumbel case ci.fevd.mle_fixed <- function (x, alpha = 0.05, type = c("return.level", "parameter"), return.period = 100, which.par = 1, R = 502, method = c("normal", "boot", "proflik"), xrange = NULL, nint = 20, verbose = FALSE, @@ -33,7 +31,7 @@ ci.fevd.mle_fixed <- function (x, alpha = 0.05, type = c("return.level", "parame else if (type == "parameter") par.name <- theta.names[which.par] if (type == "return.level" && !const) { - return(ci.rl.ns.fevd.mle(x = x, alpha = alpha, return.period = return.period, + return(ci.rl.ns.fevd.mle_fixed(x = x, alpha = alpha, return.period = return.period, method = method, verbose = verbose, return.samples = return.samples, ...)) } @@ -260,8 +258,8 @@ ci.fevd.mle_fixed <- function (x, alpha = 0.05, type = c("return.level", "parame th.est[3] <- theta.hat["shape"] } else { - shape <- rep(1e-10, R) - th.est[3] <- 1e-08 + shape <- rep(0, R) + th.est[3] <- 0 } if (return.samples) out <- t(pars) @@ -374,32 +372,272 @@ ci.fevd.mle_fixed <- function (x, alpha = 0.05, type = c("return.level", "parame } crit2 <- ma - 0.5 * qchisq((1 - alpha) + abs(log2(1 - alpha))/2, 1) - print(hold) - print(crit2) id <- hold > crit2 - print(id) z <- seq(xrange[1], xrange[2], length = length(hold)) z <- z[id] parlik <- hold[id] - print("here") - print(z) - print(parlik) - # smth <- spline(z, parlik, n = 200) - # ind <- smth$y > crit - # out <- range(smth$x[ind]) - # if (verbose) - # abline(v = out, lty = 2, col = "darkblue", lwd = 2) - # out <- c(out[1], p, out[2]) + smth <- spline(z, parlik, n = 200) + ind <- smth$y > crit + out <- range(smth$x[ind]) + if (verbose) + abline(v = out, lty = 2, col = "darkblue", lwd = 2) + out <- c(out[1], p, out[2]) } - # else stop("ci: invalid method argument.") - # conf.level <- paste(round((1 - alpha) * 100, digits = 2), - # "%", sep = "") - # names(out) <- c(paste(conf.level, " lower CI", sep = ""), - # par.name, paste(conf.level, " upper CI", sep = "")) - # attr(out, "data.name") <- x$call - # attr(out, "method") <- method.name - # attr(out, "conf.level") <- (1 - alpha) * 100 - # class(out) <- "ci" - # return(out) + else stop("ci: invalid method argument.") + conf.level <- paste(round((1 - alpha) * 100, digits = 2), + "%", sep = "") + names(out) <- c(paste(conf.level, " lower CI", sep = ""), + par.name, paste(conf.level, " upper CI", sep = "")) + attr(out, "data.name") <- x$call + attr(out, "method") <- method.name + attr(out, "conf.level") <- (1 - alpha) * 100 + class(out) <- "ci" + return(out) +# }} } + +ci.rl.ns.fevd.mle_fixed <- function (x, alpha = 0.05, return.period = 100, method = c("normal"), + verbose = FALSE, qcov = NULL, qcov.base = NULL, ...) +{ + method <- tolower(method) + method <- match.arg(method) + par.name <- paste(return.period, "-", x$period.basis, " return level", + sep = "") + if (verbose) { + cat("\n", "Preparing to calculate ", (1 - alpha) * 100, + "% CI for ", paste(return.period, "-", x$period.basis, + " return level", sep = ""), "\n") + cat("\n", "Model is non-stationary.\n") + cat("\n", "Using Normal Approximation Method.\n") + } + if (method == "normal") + method.name <- "Normal Approx." + if (method == "normal") { + res <- return.level.ns.fevd.mle_fixed(x = x, return.period = return.period, + ..., do.ci = FALSE, verbose = verbose, qcov = qcov, + qcov.base = qcov.base) + z.alpha <- qnorm(alpha/2, lower.tail = FALSE) + cov.theta <- parcov.fevd(x) + if (is.null(cov.theta)) + stop("ci: Sorry, unable to calculate the parameter covariance matrix. Maybe try a different method.") + var.theta <- diag(cov.theta) + if (any(var.theta < 0)) + stop("ci: negative Std. Err. estimates obtained. Not trusting any of them.") + grads <- t(rlgrad.fevd(x, period = return.period, qcov = qcov, + qcov.base = qcov.base)) + se.theta <- sqrt(diag(t(grads) %*% cov.theta %*% grads)) + out <- cbind(c(res) - z.alpha * se.theta, c(res), c(res) + + z.alpha * se.theta, se.theta) + if (length(return.period) > 1) + rownames(out) <- par.name + else rownames(out) <- NULL + conf.level <- paste(round((1 - alpha) * 100, digits = 2), + "%", sep = "") + colnames(out) <- c(paste(conf.level, " lower CI", sep = ""), + "Estimate", paste(conf.level, " upper CI", sep = ""), + "Standard Error") + } + else if (method == "boot") { + stop("ci.rl.ns.fevd.mle: Sorry, this functionality has not yet been added.") + } + attr(out, "data.name") <- x$call + attr(out, "method") <- method.name + attr(out, "conf.level") <- (1 - alpha) * 100 + class(out) <- "ci" + return(out) +} + +return.level.ns.fevd.mle_fixed <- function (x, return.period = c(2, 20, 100), ..., alpha = 0.05, + method = c("normal"), do.ci = FALSE, verbose = FALSE, qcov = NULL, + qcov.base = NULL) +{ + if (missing(return.period)) + return.period <- 100 + if (do.ci && method != "normal") + stop("return.level.ns.fevd.mle: only normal approximation CI calculations currently available for nonstationary return levels.") + model <- x$type + if (!(tolower(model) %in% c("pp", "gev", "weibull", "frechet", + "gumbel"))) + stop("return.level.ns.fevd.mle: not implemented for GP, beta, pareto, exponential models.") + if (do.ci && length(return.period) > 1 && nrow(qcov) > 1) + stop("return.level.ns.fevd.mle:: Cannot calculate confidence intervals for multiple return periods and multiple covariate values simultaneously.") + if (!do.ci) { + if (model == "PP") { + mod2 <- "GEV" + } + else mod2 <- model + p <- x$results$par + pnames <- names(p) + if (is.fixedfevd(x)) + stop("return.level.ns.fevd.mle: this function is for nonstationary models.") + if (is.null(qcov)) + stop("return.level.ns.fevd.mle: qcov required for this function.") + if (!is.matrix(qcov)) + qcov <- matrix(qcov, nrow = 1) + if (!is.qcov(qcov)) + qcov <- make.qcov(x = x, vals = qcov, nr = nrow(qcov)) + if (!is.null(qcov.base)) { + if (!is.matrix(qcov.base)) + qcov.base <- matrix(qcov.base, nrow = 1) + if (nrow(qcov) != nrow(qcov.base) || ncol(qcov) != + ncol(qcov.base)) + stop("return.level.ns.fevd.mle: qcov and qcov.base must have the same number of covariates and values.") + if (!is.qcov(qcov.base)) + qcov.base <- make.qcov(x = x, vals = qcov.base, + nr = nrow(qcov.base)) + } + loc.id <- 1:x$results$num.pars$location + sc.id <- (1 + x$results$num.pars$location):(x$results$num.pars$location + + x$results$num.pars$scale) + sh.id <- (1 + x$results$num.pars$location + x$results$num.pars$scale):(x$results$num.pars$location + + x$results$num.pars$scale + x$results$num.pars$shape) + loc <- qcov[, loc.id, drop = FALSE] %*% p[loc.id] + sc <- qcov[, sc.id, drop = FALSE] %*% p[sc.id] + sh <- qcov[, sh.id, drop = FALSE] %*% p[sh.id] + if (!is.null(qcov.base)) { + loc.base <- qcov.base[, loc.id, drop = FALSE] %*% + p[loc.id] + sc.base <- qcov.base[, sc.id, drop = FALSE] %*% p[sc.id] + sh.base <- qcov.base[, sh.id, drop = FALSE] %*% p[sh.id] + } + if (x$par.models$log.scale) { + sc <- exp(sc) + if (!is.null(qcov.base)) + sc.base <- exp(sc.base) + } + theta <- cbind(qcov[, "threshold"], loc, sc, sh) + rlfun2 <- function(th, pd, type, npy, rate) rlevd_fixed(pd, + loc = th[2], scale = th[3], shape = th[4], threshold = th[1], + type = type, npy = npy) + theta[4] = 0 + res <- apply(theta, 1, rlfun2, pd = return.period, type = mod2, + npy = x$npy) + if (!is.null(qcov.base)) { + theta.base <- cbind(qcov.base[, "threshold"], loc.base, + sc.base, sh.base) + res <- res - apply(theta.base, 1, rlfun2, pd = return.period, + type = mod2, npy = x$npy) + } + res <- t(matrix(res, nrow = length(return.period))) + colnames(res) <- paste(return.period, "-", x$period.basis, + " level", sep = "") + attr(res, "return.period") <- return.period + attr(res, "data.name") <- x$data.name + attr(res, "fit.call") <- x$call + attr(res, "call") <- match.call() + attr(res, "fit.type") <- x$type + attr(res, "data.assumption") <- "non-stationary" + attr(res, "period") <- x$period.basis + attr(res, "units") <- x$units + attr(res, "qcov") <- deparse(substitute(qcov)) + attr(res, "class") <- "return.level" + if (!is.null(qcov.base)) { + attr(res, "qcov.base") <- deparse(substitute(qcov.base)) + attr(res, "class") <- "return.level.diff" + } + return(res) + } + else { + out <- ci.rl.ns.fevd.mle(x = x, alpha = alpha, return.period = return.period, + qcov = qcov, qcov.base = qcov.base, ...) + return(out) + } +} + +rlevd_fixed <- function (period, loc = 0, scale = 1, shape = 0, threshold = 0, + type = c("GEV", "GP", "PP", "Gumbel", "Frechet", "Weibull", + "Exponential", "Beta", "Pareto"), npy = 365.25, rate = 0.01) +{ + if (any(period <= 1)) + stop("rlevd: invalid period argument. Must be greater than 1.") + type <- match.arg(type) + type <- tolower(type) + if (missing(loc)) + loc <- 0 + else if (is.null(loc)) + loc <- 0 + if (is.element(type, c("gumbel", "weibull", "frechet"))) { + if (type == "gumbel" && shape != 0) { + warning("rlevd: shape is not zero, but type is Gumbel. Re-setting shape parameter to zero.") + shape <- 0 + type <- "gev" + } + else if (type == "gumbel") + type <- "gev" + else if (type == "frechet" && shape <= 0) { + if (shape == 0) { + warning("rlevd: shape is zero, but type is Frechet! Re-setting type to Gumbel.") + shape <- 0 + } + else { + warning("rlevd: type is Frechet, but shape < 0. Negating shape to force it to be Frechet.") + shape <- -shape + } + type <- "gev" + } + else if (type == "frechet") + type <- "gev" + else if (type == "weibull" && shape >= 0) { + if (shape == 0) { + warning("rlevd: shape is zero, but type is Weibull! Re-setting type to Gumbel.") + shape <- 0 + } + else { + warning("rlevd: type is Weibull, but shape > 0. Negating shape to force it to be Weibull.") + shape <- -shape + } + type <- "gev" + } + else if (type == "weibull") + type <- "gev" + } + if (is.element(type, c("beta", "pareto", "exponential"))) { + if (type == "exponential" && shape != 0) { + warning("rlevd: shape is not zero, but type is Exponential. Re-setting shape parameter to zero.") + shape <- 0 + type <- "gp" + } + else if (type == "exponential") + type <- "gp" + else if (type == "beta" && shape >= 0) { + if (shape == 0) { + warning("rlevd: shape is zero, but type is Beta! Re-setting type to Exponential.") + shape <- 0 + } + else { + warning("rlevd: type is Beta, but shape > 0. Negating shape to force it to be Beta.") + shape <- -shape + } + type <- "gp" + } + else if (type == "beta") + type <- "gp" + else if (type == "pareto" && shape <= 0) { + if (shape == 0) { + warning("rlevd: shape is zero, but type is Pareto! Re-setting type to Exponential.") + shape <- 0 + } + else { + warning("rlevd: type is Pareto, but shape < 0. Negating shape to force it to be Pareto.") + shape <- -shape + } + type <- "gp" + } + else if (type == "pareto") + type <- "gp" + } + if (is.element(type, c("gev", "pp"))) { + p <- 1 - 1/period + res <- qevd(p = p, loc = loc, scale = scale, shape = shape, + type = "GEV") + } + else if (type == "gp") { + m <- period * npy * rate + if (shape == 0) + res <- threshold + scale * log(m) + else res <- threshold + (scale/shape) * (m^shape - 1) + } + names(res) <- as.character(period) + return(res) +} diff --git a/extreme_fit/distribution/gev/main_fevd_mle.R b/extreme_fit/distribution/gev/main_fevd_mle.R index f50daac0..cf67516c 100644 --- a/extreme_fit/distribution/gev/main_fevd_mle.R +++ b/extreme_fit/distribution/gev/main_fevd_mle.R @@ -17,8 +17,8 @@ coord <- matrix(ncol=1, nrow = N) coord[,1]=seq(0,N-1,1) colnames(coord) = c("T") coord = data.frame(coord, stringsAsFactors = TRUE) -res = fevd_fixed(x_gev, data=coord, method='MLE', verbose=TRUE, use.phi=FALSE) -# res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='MLE', verbose=FALSE, use.phi=FALSE) +# res = fevd_fixed(x_gev, data=coord, method='MLE', verbose=TRUE, use.phi=FALSE) +res = fevd_fixed(x_gev, data=coord, location.fun= ~T, method='MLE', type="Gumbel", verbose=FALSE, use.phi=FALSE) print(res) # Some display for the results @@ -32,24 +32,29 @@ print(res$results$par) # Confidence interval staionary -method = "proflik" -res_ci = ci.fevd.mle(res, alpha = 0.05, type = c("return.level"), - return.period = 50, method = method, xrange = c(-200,200), nint = 10, R=502, verbose = TRUE, - tscale = FALSE, return.samples = FALSE) -print(res_ci) +# method = "proflik" +# res_ci = ci.fevd.mle(res, alpha = 0.05, type = c("return.level"), +# return.period = 50, method = method, xrange = c(-200,200), nint = 10, R=502, verbose = TRUE, +# tscale = FALSE, return.samples = FALSE) +# print(res_ci) # Bug to solve for the non stationary - the returned parameter do not match with the return level -# ci.fevd.mle() -# Confidence interval non staionary -# v = make.qcov(res, vals = list(mu1 = c(0.0))) -# r = return.level(res, return.period = 50, qcov = v) +v = make.qcov(res, vals = list(mu1 = c(0.0))) +print(res$results$num.pars$location) +print(res$results$num.pars$scale) +print(res$results$num.pars$shape) +res$results$num.pars$shape = 0 +print(res$results$num.pars$shape) + +# r = return.level(res, return.period = 50, qcov = v, type='Gumbel') # print(r) # param = findpars(res, qcov = v) # print(param) -# res_ci = ci(res, alpha = 0.05, type = c("return.level"), -# return.period = 50, method = "boot", xrange = NULL, nint = 20, verbose = FALSE, -# tscale = FALSE, return.samples = FALSE, qcov=v) -# print(res_ci) +# print(res$results$num.pars$shape) +res_ci = ci.fevd.mle_fixed(res, alpha = 0.05, type = c("return.level"), + return.period = 50, method = "normal", xrange = NULL, nint = 20, verbose = FALSE, + tscale = FALSE, return.samples = FALSE, qcov=v) +print(res_ci) -- GitLab