Skip to content

Commit

Permalink
Parameter stability plot bug fixes (issue #18)
Browse files Browse the repository at this point in the history
  • Loading branch information
lbelzile committed Mar 5, 2024
1 parent 0d2f65e commit 53e47d2
Show file tree
Hide file tree
Showing 16 changed files with 369 additions and 204 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mev
Type: Package
Title: Modelling of Extreme Values
Version: 1.16.0002
Version: 1.16.0003
Authors@R: c(person(given="Leo", family="Belzile", role = c("aut", "cre"), email = "belzilel@gmail.com", comment = c(ORCID = "0000-0002-9135-014X")), person(given="Jennifer L.", family="Wadsworth", role=c("aut")), person(given="Paul J.", family="Northrop", role=c("aut")), person(given="Scott D.", family="Grimshaw", role=c("aut")), person(given="Jin", family="Zhang", role=c("ctb")), person(given="Michael A.", family="Stephens", role=c("ctb")), person(given="Art B.", family="Owen", role=c("ctb")), person(given="Raphael", family="Huser", role=c("aut")))
Description: Various tools for the analysis of univariate, multivariate and functional extremes. Exact simulation from max-stable processes [Dombry, Engelke and Oesting (2016) <doi:10.1093/biomet/asw008>, R-Pareto processes for various parametric models, including Brown-Resnick (Wadsworth and Tawn, 2014, <doi:10.1093/biomet/ast042>) and Extremal Student (Thibaud and Opitz, 2015, <doi:10.1093/biomet/asv045>). Threshold selection methods, including Wadsworth (2016) <doi:10.1080/00401706.2014.998345>, and Northrop and Coleman (2014) <doi:10.1007/s10687-014-0183-z>. Multivariate extreme diagnostics. Estimation and likelihoods for univariate extremes, e.g., Coles (2001) <doi:10.1007/978-1-4471-3675-0>.
License: GPL-3
Expand Down Expand Up @@ -30,7 +30,7 @@ Suggests:
TruncatedNormal (>= 1.1)
LinkingTo: Rcpp, RcppArmadillo
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
VignetteBuilder: knitr
Encoding: UTF-8
ByteCompile: true
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,23 @@ S3method(plot,mev_gpd)
S3method(plot,mev_gpdbayes)
S3method(plot,mev_pp)
S3method(plot,mev_rlarg)
S3method(plot,mev_taildep)
S3method(plot,mev_thdiag_infomat)
S3method(plot,mev_thdiag_northropcoleman)
S3method(plot,mev_thdiag_varty)
S3method(plot,mev_thdiag_wadsworth)
S3method(plot,mev_tstab.gpd)
S3method(print,eprof)
S3method(print,mev_egp)
S3method(print,mev_gev)
S3method(print,mev_gpd)
S3method(print,mev_gpdbayes)
S3method(print,mev_pp)
S3method(print,mev_thdiag_infomat)
S3method(print,mev_thdiag_northropcoleman)
S3method(print,mev_thdiag_varty)
S3method(print,mev_thdiag_wadsworth)
S3method(summary,eprof)
S3method(vcov,mev_egp)
S3method(vcov,mev_gev)
S3method(vcov,mev_gpd)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
* `gp.tstab` optimization bounds for profile confidence interval fixed.
* `rparp` failed when a single draw was accepted (due to matrix being cast to vectors)
* `fit.extgp` now handles model 0 (generalized Pareto), fixing #17
* `gpd.ll` and `gpd.nll` correctly return a non-NA value when xi=-1 (#18, reported by Paddy O'Toole)
* `tstab.gpd` now correctly works when input leads to boundary values, and profile likelihood intervals are now correctly truncated in the range of admissible values
* `fit.gpd` now relies on `gpd.ll` to return the negative log likelihood value `nllh`
* Threshold stability plots now compute limits of plot safely, handling missing values in parameter estimates. Intervals are truncated to admissible values

# mev 1.15 (Release date 2023-04-23)

Expand Down
2 changes: 1 addition & 1 deletion R/NCdiag.R
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ plot.mev_thdiag_northropcoleman <- function(x, size = 0.05, ...){
return(invisible(NULL))
}


#' @export
print.mev_thdiag_northropcoleman <-
function(x, digits = max(3, getOption("digits") - 3), level = 0.05, ...) {
cat("Threshold selection method: Northrop and Coleman penultimate model.\n")
Expand Down
2 changes: 1 addition & 1 deletion R/Wdiag.R
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ plot.mev_thdiag_wadsworth <-
}



#' @export
print.mev_thdiag_wadsworth <-
function(x, digits = max(3, getOption("digits") - 3), ...) {
cat("Threshold selection method: Wadsworth's white noise test\n based on sequential Poisson process superposition")
Expand Down
20 changes: 6 additions & 14 deletions R/gp.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@
}
z$conv <- temp$convergence
z$counts <- temp$counts
z$nllh <- temp$value
z$vals <- cbind(sc, xi, u)
z$rate <- length(xdatu)/n
if (calc.se) {
Expand Down Expand Up @@ -264,7 +263,6 @@
xi <- shlink(shmat %*% (x$par[seq(npsc + 1, length.out = npsh)]))
z$conv <- x$convergence
z$counts <- x$counts
z$nllh <- x$value
z$vals <- cbind(sc, xi, u)
if (z$trans) {
z$data <- -log(as.vector((1 + (xi * (xdatu - u))/sc)^(-1/xi)))
Expand Down Expand Up @@ -331,7 +329,6 @@
xi.hat <- mean(log(1 + res$estimate * ex.data))
sigma.hat <- xi.hat/phi.hat
z.fit$mle <- c(sigma.hat, xi.hat)
z.fit$nllh <- res$minimum
z.fit$gradient <- res$gradient
z.fit$code <- res$code
z.fit$counts["function"] <- res$iterations
Expand Down Expand Up @@ -810,6 +807,9 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
xi.tol = 1e-04
xdat <- na.omit(xdat)
xdatu <- xdat[xdat > threshold] - threshold
if(length(xdatu) < 3){
stop("Too few observations to fit a generalized Pareto model.")
}
# Optimization of model, depending on routine
method <- match.arg(method)
if(!is.null(fpar)){
Expand Down Expand Up @@ -862,11 +862,9 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
temp$mle <- c(temp$a, -temp$k)
}
temp$mle <- temp$par
temp$nllh <- -temp$value
nll_limxi <- -length(mdat) * log(max(mdat))
if(temp$value < nll_limxi){
temp$mle <- c(max(mdat), -1+1e-7)
temp$nllh <- -nll_limxi
temp$mle <- c(max(mdat), -1)
}
temp$conv <- temp$convergence
} else if (method == "fpar") {
Expand Down Expand Up @@ -913,10 +911,6 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
}
temp$mle <- temp$par
temp$param <- c(temp$par, fixed_param)[wfo]
temp$nllh <- temp$value
if(isTRUE(all.equal(temp$mle[2], -1, check.attributes = FALSE))){
temp$nllh <- -length(xdatu)*log(max(xdatu))
}
temp$conv <- temp$convergence
# Initialize standard errors and covariance matrix
temp$vcov <- matrix(NA)
Expand All @@ -936,7 +930,7 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
vcov = temp$vcov,
threshold = threshold,
method = method,
nllh = temp$nllh,
nllh = -gpd.ll(temp$mle, dat = xdatu),
nat = sum(xdat > threshold),
pat = length(xdatu)/length(xdat),
convergence = ifelse(temp$conv == 0, "successful", temp$conv),
Expand Down Expand Up @@ -1016,13 +1010,11 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
temp$mle <- c(pjn$a, -pjn$k) # mle for (sigma, xi)
sc <- rep(temp$mle[1], length(xdatu))
xi <- temp$mle[2]
temp$nllh <- sum(log(sc)) + sum(log(1 + xi * xdatu/sc) * (1/xi + 1))
temp$conv <- pjn$conv
}
if(temp$mle[2] < -1){
#Transform the solution (unbounded) to boundary - with maximum observation for scale and -1 for shape.
temp$mle <- c(max(xdatu) + 1e-10, -1)
tmp$nllh <- -gpd.ll(par = temp$mle, dat = xdatu)
}

# Collecting observations from temp and formatting the output
Expand Down Expand Up @@ -1059,7 +1051,7 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
vcov = invobsinfomat,
threshold = threshold,
method = method,
nllh = temp$nllh,
nllh = -gpd.ll(par = temp$mle, dat = xdatu),
nat = sum(xdat > threshold),
pat = length(xdatu)/length(xdat),
convergence = ifelse(temp$conv == 0, "successful", temp$conv),
Expand Down
1 change: 1 addition & 0 deletions R/multivar.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ angextrapo <- function(dat, qu = 0.95, w = seq(0.05, 0.95, length = 20)) {
#' }
#' @export
lambdadep <- function(dat, qu = 0.95, method = c("hill", "mle", "bayes"), plot = TRUE) {
method <- match.arg(method)
## Hill estimator for fixed kth order statistic
hill_thresh <- function(dat, qu = 0.95, thresh = quantile(dat, qu)) {
dat <- as.numeric(dat)
Expand Down
22 changes: 15 additions & 7 deletions R/penultimate.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ fit.egp <- function(xdat,
warning("Length of threshold vector greater than one. Selecting first component.")
thresh <- thresh[1]
}
if(sum(xdat > thresh[1]) < 4L){
stop("Not enough observations to fit an extended generalized Pareto model.")
}
# If no initial values are provided, fit a GP distribution to obtain them
changinit <- missing(init)
if(!changinit){
Expand All @@ -235,7 +238,9 @@ fit.egp <- function(xdat,
}
}
if (changinit) {
init <- c(kappa = 1.01, suppressWarnings(fit.gpd(xdat, threshold = thresh[1], show = FALSE)$est))
init <- c(kappa = 1.01,
suppressWarnings(fit.gpd(xdat, threshold = thresh[1],
show = FALSE)$est))
# Change starting values for boundary cases, otherwise the optimization stalls
if(init[3] < -0.99){
init[3] <- -0.97
Expand Down Expand Up @@ -381,28 +386,31 @@ tstab.egp <- function(xdat,
} else if (length(thresh) <= 1) {
stop("Invalid argument\"thresh\" provided;\n please use a vector of threshold candidates of length at least 2")
}
pe <- se <- matrix(0, ncol = 4, nrow = length(thresh))
pe <- se <- matrix(NA, ncol = 4, nrow = length(thresh))
conv <- rep(0, length(thresh))
fit <- suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[1], model = model))
fit <- try(suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[1], model = model)))
if(!inherits(fit, "try-error")){
pe[1, -4] <- fit$param
colnames(pe) <- colnames(se) <- c(names(fit$param), "modif scale")
se[1, -4] <- fit$std.err
conv[1] <- ifelse(is.character(fit$convergence), 0, fit$convergence)
se[1, 4] <- sqrt(cbind(1, -thresh[1]) %*% solve(fit$hessian[-1, -1]) %*% rbind(1, -thresh[1]))[1]
}
for (i in 2:length(thresh)) {
fit <- suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[i], model = model, init = pe[i - 1, -4]))
fit <- try(suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[i], model = model, init = pe[i - 1, -4])))
if(!inherits(fit, "try-error")){
pe[i, -4] <- fit$param
se[i, -4] <- fit$std.err
conv[i] <- ifelse(is.character(fit$convergence), 0, fit$convergence)
# Standard error for the modified scale via the delta-method
se[i, 4] <- sqrt(cbind(1, -thresh[i]) %*% solve(fit$hessian[-1, -1]) %*% rbind(1, -thresh[i]))[1]
}
# Modify point estimates for the scale
pe[, 4] <- pe[, 2] - pe[, 3] * thresh

}
}
# Graphics
plots <- plots[is.finite(plots)]
if (length(plots) > 0) {
if (length(plots) > 0 & !isTRUE(all(is.na(pe)))) {
plots <- sort(unique(plots))
if (!isTRUE(all(plots %in% 1:3))) {
stop("Invalid plot selection. Must be a vector of integers containing indices 1, 2 or 3.")
Expand Down
17 changes: 16 additions & 1 deletion R/profile.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@
# return(S1)
# }

#' @export
print.eprof <- function(x, ...){
confint.eprof(x, print = TRUE)
}

#' @export
summary.eprof <- function(x, ...){
confint.eprof(x, print = TRUE)
}
Expand Down Expand Up @@ -2239,13 +2241,20 @@ gpd.pll <-
}
# Shape parameter
} else if (param == "shape") {
maxll <- gpd.ll(mle, dat = dat)
maxll <- try(gpd.ll(mle, dat = dat))
if(inherits(maxll, "try-error")){
stop(paste("Could not find maximum likelihood estimation for the sample of size", length(dat)))
}
if(isTRUE(mle['shape'] > -0.5)){
std.error <-
sqrt(solve(gpd.infomat(
par = mle,
dat = dat,
method = "exp"
))[2, 2])
} else{
std.error <- NA
}
constr.mle.shape <- function(xit) {
as.vector(suppressWarnings(
optim(
Expand All @@ -2262,12 +2271,15 @@ gpd.pll <-
}
# Missing psi vector
if (missing(psi) || any(is.null(psi)) || any(is.na(psi))) {
if(isTRUE(is.finite(std.error))){
psirangelow <-
seq(ifelse(mle[2] < 0, -7, -5), -1.5, length = 10) * std.error + mle[2]
psirangelow <- psirangelow[psirangelow > -1]
if(length(psirangelow) > 0L){
lowvals <- sapply(psirangelow, function(par) {
gpd.ll(c(constr.mle.shape(par), par), dat = dat)
}) - maxll
}
psirangehigh <-
seq(1.5, 10, length = 10) * std.error + mle[2]
highvals <- sapply(psirangehigh, function(par) {
Expand All @@ -2283,6 +2295,9 @@ gpd.pll <-
ifelse(is.na(hi), lm(psirangehigh ~ highvals)$coef[2] * -4 + mle[2], hi)
psi <- seq(lo, hi, length = 55)
psi <- psi[psi > -1]
} else{
psi <- seq(-1, 0, length.out = 21)
}
}

pars <- cbind(sapply(psi, constr.mle.shape), psi)
Expand Down
1 change: 1 addition & 0 deletions R/taildep.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ taildep <- function (data,
invisible(out)
}

#' @export
plot.mev_taildep <- function(x, ...){
if(length(x$u) < 3){
return(invisible(NULL))
Expand Down
Loading

0 comments on commit 53e47d2

Please sign in to comment.