diff --git a/DESCRIPTION b/DESCRIPTION index 5fdb524d..d4a128e4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,7 @@ Imports: checkmate, matrixStats (>= 0.52), parallel, - posterior (>= 1.5.0), + posterior (>= 1.6.1), stats Suggests: bayesplot (>= 1.7.0), diff --git a/R/psis.R b/R/psis.R index 1b321d70..4d3b631e 100644 --- a/R/psis.R +++ b/R/psis.R @@ -206,62 +206,33 @@ do_psis_i <- function(log_ratios_i, tail_len_i, ...) { S <- length(log_ratios_i) # shift log ratios for safer exponentation lw_i <- log_ratios_i - max(log_ratios_i) - khat <- Inf - if (enough_tail_samples(tail_len_i)) { - ord <- sort.int(lw_i, index.return = TRUE) - tail_ids <- seq(S - tail_len_i + 1, S) - lw_tail <- ord$x[tail_ids] - if (abs(max(lw_tail) - min(lw_tail)) < .Machine$double.eps/100) { - warning( - "Can't fit generalized Pareto distribution ", - "because all tail values are the same.", - call. = FALSE - ) - } else { - cutoff <- ord$x[min(tail_ids) - 1] # largest value smaller than tail values - smoothed <- psis_smooth_tail(lw_tail, cutoff) - khat <- smoothed$k - lw_i[ord$ix[tail_ids]] <- smoothed$tail - } + if (length(unique(tail(log_ratios_i, -tail_len_i))) == 1) { + warning( + "Can't fit generalized Pareto distribution ", + "because all tail values are the same.", + call. = FALSE + ) } + smoothed <- posterior::pareto_smooth_tail( + x = lw_i, + ndraws_tail = tail_len_i, + tail = "right", + are_log_weights = TRUE + ) + + lw_i <- smoothed$x + khat <- smoothed$k + # truncate at max of raw wts (i.e., 0 since max has been subtracted) lw_i[lw_i > 0] <- 0 # shift log weights back so that the smallest log weights remain unchanged lw_i <- lw_i + max(log_ratios_i) - list(log_weights = lw_i, pareto_k = khat) -} - -#' PSIS tail smoothing for a single vector -#' -#' @noRd -#' @param x Vector of tail elements already sorted in ascending order. -#' @return A named list containing: -#' * `tail`: vector same size as `x` containing the logs of the -#' order statistics of the generalized pareto distribution. -#' * `k`: scalar shape parameter estimate. -#' -psis_smooth_tail <- function(x, cutoff) { - len <- length(x) - exp_cutoff <- exp(cutoff) - - # save time not sorting since x already sorted - fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) - k <- fit$k - sigma <- fit$sigma - if (is.finite(k)) { - p <- (seq_len(len) - 0.5) / len - qq <- qgpd(p, k, sigma) + exp_cutoff - tail <- log(qq) - } else { - tail <- x - } - list(tail = tail, k = k) + list(log_weights = lw_i, pareto_k = if (is.na(khat)) Inf else khat) } - #' Calculate tail lengths to use for fitting the GPD #' #' The number of weights (i.e., tail length) used to fit the generalized Pareto diff --git a/tests/testthat/test_psis.R b/tests/testthat/test_psis.R index ef4b14cd..cc6d11cc 100644 --- a/tests/testthat/test_psis.R +++ b/tests/testthat/test_psis.R @@ -150,14 +150,3 @@ test_that("do_psis_i throws warning if all tail values the same", { val <- expect_warning(do_psis_i(xx, tail_len_i = 6), "all tail values are the same") expect_equal(val$pareto_k, Inf) }) - -test_that("psis_smooth_tail returns original tail values if k is infinite", { - # skip on M1 Mac until we figure out why this test fails only on M1 Mac - skip_if(Sys.info()[["sysname"]] == "Darwin" && R.version$arch == "aarch64") - - xx <- c(1,2,3,4,4,4,4,4,4,4,4) - val <- suppressWarnings(psis_smooth_tail(xx, 3)) - expect_equal(val$tail, xx) - expect_equal(val$k, Inf) -}) -