Parameter stability plot bug fixes (issue #18)

lbelzile · Mar 5, 2024 · 53e47d2 · 53e47d2
1 parent 0d2f65e
commit 53e47d2
Show file tree

Hide file tree

Showing 16 changed files with 369 additions and 204 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mev
 Type: Package
 Title: Modelling of Extreme Values
-Version: 1.16.0002
+Version: 1.16.0003
 Authors@R: c(person(given="Leo", family="Belzile", role = c("aut", "cre"), email = "belzilel@gmail.com", comment = c(ORCID = "0000-0002-9135-014X")), person(given="Jennifer L.", family="Wadsworth", role=c("aut")), person(given="Paul J.", family="Northrop", role=c("aut")), person(given="Scott D.", family="Grimshaw", role=c("aut")), person(given="Jin", family="Zhang", role=c("ctb")), person(given="Michael A.", family="Stephens", role=c("ctb")), person(given="Art B.", family="Owen", role=c("ctb")), person(given="Raphael", family="Huser", role=c("aut")))
 Description: Various tools for the analysis of univariate, multivariate and functional extremes. Exact simulation from max-stable processes [Dombry, Engelke and Oesting (2016) <doi:10.1093/biomet/asw008>, R-Pareto processes for various parametric models, including Brown-Resnick (Wadsworth and Tawn, 2014, <doi:10.1093/biomet/ast042>) and Extremal Student (Thibaud and Opitz, 2015, <doi:10.1093/biomet/asv045>). Threshold selection methods, including Wadsworth (2016) <doi:10.1080/00401706.2014.998345>, and Northrop and Coleman (2014) <doi:10.1007/s10687-014-0183-z>. Multivariate extreme diagnostics. Estimation and likelihoods for univariate extremes, e.g., Coles (2001) <doi:10.1007/978-1-4471-3675-0>.
 License: GPL-3
@@ -30,7 +30,7 @@ Suggests:
     TruncatedNormal (>= 1.1)
 LinkingTo: Rcpp, RcppArmadillo
 LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
 Encoding: UTF-8
 ByteCompile: true
diff --git a/NAMESPACE b/NAMESPACE
@@ -27,17 +27,23 @@ S3method(plot,mev_gpd)
 S3method(plot,mev_gpdbayes)
 S3method(plot,mev_pp)
 S3method(plot,mev_rlarg)
+S3method(plot,mev_taildep)
 S3method(plot,mev_thdiag_infomat)
 S3method(plot,mev_thdiag_northropcoleman)
 S3method(plot,mev_thdiag_varty)
 S3method(plot,mev_thdiag_wadsworth)
 S3method(plot,mev_tstab.gpd)
+S3method(print,eprof)
 S3method(print,mev_egp)
 S3method(print,mev_gev)
 S3method(print,mev_gpd)
 S3method(print,mev_gpdbayes)
 S3method(print,mev_pp)
 S3method(print,mev_thdiag_infomat)
+S3method(print,mev_thdiag_northropcoleman)
+S3method(print,mev_thdiag_varty)
+S3method(print,mev_thdiag_wadsworth)
+S3method(summary,eprof)
 S3method(vcov,mev_egp)
 S3method(vcov,mev_gev)
 S3method(vcov,mev_gpd)

diff --git a/NEWS.md b/NEWS.md
@@ -10,6 +10,10 @@
 * `gp.tstab` optimization bounds for profile confidence interval fixed.
 * `rparp` failed when a single draw was accepted (due to matrix being cast to vectors)
 * `fit.extgp` now handles model 0 (generalized Pareto), fixing #17
+* `gpd.ll` and `gpd.nll` correctly return a non-NA value when xi=-1 (#18, reported by Paddy O'Toole)
+* `tstab.gpd` now correctly works when input leads to boundary values, and profile likelihood intervals are now correctly truncated in the range of admissible values
+* `fit.gpd` now relies on `gpd.ll` to return the negative log likelihood value `nllh`
+* Threshold stability plots now compute limits of plot safely, handling missing values in parameter estimates. Intervals are truncated to admissible values
 
 # mev 1.15  (Release date 2023-04-23)
 

diff --git a/R/NCdiag.R b/R/NCdiag.R
@@ -285,7 +285,7 @@ plot.mev_thdiag_northropcoleman <- function(x, size = 0.05, ...){
   return(invisible(NULL))
 }
 
-
+#' @export
 print.mev_thdiag_northropcoleman <-
   function(x, digits = max(3, getOption("digits") - 3), level = 0.05, ...) {
     cat("Threshold selection method: Northrop and Coleman penultimate model.\n")

diff --git a/R/Wdiag.R b/R/Wdiag.R
@@ -929,7 +929,7 @@ plot.mev_thdiag_wadsworth <-
   }
 
 
-
+#' @export
 print.mev_thdiag_wadsworth <-
   function(x, digits = max(3, getOption("digits") - 3), ...) {
     cat("Threshold selection method: Wadsworth's white noise test\n based on sequential Poisson process superposition")

diff --git a/R/gp.R b/R/gp.R
@@ -99,7 +99,6 @@
   }
   z$conv <- temp$convergence
   z$counts <- temp$counts
-  z$nllh <- temp$value
   z$vals <- cbind(sc, xi, u)
   z$rate <- length(xdatu)/n
   if (calc.se) {
@@ -264,7 +263,6 @@
   xi <- shlink(shmat %*% (x$par[seq(npsc + 1, length.out = npsh)]))
   z$conv <- x$convergence
   z$counts <- x$counts
-  z$nllh <- x$value
   z$vals <- cbind(sc, xi, u)
   if (z$trans) {
     z$data <- -log(as.vector((1 + (xi * (xdatu - u))/sc)^(-1/xi)))
@@ -331,7 +329,6 @@
   xi.hat <- mean(log(1 + res$estimate * ex.data))
   sigma.hat <- xi.hat/phi.hat
   z.fit$mle <- c(sigma.hat, xi.hat)
-  z.fit$nllh <- res$minimum
   z.fit$gradient <- res$gradient
   z.fit$code <- res$code
   z.fit$counts["function"] <- res$iterations
@@ -810,6 +807,9 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
   xi.tol = 1e-04
   xdat <- na.omit(xdat)
   xdatu <- xdat[xdat > threshold] - threshold
+  if(length(xdatu) < 3){
+    stop("Too few observations to fit a generalized Pareto model.")
+  }
   # Optimization of model, depending on routine
   method <- match.arg(method)
   if(!is.null(fpar)){
@@ -862,11 +862,9 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
       temp$mle <- c(temp$a, -temp$k)
     }
     temp$mle <- temp$par
-    temp$nllh <- -temp$value
     nll_limxi <- -length(mdat) * log(max(mdat))
     if(temp$value <  nll_limxi){
-      temp$mle <- c(max(mdat), -1+1e-7)
-      temp$nllh <- -nll_limxi
+      temp$mle <- c(max(mdat), -1)
     }
     temp$conv <- temp$convergence
   } else if (method == "fpar") {
@@ -913,10 +911,6 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
   }
     temp$mle <- temp$par
     temp$param <- c(temp$par, fixed_param)[wfo]
-    temp$nllh <- temp$value
-    if(isTRUE(all.equal(temp$mle[2], -1, check.attributes = FALSE))){
-      temp$nllh <- -length(xdatu)*log(max(xdatu))
-    }
     temp$conv <- temp$convergence
     # Initialize standard errors and covariance matrix
     temp$vcov <- matrix(NA)
@@ -936,7 +930,7 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
                              vcov = temp$vcov,
                              threshold = threshold,
                              method = method,
-                             nllh = temp$nllh,
+                             nllh = -gpd.ll(temp$mle, dat = xdatu),
                              nat = sum(xdat > threshold),
                              pat = length(xdatu)/length(xdat),
                              convergence = ifelse(temp$conv == 0, "successful", temp$conv),
@@ -1016,13 +1010,11 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
     temp$mle <- c(pjn$a, -pjn$k)  # mle for (sigma, xi)
     sc <- rep(temp$mle[1], length(xdatu))
     xi <- temp$mle[2]
-    temp$nllh <- sum(log(sc)) + sum(log(1 + xi * xdatu/sc) * (1/xi + 1))
     temp$conv <- pjn$conv
   }
   if(temp$mle[2] < -1){
     #Transform the solution (unbounded) to boundary - with maximum observation for scale and -1 for shape.
     temp$mle <- c(max(xdatu) + 1e-10, -1)
-    tmp$nllh <- -gpd.ll(par = temp$mle, dat = xdatu)
   }
 
   # Collecting observations from temp and formatting the output
@@ -1059,7 +1051,7 @@ gp.fit <- function(xdat, threshold, method = c("Grimshaw", "auglag", "nlm", "opt
                            vcov = invobsinfomat,
                            threshold = threshold,
                            method = method,
-                           nllh = temp$nllh,
+                           nllh = -gpd.ll(par = temp$mle, dat = xdatu),
                            nat = sum(xdat > threshold),
                            pat = length(xdatu)/length(xdat),
                            convergence = ifelse(temp$conv == 0, "successful", temp$conv),

diff --git a/R/multivar.R b/R/multivar.R
@@ -136,6 +136,7 @@ angextrapo <- function(dat, qu = 0.95, w = seq(0.05, 0.95, length = 20)) {
 #' }
 #' @export
 lambdadep <- function(dat, qu = 0.95, method = c("hill", "mle", "bayes"), plot = TRUE) {
+  method <- match.arg(method)
     ## Hill estimator for fixed kth order statistic
     hill_thresh <- function(dat, qu = 0.95, thresh = quantile(dat, qu)) {
         dat <- as.numeric(dat)

diff --git a/R/penultimate.R b/R/penultimate.R
@@ -227,6 +227,9 @@ fit.egp <- function(xdat,
         warning("Length of threshold vector greater than one. Selecting first component.")
       thresh <- thresh[1]
     }
+  if(sum(xdat > thresh[1]) < 4L){
+    stop("Not enough observations to fit an extended generalized Pareto model.")
+  }
     # If no initial values are provided, fit a GP distribution to obtain them
     changinit <- missing(init)
     if(!changinit){
@@ -235,7 +238,9 @@ fit.egp <- function(xdat,
      }
     }
     if (changinit) {
-        init <- c(kappa = 1.01, suppressWarnings(fit.gpd(xdat, threshold = thresh[1], show = FALSE)$est))
+        init <- c(kappa = 1.01,
+                  suppressWarnings(fit.gpd(xdat, threshold = thresh[1],
+                                           show = FALSE)$est))
         # Change starting values for boundary cases, otherwise the optimization stalls
         if(init[3] < -0.99){
           init[3] <- -0.97
@@ -381,28 +386,31 @@ tstab.egp <- function(xdat,
     } else if (length(thresh) <= 1) {
         stop("Invalid argument\"thresh\" provided;\n please use a vector of threshold candidates of length at least 2")
     }
-    pe <- se <- matrix(0, ncol = 4, nrow = length(thresh))
+    pe <- se <- matrix(NA, ncol = 4, nrow = length(thresh))
     conv <- rep(0, length(thresh))
-    fit <- suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[1], model = model))
+    fit <- try(suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[1], model = model)))
+    if(!inherits(fit, "try-error")){
     pe[1, -4] <- fit$param
     colnames(pe) <- colnames(se) <- c(names(fit$param), "modif scale")
     se[1, -4] <- fit$std.err
     conv[1] <- ifelse(is.character(fit$convergence), 0, fit$convergence)
     se[1, 4] <- sqrt(cbind(1, -thresh[1]) %*% solve(fit$hessian[-1, -1]) %*% rbind(1, -thresh[1]))[1]
+    }
     for (i in 2:length(thresh)) {
-        fit <- suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[i], model = model, init = pe[i - 1, -4]))
+        fit <- try(suppressWarnings(fit.egp(xdat = xdat, thresh = thresh[i], model = model, init = pe[i - 1, -4])))
+        if(!inherits(fit, "try-error")){
         pe[i, -4] <- fit$param
         se[i, -4] <- fit$std.err
         conv[i] <- ifelse(is.character(fit$convergence), 0, fit$convergence)
         # Standard error for the modified scale via the delta-method
         se[i, 4] <- sqrt(cbind(1, -thresh[i]) %*% solve(fit$hessian[-1, -1]) %*% rbind(1, -thresh[i]))[1]
-    }
     # Modify point estimates for the scale
     pe[, 4] <- pe[, 2] - pe[, 3] * thresh
-
+        }
+    }
     # Graphics
     plots <- plots[is.finite(plots)]
-    if (length(plots) > 0) {
+    if (length(plots) > 0 & !isTRUE(all(is.na(pe)))) {
         plots <- sort(unique(plots))
         if (!isTRUE(all(plots %in% 1:3))) {
             stop("Invalid plot selection. Must be a vector of integers containing indices 1, 2 or 3.")

diff --git a/R/profile.R b/R/profile.R
@@ -62,10 +62,12 @@
 #     return(S1)
 # }
 
+#' @export
 print.eprof <- function(x, ...){
   confint.eprof(x, print = TRUE)
 }
 
+#' @export
 summary.eprof <- function(x, ...){
   confint.eprof(x, print = TRUE)
 }
@@ -2239,13 +2241,20 @@ gpd.pll <-
       }
       # Shape parameter
     } else if (param == "shape") {
-      maxll <- gpd.ll(mle, dat = dat)
+      maxll <- try(gpd.ll(mle, dat = dat))
+      if(inherits(maxll, "try-error")){
+        stop(paste("Could not find maximum likelihood estimation for the sample of size", length(dat)))
+      }
+      if(isTRUE(mle['shape'] > -0.5)){
       std.error <-
         sqrt(solve(gpd.infomat(
           par = mle,
           dat = dat,
           method = "exp"
         ))[2, 2])
+      } else{
+       std.error <- NA
+      }
       constr.mle.shape <- function(xit) {
         as.vector(suppressWarnings(
           optim(
@@ -2262,12 +2271,15 @@ gpd.pll <-
       }
       # Missing psi vector
       if (missing(psi) || any(is.null(psi)) || any(is.na(psi))) {
+        if(isTRUE(is.finite(std.error))){
         psirangelow <-
           seq(ifelse(mle[2] < 0, -7, -5), -1.5, length = 10) * std.error +  mle[2]
         psirangelow <- psirangelow[psirangelow > -1]
+        if(length(psirangelow) > 0L){
         lowvals <- sapply(psirangelow, function(par) {
           gpd.ll(c(constr.mle.shape(par), par), dat = dat)
         }) - maxll
+        }
         psirangehigh <-
           seq(1.5, 10, length = 10) * std.error + mle[2]
         highvals <- sapply(psirangehigh, function(par) {
@@ -2283,6 +2295,9 @@ gpd.pll <-
           ifelse(is.na(hi), lm(psirangehigh ~ highvals)$coef[2] * -4 + mle[2], hi)
         psi <- seq(lo, hi, length = 55)
         psi <- psi[psi > -1]
+        } else{
+          psi <- seq(-1, 0, length.out = 21)
+        }
       }
 
       pars <- cbind(sapply(psi, constr.mle.shape), psi)

diff --git a/R/taildep.R b/R/taildep.R
@@ -252,6 +252,7 @@ taildep <- function (data,
     invisible(out)
 }
 
+#' @export
 plot.mev_taildep <- function(x, ...){
   if(length(x$u) < 3){
     return(invisible(NULL))