diff --git a/NEWS.md b/NEWS.md index 5bb7512d6..2f2459c22 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # sdmTMB (development version) +* Fix bug in `sanity()` where gradient checks were missing `abs()` such that + large negative gradients weren't getting caught. #324 + +* Return `offset` vector in fitted object as an element. Ensure any extra time + rows of data in the `data` element of the fitted object do not include the + extra time slices. + * Add experimental residuals option "mle-mvn" where a single approximate posterior sample of the random effects is drawn and these are combined with the MLE fixed effects to produce residuals. This may become the diff --git a/R/fit.R b/R/fit.R index dc47fb93f..fb5f9b1a5 100644 --- a/R/fit.R +++ b/R/fit.R @@ -784,6 +784,8 @@ sdmTMB <- function( spde$loc_xy <- as.matrix(data[,spde$xy_cols,drop=FALSE]) spde$A_st <- fmesher::fm_basis(spde$mesh, loc = spde$loc_xy) spde$sdm_spatial_id <- seq(1, nrow(data)) # FIXME? + } else { + data[["__fake_data__"]] <- FALSE } check_irregalar_time(data, time, spatiotemporal, time_varying) @@ -1373,8 +1375,13 @@ sdmTMB <- function( prof <- c("b_j") if (delta) prof <- c(prof, "b_j2") - out_structure <- structure(list( - data = data, + fd <- data[['__fake_data__']] + tmp <- data[!fd,,drop=FALSE] + tmp[['__fake_data__']] <- tmp[['__weight_sdmTMB__']] <- + tmp[['__sdmTMB_offset__']] <- tmp[['__dcens_upr__']] <- NULL + out_structure <- structure(list( + data = tmp, + offset = offset[!fd], spde = spde, formula = original_formula, split_formula = split_formula, @@ -1385,7 +1392,7 @@ sdmTMB <- function( time = time, family = family, smoothers = sm, - response = y_i, + response = y_i[!fd,,drop=FALSE], tmb_data = tmb_data, tmb_params = tmb_params, tmb_map = tmb_map, @@ -1401,6 +1408,7 @@ sdmTMB <- function( contrasts = lapply(X_ij, attr, which = "contrasts"), terms = lapply(mf, attr, which = "terms"), extra_time = extra_time, + fitted_time = sort(unique(data[[time]])), xlevels = lapply(seq_along(mf), function(i) stats::.getXlevels(mt[[i]], mf[[i]])), call = match.call(expand.dots = TRUE), version = utils::packageVersion("sdmTMB")), diff --git a/R/index.R b/R/index.R index d04e68c06..8197f77e7 100644 --- a/R/index.R +++ b/R/index.R @@ -129,7 +129,7 @@ get_generic <- function(obj, value_name, bias_correct = FALSE, level = 0.95, # FIXME parallel setup here? if (!"fake_nd" %in% names(obj)) { # old sdmTMB versions... predicted_time <- sort(unique(obj$data[[obj$fit_obj$time]])) - fitted_time <- sort(unique(obj$fit_obj$data[[obj$fit_obj$time]])) + fitted_time <- get_fitted_time(obj$fit_obj) if (!all(fitted_time %in% predicted_time)) { cli_abort(paste0("Some of the fitted time elements were not predicted ", "on with `predict.sdmTMB()`. Either supply all time elements to ", @@ -236,7 +236,8 @@ get_generic <- function(obj, value_name, bias_correct = FALSE, level = 0.95, } d$lwr <- as.numeric(trans(d$trans_est + stats::qnorm((1-level)/2) * d$se)) d$upr <- as.numeric(trans(d$trans_est + stats::qnorm(1-(1-level)/2) * d$se)) - d[[time_name]] <- sort(unique(obj$fit_obj$data[[time_name]])) + + d[[time_name]] <- get_fitted_time(obj$fit_obj) # d$max_gradient <- max(conv$final_grads) # d$bad_eig <- conv$bad_eig diff --git a/R/predict.R b/R/predict.R index 6d37daed7..6e503fece 100644 --- a/R/predict.R +++ b/R/predict.R @@ -361,8 +361,8 @@ predict.sdmTMB <- function(object, newdata = NULL, } check_time_class(object, newdata) - original_time <- as.numeric(sort(unique(object$data[[object$time]]))) - new_data_time <- as.numeric(sort(unique(newdata[[object$time]]))) + original_time <- as.integer(get_fitted_time(object)) + new_data_time <- as.integer(sort(unique(newdata[[object$time]]))) if (!all(new_data_time %in% original_time)) cli_abort(c("Some new time elements were found in `newdata`. ", diff --git a/R/print.R b/R/print.R index c70104d6c..e9a15d89a 100644 --- a/R/print.R +++ b/R/print.R @@ -167,7 +167,7 @@ print_time_varying <- function(x, m = 1) { tv_names <- colnames(model.matrix(x$time_varying, x$data)) mm_tv <- cbind(round(as.numeric(b_rw_t_est), 2L), round(as.numeric(b_rw_t_se), 2L)) colnames(mm_tv) <- c("coef.est", "coef.se") - time_slices <- sort(unique(x$data[[x$time]])) + time_slices <- get_fitted_time(x) row.names(mm_tv) <- paste(rep(tv_names, each = length(time_slices)), time_slices, sep = "-") } else { mm_tv <- NULL @@ -277,7 +277,7 @@ print_other_parameters <- function(x, m = 1L) { phi <- get_term_text("phi", "Dispersion parameter") tweedie_p <- get_term_text("tweedie_p", "Tweedie p") gengamma_par <- if ('gengamma' %in% family(x)[[m]]) { - get_term_text("gengamma_Q", "Generalized gamma lambda") + get_term_text("gengamma_Q", "Generalized gamma lambda") } else "" sigma_O <- get_term_text("sigma_O", "Spatial SD") xtra <- if (x$spatiotemporal[m] == "ar1") "marginal " else "" diff --git a/R/residuals.R b/R/residuals.R index a0a80cb88..c33539e25 100644 --- a/R/residuals.R +++ b/R/residuals.R @@ -306,11 +306,12 @@ residuals.sdmTMB <- function(object, res_func <- qres_func } + if (!"offset" %in% names(object)) cli_abort("This model appears to have been fit with an older sdmTMB.") if (type %in% c("mle-laplace", "response", "pearson")) { - mu <- linkinv(predict(object, newdata = object$data, offset = object$tmb_data$offset_i)[[est_column]]) # not newdata = NULL + mu <- linkinv(predict(object, newdata = object$data, offset = object$offset)[[est_column]]) # not newdata = NULL # } } else if (type == "mvn-laplace") { - mu <- linkinv(predict(object, nsim = 1L, model = model, offset = object$tmb_data$offset_i)[, 1L, drop = TRUE]) + mu <- linkinv(predict(object, nsim = 1L, model = model, offset = object$offset)[, 1L, drop = TRUE]) } else if (type == "mle-mcmc") { if (is.null(mcmc_samples)) { msg <- c("As of sdmTMB 0.3.0, `mcmc_samples` must be supplied to use `type = 'mle-mcmc'`.", @@ -337,7 +338,7 @@ residuals.sdmTMB <- function(object, mcmc_samples = matrix(p, ncol = 1L), model = model[[1L]], nsim = 1L, - offset = object$tmb_data$offset_i + offset = object$offset ) mu <- linkinv(pred[, 1L, drop = TRUE]) } else { @@ -385,7 +386,7 @@ residuals.sdmTMB <- function(object, cli_inform(paste0("These are residuals for delta model component ", model, ". Use the `model` argument to select the other component.")) } - r + as.vector(r) } # from: diff --git a/R/utils.R b/R/utils.R index 9a20a2699..f7deb8d1f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -232,7 +232,9 @@ expand_time <- function(df, time_slices, time_column, weights, offset, upr) { df[["__weight_sdmTMB__"]] <- if (!is.null(weights)) weights else 1 df[["__sdmTMB_offset__"]] <- if (!is.null(offset)) offset else 0 df[["__dcens_upr__"]] <- if (!is.null(upr)) upr else NA_real_ + df[["__fake_data__"]] <- FALSE fake_df <- df[1L, , drop = FALSE] + fake_df[["__fake_data__"]] <- TRUE fake_df[["__weight_sdmTMB__"]] <- 0 # IMPORTANT: this turns off these data in the likelihood missing_years <- time_slices[!time_slices %in% df[[time_column]]] fake_df <- do.call("rbind", replicate(length(missing_years), fake_df, simplify = FALSE)) @@ -583,3 +585,9 @@ set_delta_model <- function(x, model = c(NA, 1, 2)) { attr(x, "delta_model_predict") <- model[[1]] x } + +get_fitted_time <- function(x) { + if (!"fitted_time" %in% names(x)) + cli_abort("Missing 'fitted_time' element in fitted object. Please refit the model with a current version of sdmTMB.") + x$fitted_time +} diff --git a/tests/testthat/test-5-residuals.R b/tests/testthat/test-5-residuals.R index 68ac82b6a..8d0a8424c 100644 --- a/tests/testthat/test-5-residuals.R +++ b/tests/testthat/test-5-residuals.R @@ -352,5 +352,43 @@ test_that("predict_mle_mcmc() works with extra_time #297", { ) set.seed(123) samps <- sdmTMBextra::predict_mle_mcmc(fit, mcmc_iter = 11, mcmc_warmup = 10) - expect_true(nrow(samps) == 972L) + expect_true(nrow(samps) == 969L) +}) + +test_that("all residuals work with extra time and offsets #326", { + skip_on_cran() + skip_on_ci() + skip_if_not_installed("sdmTMBextra") + mesh <- make_mesh(pcod, c("X", "Y"), cutoff = 30) + pcod$os <- rep(log(0.01), nrow(pcod)) # offset + m <- sdmTMB( + data = pcod, + formula = density ~ 1, + mesh = mesh, + offset = pcod$os, + family = tweedie(link = "log"), + spatial = "on", + time = "year", + extra_time = c(2006, 2008, 2010, 2012, 2014, 2016), + spatiotemporal = "off" + ) + set.seed(1) + r <- residuals(m, type = "mle-laplace") + expect_true(is.vector(r)) + expect_equal(round(r, 3)[1:10], c(1.182, 0.671, -1.095, 0.511, -0.874, -0.521, 0.018, -1.19, 0.069, 0.973)) + set.seed(1) + expect_equal(length(r), 2143L) + r <- residuals(m, type = "mle-mvn") + expect_equal(round(r, 3)[1:10], c(1.24, 0.746, -1.054, 0.506, 0, -0.453, -1.196, -1.449, -0.585, 0.976)) + expect_equal(length(r), 2143L) + r <- residuals(m, type = "mvn-laplace") + expect_equal(length(r), 2143L) + r <- residuals(m, type = "response") + expect_equal(length(r), 2143L) + set.seed(1) + p1 <- sdmTMBextra::predict_mle_mcmc(m, + mcmc_iter = 11, mcmc_warmup = 10 + ) + r <- residuals(m, type = "mle-mcmc", mcmc_samples = p1) + expect_equal(length(r), 2143L) })