Skip to content

Commit

Permalink
Overhaul how extra-time data is stored #326
Browse files Browse the repository at this point in the history
  • Loading branch information
seananderson committed Mar 21, 2024
1 parent c01b13a commit c439c29
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 14 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# sdmTMB (development version)

* Fix bug in `sanity()` where gradient checks were missing `abs()` such that
large negative gradients weren't getting caught. #324

* Return `offset` vector in fitted object as an element. Ensure any extra time
rows of data in the `data` element of the fitted object do not include the
extra time slices.

* Add experimental residuals option "mle-mvn" where a single approximate
posterior sample of the random effects is drawn and these are combined
with the MLE fixed effects to produce residuals. This may become the
Expand Down
14 changes: 11 additions & 3 deletions R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,8 @@ sdmTMB <- function(
spde$loc_xy <- as.matrix(data[,spde$xy_cols,drop=FALSE])
spde$A_st <- fmesher::fm_basis(spde$mesh, loc = spde$loc_xy)
spde$sdm_spatial_id <- seq(1, nrow(data)) # FIXME?
} else {
data[["__fake_data__"]] <- FALSE
}
check_irregalar_time(data, time, spatiotemporal, time_varying)

Expand Down Expand Up @@ -1373,8 +1375,13 @@ sdmTMB <- function(
prof <- c("b_j")
if (delta) prof <- c(prof, "b_j2")

out_structure <- structure(list(
data = data,
fd <- data[['__fake_data__']]
tmp <- data[!fd,,drop=FALSE]
tmp[['__fake_data__']] <- tmp[['__weight_sdmTMB__']] <-
tmp[['__sdmTMB_offset__']] <- tmp[['__dcens_upr__']] <- NULL
out_structure <- structure(list(
data = tmp,
offset = offset[!fd],
spde = spde,
formula = original_formula,
split_formula = split_formula,
Expand All @@ -1385,7 +1392,7 @@ sdmTMB <- function(
time = time,
family = family,
smoothers = sm,
response = y_i,
response = y_i[!fd,,drop=FALSE],
tmb_data = tmb_data,
tmb_params = tmb_params,
tmb_map = tmb_map,
Expand All @@ -1401,6 +1408,7 @@ sdmTMB <- function(
contrasts = lapply(X_ij, attr, which = "contrasts"),
terms = lapply(mf, attr, which = "terms"),
extra_time = extra_time,
fitted_time = sort(unique(data[[time]])),
xlevels = lapply(seq_along(mf), function(i) stats::.getXlevels(mt[[i]], mf[[i]])),
call = match.call(expand.dots = TRUE),
version = utils::packageVersion("sdmTMB")),
Expand Down
5 changes: 3 additions & 2 deletions R/index.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ get_generic <- function(obj, value_name, bias_correct = FALSE, level = 0.95,
# FIXME parallel setup here?
if (!"fake_nd" %in% names(obj)) { # old sdmTMB versions...
predicted_time <- sort(unique(obj$data[[obj$fit_obj$time]]))
fitted_time <- sort(unique(obj$fit_obj$data[[obj$fit_obj$time]]))
fitted_time <- get_fitted_time(obj$fit_obj)
if (!all(fitted_time %in% predicted_time)) {
cli_abort(paste0("Some of the fitted time elements were not predicted ",
"on with `predict.sdmTMB()`. Either supply all time elements to ",
Expand Down Expand Up @@ -236,7 +236,8 @@ get_generic <- function(obj, value_name, bias_correct = FALSE, level = 0.95,
}
d$lwr <- as.numeric(trans(d$trans_est + stats::qnorm((1-level)/2) * d$se))
d$upr <- as.numeric(trans(d$trans_est + stats::qnorm(1-(1-level)/2) * d$se))
d[[time_name]] <- sort(unique(obj$fit_obj$data[[time_name]]))

d[[time_name]] <- get_fitted_time(obj$fit_obj)
# d$max_gradient <- max(conv$final_grads)
# d$bad_eig <- conv$bad_eig

Expand Down
4 changes: 2 additions & 2 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,8 @@ predict.sdmTMB <- function(object, newdata = NULL,
}

check_time_class(object, newdata)
original_time <- as.numeric(sort(unique(object$data[[object$time]])))
new_data_time <- as.numeric(sort(unique(newdata[[object$time]])))
original_time <- as.integer(get_fitted_time(object))
new_data_time <- as.integer(sort(unique(newdata[[object$time]])))

if (!all(new_data_time %in% original_time))
cli_abort(c("Some new time elements were found in `newdata`. ",
Expand Down
4 changes: 2 additions & 2 deletions R/print.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ print_time_varying <- function(x, m = 1) {
tv_names <- colnames(model.matrix(x$time_varying, x$data))
mm_tv <- cbind(round(as.numeric(b_rw_t_est), 2L), round(as.numeric(b_rw_t_se), 2L))
colnames(mm_tv) <- c("coef.est", "coef.se")
time_slices <- sort(unique(x$data[[x$time]]))
time_slices <- get_fitted_time(x)
row.names(mm_tv) <- paste(rep(tv_names, each = length(time_slices)), time_slices, sep = "-")
} else {
mm_tv <- NULL
Expand Down Expand Up @@ -277,7 +277,7 @@ print_other_parameters <- function(x, m = 1L) {
phi <- get_term_text("phi", "Dispersion parameter")
tweedie_p <- get_term_text("tweedie_p", "Tweedie p")
gengamma_par <- if ('gengamma' %in% family(x)[[m]]) {
get_term_text("gengamma_Q", "Generalized gamma lambda")
get_term_text("gengamma_Q", "Generalized gamma lambda")
} else ""
sigma_O <- get_term_text("sigma_O", "Spatial SD")
xtra <- if (x$spatiotemporal[m] == "ar1") "marginal " else ""
Expand Down
9 changes: 5 additions & 4 deletions R/residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,12 @@ residuals.sdmTMB <- function(object,
res_func <- qres_func
}

if (!"offset" %in% names(object)) cli_abort("This model appears to have been fit with an older sdmTMB.")
if (type %in% c("mle-laplace", "response", "pearson")) {
mu <- linkinv(predict(object, newdata = object$data, offset = object$tmb_data$offset_i)[[est_column]]) # not newdata = NULL
mu <- linkinv(predict(object, newdata = object$data, offset = object$offset)[[est_column]]) # not newdata = NULL
# }
} else if (type == "mvn-laplace") {
mu <- linkinv(predict(object, nsim = 1L, model = model, offset = object$tmb_data$offset_i)[, 1L, drop = TRUE])
mu <- linkinv(predict(object, nsim = 1L, model = model, offset = object$offset)[, 1L, drop = TRUE])
} else if (type == "mle-mcmc") {
if (is.null(mcmc_samples)) {
msg <- c("As of sdmTMB 0.3.0, `mcmc_samples` must be supplied to use `type = 'mle-mcmc'`.",
Expand All @@ -337,7 +338,7 @@ residuals.sdmTMB <- function(object,
mcmc_samples = matrix(p, ncol = 1L),
model = model[[1L]],
nsim = 1L,
offset = object$tmb_data$offset_i
offset = object$offset
)
mu <- linkinv(pred[, 1L, drop = TRUE])
} else {
Expand Down Expand Up @@ -385,7 +386,7 @@ residuals.sdmTMB <- function(object,
cli_inform(paste0("These are residuals for delta model component ", model,
". Use the `model` argument to select the other component."))
}
r
as.vector(r)
}

# from:
Expand Down
8 changes: 8 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,9 @@ expand_time <- function(df, time_slices, time_column, weights, offset, upr) {
df[["__weight_sdmTMB__"]] <- if (!is.null(weights)) weights else 1
df[["__sdmTMB_offset__"]] <- if (!is.null(offset)) offset else 0
df[["__dcens_upr__"]] <- if (!is.null(upr)) upr else NA_real_
df[["__fake_data__"]] <- FALSE
fake_df <- df[1L, , drop = FALSE]
fake_df[["__fake_data__"]] <- TRUE
fake_df[["__weight_sdmTMB__"]] <- 0 # IMPORTANT: this turns off these data in the likelihood
missing_years <- time_slices[!time_slices %in% df[[time_column]]]
fake_df <- do.call("rbind", replicate(length(missing_years), fake_df, simplify = FALSE))
Expand Down Expand Up @@ -583,3 +585,9 @@ set_delta_model <- function(x, model = c(NA, 1, 2)) {
attr(x, "delta_model_predict") <- model[[1]]
x
}

get_fitted_time <- function(x) {
if (!"fitted_time" %in% names(x))
cli_abort("Missing 'fitted_time' element in fitted object. Please refit the model with a current version of sdmTMB.")
x$fitted_time
}
40 changes: 39 additions & 1 deletion tests/testthat/test-5-residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -352,5 +352,43 @@ test_that("predict_mle_mcmc() works with extra_time #297", {
)
set.seed(123)
samps <- sdmTMBextra::predict_mle_mcmc(fit, mcmc_iter = 11, mcmc_warmup = 10)
expect_true(nrow(samps) == 972L)
expect_true(nrow(samps) == 969L)
})

test_that("all residuals work with extra time and offsets #326", {
skip_on_cran()
skip_on_ci()
skip_if_not_installed("sdmTMBextra")
mesh <- make_mesh(pcod, c("X", "Y"), cutoff = 30)
pcod$os <- rep(log(0.01), nrow(pcod)) # offset
m <- sdmTMB(
data = pcod,
formula = density ~ 1,
mesh = mesh,
offset = pcod$os,
family = tweedie(link = "log"),
spatial = "on",
time = "year",
extra_time = c(2006, 2008, 2010, 2012, 2014, 2016),
spatiotemporal = "off"
)
set.seed(1)
r <- residuals(m, type = "mle-laplace")
expect_true(is.vector(r))
expect_equal(round(r, 3)[1:10], c(1.182, 0.671, -1.095, 0.511, -0.874, -0.521, 0.018, -1.19, 0.069, 0.973))
set.seed(1)
expect_equal(length(r), 2143L)
r <- residuals(m, type = "mle-mvn")
expect_equal(round(r, 3)[1:10], c(1.24, 0.746, -1.054, 0.506, 0, -0.453, -1.196, -1.449, -0.585, 0.976))
expect_equal(length(r), 2143L)
r <- residuals(m, type = "mvn-laplace")
expect_equal(length(r), 2143L)
r <- residuals(m, type = "response")
expect_equal(length(r), 2143L)
set.seed(1)
p1 <- sdmTMBextra::predict_mle_mcmc(m,
mcmc_iter = 11, mcmc_warmup = 10
)
r <- residuals(m, type = "mle-mcmc", mcmc_samples = p1)
expect_equal(length(r), 2143L)
})

0 comments on commit c439c29

Please sign in to comment.