From 7b8569c06e7d0039dff4cac3ea3b569e1b10cebe Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 27 Sep 2024 11:23:59 -0700 Subject: [PATCH] Add `newdata` arg to simulate.sdmTMB() --- DESCRIPTION | 2 +- NEWS.md | 7 ++++ R/tmb-sim.R | 34 +++++++++++++++++-- man/simulate.sdmTMB.Rd | 8 +++++ tests/testthat/test-6-tmb-simulation.R | 45 ++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a5a41a129..d3877b2a3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: sdmTMB Title: Spatial and Spatiotemporal SPDE-Based GLMMs with 'TMB' -Version: 0.6.0.9011 +Version: 0.6.0.9012 Authors@R: c( person(c("Sean", "C."), "Anderson", , "sean@seananderson.ca", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index a1a24b087..adf28bfd3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # sdmTMB (development version) +* Add `newdata` argument to `simulate.sdmTMB()`. This enables simulating on + a new data frame similar to how one would predict on new data. + +* Add `mle_mvn_samples` argument to `simulate.sdmTMB()`. Defaults to "single". + If "multiple", then a sample from the random effects is taken for each + simulation iteration. + * Add `project()` experimental function. * Add print method for `sdmTMB_cv()` output. #319 diff --git a/R/tmb-sim.R b/R/tmb-sim.R index d5a29c1b0..8859c6029 100644 --- a/R/tmb-sim.R +++ b/R/tmb-sim.R @@ -332,8 +332,12 @@ sdmTMB_simulate <- function(formula, #' effects (this only simulates observation error). `~0` or `NA` to simulate #' new random affects (smoothers, which internally are random effects, will #' not be simulated as new). +#' @param mle_mvn_samples Applies if `type = "mle-mvn"`. If `"single"`, take +#' a single MVN draw from the random effects. If `"multiple"`, take an MVN +#' draw from the random effects for each of the `nsim`. #' @param model If a delta/hurdle model, which model to simulate from? #' `NA` = combined, `1` = first model, `2` = second mdoel. +#' @param newdata Optional new data frame from which to simulate. #' @param mcmc_samples An optional matrix of MCMC samples. See `extract_mcmc()` #' in the \href{https://github.com/pbs-assess/sdmTMBextra}{sdmTMBextra} #' package. @@ -381,10 +385,16 @@ sdmTMB_simulate <- function(formula, simulate.sdmTMB <- function(object, nsim = 1L, seed = sample.int(1e6, 1L), type = c("mle-eb", "mle-mvn"), model = c(NA, 1, 2), - re_form = NULL, mcmc_samples = NULL, silent = FALSE, ...) { + newdata = NULL, + re_form = NULL, + mle_mvn_samples = c("single", "multiple"), + mcmc_samples = NULL, + silent = FALSE, + ...) { set.seed(seed) type <- tolower(type) type <- match.arg(type) + mle_mvn_samples <- match.arg(mle_mvn_samples) assert_that(as.integer(model[[1]]) %in% c(NA_integer_, 1L, 2L)) # need to re-attach environment if in fresh session @@ -403,6 +413,16 @@ simulate.sdmTMB <- function(object, nsim = 1L, seed = sample.int(1e6, 1L), stopifnot(length(object$tmb_data$sim_re) == 6L) # in case this gets changed tmb_dat$sim_re <- c(rep(1L, 5L), 0L) # last is smoothers; don't simulate them } + + if (!is.null(newdata)) { + # generate prediction TMB data list + p <- predict(object, newdata = newdata, return_tmb_data = TRUE, ...) + # move data elements over + p <- move_proj_to_tmbdat(p, object, newdata) + p$sim_re <- tmb_dat$sim_re + tmb_dat <- p + } + newobj <- TMB::MakeADFun( data = tmb_dat, map = object$tmb_map, random = object$tmb_random, parameters = object$tmb_obj$env$parList(), DLL = "sdmTMB" @@ -411,9 +431,17 @@ simulate.sdmTMB <- function(object, nsim = 1L, seed = sample.int(1e6, 1L), # params MLE/MVN stuff if (is.null(mcmc_samples)) { if (type == "mle-mvn") { - new_par <- .one_sample_posterior(object) + if (mle_mvn_samples == "single") { + new_par <- .one_sample_posterior(object) + new_par <- replicate(nsim, new_par) + } else { + new_par <- lapply(seq_len(nsim), \(i) .one_sample_posterior(object)) + new_par <- do.call(cbind, new_par) + } } else if (type == "mle-eb") { new_par <- object$tmb_obj$env$last.par.best + new_par <- lapply(seq_len(nsim), \(i) new_par) + new_par <- do.call(cbind, new_par) } else { cli_abort("`type` type not defined") } @@ -432,7 +460,7 @@ simulate.sdmTMB <- function(object, nsim = 1L, seed = sample.int(1e6, 1L), } else { for (i in seq_len(nsim)) { if (!silent) cli::cli_progress_update() - ret[[i]] <- newobj$simulate(par = new_par, complete = FALSE)$y_i + ret[[i]] <- newobj$simulate(par = new_par[, i, drop = TRUE], complete = FALSE)$y_i } } if (!silent) cli::cli_progress_done() diff --git a/man/simulate.sdmTMB.Rd b/man/simulate.sdmTMB.Rd index 2d8b25caf..18bf7b99c 100644 --- a/man/simulate.sdmTMB.Rd +++ b/man/simulate.sdmTMB.Rd @@ -10,7 +10,9 @@ seed = sample.int(1e+06, 1L), type = c("mle-eb", "mle-mvn"), model = c(NA, 1, 2), + newdata = NULL, re_form = NULL, + mle_mvn_samples = c("single", "multiple"), mcmc_samples = NULL, silent = FALSE, ... @@ -33,11 +35,17 @@ used for goodness of fit testing (e.g., with the DHARMa package).} \item{model}{If a delta/hurdle model, which model to simulate from? \code{NA} = combined, \code{1} = first model, \code{2} = second mdoel.} +\item{newdata}{Optional new data frame from which to simulate.} + \item{re_form}{\code{NULL} to specify a simulation conditional on fitted random effects (this only simulates observation error). \code{~0} or \code{NA} to simulate new random affects (smoothers, which internally are random effects, will not be simulated as new).} +\item{mle_mvn_samples}{Applies if \code{type = "mle-mvn"}. If \code{"single"}, take +a single MVN draw from the random effects. If \code{"multiple"}, take an MVN +draw from the random effects for each of the \code{nsim}.} + \item{mcmc_samples}{An optional matrix of MCMC samples. See \code{extract_mcmc()} in the \href{https://github.com/pbs-assess/sdmTMBextra}{sdmTMBextra} package.} diff --git a/tests/testthat/test-6-tmb-simulation.R b/tests/testthat/test-6-tmb-simulation.R index 2ce854af2..c7c383919 100644 --- a/tests/testthat/test-6-tmb-simulation.R +++ b/tests/testthat/test-6-tmb-simulation.R @@ -180,6 +180,51 @@ test_that("simulate() behaves OK with or without random effects across types", { expect_length(s, 969) }) +test_that("simulate() method works with newdata", { + skip_on_cran() + fit <- sdmTMB( + present ~ 1, + time = "year", + data = pcod_2011, spatial = "on", + spatiotemporal = "iid", + family = binomial(), + mesh = pcod_mesh_2011 + ) + s <- simulate(fit) + expect_true(nrow(s) == nrow(pcod_2011)) + g <- replicate_df(qcs_grid, "year", unique(pcod_2011$year)) + s <- simulate(fit, newdata = g) + expect_true(nrow(s) == nrow(g)) + s <- simulate(fit, newdata = subset(g, year == 2011)) + nrow(s) + expect_true(nrow(s) == nrow(subset(g, year == 2011))) + + gg <- subset(g, year == 2011) + set.seed(1) + s <- simulate(fit, newdata = gg, nsim = 400L) + a <- apply(s, 1, mean) + p <- predict(fit, newdata = gg) + plot(a, plogis(p$est)) + expect_gt(cor(a, plogis(p$est)), 0.98) + + set.seed(1) + s1 <- simulate(fit, type = "mle-mvn", mle_mvn_samples = "single", nsim = 100) + set.seed(1) + s2 <- simulate(fit, type = "mle-mvn", mle_mvn_samples = "multiple", nsim = 100) + set.seed(1) + s3 <- simulate(fit, type = "mle-eb", nsim = 100) + + expect_false(identical(s1, s2)) + expect_false(identical(s1, s3)) + + sd1 <- apply(s1, 1, sd) + sd2 <- apply(s2, 1, sd) + sd3 <- apply(s3, 1, sd) + + expect_lt(mean(sd1), mean(sd2)) +}) + + # test_that("TMB Delta simulation works", { # skip_on_cran() # skip_if_not_installed("INLA")