Skip to content

Commit

Permalink
vectorise predict function
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed Aug 28, 2023
1 parent ed14f1f commit cc29f1d
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 162 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -162,5 +162,4 @@ importFrom(stats,update.formula)
importFrom(stats,var)
importFrom(utils,getFromNamespace)
importFrom(utils,head)
importFrom(utils,lsf.str)
importFrom(utils,tail)
4 changes: 0 additions & 4 deletions R/marginaleffects.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#' (only applicable if a `trend_formula` was specified in the model)
#' @param process_error `logical`. If `TRUE`, uncertainty in the latent
#' process (or trend) model is incorporated in predictions
#' @param n_cores `Integer` specifying number of cores to use for
#' generating predictions
#' @name mvgam_marginaleffects
#' @author Nicholas J Clark
NULL
Expand Down Expand Up @@ -77,13 +75,11 @@ get_vcov.mvgam <- function(model,
get_predict.mvgam <- function(model, newdata,
type = 'response',
process_error = FALSE,
n_cores = 1,
...) {
preds <- predict(object = model,
newdata = newdata,
type = type,
process_error = process_error,
n_cores = n_cores,
...)
out <- data.frame(
rowid = seq_len(NCOL(preds)),
Expand Down
209 changes: 74 additions & 135 deletions R/predict.mvgam.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
#'Predict from the GAM component of an mvgam model
#'@importFrom parallel clusterExport stopCluster setDefaultCluster
#'@importFrom utils lsf.str
#'@importFrom stats predict
#'@param object \code{list} object returned from \code{mvgam}
#'@param newdata Optional \code{dataframe} or \code{list} of test data containing the
#'variables included in the linear predictor of \code{formula}. If not supplied,
#'predictions are generated for the original observations used for the model fit.
#'@param data_test Deprecated. Still works in place of \code{newdata} but users are recommended to use
#'\code{newdata} instead for more seamless integration into `R` workflows
#'@param n_cores \code{integer} specifying number of cores for generating predictions in parallel
#'@param type When this has the value \code{link} (default) the linear predictor is calculated on the link scale.
#'If \code{expected} is used, predictions reflect the expectation of the response (the mean)
#'but ignore uncertainty in the observation process. When \code{response} is used,
Expand Down Expand Up @@ -37,8 +34,7 @@
predict.mvgam = function(object, newdata,
data_test,
type = 'link',
process_error = TRUE,
n_cores = 1, ...){
process_error = TRUE, ...){

# Argument checks
if(!missing("data_test")){
Expand All @@ -56,7 +52,6 @@ predict.mvgam = function(object, newdata,
levels = levels(object$obs_data$series))
}

validate_pos_integer(n_cores)
type <- match.arg(arg = type, choices = c("link",
"expected",
"response"))
Expand All @@ -81,11 +76,10 @@ predict.mvgam = function(object, newdata,
if(object$trend_model %in% c('VAR1')){
family_pars <- list(sigma_obs =
mcmc_chains(object$model_output, 'Sigma')[ ,
seq(1, NCOL(object$ytimes)^2,
by = NCOL(object$ytimes)+1)])
seq(1, NCOL(object$ytimes)^2,
by = NCOL(object$ytimes)+1)])
}


# Indicators of which trend to use for each observation
if(inherits(newdata, 'list')){
data.frame(series = newdata$series) %>%
Expand All @@ -99,47 +93,33 @@ predict.mvgam = function(object, newdata,
dplyr::left_join(object$trend_map,
by = 'series') -> newdata_trend
}

trend_ind <- as.numeric(newdata_trend$trend)

# Beta coefficients for GAM process model component
betas <- mcmc_chains(object$model_output, 'b_trend')

# Loop across all posterior samples and calculate
# trend predictions
cl <- parallel::makePSOCKcluster(n_cores)
setDefaultCluster(cl)
clusterExport(NULL, c('betas',
'family_pars',
'Xp',
'trend_ind'),
envir = environment())
parallel::clusterExport(cl = cl,
unclass(lsf.str(envir = asNamespace("mvgam"),
all = T)),
envir = as.environment(asNamespace("mvgam"))
)

pbapply::pboptions(type = "none")
trend_predictions <- do.call(rbind, pbapply::pblapply(seq_len(dim(betas)[1]),
function(x){

# Trend-specific SD parameters
par_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
family_pars[[j]][x, trend_ind]
} else {
family_pars[[j]][x]
}
})
names(par_extracts) <- names(family_pars)
mvgam_predict(family = 'gaussian',
Xp = Xp,
type = 'response',
betas = betas[x,],
family_pars = par_extracts)
}, cl = cl))
stopCluster(cl)
# Family parameters spread into a vector
family_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
as.vector(family_pars[[j]][, trend_ind])
} else {
family_pars[[j]]
}
})
names(family_extracts) <- names(family_pars)

# Pre-multiply the linear predictors
all_linpreds <- as.matrix(as.vector(t(apply(as.matrix(betas), 1,
function(row) Xp %*% row +
attr(Xp, 'model.offset')))))
attr(all_linpreds, 'model.offset') <- 0

# Trend stationary predictions
trend_predictions <- mvgam_predict(family = 'gaussian',
Xp = all_linpreds,
type = 'response',
betas = 1,
family_pars = family_extracts)

} else if(object$trend_model != 'None' & process_error){
# If no linear predictor for the trends but a dynamic trend model was used,
Expand Down Expand Up @@ -204,47 +184,34 @@ predict.mvgam = function(object, newdata,
Xp <- matrix(1, ncol = 1, nrow = NROW(newdata))
attr(Xp, 'model.offset') <- 0

# Loop across all posterior samples and calculate
# trend predictions
cl <- parallel::makePSOCKcluster(n_cores)
setDefaultCluster(cl)
clusterExport(NULL, c('betas',
'family_pars',
'Xp',
'trend_ind'),
envir = environment())
parallel::clusterExport(cl = cl,
unclass(lsf.str(envir = asNamespace("mvgam"),
all = T)),
envir = as.environment(asNamespace("mvgam"))
)

pbapply::pboptions(type = "none")
trend_predictions <- do.call(rbind,
pbapply::pblapply(seq_len(dim(betas)[1]),
function(x){

# Trend-specific SD parameters
par_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
family_pars[[j]][x, trend_ind]
} else {
family_pars[[j]][x]
}
})
names(par_extracts) <- names(family_pars)
mvgam_predict(family = 'gaussian',
Xp = Xp,
type = 'response',
betas = betas[x,],
family_pars = par_extracts)
}, cl = cl))
stopCluster(cl)
# Family parameters spread into a vector
family_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
as.vector(family_pars[[j]][, trend_ind])
} else {
family_pars[[j]]
}
})
names(family_extracts) <- names(family_pars)

# Pre-multiply the linear predictors
all_linpreds <- as.matrix(as.vector(t(apply(as.matrix(betas), 1,
function(row) Xp %*% row +
attr(Xp, 'model.offset')))))
attr(all_linpreds, 'model.offset') <- 0

# Trend stationary predictions
trend_predictions <- mvgam_predict(family = 'gaussian',
Xp = all_linpreds,
type = 'response',
betas = 1,
family_pars = family_extracts)

}
} else {
# If no trend_model was used, or if process_error == FALSE,
# ignore uncertainty in any latent trend component
trend_predictions <- NULL
trend_predictions <- 0
}

#### Once trend predictions are made, calculate observation predictions ####
Expand All @@ -264,61 +231,33 @@ predict.mvgam = function(object, newdata,
# Determine which series each observation belongs to
series_ind <- as.numeric(newdata$series)

# Loop across all posterior samples and calculate observation model
# predictions
cl <- parallel::makePSOCKcluster(n_cores)
setDefaultCluster(cl)

trend_model <- object$trend_model

clusterExport(NULL, c('betas',
'family_pars',
'trend_model',
'family',
'trend_predictions',
'type',
'Xp',
'series_ind'),
envir = environment())
parallel::clusterExport(cl = cl,
unclass(lsf.str(envir = asNamespace("mvgam"),
all = T)),
envir = as.environment(asNamespace("mvgam"))
)
pbapply::pboptions(type = "none")
predictions <- do.call(rbind, pbapply::pblapply(seq_len(dim(betas)[1]), function(x){

# Family-specific parameters
par_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
family_pars[[j]][x, series_ind]
} else {
family_pars[[j]][x]
}
})
names(par_extracts) <- names(family_pars)

# Set up Xp matrix to include the latent process predictions
# if necessary
if(trend_model != 'None' & !is.null(trend_predictions[x,])){
Xpmat <- cbind(as.matrix(Xp),
trend_predictions[x,])
attr(Xpmat, 'model.offset') <- attr(Xp, 'model.offset')
betas_pred <- c(betas[x,], 1)
# Family parameters spread into a vector
family_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
as.vector(family_pars[[j]][, series_ind])
} else {
Xpmat <- Xp
betas_pred <- betas[x, ]
family_pars[[j]]
}

# Calculate predictions
as.vector(mvgam_predict(family = family,
Xp = Xpmat,
type = type,
betas = betas_pred,
family_pars = par_extracts))
}, cl = cl))
stopCluster(cl)

})
names(family_extracts) <- names(family_pars)

# Pre-multiply the linear predictors, including any offset and trend
# predictions if applicable
all_linpreds <- as.matrix(as.vector(t(apply(as.matrix(betas), 1,
function(row) Xp %*% row +
attr(Xp, 'model.offset')))) +
trend_predictions)
attr(all_linpreds, 'model.offset') <- 0

# Calculate vectorized predictions
predictions_vec <- mvgam_predict(family = family,
Xp = all_linpreds,
type = type,
betas = 1,
family_pars = family_extracts)

# Convert back to matrix
predictions <- matrix(predictions_vec, nrow = NROW(betas))
return(predictions)
}

12 changes: 1 addition & 11 deletions man/mvgam_marginaleffects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 1 addition & 11 deletions man/predict.mvgam.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit cc29f1d

Please sign in to comment.