Skip to content

Commit

Permalink
better smooth p-values; improved update function
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed Sep 22, 2023
1 parent e6748e4 commit f115d4e
Show file tree
Hide file tree
Showing 22 changed files with 761 additions and 405 deletions.
5 changes: 5 additions & 0 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
dllunload <- function(){
dyn.unload(
system.file("libs", "x64", "mvgam.dll", package = "mvgam")
)
}
60 changes: 43 additions & 17 deletions R/add_stan_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ add_stan_data = function(jags_file, stan_file,
n_sp_data <- NULL
}

# Occasionally there are smooths with no zero vector
# (i.e. for bs = 'fs', they are often just normal(0, lambda))
if(is.null(jags_data$zero)){
zero_data <- NULL
}

# latent variable lines
if(use_lv){
lv_data <- paste0('int<lower=0> n_lv; // number of dynamic factors\n')
Expand Down Expand Up @@ -256,7 +262,18 @@ add_stan_data = function(jags_file, stan_file,
jags_smooth_text <- gsub('##', '//', jags_smooth_text)
jags_smooth_text <- gsub('dexp', 'exponential', jags_smooth_text)

any_ks <- any(grep('K.* <- ', jags_smooth_text))
smooth_labs <- do.call(rbind, lapply(seq_along(ss_gam$smooth), function(x){
data.frame(label = ss_gam$smooth[[x]]$label,
term = paste(ss_gam$smooth[[x]]$term, collapse = ','),
class = class(ss_gam$smooth[[x]])[1])
}))

if(length(ss_gam$sp) > 0 & !all(smooth_labs$class == 'random.effect')){
any_ks <- TRUE
} else {
any_ks <- FALSE
}
# any_ks <- any(grep('K.* <- ', jags_smooth_text))
any_timevarying <- any(grep('// prior for s(time):', jags_smooth_text, fixed = TRUE))
if(any_ks ||
any_timevarying){
Expand Down Expand Up @@ -290,8 +307,8 @@ add_stan_data = function(jags_file, stan_file,
fixed = TRUE)]
stan_file <- stan_file[-grep('[n_sp] lambda', stan_file,
fixed = TRUE)]
stan_file <- stan_file[-grep('vector[num_basis] zero; //', stan_file,
fixed = TRUE)]
# stan_file <- stan_file[-grep('vector[num_basis] zero; //', stan_file,
# fixed = TRUE)]
stan_file <- stan_file[-grep('int<lower=0> n_sp; //', stan_file,
fixed = TRUE)]
}
Expand Down Expand Up @@ -412,22 +429,31 @@ add_stan_data = function(jags_file, stan_file,
if(any(grep('// parametric effect', stan_file))){

# Get indices of parametric effects
min_paras <- as.numeric(sub('.*(?=.$)', '',
sub("\\:.*", "",
stan_file[grep('// parametric effect', stan_file) + 1]),
perl=T))
max_paras <- as.numeric(substr(sub(".*\\:", "",
stan_file[grep('// parametric effect', stan_file) + 1]),
1, 1))
para_indices <- seq(min_paras, max_paras)
smooth_labs <- do.call(rbind, lapply(seq_along(ss_gam$smooth), function(x){
data.frame(label = ss_gam$smooth[[x]]$label,
term = paste(ss_gam$smooth[[x]]$term, collapse = ','),
class = class(ss_gam$smooth[[x]])[1])
}))
lpmat <- predict(ss_gam, type = 'lpmatrix', exclude = smooth_labs$label)
para_indices <- which(apply(lpmat, 2, function(x) !all(x == 0)) == TRUE)

# min_paras <- as.numeric(sub('.*(?=.$)', '',
# sub("\\:.*", "",
# stan_file[grep('// parametric effect', stan_file) + 1]),
# perl=T))
# max_paras <- as.numeric(substr(sub(".*\\:", "",
# stan_file[grep('// parametric effect', stan_file) + 1]),
# 1, 1))
# para_indices <- seq(min_paras, max_paras)

# Get names of parametric terms
int_included <- attr(ss_gam$pterms, 'intercept') == 1L
other_pterms <- attr(ss_gam$pterms, 'term.labels')
all_paras <- other_pterms
if(int_included){
all_paras <- c('(Intercept)', all_paras)
}
# int_included <- attr(ss_gam$pterms, 'intercept') == 1L
# other_pterms <- attr(ss_gam$pterms, 'term.labels')
# all_paras <- other_pterms
# if(int_included){
# all_paras <- c('(Intercept)', all_paras)
# }
all_paras <- names(para_indices)

# Create prior lines for parametric terms
para_lines <- vector()
Expand Down
29 changes: 3 additions & 26 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -930,30 +930,8 @@ get_mvgam_resids = function(object, n_cores = 1){
# Family-specific parameters
family_pars <- extract_family_pars(object = object)

# Calculate DS residual distributions in parallel
cl <- parallel::makePSOCKcluster(n_cores)
setDefaultCluster(cl)
clusterExport(NULL, c('sample_seq',
'draw_seq',
'n_series',
'obs_series',
'series_levels',
'family',
'family_pars',
'preds',
'obs_series',
'obs_data',
'fit_engine'),
envir = environment())
clusterEvalQ(cl, library(dplyr))
clusterExport(cl = cl,
unclass(lsf.str(envir = asNamespace("mvgam"),
all = T)),
envir = as.environment(asNamespace("mvgam"))
)

pbapply::pboptions(type = "none")
series_resids <- pbapply::pblapply(seq_len(n_series), function(series){
# Calculate DS residual distributions in sequence (parallel is no faster)
series_resids <- lapply(seq_len(n_series), function(series){
if(class(obs_data)[1] == 'list'){
n_obs <- data.frame(series = obs_series) %>%
dplyr::filter(series == !!(series_levels[series])) %>%
Expand Down Expand Up @@ -1076,8 +1054,7 @@ get_mvgam_resids = function(object, n_cores = 1){

resids

}, cl = cl)
stopCluster(cl)
})
names(series_resids) <- series_levels
return(series_resids)
}
2 changes: 1 addition & 1 deletion R/formula.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
formula.mvgam = function(x, trend_effects = FALSE, ...){
# Check trend_effects
if(trend_effects){
if(is.null(formula$trend_call)){
if(is.null(x$trend_call)){
stop('no trend_formula exists so there is no trend-level model.frame')
}
}
Expand Down
2 changes: 1 addition & 1 deletion R/get_monitor_pars.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ get_monitor_pars = function(family, smooths_included = TRUE,
"student", "Gamma"))

if(smooths_included){
param <- c('rho', 'b', 'ypred', 'mus', 'lp__')
param <- c('rho', 'b', 'ypred', 'mus', 'lp__', 'lambda')
} else {
param <- c('b', 'ypred', 'mus', 'lp__')
}
Expand Down
15 changes: 14 additions & 1 deletion R/get_mvgam_priors.R
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,21 @@ get_mvgam_priors = function(formula,

# Parametric effect priors
if(use_stan){
smooth_labs <- do.call(rbind, lapply(seq_along(ss_gam$smooth), function(x){
data.frame(label = ss_gam$smooth[[x]]$label,
term = paste(ss_gam$smooth[[x]]$term, collapse = ','),
class = class(ss_gam$smooth[[x]])[1])
}))
lpmat <- suppressWarnings(predict(ss_gam, type = 'lpmatrix',
exclude = smooth_labs$label))
para_indices <- which(apply(lpmat, 2, function(x) !all(x == 0)) == TRUE)

int_included <- attr(ss_gam$pterms, 'intercept') == 1L
other_pterms <- attr(ss_gam$pterms, 'term.labels')
if(int_included){
other_pterms <- names(para_indices)[-1]
} else {
other_pterms <- names(para_indices)
}
all_paras <- other_pterms

para_priors <- c()
Expand Down
104 changes: 104 additions & 0 deletions R/lfo_cv.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,110 @@ plot.mvgam_lfo = function(x, ...){
layout(1)
}

#' Function to generate informative priors based on the posterior
#' of a previously fitted model (EXPERIMENTAL!!)
#' @noRd
summarize_posterior = function(object){

# Extract the trend model
trend_model <- object$trend_model
if(trend_model == 'VAR1'){
trend_model <- 'VAR1cor'
}

# Get params that can be modified for this model
if(is.null(object$trend_call)){
update_priors <- get_mvgam_priors(formula = formula(object),
family = object$family,
data = object$obs_data,
trend_model = trend_model)
} else {
update_priors <- get_mvgam_priors(formula = formula(object),
family = object$family,
trend_formula = as.formula(object$trend_call),
data = object$obs_data,
trend_model = trend_model)
}

pars_keep <- c('smooth parameter|process error|fixed effect|Intercept|pop mean|pop sd|AR1|AR2|AR3|amplitude|length scale')
update_priors <- update_priors[grepl(pars_keep, update_priors$param_info), ]

# Get all possible parameters to summarize
pars_to_prior <- vector()
for(i in 1:NROW(update_priors)){
pars_to_prior[i] <- trimws(strsplit(update_priors$prior[i], "[~]")[[1]][1])
}

# Summarize parameter posteriors as Normal distributions
pars_posterior <- list()
for(i in seq_along(pars_to_prior)){
if(pars_to_prior[i] == '(Intercept)'){
post_samps <- mcmc_chains(object$model_output, 'b[1]',
ISB = FALSE)
} else {

suppressWarnings(post_samps <- try(mcmc_chains(object$model_output,
pars_to_prior[i]),
silent = TRUE))

if(inherits(post_samps, 'try-error')){
suppressWarnings(post_samps <- try(as.matrix(mod,
variable = pars_to_prior[i],
regex = TRUE),
silent = TRUE))
}

if(inherits(post_samps, 'try-error')) next
}

new_lowers <- round(apply(post_samps, 2, min), 3)
new_uppers <- round(apply(post_samps, 2, max), 3)
means <- round(apply(post_samps, 2, mean), 3)
sds <- round(apply(post_samps, 2, sd), 3)
priors <- paste0('normal(', means, ', ', sds, ')')
parametric <- grepl('Intercept|fixed effect',
update_priors$param_info[i])
parnames <- dimnames(post_samps)[[2]]

priorstring <- list()
for(j in 1:NCOL(post_samps)){
priorstring[[j]] <- prior_string(priors[j],
class = parnames[j],
resp = parametric,
ub = new_uppers[j],
lb = new_lowers[j],
group = pars_to_prior[i])
}

pars_posterior[[i]] <- do.call(rbind, priorstring)
}
pars_posterior <- do.call(rbind, pars_posterior)
pars_posterior <- pars_posterior[(pars_posterior$ub == 0 &
pars_posterior$lb == 0)
== FALSE, ]
pars_posterior <- pars_posterior[(pars_posterior$ub == 1 &
pars_posterior$lb == 1)
== FALSE, ]
pars_posterior$param_name <- NA
pars_posterior$parametric <- pars_posterior$resp
pars_posterior$resp <- NULL
attr(pars_posterior, 'posterior_to_prior') <- TRUE

return(pars_posterior)
}

#' Function for a leave-future-out update that uses informative priors
#' @noRd
lfo_update = function(object, ...){
# Get informative priors
priors <- summarize_posterior(object)

# Run the update using the informative priors
update.mvgam(object,
priors = priors,
...)
}

#' Function to generate training and testing splits
#' @noRd
cv_split = function(data, last_train, fc_horizon = 1){
Expand Down
3 changes: 3 additions & 0 deletions R/mvgam-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
#' \item `test_data` If test data were supplied (as argument `newdata` in the original model), it
#' will be returned. Othwerise `NULL`
#' \item `fit_engine` `Character` describing the fit engine, either as `stan` or `jags`
#' \item `backend` `Character` describing the backend used for modelling, either as `rstan`, `cmdstanr` or `rjags`
#' \item `algorithm` `Character` describing the algorithm used for finding the posterior,
#' either as `sampling`, `meanfield` or `fullrank`
#' \item `max_treedepth` If the model was fitted using `Stan`, the value supplied for the maximum
#' treedepth tuning parameter is returned (see \code{\link[rstan]{stan}} for details).
#' Otherwise `NULL`
Expand Down
Loading

0 comments on commit f115d4e

Please sign in to comment.