diff --git a/DESCRIPTION b/DESCRIPTION index 92b90466..e0cea7c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,7 @@ Imports: rstantools (>= 2.1.1), bayesplot (>= 1.5.0), ggplot2 (>= 2.0.0), + extraDistr, matrixStats, parallel, pbapply, diff --git a/NAMESPACE b/NAMESPACE index 72f812f7..21e7cf1c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,6 +56,7 @@ S3method(update,mvgam) S3method(variables,mvgam) export("%>%") export(AR) +export(PW) export(RW) export(VAR) export(add_tweedie_lines) @@ -111,6 +112,7 @@ importFrom(brms,pstudent_t) importFrom(brms,qstudent_t) importFrom(brms,rstudent_t) importFrom(brms,student) +importFrom(extraDistr,rlaplace) importFrom(ggplot2,scale_colour_discrete) importFrom(ggplot2,scale_fill_discrete) importFrom(ggplot2,theme_classic) diff --git a/R/RW.R b/R/RW.R index ced6725e..752231c3 100644 --- a/R/RW.R +++ b/R/RW.R @@ -5,13 +5,11 @@ #' they exist purely to help set up a model with particular autoregressive #' trend models. #' @param ma \code{Logical} Include moving average terms of order \code{1}? -#' Default is \code{FALSE}. Note, this option is only currently operational -#' for fitting VARMA models. Support for other models (AR and RW) is upcoming. +#' Default is \code{FALSE}. #' @param cor \code{Logical} Include correlated process errors as part of a #' multivariate normal process model? If \code{TRUE} and if \code{n_series > 1} #' in the supplied data, a fully structured covariance matrix will be estimated -#' for the process errors. Default is \code{FALSE}. Note, this option is only currently operational -#' for fitting VAR / VARMA models. Support for other models (AR and RW) is upcoming. +#' for the process errors. Default is \code{FALSE}. #' @param p A non-negative integer specifying the autoregressive (AR) order. #' Default is \code{1}. Cannot currently be larger than \code{3} #' @return An object of class \code{mvgam_trend}, which contains a list of @@ -19,14 +17,6 @@ #' @rdname RW #' @export RW = function(ma = FALSE, cor = FALSE){ - # if(ma){ - # stop('Moving average terms not yet supported for RW models', - # call. = FALSE) - # } - # if(cor){ - # stop('Correlated errors not yet supported for RW models', - # call. = FALSE) - # } out <- structure(list(trend_model = 'RW', ma = ma, cor = cor, @@ -37,14 +27,6 @@ RW = function(ma = FALSE, cor = FALSE){ #' @rdname RW #' @export AR = function(p = 1, ma = FALSE, cor = FALSE){ - # if(ma){ - # stop('Moving average terms not yet supported for AR models', - # call. = FALSE) - # } - # if(cor){ - # stop('Correlated errors not yet supported for AR models', - # call. = FALSE) - # } validate_pos_integer(p) if(p > 3){ stop("Argument 'p' must be <= 3", diff --git a/R/families.R b/R/families.R index 7569e0f3..0a0b9a96 100644 --- a/R/families.R +++ b/R/families.R @@ -437,13 +437,14 @@ family_inits = function(family, trend_model, # as there is a risk the user will place bounds on priors that conflict # with the inits. Just let Stan choose reasonable and diffuse inits, # this is better anyway for sampling - inits <- function() { - if(model_data$num_basis == 1){ - list(b_raw = array(runif(model_data$num_basis, -2, 2))) - } else { - list(b_raw = runif(model_data$num_basis, -2, 2)) - } + inits <- function() { + if(model_data$num_basis == 1){ + list(b_raw = array(runif(model_data$num_basis, -2, 2))) + } else { + list(b_raw = runif(model_data$num_basis, -2, 2)) } + } + return(inits) } diff --git a/R/forecast.mvgam.R b/R/forecast.mvgam.R index d73ef59b..c9da361c 100644 --- a/R/forecast.mvgam.R +++ b/R/forecast.mvgam.R @@ -733,7 +733,21 @@ forecast_draws = function(object, general_trend_pars <- extract_general_trend_pars(trend_pars = trend_pars, samp_index = samp_index) - if(use_lv || trend_model == 'VAR1'){ + if(use_lv || trend_model %in% c('VAR1', 'PWlinear', 'PWlogistic')){ + if(trend_model == 'PWlogistic'){ + if(!(exists('cap', where = data_test))) { + stop('Capacities must also be supplied in "newdata" for logistic growth predictions', + call. = FALSE) + } + family <- eval(parse(text = family)) + cap <- data.frame(series = data_test$series, + time = data_test$time, + cap = suppressWarnings(linkfun(data_test$cap, + link = family$link))) + } else { + cap <- NULL + } + # Propagate all trends / lvs forward jointly using sampled trend parameters trends <- forecast_trend(trend_model = trend_model, use_lv = use_lv, @@ -741,7 +755,8 @@ forecast_draws = function(object, h = fc_horizon, betas_trend = betas_trend, Xp_trend = Xp_trend, - time = sort(unique(data_test$time))) + time = unique(data_test$time - min(object$obs_data$time) + 1), + cap = cap) } # Loop across series and produce the next trend estimate @@ -753,11 +768,11 @@ forecast_draws = function(object, trend_pars = trend_pars, use_lv = use_lv) - if(use_lv || trend_model == 'VAR1'){ + if(use_lv || trend_model %in% c('VAR1', 'PWlinear', 'PWlogistic')){ if(use_lv){ # Multiply lv states with loadings to generate the series' forecast trend state out <- as.numeric(trends %*% trend_extracts$lv_coefs) - } else if(trend_model == 'VAR1'){ + } else if(trend_model %in% c('VAR1', 'PWlinear', 'PWlogistic')){ out <- trends[,series] } diff --git a/R/get_mvgam_priors.R b/R/get_mvgam_priors.R index 56f447af..3937bf6b 100644 --- a/R/get_mvgam_priors.R +++ b/R/get_mvgam_priors.R @@ -201,22 +201,18 @@ get_mvgam_priors = function(formula, } # Check for missing rhs in formula + # If there are no terms in the observation formula (i.e. y ~ -1), + # we will use an intercept-only observation formula and fix + # the intercept coefficient at zero drop_obs_intercept <- FALSE if(length(attr(terms(formula), 'term.labels')) == 0 & !attr(terms(formula), 'intercept') == 1){ - if(!missing(trend_formula)){ - # If there are no terms in the observation formula (i.e. y ~ -1), - # but a trend_formula is supplied, we will use an intercept-only - # observation formula and fix the intercept coefficient at zero - formula_envir <- attr(formula, '.Environment') - formula <- formula(paste(rlang::f_lhs(formula), '~ 1')) - attr(formula, '.Environment') <- formula_envir - drop_obs_intercept <- TRUE - } else { - stop('argument "formula" contains no terms', - call. = FALSE) - } + formula_envir <- attr(formula, '.Environment') + formula <- formula(paste(rlang::f_lhs(formula), '~ 1')) + attr(formula, '.Environment') <- formula_envir + drop_obs_intercept <- TRUE } + if(is.null(orig_formula)){ orig_formula <- formula } @@ -245,6 +241,11 @@ get_mvgam_priors = function(formula, add_cor <- FALSE } + if(use_lv & trend_model %in% c('PWlinear', 'PWlogistic')){ + stop('Cannot estimate piecewise trends using dynamic factors', + call. = FALSE) + } + if(use_lv & (add_ma | add_cor) & missing(trend_formula)){ stop('Cannot estimate moving averages or correlated errors for dynamic factors', call. = FALSE) @@ -260,6 +261,18 @@ get_mvgam_priors = function(formula, call. = FALSE) } + # JAGS cannot support latent GP, VAR or piecewise trends + if(!use_stan & trend_model %in% c('GP', 'VAR1', 'PWlinear', 'PWlogistic')){ + stop('Gaussian Process, VAR and piecewise trends not supported for JAGS', + call. = FALSE) + } + + # Stan cannot support Tweedie + if(use_stan & family_char == 'tweedie'){ + stop('Tweedie family not supported for stan', + call. = FALSE) + } + # Check trend formula if(!missing(trend_formula)){ if(missing(trend_map)){ @@ -267,9 +280,9 @@ get_mvgam_priors = function(formula, trend = 1:length(unique(data_train$series))) } - if(!trend_model %in% c('RW', 'AR1', 'AR2', + if(!trend_model %in% c('RW', 'AR1', 'AR2', 'AR3', 'VAR1', 'VAR1cor', 'VARMA1,1cor')){ - stop('only RW, AR1, AR2 and VAR trends currently supported for trend predictor models', + stop('only RW, AR1, AR2, AR3 and VAR trends currently supported for trend predictor models', call. = FALSE) } } @@ -414,11 +427,10 @@ get_mvgam_priors = function(formula, } else { - # JAGS cannot support latent GP or VAR trends - if(!use_stan & trend_model %in%c ('GP', 'VAR1', - 'VAR1cor', 'VARMA1,1cor')){ - warning('gaussian process and VAR trends not supported for JAGS; reverting to Stan') - use_stan <- TRUE + # JAGS cannot support latent GP, VAR or piecewise trends + if(!use_stan & trend_model %in% c('GP', 'VAR1', 'PWlinear', 'PWlogistic')){ + stop('Gaussian Process, VAR and piecewise trends not supported for JAGS', + call. = FALSE) } if(use_stan & family_char == 'tweedie'){ @@ -718,6 +730,31 @@ get_mvgam_priors = function(formula, trend_df <- NULL } + if(trend_model %in% c('PWlinear', 'PWlogistic')){ + # Need to fix this as a next priority + trend_df <- NULL + # trend_df <- data.frame(param_name = c('vector[n_series] k_trend;', + # 'vector[n_series] m_trend;'), + # param_length = length(unique(data_train$series)), + # param_info = c('base trend growth rates', + # 'trend offset parameters'), + # prior = c('k_trend ~ std_normal();', + # 'm_trend ~ student_t(3, 0, 2.5);'), + # example_change = c(paste0( + # 'k ~ normal(', + # round(runif(min = -1, max = 1, n = 1), 2), + # ', ', + # round(runif(min = 0.1, max = 1, n = 1), 2), + # ');'), + # paste0( + # 'm ~ normal(', + # round(runif(min = -1, max = 1, n = 1), 2), + # ', ', + # round(runif(min = 0.1, max = 1, n = 1), 2), + # ');'))) + # + + } if(trend_model == 'GP'){ if(use_lv){ trend_df <- data.frame(param_name = c('vector[n_lv] rho_gp;'), @@ -1247,6 +1284,17 @@ make_default_int = function(response, family){ return(out) } +#' @noRd +linkfun = function (x, link) { + switch(link, identity = x, log = log(x), logm1 = logm1(x), + log1p = log1p(x), inverse = 1/x, sqrt = sqrt(x), `1/mu^2` = 1/x^2, + tan_half = tan(x/2), logit = plogis(x), probit = qnorm(x), + cauchit = qcauchy(x), probit_approx = qnorm(x), + squareplus = (x^2 - 1)/x, + stop("Link '", link, "' is not supported.", + call. = FALSE)) +} + #' @noRd update_default_scales = function(response, family, @@ -1255,16 +1303,6 @@ update_default_scales = function(response, location = 0, scale = 2.5){ - linkfun = function (x, link) { - switch(link, identity = x, log = log(x), logm1 = logm1(x), - log1p = log1p(x), inverse = 1/x, sqrt = sqrt(x), `1/mu^2` = 1/x^2, - tan_half = tan(x/2), logit = plogis(x), probit = qnorm(x), - cauchit = qcauchy(x), probit_approx = qnorm(x), - squareplus = (x^2 - 1)/x, - stop("Link '", link, "' is not supported.", - call. = FALSE)) - } - if(all(is.na(response))){ out <- paste0("student_t(", paste0(as.character(c(df, '0', '3')), diff --git a/R/index-mvgam.R b/R/index-mvgam.R index 0b2c77df..8e5e34c0 100644 --- a/R/index-mvgam.R +++ b/R/index-mvgam.R @@ -137,7 +137,8 @@ variables.mvgam = function(x, ...){ 'rho_gp', 'ar1', 'ar2', 'ar3', 'A', - 'Sigma', 'error', 'theta'), collapse = '|'), + 'Sigma', 'error', 'theta', + 'k_trend', 'delta', 'm_trend'), collapse = '|'), parnames) & !grepl('sigma_obs', parnames, fixed = TRUE) & !grepl('sigma_raw', parnames, fixed = TRUE))){ @@ -145,7 +146,8 @@ variables.mvgam = function(x, ...){ 'rho_gp', 'ar1', 'ar2', 'ar3', 'A', - 'Sigma', 'error', 'theta'), collapse = '|'), + 'Sigma', 'error', 'theta', + 'k_trend', 'delta', 'm_trend'), collapse = '|'), parnames) & !grepl('sigma_obs', parnames, fixed = TRUE) & !grepl('sigma_raw', parnames, fixed = TRUE) diff --git a/R/mvgam.R b/R/mvgam.R index 3e26af04..745e28df 100644 --- a/R/mvgam.R +++ b/R/mvgam.R @@ -557,22 +557,18 @@ mvgam = function(formula, } # Check for missing rhs in formula + # If there are no terms in the observation formula (i.e. y ~ -1), + # we will use an intercept-only observation formula and fix + # the intercept coefficient at zero drop_obs_intercept <- FALSE if(length(attr(terms(formula), 'term.labels')) == 0 & !attr(terms(formula), 'intercept') == 1){ - if(!missing(trend_formula)){ - # If there are no terms in the observation formula (i.e. y ~ -1), - # but a trend_formula is supplied, we will use an intercept-only - # observation formula and fix the intercept coefficient at zero - formula_envir <- attr(formula, '.Environment') - formula <- formula(paste(rlang::f_lhs(formula), '~ 1')) - attr(formula, '.Environment') <- formula_envir - drop_obs_intercept <- TRUE - } else { - stop('argument "formula" contains no terms', - call. = FALSE) - } + formula_envir <- attr(formula, '.Environment') + formula <- formula(paste(rlang::f_lhs(formula), '~ 1')) + attr(formula, '.Environment') <- formula_envir + drop_obs_intercept <- TRUE } + if(is.null(orig_formula)){ orig_formula <- formula } @@ -641,17 +637,33 @@ mvgam = function(formula, add_ma <- ma_cor_adds$add_ma; add_cor <- ma_cor_adds$add_cor if(length(unique(data_train$series)) == 1 & add_cor){ - warning('Correlated process errors not possible with only 1 series') + warning('Correlated process errors not possible with only 1 series', + call. = FALSE) add_cor <- FALSE } + if(trend_model %in% c('PWlinear', 'PWlogistic')){ + if(attr(terms(formula), 'intercept') == 1 & !drop_obs_intercept){ + warning(paste0('It is difficult / impossible to estimate intercepts\n', + 'and piecewise trend offset parameters. You may want to\n', + 'consider dropping the intercept from the formula'), + call. = FALSE) + } + } + + if(use_lv & trend_model %in% c('PWlinear', 'PWlogistic')){ + stop('Cannot estimate piecewise trends using dynamic factors', + call. = FALSE) + } + if(use_lv & (add_ma | add_cor) & missing(trend_formula)){ stop('Cannot estimate moving averages or correlated errors for dynamic factors', call. = FALSE) } if(drift && use_lv){ - warning('Cannot identify drift terms in latent factor models; setting "drift = FALSE"') + warning('Cannot identify drift terms in latent factor models; setting "drift = FALSE"', + call. = FALSE) drift <- FALSE } @@ -660,9 +672,9 @@ mvgam = function(formula, call. = FALSE) } - # JAGS cannot support latent GP or VAR trends - if(!use_stan & trend_model %in% c('GP', 'VAR1')){ - stop('Gaussian Process and VAR trends not supported for JAGS', + # JAGS cannot support latent GP, VAR or piecewise trends + if(!use_stan & trend_model %in% c('GP', 'VAR1', 'PWlinear', 'PWlogistic')){ + stop('Gaussian Process, VAR and piecewise trends not supported for JAGS', call. = FALSE) } @@ -682,8 +694,8 @@ mvgam = function(formula, trend = 1:length(unique(data_train$series))) } - if(!trend_model %in% c('RW', 'AR1', 'AR2', 'VAR1')){ - stop('only RW, AR1, AR2 and VAR trends currently supported for trend predictor models', + if(!trend_model %in% c('RW', 'AR1', 'AR2', 'AR3', 'VAR1')){ + stop('only RW, AR1, AR2, AR3 and VAR trends currently supported for trend predictor models', call. = FALSE) } } @@ -742,7 +754,12 @@ mvgam = function(formula, family = family, use_lv = use_lv, n_lv = n_lv, - trend_model = trend_model, + trend_model = + if(trend_model %in% c('PWlinear', 'PWlogistic')){ + 'RW' + } else { + trend_model + }, trend_map = trend_map, drift = drift) @@ -975,7 +992,9 @@ mvgam = function(formula, # Modify lines needed for the specified trend model model_file <- add_trend_lines(model_file, stan = FALSE, use_lv = use_lv, - trend_model = if(trend_model %in% c('RW', 'VAR1')){'RW'} else {trend_model}, + trend_model = if(trend_model %in% + c('RW', 'VAR1', + 'PWlinear', 'PWlogistic')){'RW'} else {trend_model}, drift = drift) # Use informative priors based on the fitted mgcv model to speed convergence @@ -1353,7 +1372,9 @@ mvgam = function(formula, # Add necessary trend structure base_stan_model <- add_trend_lines(model_file = base_stan_model, stan = TRUE, - trend_model = if(trend_model %in% c('RW', 'VAR1')){'RW'} else {trend_model}, + trend_model = if(trend_model %in% c('RW', 'VAR1', + 'PWlinear', + 'PWlogistic')){'RW'} else {trend_model}, use_lv = use_lv, drift = drift) @@ -1499,13 +1520,24 @@ mvgam = function(formula, # Drop observation intercept if specified if(drop_obs_intercept){ - vectorised$model_file[grep('// observation model basis coefficients', - vectorised$model_file, - fixed = TRUE) + 1] <- - paste0(vectorised$model_file[grep('// observation model basis coefficients', - vectorised$model_file, - fixed = TRUE) + 1], - '\n', '// (Intercept) fixed at zero\n', "b[1] = 0;") + if(any(grepl('// observation model basis coefficients', + vectorised$model_file, + fixed = TRUE))){ + vectorised$model_file[grep('// observation model basis coefficients', + vectorised$model_file, + fixed = TRUE) + 1] <- + paste0(vectorised$model_file[grep('// observation model basis coefficients', + vectorised$model_file, + fixed = TRUE) + 1], + '\n', '// (Intercept) fixed at zero\n', "b[1] = 0;") + } else { + vectorised$model_file[grep('b[1:num_basis] = b_raw[1:num_basis]', + vectorised$model_file, + fixed = TRUE)] <- + paste0('b[1:num_basis] = b_raw[1:num_basis];\n', + '// (Intercept) fixed at zero\n', "b[1] = 0;") + } + vectorised$model_file <- readLines(textConnection(vectorised$model_file), n = -1) } @@ -1536,6 +1568,21 @@ mvgam = function(formula, param <- c(param, alpha_params, rho_params) } + # Update for any piecewise trends + if(trend_model %in% c('PWlinear', 'PWlogistic')){ + pw_additions <- add_piecewise(vectorised$model_file, + vectorised$model_data, + data_train, + data_test, + orig_trend_model, + family) + vectorised$model_file <- pw_additions$model_file + vectorised$model_data <- pw_additions$model_data + orig_trend_model$changepoints <- pw_additions$model_data$t_change + orig_trend_model$change_freq <- pw_additions$model_data$change_freq + orig_trend_model$cap <- pw_additions$model_data$cap + } + # Add in any user-specified priors if(!missing(priors)){ vectorised$model_file <- update_priors(vectorised$model_file, priors, @@ -1544,7 +1591,8 @@ mvgam = function(formula, priors <- NULL } - # Add any correlated error or moving average processes + # Add any correlated error or moving average processes; this comes after + # priors as currently there is no option to change priors on these parameters if(add_ma | add_cor){ vectorised$model_file <- add_MaCor(vectorised$model_file, add_ma = add_ma, diff --git a/R/piecewise_trends.R b/R/piecewise_trends.R new file mode 100644 index 00000000..84f162b3 --- /dev/null +++ b/R/piecewise_trends.R @@ -0,0 +1,367 @@ +#' Specify piecewise linear or logistic trends +#' +#' Set up piecewise linear or logistic trend models +#' in \code{mvgam}. These functions do not evaluate their arguments – +#' they exist purely to help set up a model with particular piecewise +#' trend models. +#' @param n_changepoints A non-negative integer specifying the number of potential +#' changepoints. Potential changepoints are selected uniformly from the +#' first `changepoint_range` proportion of timepoints in \code{data}. Default is `10` +#' @param changepoint_range Proportion of history in \code{data} in which trend changepoints +#' will be estimated. Defaults to 0.8 for the first 80%. +#' @param changepoint_scale Parameter modulating the flexibility of the +#' automatic changepoint selection by altering the scale parameter of a Laplace distribution. +#' The resulting prior will be `double_exponential(0, changepoint_scale)`. +#' Large values will allow many changepoints and a more flexible trend, while +#' small values will allow few changepoints. Default is `0.05`. +#' @param growth Character string specifying either 'linear' or 'logistic' growth of +#' the trend. If 'logistic', a variable labelled 'cap' MUST be in \code{data} to specify the +#' maximum saturation point for the trend (see examples in \code{\link{mvgam}} for details). +#' Default is 'linear'. +#' @return An object of class \code{mvgam_trend}, which contains a list of +#' arguments to be interpreted by the parsing functions in \code{mvgam} +#' @rdname piecewise_trends +#' @export +PW = function(n_changepoints = 10, + changepoint_range = 0.8, + changepoint_scale = 0.05, + growth = 'linear'){ + + growth <- match.arg(growth, choices = c('linear', 'logistic')) + validate_proportional(changepoint_range) + validate_pos_integer(n_changepoints) + validate_pos_real(changepoint_scale) + + trend_model <- 'PWlinear' + if(growth == 'logistic'){ + trend_model = 'PWlogistic' + } + out <- structure(list(trend_model = trend_model, + n_changepoints = n_changepoints, + changepoint_range = changepoint_range, + changepoint_scale = changepoint_scale, + ma = FALSE, + cor = FALSE, + label = match.call()), + class = 'mvgam_trend') +} + +#' Updates for adding piecewise trends +#' @noRd +add_piecewise = function(model_file, + model_data, + data_train, + data_test = NULL, + orig_trend_model, + family){ + + trend_model <- orig_trend_model$trend_model + n_changepoints <- orig_trend_model$n_changepoints + changepoint_range <- orig_trend_model$changepoint_range + changepoint_scale <- orig_trend_model$changepoint_scale + + if(trend_model == 'PWlogistic'){ + if(!(exists('cap', where = data_train))) { + stop('Capacities must be supplied as a variable named "cap" for logistic growth', + call. = FALSE) + + } + # Matrix of capacities per series (these must operate on the link scale) + all_caps <- data.frame(series = as.numeric(data_train$series), + time = data_train$time, + cap = suppressWarnings(linkfun(data_train$cap, + link = family$link))) %>% + dplyr::arrange(time, series) + + if(!is.null(data_test)){ + if(!(exists('cap', where = data_test))) { + stop('Capacities must also be supplied in "newdata" for logistic growth predictions', + call. = FALSE) + } + + all_caps <- rbind(all_caps, + data.frame(series = as.numeric(data_test$series), + time = data_test$time, + cap = suppressWarnings(linkfun(data_test$cap, + link = family$link)))) %>% + dplyr::arrange(time, series) + } + + cap <- matrix(NA, nrow = length(unique(all_caps$time)), + ncol = length(unique(all_caps$series))) + for(i in 1:length(unique(all_caps$series))){ + cap[,i] <- all_caps$cap[which(all_caps$series == i)] + } + } else { + cap <- NULL + } + + #### Distribute possible changepoints #### + scaled_time <- unique(data_train$time - min(data_train$time) + 1) + max_time <- max(scaled_time) + hist_size <- floor(max_time * changepoint_range) + t_change <- unique(round(seq.int(1, hist_size, + length.out = (n_changepoints + 1))[-1])) + n_changepoints <- length(t_change) + change_freq <- n_changepoints / hist_size + + if(!is.null(data_test)){ + # Get forecast horizon changepoints + # This can go in with the data if newdata is supplied; else it needs + # to be used when extrapolating the trend forward + n_new_changes <- stats::rpois(1, (change_freq * + (max(data_test$time) - + min(data_test$time)))) + + # Spread the forecast changepoints evenly across the forecast + # horizon + scaled_test_time <- unique(data_test$time - min(data_train$time) + 1) + t_change_new <- unique(floor(seq.int(min(scaled_test_time), + max(scaled_test_time), + length.out = n_changes))) + t_change <- c(t_change, t_change_new) + n_changepoints <- n_changepoints + n_newchanges + scaled_time <- c(scaled_time, scaled_test_time) + } + + # Add changepoint info to the data + model_data$n_changepoints <- n_changepoints + model_data$change_freq <- change_freq + model_data$t_change <- t_change + model_data$time <- scaled_time + model_data$changepoint_scale <- changepoint_scale + model_data$cap <- cap + + #### Update the model file appropriately #### + # Add the piecewise functions + if(any(grepl('functions {', model_file, fixed = TRUE))){ + model_file[grep('functions {', model_file, fixed = TRUE)] <- + paste0('functions {\n', + 'matrix get_changepoint_matrix(vector t, vector t_change, int T, int S) {\n', + '/* Function to sort changepoints */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'matrix[T, S] A;\n', + 'row_vector[S] a_row;\n', + 'int cp_idx;\n', + 'A = rep_matrix(0, T, S);\n', + 'a_row = rep_row_vector(0, S);\n', + 'cp_idx = 1;\n', + 'for (i in 1:T) {\n', + 'while ((cp_idx <= S) && (t[i] >= t_change[cp_idx])) {\n', + 'a_row[cp_idx] = 1;\n', + 'cp_idx = cp_idx + 1;\n', + '}\n', + 'A[i] = a_row;\n', + '}\n', + 'return A;\n', + '}\n', + + '// logistic trend functions\n', + 'vector logistic_gamma(real k, real m, vector delta, vector t_change, int S) {\n', + '/* Function to compute a logistic trend with changepoints */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'vector[S] gamma; // adjusted offsets, for piecewise continuity\n', + 'vector[S + 1] k_s; // actual rate in each segment\n', + 'real m_pr;\n', + 'k_s = append_row(k, k + cumulative_sum(delta));\n', + 'm_pr = m; // The offset in the previous segment\n', + 'for (i in 1:S) {\n', + 'gamma[i] = (t_change[i] - m_pr) * (1 - k_s[i] / k_s[i + 1]);\n', + 'm_pr = m_pr + gamma[i]; // update for the next segment\n', + '}\n', + 'return gamma;\n', + '}\n', + + 'vector logistic_trend(\n', + '/* Function to adjust a logistic trend using a carrying capacity */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'real k,\n', + 'real m,\n', + 'vector delta,\n', + 'vector t,\n', + 'vector cap,\n', + 'matrix A,\n', + 'vector t_change,\n', + 'int S\n', + ') {\n', + 'vector[S] gamma;\n', + 'gamma = logistic_gamma(k, m, delta, t_change, S);\n', + 'return cap .* inv_logit((k + A * delta) .* (t - (m + A * gamma)));\n', + '}\n', + + '// linear trend function\n', + '/* Function to compute a linear trend with changepoints */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'vector linear_trend(\n', + 'real k,\n', + 'real m,\n', + 'vector delta,\n', + 'vector t,\n', + 'matrix A,\n', + 'vector t_change\n', + ') {\n', + 'return (k + A * delta) .* t + (m + A * (-t_change .* delta));\n', + '}\n') + + } else { + model_file[grep('Stan model code', model_file)] <- + paste0('// Stan model code generated by package mvgam\n', + 'functions {\n', + 'matrix get_changepoint_matrix(vector t, vector t_change, int T, int S) {\n', + '/* Function to sort changepoints */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'matrix[T, S] A;\n', + 'row_vector[S] a_row;\n', + 'int cp_idx;\n', + 'A = rep_matrix(0, T, S);\n', + 'a_row = rep_row_vector(0, S);\n', + 'cp_idx = 1;\n', + 'for (i in 1:T) {\n', + 'while ((cp_idx <= S) && (t[i] >= t_change[cp_idx])) {\n', + 'a_row[cp_idx] = 1;\n', + 'cp_idx = cp_idx + 1;\n', + '}\n', + 'A[i] = a_row;\n', + '}\n', + 'return A;\n', + '}\n', + + '// logistic trend functions\n', + 'vector logistic_gamma(real k, real m, vector delta, vector t_change, int S) {\n', + '/* Function to compute a logistic trend with changepoints */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'vector[S] gamma; // adjusted offsets, for piecewise continuity\n', + 'vector[S + 1] k_s; // actual rate in each segment\n', + 'real m_pr;\n', + 'k_s = append_row(k, k + cumulative_sum(delta));\n', + 'm_pr = m; // The offset in the previous segment\n', + 'for (i in 1:S) {\n', + 'gamma[i] = (t_change[i] - m_pr) * (1 - k_s[i] / k_s[i + 1]);\n', + 'm_pr = m_pr + gamma[i]; // update for the next segment\n', + '}\n', + 'return gamma;\n', + '}\n', + + 'vector logistic_trend(\n', + '/* Function to adjust a logistic trend using a carrying capacity */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'real k,\n', + 'real m,\n', + 'vector delta,\n', + 'vector t,\n', + 'vector cap,\n', + 'matrix A,\n', + 'vector t_change,\n', + 'int S\n', + ') {\n', + 'vector[S] gamma;\n', + 'gamma = logistic_gamma(k, m, delta, t_change, S);\n', + 'return cap .* inv_logit((k + A * delta) .* (t - (m + A * gamma)));\n', + '}\n', + + '// linear trend function\n', + '/* Function to compute a linear trend with changepoints */\n', + '/* credit goes to the Prophet development team at Meta (https://github.com/facebook/prophet/tree/main)*/\n', + 'vector linear_trend(\n', + 'real k,\n', + 'real m,\n', + 'vector delta,\n', + 'vector t,\n', + 'matrix A,\n', + 'vector t_change\n', + ') {\n', + 'return (k + A * delta) .* t + (m + A * (-t_change .* delta));\n', + '}\n}\n') + } + + # Update the data block + model_file[grep('int num_basis;', model_file, fixed = TRUE)] <- + paste0("int num_basis; // total number of basis coefficients\n", + "vector[n] time; // index of time for changepoint model\n", + "int n_changepoints; // number of potential trend changepoints\n", + "vector[n_changepoints] t_change; // times of potential changepoints\n", + if(trend_model == 'PWlogistic'){ + "matrix[n, n_series] cap; // carrying capacities for logistic trends\n" + } else { + NULL + }, + 'real changepoint_scale; // scale of changepoint shock prior\n') + model_file <- readLines(textConnection(model_file), n = -1) + + # Update the transformed data block + if(any(grepl('transformed data {', model_file, fixed = TRUE))){ + model_file[grep('transformed data {', model_file, fixed = TRUE)] <- + paste0('transformed data {\n', + '// sorted changepoint matrix\n', + 'matrix[n, n_changepoints] A = get_changepoint_matrix(time, t_change, n, n_changepoints);\n') + } else { + model_file[grep('parameters {', model_file, fixed = TRUE)[1]] <- + paste0('transformed data {\n', + '// sorted changepoint matrix\n', + 'matrix[n, n_changepoints] A = get_changepoint_matrix(time, t_change, n, n_changepoints);\n', + '}\nparameters {') + } + model_file <- readLines(textConnection(model_file), n = -1) + + # Update the parameters block + model_file <- model_file[-c(grep('// latent trend variance parameters', + model_file, fixed = TRUE):(grep('// latent trend variance parameters', + model_file, fixed = TRUE) + 1))] + model_file <- model_file[-c(grep('// latent trends', + model_file, fixed = TRUE):(grep('// latent trends', + model_file, fixed = TRUE) + 1))] + model_file[grep("vector[num_basis] b_raw;", model_file, fixed = TRUE)] <- + paste0("vector[num_basis] b_raw;\n", + "// base trend growth rates\n", + "vector[n_series] k_trend;\n\n", + "// trend offset parameters\n", + "vector[n_series] m_trend;\n\n", + "// trend rate adjustments per series\n", + "matrix[n_changepoints, n_series] delta;\n") + model_file <- readLines(textConnection(model_file), n = -1) + + # Update the transformed parameters block + model_file[grep("transformed parameters {", model_file, fixed = TRUE)] <- + paste0("transformed parameters {\n", + "// latent trends\n", + "matrix[n, n_series] trend;\n") + + max_rawline <- max(grep('= b_raw', model_file)) + model_file[max_rawline] <- paste0( + model_file[max_rawline], + '\n\n', + '// trend estimates\n', + 'for (s in 1 : n_series) {\n', + if(trend_model == 'PWlogistic'){ + 'trend[1 : n, s] = logistic_trend(k_trend[s], m_trend[s], to_vector(delta[,s]), time, to_vector(cap[,s]), A, t_change, n_changepoints);\n' + } else { + 'trend[1 : n, s] = linear_trend(k_trend[s], m_trend[s], to_vector(delta[,s]), time, A, t_change);\n' + }, + + '}\n') + model_file <- readLines(textConnection(model_file), n = -1) + + # Update the model block + model_file <- model_file[-c(grep('// priors for latent trend variance parameters', + model_file, fixed = TRUE):(grep('// priors for latent trend variance parameters', + model_file, fixed = TRUE) + 1))] + rw_start <- grep("trend[1, 1:n_series] ~ normal(0, sigma);", model_file, + fixed = TRUE) + rw_lines <- (rw_start - 1):(rw_start + 3) + model_file <- model_file[-rw_lines] + model_file[grep("// likelihood functions", model_file, fixed = TRUE) - 1] <- + paste0('// trend parameter priors\n', + 'm_trend ~ student_t(3, 0, 2.5);\n', + 'k_trend ~ std_normal();\n', + 'to_vector(delta) ~ double_exponential(0, changepoint_scale);\n', + model_file[grep("// likelihood functions", model_file, fixed = TRUE) - 1]) + + # Update the generated quantities block + model_file <- model_file[-grep("vector[n_series] tau;", model_file, fixed = TRUE)] + tau_start <- grep("tau[s] = pow(sigma[s], -2.0);", model_file, fixed = TRUE) - 1 + model_file <- model_file[-c(tau_start:(tau_start + 2))] + + #### Return #### + return(list(model_file = model_file, + model_data = model_data)) +} diff --git a/R/stan_utils.R b/R/stan_utils.R index 8a3fd6d0..a005ff87 100644 --- a/R/stan_utils.R +++ b/R/stan_utils.R @@ -1141,6 +1141,11 @@ vectorise_stan_lik = function(model_file, model_data, family = 'poisson', VAR1 <- FALSE } + # Similar hack for adding piecewise trends + if(trend_model %in% c('PWlinear', 'PWlogistic')){ + trend_model <- 'RW' + } + #### Family specifications #### if(threads > 1){ if(family == 'gaussian'){ @@ -3099,7 +3104,7 @@ add_trend_predictors = function(trend_formula, model_file, fixed = TRUE)] <- paste0("for(j in 1:n_lv){\n", "LV[1, j] ~ normal(drift[j] + [ytimes_trend[1, j]], sigma[j]);\n", - "LV[2, j] ~ normal(drift[j] + trend_mus[ytimes_trend[1, j]] + ar1[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", + "LV[2, j] ~ normal(drift[j] + trend_mus[ytimes_trend[2, j]] + ar1[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", "for(i in 3:n){\n", "LV[i, j] ~ normal(drift[j] + trend_mus[ytimes_trend[i, j]] + ar1[j] * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]) + ar2[j] * (LV[i - 2, j] - trend_mus[ytimes_trend[i - 2, j]]), sigma[j]);\n", "}\n}") @@ -3110,7 +3115,7 @@ add_trend_predictors = function(trend_formula, model_file, fixed = TRUE)] <- paste0("for(j in 1:n_lv){\n", "LV[1, j] ~ normal(trend_mus[ytimes_trend[1, j]], sigma[j]);\n", - "LV[2, j] ~ normal(trend_mus[ytimes_trend[1, j]] + ar1[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", + "LV[2, j] ~ normal(trend_mus[ytimes_trend[2, j]] + ar1[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", "for(i in 3:n){\n", "LV[i, j] ~ normal(trend_mus[ytimes_trend[i, j]] + ar1[j] * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]) + ar2[j] * (LV[i - 2, j] - trend_mus[ytimes_trend[i - 2, j]]), sigma[j]);\n", "}\n}") @@ -3121,6 +3126,49 @@ add_trend_predictors = function(trend_formula, model_file <- readLines(textConnection(model_file), n = -1) } + if(trend_model == 'AR3'){ + model_file[grep('// latent factor AR1 terms', model_file, fixed = TRUE)] <- + '// latent state AR1 terms' + model_file[grep('// latent factor AR2 terms', model_file, fixed = TRUE)] <- + '// latent state AR2 terms' + model_file[grep('// latent factor AR3 terms', model_file, fixed = TRUE)] <- + '// latent state AR3 terms' + model_file <- model_file[-c(grep("for(j in 1:n_lv){", model_file, fixed = TRUE): + (grep("for(j in 1:n_lv){", model_file, fixed = TRUE) + 2))] + + if(drift){ + model_file[grep("LV[1, 1:n_lv] ~ normal(0, sigma);", + model_file, fixed = TRUE)] <- + paste0("for(j in 1:n_lv){\n", + "LV[1, j] ~ normal(drift[j] + [ytimes_trend[1, j]], sigma[j]);\n", + "LV[2, j] ~ normal(drift[j] + trend_mus[ytimes_trend[2, j]] + ar1[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", + "LV[3, j] ~ normal(drift[j] + trend_mus[ytimes_trend[3, j]] + ar1[j] * (LV[2, j] - trend_mus[ytimes_trend[2, j]]) + ar2[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", + "for(i in 4:n){\n", + "LV[i, j] ~ normal(drift[j] + trend_mus[ytimes_trend[i, j]] + ar1[j] * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]) + ar2[j] * (LV[i - 2, j] - trend_mus[ytimes_trend[i - 2, j]]) + ar3[j] * (LV[i - 3, j] - trend_mus[ytimes_trend[i - 3, j]]), sigma[j]);\n", + "}\n}") + model_file <- model_file[-grep("LV[2, 1:n_lv] ~ normal(drift + LV[1, 1:n_lv] * ar1, 0.1);", + model_file, fixed = TRUE)] + model_file <- model_file[-grep('LV_raw[3, 1:n_lv] ~ normal(drift + LV_raw[2, 1:n_lv] * ar1 + LV_raw[1, 1:n_lv] * ar2, 0.1);', + model_file, fixed = TRUE)] + } else { + model_file[grep("LV[1, 1:n_lv] ~ normal(0, sigma);", + model_file, fixed = TRUE)] <- + paste0("for(j in 1:n_lv){\n", + "LV[1, j] ~ normal(trend_mus[ytimes_trend[1, j]], sigma[j]);\n", + "LV[2, j] ~ normal(trend_mus[ytimes_trend[2, j]] + ar1[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", + "LV[3, j] ~ normal(trend_mus[ytimes_trend[3, j]] + ar1[j] * (LV[2, j] - trend_mus[ytimes_trend[2, j]]) + ar2[j] * (LV[1, j] - trend_mus[ytimes_trend[1, j]]), sigma[j]);\n", + "for(i in 4:n){\n", + "LV[i, j] ~ normal(trend_mus[ytimes_trend[i, j]] + ar1[j] * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]) + ar2[j] * (LV[i - 2, j] - trend_mus[ytimes_trend[i - 2, j]]) + ar3[j] * (LV[i - 3, j] - trend_mus[ytimes_trend[i - 3, j]]), sigma[j]);\n", + "}\n}") + model_file <- model_file[-grep("LV[2, 1:n_lv] ~ normal(LV[1, 1:n_lv] * ar1, 0.1);", + model_file, fixed = TRUE)] + model_file <- model_file[-grep('LV_raw[3, 1:n_lv] ~ normal(LV_raw[2, 1:n_lv] * ar1 + LV_raw[1, 1:n_lv] * ar2, 0.1);', + model_file, fixed = TRUE)] + } + + model_file <- readLines(textConnection(model_file), n = -1) + } + if(trend_model == 'VAR1'){ model_file <- gsub('trend means', 'latent state means', diff --git a/R/trends.R b/R/trends.R index 9fac6de6..63cfee29 100644 --- a/R/trends.R +++ b/R/trends.R @@ -91,6 +91,8 @@ trend_model_choices = function(){ 'VARMA', 'VARMAcor', 'VARMA1,1cor', + 'PWlinear', + 'PWlogistic', 'None') } @@ -142,6 +144,68 @@ ma_cor_additions = function(trend_model){ add_cor = add_cor)) } +#' Evaluate the piecewise linear function +#' This code is borrowed from the {prophet} R package +#' All credit goes directly to the prophet development team +#' https://github.com/facebook/prophet/blob/main/R/R/prophet.R +#' @param t Vector of times on which the function is evaluated. +#' @param deltas Vector of rate changes at each changepoint. +#' @param k Float initial rate. +#' @param m Float initial offset. +#' @param changepoint_ts Vector of changepoint times. +#' +#' @return Vector y(t). +#' +#' @noRd +piecewise_linear <- function(t, deltas, k, m = 0, changepoint_ts) { + # Intercept changes + gammas <- -changepoint_ts * deltas + # Get cumulative slope and intercept at each t + k_t <- rep(k, length(t)) + m_t <- rep(m, length(t)) + for (s in 1:length(changepoint_ts)) { + indx <- t >= changepoint_ts[s] + k_t[indx] <- k_t[indx] + deltas[s] + m_t[indx] <- m_t[indx] + gammas[s] + } + y <- k_t * t + m_t + return(y) +} + +#' Evaluate the piecewise logistic function. +#' This code is borrowed from the {prophet} R package +#' All credit goes directly to the prophet development team +#' https://github.com/facebook/prophet/blob/main/R/R/prophet.R +#' @param t Vector of times on which the function is evaluated. +#' @param cap Vector of capacities at each t. +#' @param deltas Vector of rate changes at each changepoint. +#' @param k Float initial rate. +#' @param m Float initial offset. +#' @param changepoint_ts Vector of changepoint times. +#' +#' @return Vector y(t). +#' +#' @noRd +piecewise_logistic <- function(t, cap, deltas, k, m, changepoint_ts) { + # Compute offset changes + k.cum <- c(k, cumsum(deltas) + k) + gammas <- rep(0, length(changepoint_ts)) + for (i in 1:length(changepoint_ts)) { + gammas[i] <- ((changepoint_ts[i] - m - sum(gammas)) + * (1 - k.cum[i] / k.cum[i + 1])) + } + # Get cumulative rate and offset at each t + k_t <- rep(k, length(t)) + m_t <- rep(m, length(t)) + for (s in 1:length(changepoint_ts)) { + indx <- t >= changepoint_ts[s] + k_t[indx] <- k_t[indx] + deltas[s] + m_t[indx] <- m_t[indx] + gammas[s] + } + y <- cap / (1 + exp(-k_t * (t - m_t))) + return(y) +} + #' Squared exponential GP simulation function #' @param last_trends Vector of trend estimates leading up to the current timepoint #' @param h \code{integer} specifying the forecast horizon @@ -577,6 +641,10 @@ trend_par_names = function(trend_model, 'sigma', 'theta', 'error') } + if(trend_model %in% c('PWlinear', 'PWlogistic')){ + param <- c('trend', 'delta', 'k_trend', 'm_trend') + } + } if(trend_model != 'None'){ @@ -628,6 +696,28 @@ extract_trend_pars = function(object, keep_all_estimates = TRUE, out <- list() } + # delta params for piecewise trends + if(attr(object$model_data, 'trend_model') %in% c('PWlinear', 'PWlogistic')){ + out$delta <- lapply(seq_along(levels(object$obs_data$series)), function(series){ + if(object$fit_engine == 'stan'){ + delta_estimates <- mvgam:::mcmc_chains(object$model_output, 'delta')[,seq(series, + dim(mvgam:::mcmc_chains(object$model_output, + 'delta'))[2], + by = NCOL(object$ytimes))] + } else { + delta_estimates <- mcmc_chains(object$model_output, 'delta')[,starts[series]:ends[series]] + } + }) + if(attr(object$model_data, 'trend_model') == 'PWlogistic'){ + out$cap <- lapply(seq_along(levels(object$obs_data$series)), function(series){ + t(replicate(NROW(out$delta[[1]]), object$trend_model$cap[,series])) + }) + } + out$changepoints <- t(replicate(NROW(out$delta[[1]]), object$trend_model$changepoints)) + out$change_freq <- replicate(NROW(out$delta[[1]]), object$trend_model$change_freq) + out$change_scale <- replicate(NROW(out$delta[[1]]), object$trend_model$changepoint_scale) + } + # Latent trend loadings for dynamic factor models if(object$use_lv){ if(attr(object$model_data, 'trend_model') %in% c('RW', 'AR1', 'AR2', 'AR3')){ @@ -843,10 +933,11 @@ extract_general_trend_pars = function(samp_index, trend_pars){ general_trend_pars <- lapply(seq_along(trend_pars), function(x){ if(names(trend_pars)[x] %in% c('last_lvs', 'lv_coefs', 'last_trends', - 'A', 'Sigma', 'theta', 'b_gp', 'error')){ + 'A', 'Sigma', 'theta', 'b_gp', 'error', + 'delta', 'cap')){ if(names(trend_pars)[x] %in% c('last_lvs', 'lv_coefs', 'last_trends', - 'b_gp')){ + 'b_gp', 'delta', 'cap')){ out <- unname(lapply(trend_pars[[x]], `[`, samp_index, )) } @@ -922,10 +1013,11 @@ extract_series_trend_pars = function(series, samp_index, trend_pars, } #' Wrapper function to forecast trends +#' @importFrom extraDistr rlaplace #' @noRd forecast_trend = function(trend_model, use_lv, trend_pars, Xp_trend = NULL, betas_trend = NULL, - h = 1, time = NULL){ + h = 1, time = NULL, cap = NULL){ # Propagate dynamic factors forward if(use_lv){ @@ -1288,6 +1380,80 @@ forecast_trend = function(trend_model, use_lv, trend_pars, # h = h) } + if(trend_model == 'PWlinear'){ + trend_fc <- do.call(cbind, lapply(seq_along(trend_pars$delta), function(x){ + + # Sample forecast horizon changepoints + n_changes <- stats::rpois(1, (trend_pars$change_freq * + (max(time) - + min(time)))) + + # Sample deltas + deltas_new <- extraDistr::rlaplace(n_changes, + mu = 0, + sigma = trend_pars$change_scale + 1e-8) + + # Spread changepoints evenly across the forecast horizon + t_change_new <- sample(min(time):max(time), n_changes) + + # Combine with changepoints from the history + deltas <- c(trend_pars$delta[[x]], deltas_new) + changepoint_ts <- c(trend_pars$changepoints, t_change_new) + + # Generate a trend draw + draw <- piecewise_linear(t = 1:max(time), + deltas = deltas, + k = trend_pars$k_trend[x], + m = trend_pars$m_trend[x], + changepoint_ts = changepoint_ts) + + # Keep only the forecast horizon estimates + tail(draw, max(time) - min(time)) + })) + } + + if(trend_model == 'PWlogistic'){ + trend_fc <- do.call(cbind, lapply(seq_along(trend_pars$delta), function(x){ + + # Sample forecast horizon changepoints + n_changes <- stats::rpois(1, (trend_pars$change_freq * + (max(time) - min(time)))) + + # Sample deltas + deltas_new <- extraDistr::rlaplace(n_changes, + mu = 0, + sigma = trend_pars$change_scale + 1e-8) + + # Spread changepoints evenly across the forecast horizon + t_change_new <- sample(min(time):max(time), n_changes) + + # Combine with changepoints from the history + deltas <- c(trend_pars$delta[[x]], deltas_new) + changepoint_ts <- c(trend_pars$changepoints, t_change_new) + + # Get historical capacities + oldcaps <- trend_pars$cap[[x]] + + # And forecast capacities + newcaps = cap %>% + dplyr::filter(series == levels(cap$series)[x]) %>% + dplyr::arrange(time) %>% + dplyr::pull(cap) + caps <- c(oldcaps, newcaps) + + # Generate a trend draw + draw <- piecewise_logistic(t = 1:max(time), + cap = caps, + deltas = deltas, + k = trend_pars$k_trend[x], + m = trend_pars$m_trend[x], + changepoint_ts = changepoint_ts) + + # Keep only the forecast horizon estimates + tail(draw, max(time) - min(time)) + })) + } + } return(trend_fc) } diff --git a/R/validations.R b/R/validations.R index 5bfe54ae..6ae16b67 100644 --- a/R/validations.R +++ b/R/validations.R @@ -248,6 +248,20 @@ validate_pos_integer = function(x){ } } } +#'@noRd +validate_pos_real = function(x){ + s <- substitute(x) + x <- base::suppressWarnings(as.numeric(x)) + if (length(x) != 1L || anyNA(x)) { + stop("Argument '", s, "' must be a single numeric value", + call. = FALSE) + } + + if(sign(x) != 1){ + stop("Argument '", s, "' must be a positive real value", + call. = FALSE) + } +} #'@noRd validate_trendmap = function(trend_map, diff --git a/man/RW.Rd b/man/RW.Rd index fb1c3094..87cab0cc 100644 --- a/man/RW.Rd +++ b/man/RW.Rd @@ -14,14 +14,12 @@ VAR(ma = FALSE, cor = FALSE) } \arguments{ \item{ma}{\code{Logical} Include moving average terms of order \code{1}? -Default is \code{FALSE}. Note, this option is only currently operational -for fitting VARMA models. Support for other models (AR and RW) is upcoming.} +Default is \code{FALSE}.} \item{cor}{\code{Logical} Include correlated process errors as part of a multivariate normal process model? If \code{TRUE} and if \code{n_series > 1} in the supplied data, a fully structured covariance matrix will be estimated -for the process errors. Default is \code{FALSE}. Note, this option is only currently operational -for fitting VAR / VARMA models. Support for other models (AR and RW) is upcoming.} +for the process errors. Default is \code{FALSE}.} \item{p}{A non-negative integer specifying the autoregressive (AR) order. Default is \code{1}. Cannot currently be larger than \code{3}} diff --git a/man/piecewise_trends.Rd b/man/piecewise_trends.Rd new file mode 100644 index 00000000..6482644c --- /dev/null +++ b/man/piecewise_trends.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/piecewise_trends.R +\name{PW} +\alias{PW} +\title{Specify piecewise linear or logistic trends} +\usage{ +PW( + n_changepoints = 10, + changepoint_range = 0.8, + changepoint_scale = 0.05, + growth = "linear" +) +} +\arguments{ +\item{n_changepoints}{A non-negative integer specifying the number of potential +changepoints. Potential changepoints are selected uniformly from the +first \code{changepoint_range} proportion of timepoints in \code{data}. Default is \code{10}} + +\item{changepoint_range}{Proportion of history in \code{data} in which trend changepoints +will be estimated. Defaults to 0.8 for the first 80\%.} + +\item{changepoint_scale}{Parameter modulating the flexibility of the +automatic changepoint selection by altering the scale parameter of a Laplace distribution. +The resulting prior will be \code{double_exponential(0, changepoint_scale)}. +Large values will allow many changepoints and a more flexible trend, while +small values will allow few changepoints. Default is \code{0.05}.} + +\item{growth}{Character string specifying either 'linear' or 'logistic' growth of +the trend. If 'logistic', a variable labelled 'cap' MUST be in \code{data} to specify the +maximum saturation point for the trend (see examples in \code{\link{mvgam}} for details). +Default is 'linear'.} +} +\value{ +An object of class \code{mvgam_trend}, which contains a list of +arguments to be interpreted by the parsing functions in \code{mvgam} +} +\description{ +Set up piecewise linear or logistic trend models +in \code{mvgam}. These functions do not evaluate their arguments – +they exist purely to help set up a model with particular piecewise +trend models. +} diff --git a/src/RcppExports.o b/src/RcppExports.o index 849c29a4..3738bd53 100644 Binary files a/src/RcppExports.o and b/src/RcppExports.o differ diff --git a/src/mvgam.dll b/src/mvgam.dll index 7a305a9b..ee1d68fd 100644 Binary files a/src/mvgam.dll and b/src/mvgam.dll differ diff --git a/src/trend_funs.o b/src/trend_funs.o index faf098a5..371ea49f 100644 Binary files a/src/trend_funs.o and b/src/trend_funs.o differ diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index 03e4428d..8e286ed8 100644 Binary files a/tests/testthat/Rplots.pdf and b/tests/testthat/Rplots.pdf differ diff --git a/tests/testthat/test-mvgam.R b/tests/testthat/test-mvgam.R index aff31255..d3b9db9a 100644 --- a/tests/testthat/test-mvgam.R +++ b/tests/testthat/test-mvgam.R @@ -91,13 +91,13 @@ test_that("median coefs should be stored in the mgcv object", { coef(gaus_ar1)[,2])) }) -test_that("empty obs formula should error if no trend_formula", { - expect_error(mvgam(y ~ -1, +test_that("empty obs formula is allowed, even if no trend_formula", { + mod <- mvgam(y ~ -1, trend_model = 'AR1', data = gaus_data$data_train, family = gaussian(), - run_model = FALSE), - 'argument "formula" contains no terms') + run_model = FALSE) + expect_true(inherits(mod, 'mvgam_prefit')) }) test_that("empty obs formula allowed if trend_formula supplied", {