From f7a26e6feb726498d7c7c413c7615cb892708712 Mon Sep 17 00:00:00 2001 From: Adrian Lison Date: Fri, 4 Oct 2024 13:07:32 +0200 Subject: [PATCH 1/6] Implement change point model for Rt variability --- R/model_infections.R | 327 ++++++++++++++++++++++++++--------- inst/stan/EpiSewer_main.stan | 70 ++++++-- 2 files changed, 296 insertions(+), 101 deletions(-) diff --git a/R/model_infections.R b/R/model_infections.R index b509b56..8cac585 100644 --- a/R/model_infections.R +++ b/R/model_infections.R @@ -89,7 +89,7 @@ generation_dist_assume <- return(modeldata) } -#' Estimate Rt via exponential smoothing +#'Estimate Rt via exponential smoothing #' #'@description This option estimates the effective reproduction number over time #' using exponential smoothing. It implements Holt's linear trend method with @@ -105,6 +105,18 @@ generation_dist_assume <- #'@param sd_prior_mu Prior (mean) on the standard deviation of the innovations. #'@param sd_prior_sigma Prior (standard deviation) on the standard deviation of #' the innovations. +#'@param sd_changepoint_dist The variability of Rt can change over time, e.g. +#' during the height of an epidemic wave, countermeasures may lead to much +#' faster changes in Rt than observable at other times. This potential +#' variability is accounted for using change points placed at regular +#' intervals. The standard deviation of the state space model innovations then +#' evolves linearly between the change points. The default change point +#' distance is 28 days. If set to zero, no change points are modeled. +#'@param sd_changepoint_sd This parameter controls the variability of the change +#' points. When change points are modeled, EpiSewer will estimate a baseline +#' standard deviation (see `sd_prior_mu` and `sd_prior_sigma`), and model +#' change point values as independently distributed with mean equal to this +#' baseline and standard deviation `sd_changepoint_sd`. #'@param smooth_prior_mu Prior (mean) on the smoothing parameter. Must be #' between 0 and 1. #'@param smooth_prior_sigma Prior (standard deviation) on the smoothing @@ -182,6 +194,8 @@ R_estimate_ets <- function( trend_prior_sigma = 0.1, sd_prior_mu = 0, sd_prior_sigma = 0.1, + sd_changepoint_dist = 28, + sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, smooth_prior_mu = 0.5, @@ -210,14 +224,39 @@ R_estimate_ets <- function( "R_sd", "truncated normal", mu = sd_prior_mu, sigma = sd_prior_sigma ) + modeldata$R_vari_sd <- sd_changepoint_sd modeldata$.init$R_level_start <- modeldata$R_level_start_prior$R_level_start_prior[1] modeldata$.init$R_trend_start <- 1e-4 - modeldata$.init$R_sd <- max(modeldata$R_sd_prior$R_sd_prior[1], 0.1) - modeldata$.init$R_noise <- tbe( - rep(0, modeldata$.metainfo$length_R - 1), - ".metainfo$length_R" + + modeldata <- tbc( + "R_ets_noise", + { + modeldata$.init$R_noise <- rep(0, modeldata$.metainfo$length_R - 1) + modeldata <- add_R_variability( + length_R = modeldata$.metainfo$length_R, + length_seeding = modeldata$.metainfo$length_seeding, + length_partial = modeldata$.metainfo$partial_window, + h = modeldata$.metainfo$forecast_horizon, + changepoint_dist = sd_changepoint_dist, + modeldata = modeldata + ) + modeldata$.init$R_vari_baseline <- max( + modeldata$R_sd_prior$R_sd_prior[1], 0.01 + ) + modeldata$.init$R_vari_changepoints <- rep( + max(modeldata$R_sd_prior$R_sd_prior[1], 0.01), + modeldata$R_vari_n_basis + ) + }, + required = c( + ".metainfo$length_R", + ".metainfo$length_seeding", + ".metainfo$partial_window", + ".metainfo$forecast_horizon" + ), + modeldata = modeldata ) check_beta_alternative(smooth_prior_mu, smooth_prior_sigma) @@ -250,8 +289,10 @@ R_estimate_ets <- function( modeldata$ets_noncentered <- noncentered modeldata <- add_dummy_data(modeldata, c( - "bs_n_basis", "bs_dists", "bs_n_w", "bs_w", "bs_v", "bs_u", - "bs_coeff_ar_start_prior", "bs_coeff_ar_sd_prior" + "bs_n_basis", "bs_n_w", "bs_w", "bs_v", "bs_u", + "bs_coeff_ar_start_prior", "bs_coeff_ar_sd_prior", + "R_vari_sel_ncol", "R_vari_sel_n_w", "R_vari_sel_w", "R_vari_sel_v", + "R_vari_sel_u" )) modeldata <- add_dummy_inits(modeldata, c( @@ -276,6 +317,13 @@ R_estimate_ets <- function( #' @param sd_prior_mu Prior (mean) on the standard deviation of the random walk. #' @param sd_prior_sigma Prior (standard deviation) on the standard deviation of #' the random walk. +#' @param sd_changepoint_dist The variability of Rt can change over time, e.g. +#' during the height of an epidemic wave, countermeasures may lead to much +#' faster changes in Rt than observable at other times. This potential +#' variability is accounted for using change points placed at regular +#' intervals. The standard deviation of the random walk then evolves linearly +#' between the change points. The default change point distance is 28 days. If +#' set to zero, no change points are modeled. #' @param differenced If `FALSE` (default), the random walk is applied to the #' absolute Rt time series. If `TRUE`, it is instead applied to the #' differenced time series, i.e. now the trend is modeled as a random walk. @@ -286,10 +334,10 @@ R_estimate_ets <- function( #' #' @details The smoothness of Rt estimates is influenced by the prior on the #' standard deviation of the random walk. It also influences the uncertainty -#' of Rt estimates towards the present / date of estimation, when limited -#' data signal is available. The prior on the intercept of the random walk -#' should reflect your expectation of Rt at the beginning of the time series. -#' If estimating from the start of an epidemic, you might want to use a prior +#' of Rt estimates towards the present / date of estimation, when limited data +#' signal is available. The prior on the intercept of the random walk should +#' reflect your expectation of Rt at the beginning of the time series. If +#' estimating from the start of an epidemic, you might want to use a prior #' with mean > 1 for the intercept. #' #' @details The priors of this component have the following functional form: @@ -305,6 +353,8 @@ R_estimate_rw <- function( intercept_prior_sigma = 0.8, sd_prior_mu = 0, sd_prior_sigma = 0.1, + sd_changepoint_dist = 28, + sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, differenced = FALSE, @@ -315,6 +365,7 @@ R_estimate_rw <- function( level_prior_sigma = intercept_prior_sigma, sd_prior_mu = sd_prior_mu, sd_prior_sigma = sd_prior_sigma, + sd_changepoint_dist = sd_changepoint_dist, link = link, R_max = R_max, smooth_prior_mu = 1, @@ -335,83 +386,99 @@ R_estimate_rw <- function( return(modeldata) } -#' Estimate Rt via smoothing splines +#'Estimate Rt via smoothing splines #' -#' @description This option estimates the effective reproduction number using -#' penalized B-splines. +#'@description This option estimates the effective reproduction number using +#' penalized B-splines. #' -#' @param knot_distance Distance between spline breakpoints (knots) in days -#' (default is 7, i.e. one knot each week). Placing knots further apart -#' increases the smoothness of Rt estimates and can speed up model fitting. -#' The Rt time series remains surprisingly flexible even at larger knot -#' distances, but placing knots too far apart can lead to inaccurate -#' estimates. -#' @param spline_degree Degree of the spline polynomials (default is 3 for cubic -#' splines). -#' @param coef_intercept_prior_mu Prior (mean) on the intercept of the random -#' walk over spline coefficients. -#' @param coef_intercept_prior_sigma Prior (standard deviation) on the intercept -#' of the random walk over spline coefficients. -#' @param coef_sd_prior_mu Prior (mean) on the daily standard deviation of the -#' random walk over spline coefficients. -#' @param coef_sd_prior_sigma Prior (standard deviation) on the daily standard -#' deviation of the random walk over spline coefficients. -#' @param link Link function. Currently supported are `inv_softplus` (default) -#' and `scaled_logit`. Both of these links are configured to behave -#' approximately like the identity function around R=1, but become -#' increasingly non-linear below (and in the case of `scaled_logit` also -#' above) R=1. -#' @param R_max If `link=scaled_logit` is used, a maximum reproduction number -#' must be assumed. This should be higher than any realistic R value for the -#' modeled pathogen. Default is 6. +#'@param knot_distance Distance between spline breakpoints (knots) in days +#' (default is 7, i.e. one knot each week). Placing knots further apart +#' increases the smoothness of Rt estimates and can speed up model fitting. The +#' Rt time series remains surprisingly flexible even at larger knot distances, +#' but placing knots too far apart can lead to inaccurate estimates. +#'@param spline_degree Degree of the spline polynomials (default is 3 for cubic +#' splines). +#'@param coef_intercept_prior_mu Prior (mean) on the intercept of the random +#' walk over spline coefficients. +#'@param coef_intercept_prior_sigma Prior (standard deviation) on the intercept +#' of the random walk over spline coefficients. +#'@param coef_sd_prior_mu Prior (mean) on the daily standard deviation of the +#' random walk over spline coefficients (see details). +#'@param coef_sd_prior_sigma Prior (standard deviation) on the daily standard +#' deviation of the random walk over spline coefficients. +#'@param coef_sd_changepoint_dist The variability of Rt can change over time, +#' e.g. during the height of an epidemic wave, countermeasures may lead to much +#' faster changes in Rt than observable at other times. This potential +#' variability is accounted for using change points placed at regular +#' intervals. The standard deviation of the random walk over spline +#' coefficients then evolves linearly between the change points. The default +#' change point distance is 28 days. If set to zero, no change points are +#' modeled. +#'@param coef_sd_changepoint_sd This parameter controls the variability of the +#' change points. When change points are modeled, EpiSewer will estimate a +#' baseline standard deviation (see `coef_sd_prior_mu` and +#' `coef_sd_prior_sigma`), and model change point values as independently +#' distributed with mean equal to this baseline and standard deviation +#' `coef_sd_changepoint_sd`. +#'@param link Link function. Currently supported are `inv_softplus` (default) +#' and `scaled_logit`. Both of these links are configured to behave +#' approximately like the identity function around R=1, but become increasingly +#' non-linear below (and in the case of `scaled_logit` also above) R=1. +#'@param R_max If `link=scaled_logit` is used, a maximum reproduction number +#' must be assumed. This should be higher than any realistic R value for the +#' modeled pathogen. Default is 6. #' -#' @details `EpiSewer` uses a random walk on the B-spline coefficients for -#' regularization. This allows to use small knot distances without obtaining -#' extremely wiggly Rt estimates. +#'@details `EpiSewer` uses a random walk on the B-spline coefficients for +#' regularization. This allows to use small knot distances without obtaining +#' extremely wiggly Rt estimates. #' - The prior on the random walk intercept should reflect -#' your expectation of Rt at the beginning of the time series. If estimating -#' from the start of an epidemic, you might want to use a prior with mean -#' larger than 1 for the intercept. +#' your expectation of Rt at the beginning of the time series. If estimating +#' from the start of an epidemic, you might want to use a prior with mean +#' larger than 1 for the intercept. #' - The prior on the daily standard deviation should be interpreted in terms -#' of daily additive changes (this is at least true around Rt=1, and becomes -#' less true as Rt approaches 0 or its upper bound as defined by the `link` -#' function). For example, a standard deviation of 0.5 roughly allows the -#' spline coefficients to change by ±1 (using the 2 sigma rule) each day. The -#' daily standard deviation is multiplied by `sqrt(knot_distance)` to get the -#' actual standard deviation of the coefficients. This way, the prior is -#' independent of the chosen `knot_distance`. For example, if `knot_distance` -#' is 7 days, and a daily standard deviation of 0.5 is used, the coefficients -#' of two adjacent splines can differ by up to ±2.6. Note however that this -#' difference does not directly translate into a change of Rt by ±2.6, as Rt -#' is always the weighted sum of several basis functions at any given point. -#' It may therefore change much more gradually, depending on the distances -#' between knots. +#' of daily additive changes (this is at least true around Rt=1, and becomes +#' less true as Rt approaches 0 or its upper bound as defined by the `link` +#' function). For example, a prior with mean=0 and sd=0.2 allows a daily +#' standard deviation between 0 and 0.4. A daily standard deviation of 0.4 in +#' turn roughly allows the spline coefficients to change by ±0.8 (using the 2 +#' sigma rule) each day. The daily standard deviation is summed up over the +#' days between two knots to get the actual standard deviation of the +#' coefficients. This way, the prior is independent of the chosen +#' `knot_distance`. For example, if `knot_distance` is 7 days, and a constant +#' daily standard deviation of 0.4 is estimated, the coefficients of two +#' adjacent splines can differ by up to `0.8*sqrt(knot_distance)`, i.e. ±2.1. +#' Note however that this difference does not directly translate into a change +#' of Rt by ±2.1, as Rt is always the weighted sum of several basis functions +#' at any given point. It may therefore change much more gradually, depending +#' on the distances between knots. #' -#' @details The smoothness of the Rt estimates is influenced both by the knot -#' distance and by the daily standard deviation of the random walk on -#' coefficients. The latter also influences the uncertainty of Rt estimates -#' towards the present / date of estimation, when limited data signal is -#' available. Absent sufficient data signal, Rt estimates will tend to stay at -#' the current level (which corresponds to assuming unchanged transmission -#' dynamics). +#'@details The smoothness of the Rt estimates is influenced both by the knot +#' distance and by the daily standard deviation of the random walk on +#' coefficients. The latter also influences the uncertainty of Rt estimates +#' towards the present / date of estimation, when limited data signal is +#' available. Absent sufficient data signal, Rt estimates will tend to stay at +#' the current level (which corresponds to assuming unchanged transmission +#' dynamics). #' -#' @details The priors of this component have the following functional form: +#'@details The priors of this component have the following functional form: #' - intercept of the random walk over spline coefficients: -#' `Normal` +#' `Normal` #' - standard deviation of the random walk over spline coefficients: -#' `Truncated normal` +#' `Truncated normal` #' -#' @inheritParams template_model_helpers -#' @inherit modeldata_init return -#' @export -#' @family {Rt models} +#'@inheritParams template_model_helpers +#'@inherit modeldata_init return +#'@export +#'@family {Rt models} R_estimate_splines <- function( knot_distance = 7, spline_degree = 3, coef_intercept_prior_mu = 1, coef_intercept_prior_sigma = 0.8, coef_sd_prior_mu = 0, - coef_sd_prior_sigma = 0.25, + coef_sd_prior_sigma = 0.2, + coef_sd_changepoint_dist = 28, + coef_sd_changepoint_sd = 0.05, link = "inv_softplus", R_max = 6, modeldata = modeldata_init()) { @@ -441,18 +508,6 @@ R_estimate_splines <- function( modeldata$.metainfo$R_knots <- knots modeldata$.metainfo$B <- B modeldata$bs_n_basis <- ncol(B) - modeldata$bs_dists <- c( - rep( # left boundary basis functions - knots$interior[1]-knots$boundary[1], spline_degree - 1 - ), - diff( # interior basis functions - knots$interior - ), - rep( # right boundary basis function - knots$boundary[2]-knots$interior[length(knots$interior)], 1 - ) - ) - B_sparse <- suppressMessages(rstan::extract_sparse_parts(B)) modeldata$bs_n_w <- length(B_sparse$w) modeldata$bs_w <- B_sparse$w @@ -460,11 +515,46 @@ R_estimate_splines <- function( modeldata$bs_u <- B_sparse$u modeldata$.init$bs_coeff_ar_start <- 0 - modeldata$.init$bs_coeff_ar_sd <- 0.1 modeldata$.init$bs_coeff_noise_raw <- rep(0, modeldata$bs_n_basis - 1) + + modeldata <- add_R_variability( + length_R = modeldata$.metainfo$length_R, + length_seeding = modeldata$.metainfo$length_seeding, + length_partial = modeldata$.metainfo$partial_window, + h = modeldata$.metainfo$forecast_horizon, + changepoint_dist = coef_sd_changepoint_dist, + modeldata = modeldata + ) + modeldata$.init$R_vari_baseline <- 0.01 + modeldata$.init$R_vari_changepoints <- rep(0.01, modeldata$R_vari_n_basis) + + # build sparse matrix that represents which days need to be summed up + # to compute the variance between two knots + all_positions <- c( + rev(knots$interior[1] - seq( + 0, + by = (knots$interior[1] - knots$boundary[1]), + length.out = spline_degree + )), knots$interior[-1], knots$boundary[2]) + R_vari_selection <- t(mapply( + get_selection_vector, + from = all_positions[-length(all_positions)] + 1, + to = all_positions[-1], + n = nrow(B) + )) + modeldata$R_vari_sel_ncol <- ncol(R_vari_selection) + R_vari_selection_sparse <- suppressMessages( + rstan::extract_sparse_parts(R_vari_selection) + ) + modeldata$R_vari_sel_n_w <- length(R_vari_selection_sparse$w) + modeldata$R_vari_sel_w <- R_vari_selection_sparse$w + modeldata$R_vari_sel_v <- R_vari_selection_sparse$v + modeldata$R_vari_sel_u <- R_vari_selection_sparse$u + }, required = c( ".metainfo$length_R", + ".metainfo$length_seeding", ".metainfo$partial_window", ".metainfo$forecast_horizon" ), @@ -481,6 +571,7 @@ R_estimate_splines <- function( mu = coef_sd_prior_mu, sigma = coef_sd_prior_sigma ) + modeldata$R_vari_sd <- coef_sd_changepoint_sd modeldata <- add_link_function(link, R_max, modeldata) @@ -493,7 +584,7 @@ R_estimate_splines <- function( )) modeldata <- add_dummy_inits(modeldata, c( - "R_level_start", "R_trend_start", "R_sd", "R_noise", + "R_level_start", "R_trend_start", "R_noise", "ets_alpha", "ets_beta", "ets_phi" )) @@ -693,6 +784,58 @@ R_estimate_approx <- function( return(modeldata) } + +#' Add change point model for the variability of Rt +#' +#' @param length_R Length of the modeled Rt time series +#' @param changepoint_dist How many days should the change points be apart? If +#' zero, no change points are modeled (fixed R variability). +#' @inheritParams template_model_helpers +#' +#' @details The change points are placed going backwards from the end of the +#' time series, i.e. on the last day, then `changepoint_dist` days before +#' that, and so on. The distance between the first and second change point at +#' the start of the time series (partly falls into the seeding phase) varies +#' between half and double the `changepoint_dist`. +#' +#' @inherit modeldata_init return +#' @keywords internal +add_R_variability <- function(length_R, h, length_seeding, length_partial, + changepoint_dist, modeldata) { + if (changepoint_dist == 0) { + # no changepoints + B <- matrix(rep(1,length_R+h), ncol = 1) + } else if (length_R - length_partial - changepoint_dist <= + length_seeding + changepoint_dist/2) { + B <- matrix(rep(1,length_R+h), ncol = 1) + } else { + # we here suppress extrapolation warnings if h > changepoint_dist, as we fix + # spline bases in the next step + B <- suppressWarnings(splines::bs( + 1:(length_R+h), + knots = rev(seq( + length_R - length_partial - changepoint_dist, + length_seeding + changepoint_dist/2, + by = -changepoint_dist) + ), + degree = 1, + intercept = TRUE, + Boundary.knots = c(length_seeding, length_R - length_partial), + )) + B <- matrix(pmax(0, pmin(B, 1)), ncol = ncol(B)) # fix extrapolated bases + if (all(B[,ncol(B)]==0)) { + B <- B[,-ncol(B)] + } + } + modeldata$R_vari_n_basis <- ncol(B) + B_sparse <- suppressMessages(rstan::extract_sparse_parts(B)) + modeldata$R_vari_n_w <- length(B_sparse$w) + modeldata$R_vari_w <- B_sparse$w + modeldata$R_vari_v <- B_sparse$v + modeldata$R_vari_u <- B_sparse$u + return(modeldata) +} + #' Estimate constant seeding infections #' #' @description This option estimates a constant number of infections at the @@ -1085,3 +1228,15 @@ add_link_function <- function(link, R_max, modeldata) { } return(modeldata) } + +get_selection_vector <- function(from, to, n) { + selection <- rep(0, n) + selection[max(1,min(n,from)):max(1,min(n,to))] <- 1 + if (from < 1) { + selection[1] <- selection[1] + (min(1, to)-from) + } + if (to > n) { + selection[n] <- selection[n] + (to-max(n, from)) + } + return(selection) +} diff --git a/inst/stan/EpiSewer_main.stan b/inst/stan/EpiSewer_main.stan index a16744d..60c9833 100644 --- a/inst/stan/EpiSewer_main.stan +++ b/inst/stan/EpiSewer_main.stan @@ -97,12 +97,10 @@ data { // Reproduction number ---- int R_model; // 0 for ets, 1 for spline smoothing - // Hyperpriors for exponential smoothing + // Exponential smoothing priors / configuration array[R_model == 0 ? 2 : 0] real R_level_start_prior; array[R_model == 0 ? 2 : 0] real R_trend_start_prior; array[R_model == 0 ? 2 : 0] real R_sd_prior; - - // Exponential smoothing priors / configuration array[R_model == 0 ? 1 : 0] int ets_diff; // order of differencing array[R_model == 0 ? 1 : 0] int ets_noncentered; // use non-centered parameterization? array[R_model == 0 ? 2 : 0] real ets_alpha_prior; @@ -112,14 +110,28 @@ data { // Basis spline (bs) configuration for spline smoothing of R // Sparse bs matrix: columns = bases (bs_n_basis), rows = time points (L+S+T-G) array[R_model == 1 ? 1 : 0] int bs_n_basis; // number of B-splines - vector[R_model == 1 ? bs_n_basis[1] - 1 : 0] bs_dists; // distances between knots array[R_model == 1 ? 1 : 0] int bs_n_w; // number of nonzero entries in bs matrix vector[R_model == 1 ? bs_n_w[1] : 0] bs_w; // nonzero entries in bs matrix array[R_model == 1 ? bs_n_w[1] : 0] int bs_v; // column indices of bs_w - array[R_model == 1 ? L + S + D + T - G + 1 + h : 0] int bs_u; // row starting indices for bs_w plus padding + array[R_model == 1 ? (L + S + D + T - G + 1 + h) : 0] int bs_u; // row starting indices for bs_w plus padding array[R_model == 1 ? 2 : 0] real bs_coeff_ar_start_prior; // start hyperprior for random walk on log bs coeffs array[R_model == 1 ? 2 : 0] real bs_coeff_ar_sd_prior; // sd hyperprior for random walk on log bs coeffs + // Change point model for R variability + real R_vari_sd; // standard deviation of changepoint values around baseline + int R_vari_n_basis; // number of B-splines (degree 1) for change points + int R_vari_n_w; // number of nonzero entries in R_vari matrix + vector[R_vari_n_w] R_vari_w; // nonzero entries in R_vari matrix + array[R_vari_n_w] int R_vari_v; // column indices of R_vari_w + array[L + S + D + T - G + 1 + h] int R_vari_u; // row starting indices for R_vari_w plus padding + + // Selection matrix for R variability (needed for spline smoothing) + array[R_model == 1 ? 1 : 0] int R_vari_sel_ncol; // number of columns + array[R_model == 1 ? 1 : 0] int R_vari_sel_n_w; // number of nonzero entries in R_vari_sel matrix + vector[R_model == 1 ? R_vari_sel_n_w[1] : 0] R_vari_sel_w; // nonzero entries in R_vari_sel matrix + array[R_model == 1 ? R_vari_sel_n_w[1]: 0] int R_vari_sel_v; // column indices of R_vari_sel_w + array[R_model == 1 ? bs_n_basis[1] : 0] int R_vari_sel_u; // row starting indices for R_vari_sel_w plus padding + // Link function and corresponding hyperparameters // first element: 0 = inv_softplus, 1 = scaled_logit // other elements: hyperparameters for the respective link function @@ -213,17 +225,19 @@ parameters { // Exponential smoothing (ets) time series prior for Rt array[R_model == 0 ? 1 : 0] real R_level_start; // starting value of the level array[R_model == 0 ? 1 : 0] real R_trend_start; // starting value of the trend - array[R_model == 0 ? 1 : 0] real R_sd; // standard deviation of additive errors - vector[R_model == 0 ? L + S + D + T - G - 1 : 0] R_noise; // additive errors + vector[R_model == 0 ? L + S + D + T - G - 1 : 0] R_noise; // additive errors + real R_vari_baseline; // baseline R variability (mean of changepoints) array[R_model == 0 && ets_alpha_prior[2] > 0 ? 1 : 0] real ets_alpha; // smoothing parameter for the level array[R_model == 0 && ets_beta_prior[2] > 0 ? 1 : 0] real ets_beta; // smoothing parameter for the trend array[R_model == 0 && ets_phi_prior[2] > 0 ? 1 : 0] real ets_phi; // dampening parameter of the trend // Basis spline (bs) time series prior for Rt array[R_model == 1 ? 1 : 0] real bs_coeff_ar_start; // intercept for random walk on log bs coeffs - array[R_model == 1 ? 1 : 0] real bs_coeff_ar_sd; // sd for random walk on log bs coeffs vector[R_model == 1 ? (bs_n_basis[1] - 1) : 0] bs_coeff_noise_raw; // additive errors (non-centered) + // Change point model for Rt + vector[R_vari_n_basis] R_vari_changepoints; // R variability at changepoints + // seeding real iota_log_seed_intercept; array[seeding_model == 1 ? 1 : 0] real iota_log_seed_sd; @@ -253,6 +267,8 @@ parameters { transformed parameters { vector[L + S + D + T - G] R; // effective reproduction number vector[R_model == 1 ? h : 0] R_forecast_spline; // spline-based forecast of R + vector[R_model == 0 ? L + S + D + T - G - 1 : 0] R_sd; // standard deviation of additive errors in R ets model + vector[R_model == 1 ? L + S + D + T - G + h : 0] bs_coeff_ar_sd; // sd for random walk on log bs coeffs vector[L + S + D + T] iota; // expected number of infections vector[load_vari ? S + D + T : 0] zeta_log; // realized shedding load vector[S + D + T] lambda; // expected number of shedding onsets @@ -264,18 +280,31 @@ transformed parameters { array[LOD_model > 0 ? 1 : 0] vector[n_measured] LOD_hurdle_scale; if (R_model == 0) { + R_sd = csr_matrix_times_vector( + L + S + D + T - G + h, R_vari_n_basis, R_vari_w, + R_vari_v, R_vari_u, R_vari_changepoints + )[2:(L + S + D + T - G)]; // Innovations state space process implementing exponential smoothing R = apply_link(holt_damped_process( [R_level_start[1], R_trend_start[1]]', param_or_fixed(ets_alpha, ets_alpha_prior), param_or_fixed(ets_beta, ets_beta_prior), param_or_fixed(ets_phi, ets_phi_prior), - R_noise, + R_noise .* (ets_noncentered[1] ? R_sd : rep_vector(1, L + S + D + T - G - 1)), ets_diff[1] ), R_link); } else if (R_model == 1) { + bs_coeff_ar_sd = csr_matrix_times_vector( + L + S + D + T - G + h, R_vari_n_basis, R_vari_w, + R_vari_v, R_vari_u, R_vari_changepoints + ); + vector[bs_n_basis[1] - 1] bs_coeff_noise = bs_coeff_noise_raw .* + sqrt(csr_matrix_times_vector( + bs_n_basis[1] - 1, R_vari_sel_ncol[1], R_vari_sel_w, + R_vari_sel_v, R_vari_sel_u, + bs_coeff_ar_sd^2 // we add together variances --> square sd + )); // Basis spline smoothing - vector[bs_n_basis[1] - 1] bs_coeff_noise = bs_coeff_noise_raw .* (bs_coeff_ar_sd[1] * sqrt(bs_dists)); // additive errors vector[bs_n_basis[1]] bs_coeff = random_walk([bs_coeff_ar_start[1]]', bs_coeff_noise, 0); // Basis spline coefficients vector[L + S + D + T - G + h] R_all = apply_link(csr_matrix_times_vector( L + S + D + T - G + h, bs_n_basis[1], bs_w, bs_v, bs_u, bs_coeff @@ -387,12 +416,18 @@ model { ); R_level_start ~ normal(R_level_start_prior[1], R_level_start_prior[2]); // starting prior for level R_trend_start ~ normal(R_trend_start_prior[1], R_trend_start_prior[2]); // starting prior for trend - R_sd ~ normal(R_sd_prior[1], R_sd_prior[2]) T[0, ]; // truncated normal - R_noise ~ normal(0, R_sd[1]); // Gaussian noise + R_vari_baseline ~ normal(R_sd_prior[1], R_sd_prior[2]) T[0, ]; // truncated normal, baseline + R_vari_changepoints ~ normal(R_vari_baseline, R_vari_sd) T[0, ]; // truncated normal, change points are independent + if (ets_noncentered[1]) { + R_noise ~ std_normal(); // Gaussian noise + } else { + R_noise ~ normal(0, R_sd); // Gaussian noise + } } else if (R_model == 1) { // R spline smoothing bs_coeff_ar_start ~ normal(bs_coeff_ar_start_prior[1], bs_coeff_ar_start_prior[2]); // starting prior - bs_coeff_ar_sd ~ normal(bs_coeff_ar_sd_prior[1], bs_coeff_ar_sd_prior[2]) T[0, ]; // truncated normal + R_vari_baseline ~ normal(bs_coeff_ar_sd_prior[1], bs_coeff_ar_sd_prior[2]) T[0, ]; // truncated normal, baseline + R_vari_changepoints ~ normal(R_vari_baseline, R_vari_sd) T[0, ]; // truncated normal, change points are independent bs_coeff_noise_raw ~ std_normal(); // Gaussian noise } @@ -559,16 +594,21 @@ generated quantities { // Forecasting of R if (R_model == 0) { // Innovations state space process implementing exponential smoothing + vector[h] R_sd_forecast = csr_matrix_times_vector( + L + S + D + T - G + h, R_vari_n_basis, R_vari_w, + R_vari_v, R_vari_u, R_vari_changepoints + )[((L + S + D + T - G) + 1):((L + S + D + T - G) + h)]; R_forecast = apply_link(holt_damped_process( [R_level_start[1], R_trend_start[1]]', param_or_fixed(ets_alpha, ets_alpha_prior), param_or_fixed(ets_beta, ets_beta_prior), param_or_fixed(ets_phi, ets_phi_prior), - append_row(R_noise, to_vector(normal_rng(rep_vector(0, h), R_sd[1]))), + append_row( + R_noise .* (ets_noncentered[1] ? R_sd : rep_vector(1, L + S + D + T - G - 1)), + to_vector(normal_rng(0, R_sd_forecast))), ets_diff[1] ), R_link)[((L + S + D + T - G) + 1):((L + S + D + T - G) + h)]; } else if (R_model == 1) { - // Current solution for smoothing splines is to use a simple random walk for forecasting R_forecast = R_forecast_spline; } From bdebe340e1628dba5d1cc713d2999ac265a5f6f6 Mon Sep 17 00:00:00 2001 From: Adrian Lison Date: Fri, 4 Oct 2024 13:07:54 +0200 Subject: [PATCH 2/6] Update doc --- man/R_estimate_ets.Rd | 21 ++++++++++++-- man/R_estimate_rw.Rd | 29 +++++++++++++++----- man/R_estimate_splines.Rd | 58 +++++++++++++++++++++++++-------------- man/add_R_variability.Rd | 46 +++++++++++++++++++++++++++++++ man/model_infections.Rd | 4 +-- man/model_sampling.Rd | 2 +- 6 files changed, 127 insertions(+), 33 deletions(-) create mode 100644 man/add_R_variability.Rd diff --git a/man/R_estimate_ets.Rd b/man/R_estimate_ets.Rd index 9c230c2..95f5925 100644 --- a/man/R_estimate_ets.Rd +++ b/man/R_estimate_ets.Rd @@ -11,6 +11,8 @@ R_estimate_ets( trend_prior_sigma = 0.1, sd_prior_mu = 0, sd_prior_sigma = 0.1, + sd_changepoint_dist = 28, + sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, smooth_prior_mu = 0.5, @@ -40,11 +42,24 @@ Rt.} \item{sd_prior_sigma}{Prior (standard deviation) on the standard deviation of the innovations.} +\item{sd_changepoint_dist}{The variability of Rt can change over time, e.g. +during the height of an epidemic wave, countermeasures may lead to much +faster changes in Rt than observable at other times. This potential +variability is accounted for using change points placed at regular +intervals. The standard deviation of the state space model innovations then +evolves linearly between the change points. The default change point +distance is 28 days. If set to zero, no change points are modeled.} + +\item{sd_changepoint_sd}{This parameter controls the variability of the change +points. When change points are modeled, EpiSewer will estimate a baseline +standard deviation (see \code{sd_prior_mu} and \code{sd_prior_sigma}), and model +change point values as independently distributed with mean equal to this +baseline and standard deviation \code{sd_changepoint_sd}.} + \item{link}{Link function. Currently supported are \code{inv_softplus} (default) and \code{scaled_logit}. Both of these links are configured to behave -approximately like the identity function around R=1, but become -increasingly non-linear below (and in the case of \code{scaled_logit} also -above) R=1.} +approximately like the identity function around R=1, but become increasingly +non-linear below (and in the case of \code{scaled_logit} also above) R=1.} \item{R_max}{If \code{link=scaled_logit} is used, a maximum reproduction number must be assumed. This should be higher than any realistic R value for the diff --git a/man/R_estimate_rw.Rd b/man/R_estimate_rw.Rd index eca09cd..7405921 100644 --- a/man/R_estimate_rw.Rd +++ b/man/R_estimate_rw.Rd @@ -9,6 +9,8 @@ R_estimate_rw( intercept_prior_sigma = 0.8, sd_prior_mu = 0, sd_prior_sigma = 0.1, + sd_changepoint_dist = 28, + sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, differenced = FALSE, @@ -27,11 +29,24 @@ the random walk.} \item{sd_prior_sigma}{Prior (standard deviation) on the standard deviation of the random walk.} +\item{sd_changepoint_dist}{The variability of Rt can change over time, e.g. +during the height of an epidemic wave, countermeasures may lead to much +faster changes in Rt than observable at other times. This potential +variability is accounted for using change points placed at regular +intervals. The standard deviation of the random walk then evolves linearly +between the change points. The default change point distance is 28 days. If +set to zero, no change points are modeled.} + +\item{sd_changepoint_sd}{This parameter controls the variability of the change +points. When change points are modeled, EpiSewer will estimate a baseline +standard deviation (see \code{sd_prior_mu} and \code{sd_prior_sigma}), and model +change point values as independently distributed with mean equal to this +baseline and standard deviation \code{sd_changepoint_sd}.} + \item{link}{Link function. Currently supported are \code{inv_softplus} (default) and \code{scaled_logit}. Both of these links are configured to behave -approximately like the identity function around R=1, but become -increasingly non-linear below (and in the case of \code{scaled_logit} also -above) R=1.} +approximately like the identity function around R=1, but become increasingly +non-linear below (and in the case of \code{scaled_logit} also above) R=1.} \item{R_max}{If \code{link=scaled_logit} is used, a maximum reproduction number must be assumed. This should be higher than any realistic R value for the @@ -66,10 +81,10 @@ time using a random walk. \details{ The smoothness of Rt estimates is influenced by the prior on the standard deviation of the random walk. It also influences the uncertainty -of Rt estimates towards the present / date of estimation, when limited -data signal is available. The prior on the intercept of the random walk -should reflect your expectation of Rt at the beginning of the time series. -If estimating from the start of an epidemic, you might want to use a prior +of Rt estimates towards the present / date of estimation, when limited data +signal is available. The prior on the intercept of the random walk should +reflect your expectation of Rt at the beginning of the time series. If +estimating from the start of an epidemic, you might want to use a prior with mean > 1 for the intercept. The priors of this component have the following functional form: diff --git a/man/R_estimate_splines.Rd b/man/R_estimate_splines.Rd index f215c39..77e35cd 100644 --- a/man/R_estimate_splines.Rd +++ b/man/R_estimate_splines.Rd @@ -10,7 +10,9 @@ R_estimate_splines( coef_intercept_prior_mu = 1, coef_intercept_prior_sigma = 0.8, coef_sd_prior_mu = 0, - coef_sd_prior_sigma = 0.25, + coef_sd_prior_sigma = 0.2, + coef_sd_changepoint_dist = 28, + coef_sd_changepoint_sd = 0.05, link = "inv_softplus", R_max = 6, modeldata = modeldata_init() @@ -19,10 +21,9 @@ R_estimate_splines( \arguments{ \item{knot_distance}{Distance between spline breakpoints (knots) in days (default is 7, i.e. one knot each week). Placing knots further apart -increases the smoothness of Rt estimates and can speed up model fitting. -The Rt time series remains surprisingly flexible even at larger knot -distances, but placing knots too far apart can lead to inaccurate -estimates.} +increases the smoothness of Rt estimates and can speed up model fitting. The +Rt time series remains surprisingly flexible even at larger knot distances, +but placing knots too far apart can lead to inaccurate estimates.} \item{spline_degree}{Degree of the spline polynomials (default is 3 for cubic splines).} @@ -34,16 +35,31 @@ walk over spline coefficients.} of the random walk over spline coefficients.} \item{coef_sd_prior_mu}{Prior (mean) on the daily standard deviation of the -random walk over spline coefficients.} +random walk over spline coefficients (see details).} \item{coef_sd_prior_sigma}{Prior (standard deviation) on the daily standard deviation of the random walk over spline coefficients.} +\item{coef_sd_changepoint_dist}{The variability of Rt can change over time, +e.g. during the height of an epidemic wave, countermeasures may lead to much +faster changes in Rt than observable at other times. This potential +variability is accounted for using change points placed at regular +intervals. The standard deviation of the random walk over spline +coefficients then evolves linearly between the change points. The default +change point distance is 28 days. If set to zero, no change points are +modeled.} + +\item{coef_sd_changepoint_sd}{This parameter controls the variability of the +change points. When change points are modeled, EpiSewer will estimate a +baseline standard deviation (see \code{coef_sd_prior_mu} and +\code{coef_sd_prior_sigma}), and model change point values as independently +distributed with mean equal to this baseline and standard deviation +\code{coef_sd_changepoint_sd}.} + \item{link}{Link function. Currently supported are \code{inv_softplus} (default) and \code{scaled_logit}. Both of these links are configured to behave -approximately like the identity function around R=1, but become -increasingly non-linear below (and in the case of \code{scaled_logit} also -above) R=1.} +approximately like the identity function around R=1, but become increasingly +non-linear below (and in the case of \code{scaled_logit} also above) R=1.} \item{R_max}{If \code{link=scaled_logit} is used, a maximum reproduction number must be assumed. This should be higher than any realistic R value for the @@ -79,17 +95,19 @@ larger than 1 for the intercept. \item The prior on the daily standard deviation should be interpreted in terms of daily additive changes (this is at least true around Rt=1, and becomes less true as Rt approaches 0 or its upper bound as defined by the \code{link} -function). For example, a standard deviation of 0.5 roughly allows the -spline coefficients to change by ±1 (using the 2 sigma rule) each day. The -daily standard deviation is multiplied by \code{sqrt(knot_distance)} to get the -actual standard deviation of the coefficients. This way, the prior is -independent of the chosen \code{knot_distance}. For example, if \code{knot_distance} -is 7 days, and a daily standard deviation of 0.5 is used, the coefficients -of two adjacent splines can differ by up to ±2.6. Note however that this -difference does not directly translate into a change of Rt by ±2.6, as Rt -is always the weighted sum of several basis functions at any given point. -It may therefore change much more gradually, depending on the distances -between knots. +function). For example, a prior with mean=0 and sd=0.2 allows a daily +standard deviation between 0 and 0.4. A daily standard deviation of 0.4 in +turn roughly allows the spline coefficients to change by ±0.8 (using the 2 +sigma rule) each day. The daily standard deviation is summed up over the +days between two knots to get the actual standard deviation of the +coefficients. This way, the prior is independent of the chosen +\code{knot_distance}. For example, if \code{knot_distance} is 7 days, and a constant +daily standard deviation of 0.4 is estimated, the coefficients of two +adjacent splines can differ by up to \code{0.8*sqrt(knot_distance)}, i.e. ±2.1. +Note however that this difference does not directly translate into a change +of Rt by ±2.1, as Rt is always the weighted sum of several basis functions +at any given point. It may therefore change much more gradually, depending +on the distances between knots. } The smoothness of the Rt estimates is influenced both by the knot diff --git a/man/add_R_variability.Rd b/man/add_R_variability.Rd new file mode 100644 index 0000000..1b99054 --- /dev/null +++ b/man/add_R_variability.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_infections.R +\name{add_R_variability} +\alias{add_R_variability} +\title{Add change point model for the variability of Rt} +\usage{ +add_R_variability( + length_R, + h, + length_seeding, + length_partial, + changepoint_dist, + modeldata +) +} +\arguments{ +\item{length_R}{Length of the modeled Rt time series} + +\item{changepoint_dist}{How many days should the change points be apart? If +zero, no change points are modeled (fixed R variability).} + +\item{modeldata}{A \code{modeldata} object to which the above model specifications +should be added. Default is an empty model given by \code{\link[=modeldata_init]{modeldata_init()}}. Can +also be an already partly specified model returned by other \code{EpiSewer} +modeling functions.} +} +\value{ +A \code{modeldata} object containing data and specifications of the model +to be fitted. Can be passed on to other \code{EpiSewer} modeling functions to +add further data and model specifications. + +The \code{modeldata} object also includes information about parameter +initialization (\code{init}), meta data (\code{.metainfo}), and checks to be +performed before model fitting (\code{.checks}). +} +\description{ +Add change point model for the variability of Rt +} +\details{ +The change points are placed going backwards from the end of the +time series, i.e. on the last day, then \code{changepoint_dist} days before +that, and so on. The distance between the first and second change point at +the start of the time series (partly falls into the seeding phase) varies +between half and double the \code{changepoint_dist}. +} +\keyword{internal} diff --git a/man/model_infections.Rd b/man/model_infections.Rd index d85f0b6..79fe7b8 100644 --- a/man/model_infections.Rd +++ b/man/model_infections.Rd @@ -24,10 +24,10 @@ of interest estimated by \code{EpiSewer}. \code{R} is smoothed using a time seri smoothing prior. Currently supported are: random walk (rw), exponential smoothing (ets), and smoothing splines. Modeling options: \itemize{ -\item \code{\link[=R_estimate_approx]{R_estimate_approx()}} -\item \code{\link[=R_estimate_rw]{R_estimate_rw()}} \item \code{\link[=R_estimate_splines]{R_estimate_splines()}} \item \code{\link[=R_estimate_ets]{R_estimate_ets()}} +\item \code{\link[=R_estimate_approx]{R_estimate_approx()}} +\item \code{\link[=R_estimate_rw]{R_estimate_rw()}} }} \item{seeding}{Seeding of initial infections. The renewal model used by diff --git a/man/model_sampling.Rd b/man/model_sampling.Rd index 06d4e12..e27cbe4 100644 --- a/man/model_sampling.Rd +++ b/man/model_sampling.Rd @@ -15,8 +15,8 @@ such effects using covariates that describe differences between the samples. Modeling options: \itemize{ \item \code{\link[=sample_effects_none]{sample_effects_none()}} -\item \code{\link[=sample_effects_estimate_matrix]{sample_effects_estimate_matrix()}} \item \code{\link[=sample_effects_estimate_weekday]{sample_effects_estimate_weekday()}} +\item \code{\link[=sample_effects_estimate_matrix]{sample_effects_estimate_matrix()}} }} } \value{ From ee3c1a0e6870471e04e517041dfd9c219012df10 Mon Sep 17 00:00:00 2001 From: Adrian Lison Date: Sat, 5 Oct 2024 12:33:16 +0200 Subject: [PATCH 3/6] Implement mean reversion changepoint model With the new implementation, the standard deviation reverts to the baseline / mean towards the present (from total_delay_dist to present) and towards the start of the time series (length of the seeding period) --- R/model_infections.R | 33 +++++++++++++++++++-------------- inst/stan/EpiSewer_main.stan | 17 +++++++++++++---- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/R/model_infections.R b/R/model_infections.R index 8cac585..3428ebb 100644 --- a/R/model_infections.R +++ b/R/model_infections.R @@ -243,12 +243,13 @@ R_estimate_ets <- function( modeldata = modeldata ) modeldata$.init$R_vari_baseline <- max( - modeldata$R_sd_prior$R_sd_prior[1], 0.01 + modeldata$R_sd_prior$R_sd_prior[1], 1e-2 ) - modeldata$.init$R_vari_changepoints <- rep( - max(modeldata$R_sd_prior$R_sd_prior[1], 0.01), - modeldata$R_vari_n_basis - ) + if (modeldata$R_vari_n_basis > 2) { + modeldata$.init$R_vari_changepoints <- rep(1e-2, modeldata$R_vari_n_basis - 2) + } else { + modeldata$.init$R_vari_changepoints <- rep(1e-2, modeldata$R_vari_n_basis) + } }, required = c( ".metainfo$length_R", @@ -525,8 +526,12 @@ R_estimate_splines <- function( changepoint_dist = coef_sd_changepoint_dist, modeldata = modeldata ) - modeldata$.init$R_vari_baseline <- 0.01 - modeldata$.init$R_vari_changepoints <- rep(0.01, modeldata$R_vari_n_basis) + modeldata$.init$R_vari_baseline <- 1e-2 + if (modeldata$R_vari_n_basis > 2) { + modeldata$.init$R_vari_changepoints <- rep(1e-2, modeldata$R_vari_n_basis - 2) + } else { + modeldata$.init$R_vari_changepoints <- rep(1e-2, modeldata$R_vari_n_basis) + } # build sparse matrix that represents which days need to be summed up # to compute the variance between two knots @@ -805,22 +810,22 @@ add_R_variability <- function(length_R, h, length_seeding, length_partial, if (changepoint_dist == 0) { # no changepoints B <- matrix(rep(1,length_R+h), ncol = 1) - } else if (length_R - length_partial - changepoint_dist <= - length_seeding + changepoint_dist/2) { + } else if (length_R - length_partial - changepoint_dist <= length_seeding) { B <- matrix(rep(1,length_R+h), ncol = 1) } else { # we here suppress extrapolation warnings if h > changepoint_dist, as we fix # spline bases in the next step B <- suppressWarnings(splines::bs( 1:(length_R+h), - knots = rev(seq( - length_R - length_partial - changepoint_dist, + knots = rev(c(seq( + length_R - length_partial, length_seeding + changepoint_dist/2, - by = -changepoint_dist) - ), + by = -changepoint_dist), + length_seeding + )), degree = 1, intercept = TRUE, - Boundary.knots = c(length_seeding, length_R - length_partial), + Boundary.knots = c(1, length_R), )) B <- matrix(pmax(0, pmin(B, 1)), ncol = ncol(B)) # fix extrapolated bases if (all(B[,ncol(B)]==0)) { diff --git a/inst/stan/EpiSewer_main.stan b/inst/stan/EpiSewer_main.stan index 60c9833..26108d8 100644 --- a/inst/stan/EpiSewer_main.stan +++ b/inst/stan/EpiSewer_main.stan @@ -236,7 +236,7 @@ parameters { vector[R_model == 1 ? (bs_n_basis[1] - 1) : 0] bs_coeff_noise_raw; // additive errors (non-centered) // Change point model for Rt - vector[R_vari_n_basis] R_vari_changepoints; // R variability at changepoints + vector[R_vari_n_basis > 2 ? R_vari_n_basis - 2 : R_vari_n_basis] R_vari_changepoints; // R variability at changepoints // seeding real iota_log_seed_intercept; @@ -282,7 +282,10 @@ transformed parameters { if (R_model == 0) { R_sd = csr_matrix_times_vector( L + S + D + T - G + h, R_vari_n_basis, R_vari_w, - R_vari_v, R_vari_u, R_vari_changepoints + R_vari_v, R_vari_u, + R_vari_n_basis > 2 ? append_row3( + [R_vari_baseline]', R_vari_changepoints, [R_vari_baseline]' + ) : R_vari_changepoints )[2:(L + S + D + T - G)]; // Innovations state space process implementing exponential smoothing R = apply_link(holt_damped_process( @@ -296,7 +299,10 @@ transformed parameters { } else if (R_model == 1) { bs_coeff_ar_sd = csr_matrix_times_vector( L + S + D + T - G + h, R_vari_n_basis, R_vari_w, - R_vari_v, R_vari_u, R_vari_changepoints + R_vari_v, R_vari_u, + R_vari_n_basis > 2 ? append_row3( + [R_vari_baseline]', R_vari_changepoints, [R_vari_baseline]' + ) : R_vari_changepoints ); vector[bs_n_basis[1] - 1] bs_coeff_noise = bs_coeff_noise_raw .* sqrt(csr_matrix_times_vector( @@ -596,7 +602,10 @@ generated quantities { // Innovations state space process implementing exponential smoothing vector[h] R_sd_forecast = csr_matrix_times_vector( L + S + D + T - G + h, R_vari_n_basis, R_vari_w, - R_vari_v, R_vari_u, R_vari_changepoints + R_vari_v, R_vari_u, + R_vari_n_basis > 2 ? append_row3( + [R_vari_baseline]', R_vari_changepoints, [R_vari_baseline]' + ) : R_vari_changepoints )[((L + S + D + T - G) + 1):((L + S + D + T - G) + h)]; R_forecast = apply_link(holt_damped_process( [R_level_start[1], R_trend_start[1]]', From f45664656a07d9cd95eb14ab163499472c2a9e3a Mon Sep 17 00:00:00 2001 From: Adrian Lison Date: Sat, 5 Oct 2024 12:33:24 +0200 Subject: [PATCH 4/6] Fix p_zero=1 problems in generated quantitites --- inst/stan/EpiSewer_approx.stan | 7 ++++- inst/stan/EpiSewer_main.stan | 7 ++++- inst/stan/functions/helper_functions.stan | 31 ++++++++++++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/inst/stan/EpiSewer_approx.stan b/inst/stan/EpiSewer_approx.stan index eb55524..f7034f7 100644 --- a/inst/stan/EpiSewer_approx.stan +++ b/inst/stan/EpiSewer_approx.stan @@ -685,6 +685,11 @@ generated quantities { cv_type == 1 ? nu_upsilon_a : 0, // nu_pre (pre-PCR CV) cv_type == 1 ? cv_pre_type[1] : 0 // Type of pre-PCR CV )); + p_zero_all = trim_or_reject_ub( + p_zero_all, + 1-1e-5, // trim to almost 1 + 1.01 // throw error when significantly above 1 + ); above_LOD = to_vector(bernoulli_rng(1-p_zero_all)); } else { p_zero_all = rep_vector(0, T+h); @@ -714,7 +719,7 @@ generated quantities { vector[T+h] cv_conditional_all = sqrt(cv_all^2 .* (1-p_zero_all) - p_zero_all); // correct potentially slightly negative approximations - cv_conditional_all = trim_or_reject( + cv_conditional_all = trim_or_reject_lb( cv_conditional_all, 1e-5, // trim to almost zero -0.01 // throw error when significantly below zero diff --git a/inst/stan/EpiSewer_main.stan b/inst/stan/EpiSewer_main.stan index 26108d8..a90df59 100644 --- a/inst/stan/EpiSewer_main.stan +++ b/inst/stan/EpiSewer_main.stan @@ -723,6 +723,11 @@ generated quantities { cv_type == 1 ? nu_upsilon_a : 0, // nu_pre (pre-PCR CV) cv_type == 1 ? cv_pre_type[1] : 0 // Type of pre-PCR CV )); + p_zero_all = trim_or_reject_ub( + p_zero_all, + 1-1e-5, // trim to almost 1 + 1.01 // throw error when significantly above 1 + ); above_LOD = to_vector(bernoulli_rng(1-p_zero_all)); } else { p_zero_all = rep_vector(0, T+h); @@ -752,7 +757,7 @@ generated quantities { vector[T+h] cv_conditional_all = sqrt(cv_all^2 .* (1-p_zero_all) - p_zero_all); // correct potentially slightly negative approximations - cv_conditional_all = trim_or_reject( + cv_conditional_all = trim_or_reject_lb( cv_conditional_all, 1e-5, // trim to almost zero -0.01 // throw error when significantly below zero diff --git a/inst/stan/functions/helper_functions.stan b/inst/stan/functions/helper_functions.stan index 2c6848a..8c4d436 100644 --- a/inst/stan/functions/helper_functions.stan +++ b/inst/stan/functions/helper_functions.stan @@ -194,7 +194,7 @@ Helper functions for primitive operations return positions[1:nan_count]; } - vector trim_or_reject(vector x, real lb_trim, real lb_reject) { + vector trim_or_reject_lb(vector x, real lb_trim, real lb_reject) { int n = num_elements(x); array[n] int rej_positions; int rej_count = 0; @@ -215,3 +215,32 @@ Helper functions for primitive operations return(fmax(x, rep_vector(lb_trim, n))); } } + + vector trim_or_reject_ub(vector x, real ub_trim, real ub_reject) { + int n = num_elements(x); + array[n] int rej_positions; + int rej_count = 0; + for (i in 1:n) { + if (x[i] > ub_reject) { + rej_count += 1; + rej_positions[rej_count] = i; + } + } + if (rej_count > 0) { + reject( + "The following vector elements were above ", + "the upper bound for rejection (", ub_reject, "). ", + "Indices: ", rej_positions[1:rej_count], " | ", + "Values: ", x[rej_positions[1:rej_count]] + ); + } else { + return(fmin(x, rep_vector(ub_trim, n))); + } + } + + /* + append_row extended to three elements + */ + vector append_row3(vector x, vector y, vector z) { + return append_row(append_row(x, y), z); + } From a1f0fbbec9310a654bcf083331bd05755a51a493 Mon Sep 17 00:00:00 2001 From: Adrian Lison Date: Mon, 7 Oct 2024 17:46:39 +0200 Subject: [PATCH 5/6] Choose long sd changepoint dist by default and update doc --- R/model_infections.R | 37 +++++++++++++++++++++++++++---------- man/R_estimate_ets.Rd | 11 ++++++++--- man/R_estimate_rw.Rd | 18 ++++++++++-------- man/R_estimate_splines.Rd | 11 +++++++---- 4 files changed, 52 insertions(+), 25 deletions(-) diff --git a/R/model_infections.R b/R/model_infections.R index 3428ebb..3e92cf1 100644 --- a/R/model_infections.R +++ b/R/model_infections.R @@ -104,14 +104,19 @@ generation_dist_assume <- #' Rt. #'@param sd_prior_mu Prior (mean) on the standard deviation of the innovations. #'@param sd_prior_sigma Prior (standard deviation) on the standard deviation of -#' the innovations. +#' the innovations. Please note that for consistency the overall prior on the +#' standard deviation of innovations will have a standard deviation of +#' `sd_prior_sigma + sd_changepoint_sd` even if no changepoints are modeled +#' (see below). #'@param sd_changepoint_dist The variability of Rt can change over time, e.g. #' during the height of an epidemic wave, countermeasures may lead to much #' faster changes in Rt than observable at other times. This potential #' variability is accounted for using change points placed at regular #' intervals. The standard deviation of the state space model innovations then #' evolves linearly between the change points. The default change point -#' distance is 28 days. If set to zero, no change points are modeled. +#' distance is 26 weeks (182 days). Short changepoint distances (e.g. 4 weeks +#' or less) must be chosen with care, as they can make the Rt time series too +#' flexible. If set to zero, no change points are modeled. #'@param sd_changepoint_sd This parameter controls the variability of the change #' points. When change points are modeled, EpiSewer will estimate a baseline #' standard deviation (see `sd_prior_mu` and `sd_prior_sigma`), and model @@ -194,7 +199,7 @@ R_estimate_ets <- function( trend_prior_sigma = 0.1, sd_prior_mu = 0, sd_prior_sigma = 0.1, - sd_changepoint_dist = 28, + sd_changepoint_dist = 7*26, sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, @@ -323,8 +328,15 @@ R_estimate_ets <- function( #' faster changes in Rt than observable at other times. This potential #' variability is accounted for using change points placed at regular #' intervals. The standard deviation of the random walk then evolves linearly -#' between the change points. The default change point distance is 28 days. If -#' set to zero, no change points are modeled. +#' between the change points. The default change point distance is 26 weeks +#' (182 days). Short changepoint distances (e.g. 4 weeks or less) must be +#' chosen with care, as they can make the Rt time series too flexible. If set +#' to zero, no change points are modeled. +#' @param sd_changepoint_sd This parameter controls the variability of the +#' change points. When change points are modeled, EpiSewer will estimate a +#' baseline standard deviation (see `sd_prior_mu` and `sd_prior_sigma`), and +#' model change point values as independently distributed with mean equal to +#' this baseline and standard deviation `sd_changepoint_sd`. #' @param differenced If `FALSE` (default), the random walk is applied to the #' absolute Rt time series. If `TRUE`, it is instead applied to the #' differenced time series, i.e. now the trend is modeled as a random walk. @@ -354,7 +366,7 @@ R_estimate_rw <- function( intercept_prior_sigma = 0.8, sd_prior_mu = 0, sd_prior_sigma = 0.1, - sd_changepoint_dist = 28, + sd_changepoint_dist = 7*26, sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, @@ -367,6 +379,7 @@ R_estimate_rw <- function( sd_prior_mu = sd_prior_mu, sd_prior_sigma = sd_prior_sigma, sd_changepoint_dist = sd_changepoint_dist, + sd_changepoint_sd = sd_changepoint_sd, link = link, R_max = R_max, smooth_prior_mu = 1, @@ -406,15 +419,19 @@ R_estimate_rw <- function( #'@param coef_sd_prior_mu Prior (mean) on the daily standard deviation of the #' random walk over spline coefficients (see details). #'@param coef_sd_prior_sigma Prior (standard deviation) on the daily standard -#' deviation of the random walk over spline coefficients. +#' deviation of the random walk over spline coefficients. Please note that for +#' consistency the overall prior on the daily standard deviation of the random +#' walk will have a standard deviation of `coef_sd_prior_sigma + +#' coef_sd_changepoint_sd` even if no changepoints are modeled (see below). #'@param coef_sd_changepoint_dist The variability of Rt can change over time, #' e.g. during the height of an epidemic wave, countermeasures may lead to much #' faster changes in Rt than observable at other times. This potential #' variability is accounted for using change points placed at regular #' intervals. The standard deviation of the random walk over spline #' coefficients then evolves linearly between the change points. The default -#' change point distance is 28 days. If set to zero, no change points are -#' modeled. +#' change point distance is 26 weeks (182 days). Short changepoint distances +#' (e.g. 4 weeks or less) must be chosen with care, as they can make the Rt +#' time series too flexible. If set to zero, no change points are modeled. #'@param coef_sd_changepoint_sd This parameter controls the variability of the #' change points. When change points are modeled, EpiSewer will estimate a #' baseline standard deviation (see `coef_sd_prior_mu` and @@ -478,7 +495,7 @@ R_estimate_splines <- function( coef_intercept_prior_sigma = 0.8, coef_sd_prior_mu = 0, coef_sd_prior_sigma = 0.2, - coef_sd_changepoint_dist = 28, + coef_sd_changepoint_dist = 7*26, coef_sd_changepoint_sd = 0.05, link = "inv_softplus", R_max = 6, diff --git a/man/R_estimate_ets.Rd b/man/R_estimate_ets.Rd index 95f5925..ec3aa50 100644 --- a/man/R_estimate_ets.Rd +++ b/man/R_estimate_ets.Rd @@ -11,7 +11,7 @@ R_estimate_ets( trend_prior_sigma = 0.1, sd_prior_mu = 0, sd_prior_sigma = 0.1, - sd_changepoint_dist = 28, + sd_changepoint_dist = 7 * 26, sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, @@ -40,7 +40,10 @@ Rt.} \item{sd_prior_mu}{Prior (mean) on the standard deviation of the innovations.} \item{sd_prior_sigma}{Prior (standard deviation) on the standard deviation of -the innovations.} +the innovations. Please note that for consistency the overall prior on the +standard deviation of innovations will have a standard deviation of +\code{sd_prior_sigma + sd_changepoint_sd} even if no changepoints are modeled +(see below).} \item{sd_changepoint_dist}{The variability of Rt can change over time, e.g. during the height of an epidemic wave, countermeasures may lead to much @@ -48,7 +51,9 @@ faster changes in Rt than observable at other times. This potential variability is accounted for using change points placed at regular intervals. The standard deviation of the state space model innovations then evolves linearly between the change points. The default change point -distance is 28 days. If set to zero, no change points are modeled.} +distance is 26 weeks (182 days). Short changepoint distances (e.g. 4 weeks +or less) must be chosen with care, as they can make the Rt time series too +flexible. If set to zero, no change points are modeled.} \item{sd_changepoint_sd}{This parameter controls the variability of the change points. When change points are modeled, EpiSewer will estimate a baseline diff --git a/man/R_estimate_rw.Rd b/man/R_estimate_rw.Rd index 7405921..57c7c2b 100644 --- a/man/R_estimate_rw.Rd +++ b/man/R_estimate_rw.Rd @@ -9,7 +9,7 @@ R_estimate_rw( intercept_prior_sigma = 0.8, sd_prior_mu = 0, sd_prior_sigma = 0.1, - sd_changepoint_dist = 28, + sd_changepoint_dist = 7 * 26, sd_changepoint_sd = 0.025, link = "inv_softplus", R_max = 6, @@ -34,14 +34,16 @@ during the height of an epidemic wave, countermeasures may lead to much faster changes in Rt than observable at other times. This potential variability is accounted for using change points placed at regular intervals. The standard deviation of the random walk then evolves linearly -between the change points. The default change point distance is 28 days. If -set to zero, no change points are modeled.} +between the change points. The default change point distance is 26 weeks +(182 days). Short changepoint distances (e.g. 4 weeks or less) must be +chosen with care, as they can make the Rt time series too flexible. If set +to zero, no change points are modeled.} -\item{sd_changepoint_sd}{This parameter controls the variability of the change -points. When change points are modeled, EpiSewer will estimate a baseline -standard deviation (see \code{sd_prior_mu} and \code{sd_prior_sigma}), and model -change point values as independently distributed with mean equal to this -baseline and standard deviation \code{sd_changepoint_sd}.} +\item{sd_changepoint_sd}{This parameter controls the variability of the +change points. When change points are modeled, EpiSewer will estimate a +baseline standard deviation (see \code{sd_prior_mu} and \code{sd_prior_sigma}), and +model change point values as independently distributed with mean equal to +this baseline and standard deviation \code{sd_changepoint_sd}.} \item{link}{Link function. Currently supported are \code{inv_softplus} (default) and \code{scaled_logit}. Both of these links are configured to behave diff --git a/man/R_estimate_splines.Rd b/man/R_estimate_splines.Rd index 77e35cd..7c99a46 100644 --- a/man/R_estimate_splines.Rd +++ b/man/R_estimate_splines.Rd @@ -11,7 +11,7 @@ R_estimate_splines( coef_intercept_prior_sigma = 0.8, coef_sd_prior_mu = 0, coef_sd_prior_sigma = 0.2, - coef_sd_changepoint_dist = 28, + coef_sd_changepoint_dist = 7 * 26, coef_sd_changepoint_sd = 0.05, link = "inv_softplus", R_max = 6, @@ -38,7 +38,9 @@ of the random walk over spline coefficients.} random walk over spline coefficients (see details).} \item{coef_sd_prior_sigma}{Prior (standard deviation) on the daily standard -deviation of the random walk over spline coefficients.} +deviation of the random walk over spline coefficients. Please note that for +consistency the overall prior on the daily standard deviation of the random +walk will have a standard deviation of \code{coef_sd_prior_sigma + coef_sd_changepoint_sd} even if no changepoints are modeled (see below).} \item{coef_sd_changepoint_dist}{The variability of Rt can change over time, e.g. during the height of an epidemic wave, countermeasures may lead to much @@ -46,8 +48,9 @@ faster changes in Rt than observable at other times. This potential variability is accounted for using change points placed at regular intervals. The standard deviation of the random walk over spline coefficients then evolves linearly between the change points. The default -change point distance is 28 days. If set to zero, no change points are -modeled.} +change point distance is 26 weeks (182 days). Short changepoint distances +(e.g. 4 weeks or less) must be chosen with care, as they can make the Rt +time series too flexible. If set to zero, no change points are modeled.} \item{coef_sd_changepoint_sd}{This parameter controls the variability of the change points. When change points are modeled, EpiSewer will estimate a From 38b91b18d553888a1b27ab40dfe8d9b0d1608c03 Mon Sep 17 00:00:00 2001 From: Adrian Lison Date: Sat, 5 Oct 2024 10:57:12 +0200 Subject: [PATCH 6/6] Make run and test_run() more stable --- R/EpiSewer.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/EpiSewer.R b/R/EpiSewer.R index 43ee5c9..a767926 100644 --- a/R/EpiSewer.R +++ b/R/EpiSewer.R @@ -217,7 +217,7 @@ run.EpiSewerJob <- function(job) { ndraws = job$results_opts$samples_ndraws )) if (job$results_opts$fitted) { - fit_res$draws() + try(fit_res$draws(), silent = TRUE) try(fit_res$sampler_diagnostics(), silent = TRUE) try(fit_res$init(), silent = TRUE) try(fit_res$profiles(), silent = TRUE) @@ -250,6 +250,7 @@ test_run.EpiSewerJob <- function(job) { job$fit_opts$sampler$iter_sampling <- 5 job$fit_opts$sampler$show_messages <- FALSE job$fit_opts$sampler$show_exceptions <- FALSE + job$fit_opts$sampler$output_dir <- withr::local_tempdir() job$results_opts$samples_ndraws <- 2 return(run(job)) }