diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 18d8839..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index 96c4f2c..7e99abe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ # vim swp files *.swp +.Rproj.user +.Rhistory +.RData +.Ruserdata +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index e15bc0d..76bc717 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: EpiEstim -Version: 1.0-0 +Version: 2.0-0 Date: 2012/01/17 Title: EpiEstim: a package to estimate time varying reproduction numbers from epidemic curves @@ -28,7 +28,8 @@ Remotes: nickreich/coarseDataTools@hackout3 Suggests: testthat, - compare + compare, + utils License: GPL (>=2) LazyLoad: yes Packaged: 2012-05-30 10:35:03 UTC; acori diff --git a/NAMESPACE b/NAMESPACE index beca5b0..73e47df 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,12 +2,16 @@ export(DiscrSI) export(EstimateR) +export(OverallInfectivity) export(WT) export(check_cdt_samples_convergence) export(coarse2estim) -export(init_MCMC_params) +export(discr_si) +export(estimate_r) +export(init_mcmc_params) export(overall_infectivity) export(plots) +export(wallinga_teunis) import(grid) import(gridExtra) import(reshape2) diff --git a/R/WT.R b/R/WT.R deleted file mode 100644 index 4792d5b..0000000 --- a/R/WT.R +++ /dev/null @@ -1,291 +0,0 @@ -######################################################################################################################### -# WT function to estimate Rc the case reproduction number # -######################################################################################################################### - -#' Estimation of the case reproduction number using the Wallinga and Teunis method -#' -#' \code{WT} estimates the case reproduction number of an epidemic, given the incidence time series and the serial interval distribution. -#' -#' @param I One of the following -#' \itemize{ -#' \item{Vector (or a dataframe with a column named 'I') of non-negative integers containing an incidence time series. -#' If the dataframe contains a column \code{I$dates}, this is used for plotting. -#' \code{I$dates} must contains only dates in a row.} -#' \item{An object of class \code{\link[incidence]{incidence}}} -#' } -#' @param t_start Vector of positive integers giving the starting times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. t_start[1] should be strictly after the first day with non null incidence. -#' @param t_end Vector of positive integers giving the ending times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. -#' @param method One of "non_parametric_si" or "parametric_si" (see details). -#' @param mean_si For method "parametric_si" ; positive real giving the mean serial interval. -#' @param std_si For method "parametric_si" ; non negative real giving the stadard deviation of the serial interval. -#' @param si_distr For method "non_parametric_si" ; vector of probabilities giving the discrete distribution of the serial interval, starting with \code{si_distr[1]} (probability that the serial interval is zero), which should be zero. -#' @param nSim A positive integer giving the number of simulated epidemic trees used for computation of the confidence intervals of the case reproduction number (see details). -#' @param plot Logical. If \code{TRUE} (default is \code{FALSE}), output is plotted (see value). -#' @param legend A boolean (TRUE by default) governing the presence / absence of legends on the plots -#' This specifies the position of the legend in the plot. Alternatively, \code{locator(1)} can be used ; the user will then need to click where the legend needs to be written. -#' @return { -#' a list with components: -#' \itemize{ -#' \item{R}{: a dataframe containing: -#' the times of start and end of each time window considered ; -#' the estimated mean, std, and 0.025 and 0.975 quantiles of the reproduction number for each time window.} -#' \item{method}{: the method used to estimate R, one of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample"} -#' \item{si_distr}{: a vector containing the discrete serial interval distribution used for estimation} -#' \item{SI.Moments}{: a vector containing the mean and std of the discrete serial interval distribution(s) used for estimation} -#' \item{I}{: the time series of total incidence} -#' \item{I_local}{: the time series of incidence of local cases (so that \code{I_local + I_imported = I})} -#' \item{I_imported}{: the time series of incidence of imported cases (so that \code{I_local + I_imported = I})} -#' \item{dates}{: a vector of dates corresponding to the incidence time series} -#' } -#' } -#' @details{ -#' Estimates of the case reproduction number for an epidemic over predefined time windows can be obtained, -#' for a given discrete distribution of the serial interval, as proposed by Wallinga and Teunis (AJE, 2004). -#' Confidence intervals are obtained by simulating a number (nSim) of possible transmission trees. -#' -#' The methods vary in the way the serial interval distribution is specified. -#' -#' ----------------------- \code{method "non_parametric_si"} ----------------------- -#' -#' The discrete distribution of the serial interval is directly specified in the argument \code{si_distr}. -#' -#' If \code{plot} is \code{TRUE}, 3 plots are produced. -#' The first one shows the epidemic curve. -#' The second one shows the posterior mean and 95\% credible interval of the reproduction number. The estimate for a time window is plotted at the end of the time window. -#' The third plot shows the discrete distribution of the serial interval. -#' -#' ----------------------- \code{method "parametric_si"} ----------------------- -#' -#' The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{mean_si} and \code{std_si}. -#' The discrete distribution of the serial interval is derived automatically using \code{\link{DiscrSI}}. -#' -#' If \code{plot} is \code{TRUE}, 3 plots are produced, which are identical to the ones for \code{method "non_parametric_si"} . -#' } -#' @seealso \code{\link{DiscrSI}}, \code{\link{EstimateR}} -#' @author Anne Cori \email{a.cori@imperial.ac.uk} -#' @references { -#' Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). -#' Wallinga, J. and P. Teunis. Different epidemic curves for severe acute respiratory syndrome reveal similar impacts of control measures (AJE 2004). -#' } -#' @export -#' @import reshape2 grid gridExtra -#' @importFrom ggplot2 last_plot ggplot aes geom_step ggtitle geom_ribbon geom_line xlab ylab xlim geom_hline ylim geom_histogram -#' @importFrom plotly layout mutate arrange rename summarise filter ggplotly -#' @examples -#' ## load data on pandemic flu in a school in 2009 -#' data("Flu2009") -#' -#' ## estimate the case reproduction number (method "non_parametric_si") -#' WT(Flu2009$incidence, t_start=2:26, t_end=8:32, method="non_parametric_si", -#' si_distr=Flu2009$si_distr, plot=TRUE, nSim=100) -#' # the second plot produced shows, at each each day, -#' # the estimate of the cqse reproduction number over the 7-day window finishing on that day. -#' -#' ## estimate the case reproduction number (method "parametric_si") -#' WT(Flu2009$incidence, t_start=2:26, t_end=8:32, method="parametric_si", -#' mean_si=2.6, std_si=1.5, plot=TRUE, nSim=100) -#' # the second plot produced shows, at each each day, -#' # the estimate of the case reproduction number over the 7-day window finishing on that day. -WT <- function(I, t_start, t_end, - method=c("non_parametric_si","parametric_si"), - mean_si=NULL, std_si=NULL, si_distr=NULL, nSim=10, - plot=FALSE, legend=FALSE) -{ - - ### Functions ### - - ######################################################### - # Draws a possile transmission tree # - ######################################################### - - DrawOneSetOfAncestries <- function() - { - res <- vector() - for(t in t_start[1]:t_end[length(t_end)]) - { - if(length(which(Onset==t))>0) - { - if(length(possibleAncesTime[[t]])>0) - { - prob <- si_distr[t-possibleAncesTime[[t]]+1]*I[possibleAncesTime[[t]]] - if(any(prob>0)) - { - res[which(Onset==t)] <- possibleAncesTime[[t]][which(rmultinom(length(which(Onset==t)),size=1,prob=prob)==TRUE,arr.ind=TRUE)[,1]] - }else - { - res[which(Onset==t)] <- NA - } - }else - res[which(Onset==t)] <- NA - } - } - return(res) - } - - ### Error messages ### - - method <- match.arg(method) - - I <- process_I(I) - if(!is.null(I$dates)) - { - dates <- check_dates(I) - I <- process_I_vector(rowSums(I[,c("local","imported")])) - T<-length(I) - }else - { - I <- process_I_vector(rowSums(I[,c("local","imported")])) - T<-length(I) - dates <- 1:T - } - - - ### Adjusting t_start and t_end so that at least an incident case has been observed before t_start[1] ### - - i <- 1 - while(sum(I[1:(t_start[i]-1)])==0) - { - i <- i+1 - } - temp <- which(t_start0)) - { - t_start <- t_start[-temp] - t_end <- t_end[-temp] - } - - check_times(t_start, t_end, T) - nb_time_periods <- length(t_start) - - if(method=="non_parametric_si") - { - check_si_distr(si_distr) - si_distr <- c(si_distr,0) - } - - if(method=="parametric_si") - { - if(is.null(mean_si)) - { - stop("method non_parametric_si requires to specify the mean_si argument.") - } - if(is.null(std_si)) - { - stop("method non_parametric_si requires to specify the std_si argument.") - } - if(mean_si<1) - { - stop("method parametric_si requires a value >1 for mean_si.") - } - if(std_si<0) - { - stop("method parametric_si requires a >0 value for std_si.") - } - } - - if(plot!=TRUE && plot!=FALSE) - { - stop("plot must be TRUE or FALSE.") - } - - if(!is.numeric(nSim)) - { - stop("nSim must be a positive integer.") - } - if(nSim<=0) - { - stop("nSim must be a positive integer.") - } - - ### What does each method do ### - - if(method=="non_parametric_si") - { - parametric_si<-"N" - } - if(method=="parametric_si") - { - parametric_si<-"Y" - } - - if(parametric_si=="Y") - { - si_distr <- sapply(1:T, function(t) DiscrSI(t-1,mean_si,std_si)) - } - if(length(si_distr)0) - { - t_start <- t_start[-TimePeriodsWithNoincidence] - t_end <- t_end[-TimePeriodsWithNoincidence] - nb_time_periods <- length(t_start) - } - - Onset <- vector() - for (t in 1:T) { - Onset <- c(Onset, rep(t, I[t])) - } - NbCases <- length(Onset) - - delay <- outer (1:T, 1:T, "-") - SIdelay <- apply(delay, 2, function(x) si_distr[pmin(pmax(x + 1, 1), length(si_distr))]) - SumOnColSIDelay_tmp <- sapply(1:nrow(SIdelay), function(i) sum(SIdelay [i,]* I, na.rm=TRUE) ) - SumOnColSIDelay <- vector() - for (t in 1:T) { - SumOnColSIDelay <- c(SumOnColSIDelay, rep(SumOnColSIDelay_tmp[t], I[t])) - } - MatSumOnColSIDelay <- matrix(rep(SumOnColSIDelay_tmp, T), nrow = T, ncol = T) - p <- SIdelay/(MatSumOnColSIDelay) - p[which(is.na(p))] <- 0 - p[which(is.infinite(p))] <- 0 - MeanRperIndexCaseDate <- sapply(1:ncol(p), function(j) sum(p[,j]*I, na.rm=TRUE)) - MeanRperDate.WT <- sapply(1:nb_time_periods, function(i) mean(rep(MeanRperIndexCaseDate[which((1:T >= t_start[i]) * (1:T <= t_end[i]) == 1)], I[which((1:T >= t_start[i]) * (1:T <= t_end[i]) == 1)]) ) ) - - possibleAncesTime <- sapply(1:T,function(t) (t-(which(si_distr!=0))+1)[which(t-(which(si_distr!=0))+1>0)]) - ancestriesTime <- t(sapply(1:nSim , function(i) DrawOneSetOfAncestries())) - - Rsim <- sapply(1:nb_time_periods,function(i) rowSums((ancestriesTime[,]>=t_start[i]) * (ancestriesTime[,]<=t_end[i]),na.rm=TRUE)/sum(I[t_start[i]:t_end[i]])) - - R025.WT <- apply(Rsim, 2, quantile,0.025,na.rm=TRUE) - R025.WT <- R025.WT[which(!is.na(R025.WT))] - R975.WT <- apply(Rsim, 2, quantile,0.975,na.rm=TRUE) - R975.WT <- R975.WT[which(!is.na(R975.WT))] - std.WT <- apply(Rsim, 2, sd,na.rm=TRUE) - std.WT <- std.WT[which(!is.na(std.WT))] - - results<-list() - - results$R<-as.data.frame(cbind(t_start,t_end,MeanRperDate.WT,std.WT,R025.WT,R975.WT)) - names(results$R)<-c("t_start","t_end","Mean(R)","Std(R)","Quantile.0.025(R)","Quantile.0.975(R)") - - results$method <- method - results$si_distr <- si_distr - - results$SI.Moments<-as.data.frame(cbind(final_mean_si,Finalstd_si)) - names(results$SI.Moments)<-c("Mean","Std") - - if(!is.null(dates)) - results$dates <- dates - results$I <- I - results$I_local <- I - results$I_local[1] <- 0 - results$I_imported <- c(I[1], rep(0, length(I)-1)) - - if(plot) - { - plots(results, what="all", legend = legend) - } - - return(results) - -} diff --git a/R/check_cdt_samples_covergence.R b/R/check_cdt_samples_convergence.R similarity index 51% rename from R/check_cdt_samples_covergence.R rename to R/check_cdt_samples_convergence.R index 5dd68ac..419826f 100644 --- a/R/check_cdt_samples_covergence.R +++ b/R/check_cdt_samples_convergence.R @@ -2,29 +2,48 @@ # check_cdt_samples_convergence runs a Gelman Rubin test to check convergence of the MCMC chain in coarseDataTools # ####################################################################################################################### -#' check_cdt_samples_convergence TITLE +#' Checking convergence of an MCMC chain by using the Gelman-Rubin algorithm #' -#' \code{check_cdt_samples_convergence} DESCRIPTION TO COME. +#' \code{check_cdt_samples_convergence} Checking convergence of an MCMC chain by using the Gelman-Rubin algorithm #' -#' @param cdtsamples XXXXXXX. +#' @param cdt_samples the \code{@sample} slot of a \code{cd.fit.mcmc} S4 object (see package \code{coarseDataTools}) #' @return TRUE if the Gelman Rubin test for convergence was successful, FALSE otherwise #' @details{ -#' XXXXXXX. +#' This function splits an MCMC chain in two halfs and uses the Gelman-Rubin algorithm to assess convergence of the chain by comparing its two halves. #' } -#' @seealso XXXXXXX. -#' @author XXXXXXX. -#' @references XXXXXXX. +#' @seealso \code{\link{estimate_r}} +#' @author Anne Cori #' @importFrom coda gelman.diag #' @export #' @examples -#' ## XXXXXXX. -check_cdt_samples_convergence <- function(cdtsamples) +#' \dontrun{ +#' ## Note the following examples use an MCMC routine +#' ## to estimate the serial interval distribution from data, +#' ## so they may take a few minutes to run +#' +#' ## load data on rotavirus +#' data("MockRotavirus") +#' +#' ## estimate the serial interval from data +#' SI_fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, +#' dist="G", +#' init_pars=init_mcmc_params(MockRotavirus$si_data, "G"), +#' burnin = 1000, +#' n.samples = 5000) +#' +#' ## use check_cdt_samples_convergence to check convergence +#' converg_diag <- check_cdt_samples_convergence(SI_fit@samples) +#' converg_diag +#' +#' } +#' +check_cdt_samples_convergence <- function(cdt_samples) { ############################################################################################################################# # checking convergence of the MCMC by using the Gelman-Rubin algorithm between the first and second half of the MCMC sample - spl1 <- cdtsamples[1:floor(nrow(cdtsamples)/2),] - spl2 <- cdtsamples[(ceiling(nrow(cdtsamples)/2)+1):nrow(cdtsamples),] + spl1 <- cdt_samples[1:floor(nrow(cdt_samples)/2),] + spl2 <- cdt_samples[(ceiling(nrow(cdt_samples)/2)+1):nrow(cdt_samples),] GRD <- gelman.diag(as.mcmc.list(list(as.mcmc(spl1), as.mcmc(spl2)))) # Is any of the potential scale reduction factors >1.1 (looking at the upper CI)? # If so this would suggest that the MCMC has not converged well. @@ -35,7 +54,7 @@ check_cdt_samples_convergence <- function(cdtsamples) > par(mfrow=c(2,1)) > plot(res$SI.Moments[,'Mean'], type='l', xlab='Iterations', ylab='Mean SI') > plot(res$SI.Moments[,'Std'], type='l', xlab='Iterations', ylab='Std SI'), - where res is the output of EstimateR + where res is the output of estimate_r and decide whether to rerun for longer.") return(FALSE) }else diff --git a/R/coarse2estim.R b/R/coarse2estim.R index f5f3139..c682634 100644 --- a/R/coarse2estim.R +++ b/R/coarse2estim.R @@ -1,10 +1,10 @@ ####################################################################################################################### -# coarse2estim integates CoarseDataTools with EpiEstim using the amended version of EstimateR called EstimateR_func # +# coarse2estim integates CoarseDataTools with EpiEstim using the amended version of EstimateR called estimate_r # ####################################################################################################################### #' Link coarseDataTools and EpiEstim #' -#' \code{coarse2estim} Transforms outputs of \code{coarseDataTools::dic.fit.mcmc} to right format for input into \code{EstimateR} +#' \code{coarse2estim} Transforms outputs of \code{coarseDataTools::dic.fit.mcmc} to right format for input into \code{estimate_r} #' #' @param x An object generated by function \code{coarseDataTools::dic.fit.mcmc}, containing posterior estimates of the serial interval distribution. #' @param dist The parametric distribution used when estimating the serial interval. @@ -16,7 +16,7 @@ #' \item{si_sample: a matrix where each column gives one distribution of the serial interval to be explored, obtained from x by thinning the MCMC chain.} #' \item{si_parametric_distr: the parametric distribution used when estimating the serial interval stored in x. } #' } -#' @seealso \code{\link{EstimateR}} +#' @seealso \code{\link{estimate_r}} #' @author The Hackout3 Parameter Estimation team. #' @export #' @examples @@ -31,22 +31,24 @@ #' ## estimate the serial interval from data #' SI.fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, #' dist="G", -#' init.pars=init_MCMC_params(MockRotavirus$si_data, "G") +#' init.pars=init_mcmc_params(MockRotavirus$si_data, "G"), #' burnin = 1000, #' n.samples = 5000) #' -#' ## use coarse2estim to turn this in the right format for EstimateR +#' ## use coarse2estim to turn this in the right format for estimate_r #' si_sample <- coarse2estim(SI.fit, thin=10)$si_sample #' -#' ## use EstimateR to estimate the reproduction number based on these estimates of the serial interval -#' R_si_from_sample <- EstimateR(MockRotavirus$incidence, -#' t_start=2:47, t_end=8:53, -#' method="si_from_sample", si_sample=si_sample, +#' ## use estimate_r to estimate the reproduction number +#' ## based on these estimates of the serial interval +#' R_si_from_sample <- estimate_r(MockRotavirus$incidence, +#' method="si_from_sample", +#' si_sample=si_sample, +#' config = list(t_start=2:47, t_end=8:53, #' n2 = 50, -#' plot=TRUE, leg.pos=xy.coords(1,3)) +#' plot=TRUE)) #' } #' -coarse2estim <- function(x=NULL, dist=x@dist, samples=x@samples, thin=10){ +coarse2estim <- function(x = NULL, dist = x@dist, samples = x@samples, thin = 10){ if(is.null(x)) # then check that dist and samples are what we expect { diff --git a/R/compatibility.R b/R/compatibility.R new file mode 100644 index 0000000..ffddbf5 --- /dev/null +++ b/R/compatibility.R @@ -0,0 +1,151 @@ +#' Function to ensure compatibility with EpiEstim versions <2.0 +#' +#' Please only use for compatibility; +#' Prefer the new estimate_r function instead +#' +#' @param I see \code{incid} in \code{estimate_r} +#' @param T.Start see \code{config$t_start} in \code{estimate_r} +#' @param T.End see \code{config$t_end} in \code{estimate_r} +#' @param method see method in \code{estimate_r} (but EstimateR uses CamelCase where estimate_r uses snake_case for the method names) +#' @param n1 see \code{n1} in \code{estimate_r} +#' @param n2 see \code{n2} in \code{estimate_r} +#' @param Mean.SI see \code{config$mean_si} in \code{estimate_r} +#' @param Std.SI see \code{config$std_si} in \code{estimate_r} +#' @param Std.Mean.SI see \code{config$std_mean_si} in \code{estimate_r} +#' @param Min.Mean.SI see \code{config$min_mean_si} in \code{estimate_r} +#' @param Max.Mean.SI see \code{config$max_mean_si} in \code{estimate_r} +#' @param Std.Std.SI see \code{config$std_std_si} in \code{estimate_r} +#' @param Min.Std.SI see \code{config$min_std_si} in \code{estimate_r} +#' @param Max.Std.SI see \code{config$max_std_si} in \code{estimate_r} +#' @param SI.Distr see \code{config$si_distr} in \code{estimate_r} +#' @param Mean.Prior see \code{config$mean_prior} in \code{estimate_r} +#' @param Std.Prior see \code{config$std_prior} in \code{estimate_r} +#' @param CV.Posterior see \code{config$cv_posterior} in \code{estimate_r} +#' @param plot see \code{config$plot} in \code{estimate_r} +#' @param leg.pos Not used anymore, only there for compatibility +#' +#' @export +#' +EstimateR <- function(I, T.Start, T.End, + method = c("NonParametricSI", "ParametricSI", "UncertainSI"), + n1 = NULL, n2 = NULL, Mean.SI = NULL, Std.SI = NULL, + Std.Mean.SI = NULL, Min.Mean.SI = NULL, Max.Mean.SI = NULL, + Std.Std.SI = NULL, Min.Std.SI = NULL, Max.Std.SI = NULL, + SI.Distr = NULL, + Mean.Prior = 5, Std.Prior = 5, CV.Posterior = 0.3, + plot = FALSE, leg.pos="topright") { + .Deprecated("estimate_r") + + method_tr <- c("NonParametricSI" = "non_parametric_si", + "ParametricSI" = "parametric_si", + "UncertainSI" = "uncertain_si") + method <- method_tr[[method]] + + config <- list( + t_start = T.Start, + t_end = T.End, + n1 = n1, + n2 = n2, + mean_si = Mean.SI, + std_si = Std.SI, + std_mean_si = Std.Mean.SI, + min_mean_si = Min.Mean.SI, + max_mean_si = Max.Mean.SI, + std_std_si = Std.Std.SI, + min_std_si = Min.Std.SI, + max_std_si = Max.Std.SI, + si_distr = SI.Distr, + mean_prior = Mean.Prior, + std_prior = Std.Prior, ### TO DO: change to sd_prior + cv_posterior = CV.Posterior, + plot = plot, ### TO DO: plot outside the function (always!) + legend = FALSE + ) + + estimate_r(incid = I, method = method, + config = config) +} + + + + +#' Function to ensure compatibility with EpiEstim versions <2.0 +#' +#' Please only use for compatibility; +#' Prefer the new discr_si function instead +#' +#' @param k see \code{k} in \code{discr_si} +#' @param mu see \code{mu} in \code{discr_si} +#' @param sigma see \code{sigma} in \code{discr_si} +#' +#' @export +#' +DiscrSI <- function(k,mu,sigma) +{ + .Deprecated("discr_si") + discr_si(k,mu,sigma) +} + + + +#' Function to ensure compatibility with EpiEstim versions <2.0 +#' +#' Please only use for compatibility; +#' Prefer the new overall_infectivity function instead +#' +#' @param I see \code{incid} in \code{overall_infectivity} +#' @param SI.Distr see \code{si_distr} in \code{overall_infectivity} + +#' @export +OverallInfectivity <- function(I, SI.Distr) +{ + .Deprecated("overall_infectivity") + overall_infectivity(incid = I, si_distr = SI.Distr) +} + + + + +#' Function to ensure compatibility with EpiEstim versions <2.0 +#' +#' Please only use for compatibility; +#' Prefer the new wallinga_teunis function instead +#' +#' @param I see \code{incid} in \code{wallinga_teunis} +#' @param T.Start see \code{config$t_start} in \code{wallinga_teunis} +#' @param T.End see \code{config$t_end} in \code{wallinga_teunis} +#' @param method see method in \code{wallinga_teunis} (but WT uses CamelCase where wallinga_teunis uses snake_case for the method names) +#' @param Mean.SI see \code{config$mean_si} in \code{wallinga_teunis} +#' @param Std.SI see \code{config$std_si} in \code{wallinga_teunis} +#' @param SI.Distr see \code{config$si_distr} in \code{wallinga_teunis} +#' @param nSim see \code{config$n_sim} in \code{wallinga_teunis} +#' @param plot see \code{config$plot} in \code{wallinga_teunis} +#' @param leg.pos Not used anymore, only there for compatibility +#' +#' @export +WT <- function(I, T.Start, T.End, + method=c("NonParametricSI", "ParametricSI"), + Mean.SI=NULL, Std.SI=NULL, + SI.Distr=NULL, nSim=10, + plot=FALSE, leg.pos="topright") +{ + .Deprecated("wallinga_teunis") + method_tr <- c("NonParametricSI" = "non_parametric_si", + "ParametricSI" = "parametric_si") + method <- method_tr[[method]] + + config = list(t_start = T.Start, + t_end = T.End, + mean_si = Mean.SI, + std_si = Std.SI, + si_distr = SI.Distr, + n_sim = nSim, + plot = plot) + + wallinga_teunis(incid = I, method = method, config = config) +} + + + + + diff --git a/R/data.R b/R/data.R index 366b44e..7d376a8 100644 --- a/R/data.R +++ b/R/data.R @@ -1,3 +1,49 @@ +#' Data on the 2009 H1N1 influenza pandemic in a school in New York city +#' +#' This data set gives: +#' 1/ the daily incidence of self-reported and laboratory-confirmed cases of influenza +#' amongst children in a school in New York city during the 2009 H1N1 influenza pandemic (see source and references), +#' 2/ interval-censored serial interval data from the 2009 outbreak of H1N1 influenza +#' in a New York city school (see references). +#' +#' @name flu_2009_NYC_school +#' @docType data +#' @format A list of two elements: +#' \describe{ +#' \item{incidence}{a dataframe with 14 lines containing dates in first column, and daily incidence in second column ,} +#' \item{si_data}{a dataframe containing data on the generation time for 16 pairs of infector/infected individuals +#' (see references and see argument \code{si_data} of function \code{estimate_r} for details on columns) } +#' } +#' @source Lessler J. et al. (2009) Outbreak of 2009 pandemic influenza A (H1N1) at a New York City school. New Eng J Med 361: 2628-2636. +#' @references +#' { +#' Lessler J. et al. (2009) Outbreak of 2009 pandemic influenza A (H1N1) at a New York City school. New Eng J Med 361: 2628-2636. } +#' @examples +#' \dontrun{ +#' ## Note the following examples use an MCMC routine +#' ## to estimate the serial interval distribution from data, +#' ## so they may take a few minutes to run +#' +#' ## load data on pandemic flu in a New York school in 2009 +#' data("flu_2009_NYC_school") +#' +#' ## estimate the reproduction number (method "si_from_data") +#' estimate_r(flu_2009_NYC_school$incidence, method="si_from_data", +#' si_data = flu_2009_NYC_school$si_data, +#' config=list(t_start=2:8, t_end=8:14, +#' si_parametric_distr = "G", +#' mcmc_control = list(burnin = 1000, +#' thin=10, seed = 1), +#' n1 = 1000, n2 = 50, +#' plot=TRUE) +#' ) +#' # the second plot produced shows, at each each day, +#' # the estimate of the reproduction number +#' # over the 7-day window finishing on that day. +#' } +NULL + +################################################################################################## #' Data on the 1918 H1N1 influenza pandemic in Baltimore. #' #' This data set gives: @@ -31,7 +77,8 @@ #' data("Flu1918") #' #' ## estimate the reproduction number (method "non_parametric_si") -#' EstimateR(Flu1918$incidence, method="non_parametric_si", +#' estimate_r(Flu1918$incidence, +#' method="non_parametric_si", #' config=list(t_start=2:86, t_end=8:92, #' si_distr=Flu1918$si_distr, #' plot=TRUE)) @@ -49,15 +96,24 @@ NULL #' (ARI, defined as at least two symptoms among fever, cough, sore throat, and runny nose) #' amongst children in a school in Pennsylvania during the 2009 H1N1 influenza pandemic (see source and references), #' 2/ the discrete daily distribution of the serial interval for influenza, assuming a shifted Gamma distribution with mean 2.6 days, standard deviation 1.5 days and shift 1 day (see references). +#' 3/ interval-censored serial interval data from the 2009 outbreak of H1N1 influenza +#' in San Antonio, Texas, USA (see references). #' #' @name Flu2009 #' @docType data -#' @format A list of two elements: +#' @format A list of three elements: #' \describe{ -#' \item{incidence}{a dataframe with 32 lines containing dates in first column, and daily incidence in second column,} -#' \item{si_distr}{a vector containing a set of 12 probabilities.} +#' \item{incidence}{a dataframe with 32 lines containing dates in first column, and daily incidence in second column (Cauchemez et al., 2011),} +#' \item{si_distr}{a vector containing a set of 12 probabilities (Ferguson et al, 2005),} +#' \item{si_data}{a dataframe with 16 lines giving serial interval patient data collected in a household study in San Antonio, +#' Texas throughout the 2009 H1N1 outbreak (Morgan et al., 2010). } +#' } +#' @source +#' { +#' Cauchemez S. et al. (2011) Role of social networks in shaping disease transmission during a community outbreak of 2009 H1N1 pandemic influenza. Proc Natl Acad Sci U S A 108(7), 2825-2830. +#' +#' Morgan O.W. et al. (2010) Household transmission of pandemic (H1N1) 2009, San Antonio, Texas, USA, April-May 2009. Emerg Infect Dis 16: 631-637. #' } -#' @source Cauchemez S. et al. (2011) Role of social networks in shaping disease transmission during a community outbreak of 2009 H1N1 pandemic influenza. Proc Natl Acad Sci U S A 108(7), 2825-2830. #' @references #' { #' Cauchemez S. et al. (2011) Role of social networks in shaping disease transmission during a community outbreak of 2009 H1N1 pandemic influenza. Proc Natl Acad Sci U S A 108(7), 2825-2830. @@ -69,7 +125,7 @@ NULL #' data("Flu2009") #' #' ## estimate the reproduction number (method "non_parametric_si") -#' EstimateR(Flu2009$incidence, method="non_parametric_si", +#' estimate_r(Flu2009$incidence, method="non_parametric_si", #' config=list(t_start=2:26, t_end=8:32, #' si_distr=Flu2009$si_distr, #' plot=TRUE) @@ -77,6 +133,28 @@ NULL #' # the second plot produced shows, at each each day, #' # the estimate of the reproduction number #' # over the 7-day window finishing on that day. +#' +#' \dontrun{ +#' ## Note the following examples use an MCMC routine +#' ## to estimate the serial interval distribution from data, +#' ## so they may take a few minutes to run +#' +#' ## estimate the reproduction number (method "si_from_data") +#' estimate_r(Flu2009$incidence, method="si_from_data", +#' si_data = Flu2009$si_data, +#' config=list(t_start=2:26, t_end=8:32, +#' mcmc_control = list(burnin = 1000, +#' thin=10, seed = 1), +#' n1 = 1000, n2 = 50, +#' si_parametric_distr = "G", +#' plot=TRUE) +#' ) +#' # the second plot produced shows, at each each day, +#' # the estimate of the reproduction number +#' # over the 7-day window finishing on that day. +#' } +#' +#' NULL ################################################################################################## @@ -101,7 +179,7 @@ NULL #' data("Measles1861") #' #' ## estimate the reproduction number (method "non_parametric_si") -#' EstimateR(Measles1861$incidence, method="non_parametric_si", +#' estimate_r(Measles1861$incidence, method="non_parametric_si", #' config=list(t_start=17:42, t_end=23:48, #' si_distr=Measles1861$si_distr, #' plot=TRUE) @@ -138,7 +216,7 @@ NULL #' data("SARS2003") #' #' ## estimate the reproduction number (method "non_parametric_si") -#' EstimateR(SARS2003$incidence, method="non_parametric_si", +#' estimate_r(SARS2003$incidence, method="non_parametric_si", #' config=list(t_start=14:101, t_end=20:107, #' si_distr=SARS2003$si_distr, #' plot=TRUE) @@ -177,7 +255,7 @@ NULL #' data("Smallpox1972") #' #' ## estimate the reproduction number (method "non_parametric_si") -#' EstimateR(Smallpox1972$incidence, method="non_parametric_si", +#' estimate_r(Smallpox1972$incidence, method="non_parametric_si", #' config=list(t_start=27:51, t_end=33:57, #' si_distr=Smallpox1972$si_distr, #' plot=TRUE) @@ -217,7 +295,7 @@ NULL #' data("MockRotavirus") #' #' ## estimate the reproduction number (method "si_from_data") -#' EstimateR(MockRotavirus$incidence, +#' estimate_r(MockRotavirus$incidence, #' method="si_from_data", #' si_data=MockRotavirus$si_data, #' config=list( diff --git a/R/DiscrSI.R b/R/discr_si.R similarity index 58% rename from R/DiscrSI.R rename to R/discr_si.R index 16e2fde..92be7e5 100644 --- a/R/DiscrSI.R +++ b/R/discr_si.R @@ -5,9 +5,9 @@ #' Discretized Generation Time Distribution Assuming A Shifted Gamma Distribution #' -#' \code{DiscrSI} computes the discrete distribution of the serial interval, assuming that the serial interval is shifted Gamma distributed, with shift 1. +#' \code{discr_si} computes the discrete distribution of the serial interval, assuming that the serial interval is shifted Gamma distributed, with shift 1. #' -#' @param k Positive integer for which the discrete distribution is desired. +#' @param k Positive integer, or vector of positive ingerers for which the discrete distribution is desired. #' @param mu A positive real giving the mean of the Gamma distribution. #' @param sigma A non-negative real giving the standard deviation of the Gamma distribution. #' @return Gives the discrete probability \eqn{w_k} that the serial interval is equal to \eqn{k}. @@ -19,36 +19,36 @@ #' #' where \eqn{F_{\{\mu,\sigma\}}} is the cumulative density function of a Gamma distribution with mean \eqn{\mu} and standard deviation \eqn{\sigma}. #' } -#' @seealso \code{\link{overall_infectivity}}, \code{\link{EstimateR}} +#' @seealso \code{\link{overall_infectivity}}, \code{\link{estimate_r}} #' @author Anne Cori \email{a.cori@imperial.ac.uk} #' @references Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). # #' @import stats #' @export #' @examples #' ## Computing the discrete serial interval of influenza -#' MeanFluSI <- 2.6 -#' SdFluSI <- 1.5 -#' DicreteSIDistr <- vector() -#' for(i in 0:20) -#' { -#' DicreteSIDistr[i+1] <- DiscrSI(i, MeanFluSI, SdFluSI) -#' } -#' plot(0:20, DicreteSIDistr, type="h", lwd=10, lend=1, xlab="time (days)", ylab="frequency") -#' title(main="Discrete distribution of the serial interval of influenza") -DiscrSI<-function(k,mu,sigma) +#' mean_flu_si <- 2.6 +#' sd_flu_si <- 1.5 +#' dicrete_si_distr <- discr_si(0:20, mean_flu_si, sd_flu_si) +#' plot(0:20, dicrete_si_distr, type = "h", +#' lwd = 10, lend = 1, xlab = "time (days)", ylab = "frequency") +#' title(main = "Discrete distribution of the serial interval of influenza") +discr_si <- function(k, mu, sigma) ### TO DO: make sure this also works if k is a vector { - if(sigma<0) - { + if(sigma < 0) stop("sigma must be >=0.") - } - a=((mu-1)/sigma)^2 - b=sigma^2/(mu-1) - CDFGamma<-function(k,a,b) - { - return(pgamma(k,shape=a,scale=b)) - } - res<-k*CDFGamma(k,a,b)+(k-2)*CDFGamma(k-2,a,b)-2*(k-1)*CDFGamma(k-1,a,b) - res<-res+a*b*(2*CDFGamma(k-1,a+1,b)-CDFGamma(k-2,a+1,b)-CDFGamma(k,a+1,b)) - res<-max(0,res) + if(mu <= 1) + stop("mu must be >1") + if(any(k < 0)) + stop("all values in k must be >=0.") + + a <- ((mu-1)/sigma)^2 + b <- sigma^2/(mu-1) + + cdf_gamma <- function(k, a, b) pgamma(k, shape = a, scale = b) + + res <- k*cdf_gamma(k,a,b)+(k-2)*cdf_gamma(k-2,a,b)-2*(k-1)*cdf_gamma(k-1,a,b) + res <- res+a*b*(2*cdf_gamma(k-1,a+1,b)-cdf_gamma(k-2,a+1,b)-cdf_gamma(k,a+1,b)) + res <- sapply(res, function(e) max(0,e)) + return(res) } diff --git a/R/EstimationR.R b/R/estimate_r.R similarity index 72% rename from R/EstimationR.R rename to R/estimate_r.R index 7a9282f..32e7c75 100755 --- a/R/EstimationR.R +++ b/R/estimate_r.R @@ -1,19 +1,19 @@ ######################################################################################################################### -# EstimateR is a wraper which replace the old EstimateR with EstimateR_func and accepts an object of class "cd.fit.mcmc"# +# estimate_r replaces the old EstimateR function # ######################################################################################################################### #' Estimated Instantaneous Reproduction Number #' -#' \code{EstimateR} estimates the reproduction number of an epidemic, given the incidence time series and the serial interval distribution. +#' \code{estimate_r} estimates the reproduction number of an epidemic, given the incidence time series and the serial interval distribution. #' -#' @param I One of the following +#' @param incid One of the following #' \itemize{ #' \item{A vector (or a dataframe with a single column) of non-negative integers containing the incidence time series} -#' \item{A dataframe of non-negative integers with either i) \code{I$I} containing the total incidence, or ii) two columns, -#' so that \code{I$local} contains the incidence of cases due to local transmission and -#' \code{I$imported} contains the incidence of imported cases (with \code{I$local + I$imported} -#' the total incidence). If the dataframe contains a column \code{I$dates}, this is used for plotting. -#' \code{I$dates} must contains only dates in a row.} +#' \item{A dataframe of non-negative integers with either i) \code{incid$I} containing the total incidence, or ii) two columns, +#' so that \code{incid$local} contains the incidence of cases due to local transmission and +#' \code{incid$imported} contains the incidence of imported cases (with \code{incid$local + incid$imported} +#' the total incidence). If the dataframe contains a column \code{incid$dates}, this is used for plotting. +#' \code{incid$dates} must contains only dates in a row.} #' \item{An object of class \code{\link[incidence]{incidence}}} #' } #' Note that the cases from the first time step are always all assumed to be imported cases. @@ -39,7 +39,7 @@ #' Should be one of "G" (Gamma), "W" (Weibull), "L" (Lognormal), "off1G" (Gamma shifted by 1), "off1W" (Weibull shifted by 1), or "off1L" (Lognormal shifted by 1).} #' \item{mcmc_control}{For method "si_from_data" ; a list containing the following (see details): #' \describe{ -#' \item{init.pars}{vector of size 2 corresponding to the initial values of parameters to use for the SI distribution. This is the shape and scale for all but the lognormal distribution, for which it is the meanlog and sdlog. If not specified these are chosen automatically using function \code{\link{init_MCMC_params}}.} +#' \item{init_pars}{vector of size 2 corresponding to the initial values of parameters to use for the SI distribution. This is the shape and scale for all but the lognormal distribution, for which it is the meanlog and sdlog. If not specified these are chosen automatically using function \code{\link{init_mcmc_params}}.} #' \item{burnin}{a positive integer giving the burnin used in the MCMC when estimating the serial interval distribution.} #' \item{thin}{a positive integer corresponding to thinning parameter; the MCMC will be run for \code{burnin+n1*thin iterations}; 1 in \code{thin} iterations will be recorded, after the burnin phase, so the posterior sample size is n1.} #' \item{seed}{An integer used as the seed for the random number generator at the start of the MCMC estimation; useful to get reproducible results.} @@ -83,7 +83,7 @@ #' \item{In method "parametric_si" the user specifies the mean and sd of the serial interval} #' \item{In method "uncertain_si" the mean and sd of the serial interval are each drawn from truncated normal distributions, with parameters specified by the user} #' \item{In method "si_from_data", the serial interval distribution is directly estimated, using MCMC, from interval censored exposure data, with data provided by the user together with a choice of parametric distribution for the serial interval} -#' \item{In method "si_from_sample", the user directly provides the sample of serial interval distribution to use for estimation of R. This can be a useful alternative to the previous method, where the MCMC estimation of the serial interval distribution could be run once, and the same estimated SI distribution then used in EstimateR in different contexts, e.g. with different time windows, hence avoiding to rerun the MCMC everytime EstimateR is called.} +#' \item{In method "si_from_sample", the user directly provides the sample of serial interval distribution to use for estimation of R. This can be a useful alternative to the previous method, where the MCMC estimation of the serial interval distribution could be run once, and the same estimated SI distribution then used in estimate_r in different contexts, e.g. with different time windows, hence avoiding to rerun the MCMC everytime estimate_r is called.} #' } #' #' If \code{plot} is \code{TRUE}, 3 plots are produced. @@ -98,7 +98,7 @@ #' ----------------------- \code{method "parametric_si"} ----------------------- #' #' The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{mean_si} and \code{std_si}. -#' The discrete distribution of the serial interval is derived automatically using \code{\link{DiscrSI}}. +#' The discrete distribution of the serial interval is derived automatically using \code{\link{discr_si}}. #' #' ----------------------- \code{method "uncertain_si"} ----------------------- #' @@ -126,7 +126,7 @@ #' \item{ER: the upper bound of the symptom onset date of the infector (given as an integer). Should be such that ER>=EL} #' \item{SL: the lower bound of the symptom onset date of the infected indivdiual (given as an integer)} #' \item{SR: the upper bound of the symptom onset date of the infected indivdiual (given as an integer). Should be such that SR>=SL} -#' \item{type (optional): can have entries 0, 1, or 2, corresponding to doubly interval-censored, single interval-censored or exact observations, respsectively, see Reich et al. Statist. Med. 2009. If not specified, this will be automatically computed from the dates} +#' \item{type (optional): can have entries 0, 1, or 2, corresponding to doubly interval-censored, single interval-censored or exact observations, respectively, see Reich et al. Statist. Med. 2009. If not specified, this will be automatically computed from the dates} #' } #' Assuming a given parametric distribution for the serial interval distribution (specified in si_parametric_distr), #' the posterior distribution of the serial interval is estimated directly fom these data using MCMC methods implemented in the package \code{coarsedatatools}. @@ -147,7 +147,7 @@ #' Unlike methods "uncertain_si" and "si_from_data", the user directly provides (in argument \code{si_sample}) a sample of serial interval distribution to be explored. #' #' } -#' @seealso \code{\link{DiscrSI}} +#' @seealso \code{\link{discr_si}} #' @author Anne Cori \email{a.cori@imperial.ac.uk} #' @references { #' Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). @@ -162,7 +162,7 @@ #' data("Flu2009") #' #' ## estimate the reproduction number (method "non_parametric_si") -#' EstimateR(Flu2009$incidence, method="non_parametric_si", +#' estimate_r(Flu2009$incidence, method="non_parametric_si", #' config=list(t_start=2:26, t_end=8:32, #' si_distr=Flu2009$si_distr, plot=TRUE)) #' # the second plot produced shows, at each each day, @@ -176,21 +176,21 @@ #' location <- sample(c("local","imported"), length(data), replace=TRUE) #' location[1] <- "imported" # forcing the first case to be imported #' # get incidence per group (location) -#' I <- incidence(data, groups = location) +#' incid <- incidence(data, groups = location) #' # Estimate R with assumptions on serial interval -#' EstimateR(I, method = "parametric_si", +#' estimate_r(incid, method = "parametric_si", #' config=list(t_start = 2:21, t_end = 8:27, #' mean_si = 2.6, std_si = 1.5, plot = TRUE)) #' #' ## estimate the reproduction number (method "parametric_si") -#' EstimateR(Flu2009$incidence, method="parametric_si", +#' estimate_r(Flu2009$incidence, method="parametric_si", #' config=list(t_start=2:26, t_end=8:32, #' mean_si=2.6, std_si=1.5, plot=TRUE)) #' # the second plot produced shows, at each each day, #' # the estimate of the reproduction number over the 7-day window finishing on that day. #' #' ## estimate the reproduction number (method "uncertain_si") -#' EstimateR(Flu2009$incidence, method="uncertain_si", +#' estimate_r(Flu2009$incidence, method="uncertain_si", #' config=list(t_start=2:26, t_end=8:32, #' mean_si=2.6, std_mean_si=1, min_mean_si=1, max_mean_si=4.2, #' std_si=1.5, std_std_si=0.5, min_std_si=0.5, max_std_si=2.5, @@ -209,7 +209,7 @@ #' ## estimate the reproduction number (method "si_from_data") #' MCMC_seed <- 1 #' overall_seed <- 2 -#' R_si_from_data <- EstimateR(MockRotavirus$incidence, +#' R_si_from_data <- estimate_r(MockRotavirus$incidence, #' method="si_from_data", #' si_data=MockRotavirus$si_data, #' config=list(t_start=2:47, t_end=8:53, @@ -220,7 +220,7 @@ #' seed = overall_seed, #' plot=TRUE)) #' ## compare with version with no uncertainty -#' R_Parametric <- EstimateR(MockRotavirus$incidence, +#' R_Parametric <- estimate_r(MockRotavirus$incidence, #' method="parametric_si", #' config=list(t_start=2:47, t_end=8:53, #' mean_si = mean(R_si_from_data$SI.Moments$Mean), @@ -237,18 +237,18 @@ #' MCMC_seed <- 1 #' overall_seed <- 2 #' SI.fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, -#' dist="G", -#' init.pars=init_MCMC_params(MockRotavirus$si_data, "G"), +#' dist = "G", +#' init.pars = init_mcmc_params(MockRotavirus$si_data, "G"), #' burnin = 1000, #' n.samples = 5000, #' seed = MCMC_seed) #' si_sample <- coarse2estim(SI.fit, thin=10)$si_sample -#' R_si_from_sample <- EstimateR(MockRotavirus$incidence, -#' method="si_from_sample", si_sample=si_sample, -#' config=list(t_start=2:47, t_end=8:53, +#' R_si_from_sample <- estimate_r(MockRotavirus$incidence, +#' method = "si_from_sample", si_sample = si_sample, +#' config = list(t_start = 2:47, t_end = 8:53, #' n2 = 50, #' seed = overall_seed, -#' plot=TRUE)) +#' plot = TRUE)) #' #' # check that R_si_from_sample is the same as R_si_from_data #' # since they were generated using the same MCMC algorithm to generate the SI sample @@ -256,110 +256,64 @@ #' all(R_si_from_sample$R$`Mean(R)` == R_si_from_data$R$`Mean(R)`) #' } #' -EstimateR <- function(I, - method = c("non_parametric_si", "parametric_si", - "uncertain_si", "si_from_data", - "si_from_sample"), - si_data = NULL, - si_sample = NULL, - config) { - - method <- match.arg(method) - - if (!("mean_prior" %in% names(config))) { - config$mean_prior = 5 - } - - if (!("std_prior" %in% names(config))) { - config$std_prior = 5 - } - - if (!("cv_posterior" %in% names(config))) { - config$cv_posterior = 0.3 - } - - if (!("plot" %in% names(config))) { - config$plot = FALSE - } +estimate_r <- function(incid, + method = c("non_parametric_si", "parametric_si", + "uncertain_si", "si_from_data", + "si_from_sample"), + si_data = NULL, + si_sample = NULL, + config) { - if (!("legend" %in% names(config))) { - config$legend = FALSE - } + method <- match.arg(method) + config <- process_config(config) + check_config(config, method) - if (!("mcmc_control" %in% names(config))) { - config$mcmc_control = list(init.pars = NULL, burnin = 3000, thin=10, seed = as.integer(Sys.time())) - } - if (method=="si_from_data") { # Warning if the expected set of parameters is not adequate si_data <- process_si_data(si_data) - - config$si_parametric_distr <- match.arg(config$si_parametric_distr, - c("G", "W", "L", "off1G", "off1W", "off1L")) - if (is.null(config$n1)) { - stop("method si_from_data requires to specify the config$n1 argument.") - } - if (is.null(config$n2)) { - stop("method si_from_data requires to specify the config$n2 argument.") - } - if (config$n2 <= 0 || config$n2%%1 != 0) { - stop("method si_from_data requires a >0 integer value for config$n2.") - } - if (config$n1 <= 0 || config$n1%%1 != 0) { - stop("method si_from_data requires a >0 integer value for config$n1.") - } - if(is.null(config$mcmc_control$init.pars)) { - config$mcmc_control$init.pars <- - init_MCMC_params(si_data, config$si_parametric_distr) - } - if((config$si_parametric_distr=="off1G" | - config$si_parametric_distr=="off1W" | - config$si_parametric_distr=="off1L") & - any(si_data$SR-si_data$EL<=1)) - { - stop(paste("You cannot fit a distribution with offset 1 to this SI", - "dataset, because for some data points the maximum serial", - "interval is <=1.\nChoose a different distribution")) - } - + config <- process_config_si_from_data(config, si_data) + + # estimate serial interval from serial interval data first if(!is.null(config$mcmc_control$seed)) { cdt <- dic.fit.mcmc(dat = si_data, dist=config$si_parametric_distr, burnin = config$mcmc_control$burnin, n.samples = config$n1*config$mcmc_control$thin, - init.pars=config$mcmc_control$init.pars, + init.pars = config$mcmc_control$init_pars, seed = config$mcmc_control$seed) }else{ cdt <- dic.fit.mcmc(dat = si_data, dist=config$si_parametric_distr, burnin = config$mcmc_control$burnin, n.samples = config$n1*config$mcmc_control$thin, - init.pars=config$mcmc_control$init.pars) + init.pars = config$mcmc_control$init_pars) } - + # check convergence of the MCMC and print warning if not converged MCMC_conv <- check_cdt_samples_convergence(cdt@samples) - + # thin the chain, and turn the two parameters of the SI distribution into a whole discrete distribution c2e <- coarse2estim(cdt, thin=config$mcmc_control$thin) - + cat(paste( - "\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@", - "\nEstimating the reproduction number for these serial interval", - "estimates...\n", - "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + "\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@", + "\nEstimating the reproduction number for these serial interval", + "estimates...\n", + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" )) - + + # then estimate R for these serial intervals + if(!is.null(config$seed)) { set.seed(config$seed) } - - out <- EstimateR_func(I=I, - method = "si_from_data", - si_sample = c2e$si_sample, - config=config - ) + + out <- estimate_r_func(incid = incid, + method = "si_from_data", + si_sample = c2e$si_sample, + config = config + ) out[["MCMC_converged"]] <- MCMC_conv } else { @@ -368,15 +322,15 @@ EstimateR <- function(I, set.seed(config$seed) } - out <- EstimateR_func(I=I, method = method, si_sample=si_sample, - config = config - ) + out <- estimate_r_func(incid = incid, method = method, si_sample=si_sample, + config = config + ) } return(out) } ######################################################### -# EstimateR_func: Doing the heavy work in EstimateR # +# estimate_r_func: Doing the heavy work in estimate_r # ######################################################### #' @import reshape2 grid gridExtra @@ -385,19 +339,19 @@ EstimateR <- function(I, #' @importFrom stats median pgamma plnorm pweibull qgamma qlnorm quantile qweibull rgamma rmultinom rnorm sd #' @importFrom graphics plot #' @importFrom incidence as.incidence -EstimateR_func <- function (I, - si_sample, - method = c("non_parametric_si", "parametric_si", +estimate_r_func <- function (incid, + si_sample, + method = c("non_parametric_si", "parametric_si", "uncertain_si", "si_from_data", "si_from_sample"), - config) { + config) { ######################################################### # Calculates the cumulative incidence over time steps # ######################################################### - calc_incidence_per_time_step <- function(I, t_start, t_end) { + calc_incidence_per_time_step <- function(incid, t_start, t_end) { nb_time_periods <- length(t_start) - incidence_per_time_step <- sapply(1:nb_time_periods, function(i) sum(I[t_start[i]:t_end[i],c("local","imported")])) + incidence_per_time_step <- sapply(1:nb_time_periods, function(i) sum(incid[t_start[i]:t_end[i], c("local", "imported")])) return(incidence_per_time_step) } @@ -406,23 +360,23 @@ EstimateR_func <- function (I, # distribution from the discrete SI distribution # ######################################################### - PosteriorFromSIDistr <- function(I, si_distr, a_prior, b_prior, - t_start, t_end) { + posterior_from_si_distr <- function(incid, si_distr, a_prior, b_prior, + t_start, t_end) { nb_time_periods <- length(t_start) - lambda <- overall_infectivity(I, si_distr) + lambda <- overall_infectivity(incid, si_distr) final_mean_si <- sum(si_distr * (0:(length(si_distr) - - 1))) + 1))) a_posterior <- vector() b_posterior <- vector() a_posterior <- sapply(1:(nb_time_periods), function(t) if (t_end[t] > - final_mean_si) { - a_prior + sum(I[t_start[t]:t_end[t],"local"]) # only counting local cases on the "numerator" + final_mean_si) { + a_prior + sum(incid[t_start[t]:t_end[t], "local"]) # only counting local cases on the "numerator" } else { NA }) b_posterior <- sapply(1:(nb_time_periods), function(t) if (t_end[t] > - final_mean_si) { + final_mean_si) { 1/(1/b_prior + sum(lambda[t_start[t]:t_end[t]])) } else { @@ -436,27 +390,28 @@ EstimateR_func <- function (I, # given mean SI and std SI # ######################################################### - sample_from_posterior <- function(sample_size, I, mean_si, std_si, si_distr=NULL, - a_prior, b_prior, t_start, t_end) { + sample_from_posterior <- function(sample_size, incid, mean_si, std_si, si_distr=NULL, + a_prior, b_prior, t_start, t_end) { + nb_time_periods <- length(t_start) if(is.null(si_distr)) - si_distr <- sapply(1:T, function(t) DiscrSI(t - 1, mean_si, std_si)) + si_distr <- discr_si(0:(T-1), mean_si, std_si) final_mean_si <- sum(si_distr * (0:(length(si_distr) - - 1))) - lambda <- overall_infectivity(I, si_distr) + 1))) + lambda <- overall_infectivity(incid, si_distr) a_posterior <- vector() b_posterior <- vector() a_posterior <- sapply(1:(nb_time_periods), function(t) if (t_end[t] > - final_mean_si) { - a_prior + sum(I[t_start[t]:t_end[t],"local"]) # only counting local cases on the "numerator" + final_mean_si) { + a_prior + sum(incid[t_start[t]:t_end[t], "local"]) # only counting local cases on the "numerator" } else { NA }) b_posterior <- sapply(1:(nb_time_periods), function(t) if (t_end[t] > - final_mean_si) { + final_mean_si) { 1/(1/b_prior + sum(lambda[t_start[t]:t_end[t]], na.rm = TRUE)) } else { @@ -469,118 +424,23 @@ EstimateR_func <- function (I, else { rep(NA, sample_size) }) + if (sample_size == 1L) { + sample_r_posterior <- matrix(sample_r_posterior, nrow = 1) + } return(list(sample_r_posterior, si_distr)) } + method <- match.arg(method) - I <- process_I(I) - T<-nrow(I) + incid <- process_I(incid) + T<-nrow(incid) - if (config$mean_prior <= 0) { - stop("config$mean_prior must be >0.") - } - if (config$std_prior <= 0) { - stop("config$std_prior must be >0.") - } a_prior <- (config$mean_prior/config$std_prior)^2 b_prior <- config$std_prior^2/config$mean_prior check_times(config$t_start, config$t_end, T) nb_time_periods <- length(config$t_start) - if (method == "non_parametric_si") { - check_si_distr(config$si_distr) - } - if (method == "parametric_si") { - if (is.null(config$mean_si)) { - stop("method parametric_si requires to specify the config$mean_si argument.") - } - if (is.null(config$std_si)) { - stop("method parametric_si requires to specify the config$std_si argument.") - } - if (config$mean_si <= 1) { - stop("method parametric_si requires a value >1 for config$mean_si.") - } - if (config$std_si <= 0) { - stop("method parametric_si requires a >0 value for config$std_si.") - } - } - if (method == "uncertain_si") { - if (is.null(config$mean_si)) { - stop("method uncertain_si requires to specify the config$mean_si argument.") - } - if (is.null(config$std_si)) { - stop("method uncertain_si requires to specify the config$std_si argument.") - } - if (is.null(config$n1)) { - stop("method uncertain_si requires to specify the config$n1 argument.") - } - if (is.null(config$n2)) { - stop("method uncertain_si requires to specify the config$n2 argument.") - } - if (is.null(config$std_mean_si)) { - stop("method uncertain_si requires to specify the config$std_mean_si argument.") - } - if (is.null(config$min_mean_si)) { - stop("method uncertain_si requires to specify the config$min_mean_si argument.") - } - if (is.null(config$max_mean_si)) { - stop("method uncertain_si requires to specify the config$max_mean_si argument.") - } - if (is.null(config$std_std_si)) { - stop("method uncertain_si requires to specify the config$std_std_si argument.") - } - if (is.null(config$min_std_si)) { - stop("method uncertain_si requires to specify the config$min_std_si argument.") - } - if (is.null(config$max_std_si)) { - stop("method uncertain_si requires to specify the config$max_std_si argument.") - } - if (config$mean_si <= 0) { - stop("method uncertain_si requires a >0 value for config$mean_si.") - } - if (config$std_si <= 0) { - stop("method uncertain_si requires a >0 value for config$std_si.") - } - if (config$n2 <= 0 || config$n2%%1 != 0) { - stop("method uncertain_si requires a >0 integer value for config$n2.") - } - if (config$n1 <= 0 || config$n1%%1 != 0) { - stop("method uncertain_si requires a >0 integer value for config$n1.") - } - if (config$std_mean_si <= 0) { - stop("method uncertain_si requires a >0 value for config$std_mean_si.") - } - if (config$min_mean_si < 1) { - stop("method uncertain_si requires a value >=1 for config$min_mean_si.") - } - if (config$max_mean_si < config$mean_si) { - stop("method uncertain_si requires that config$max_mean_si >= config$mean_si.") - } - if (config$mean_si < config$min_mean_si) { - stop("method uncertain_si requires that config$mean_si >= config$min_mean_si.") - } - if (signif(config$max_mean_si - config$mean_si, 3) != signif(config$mean_si - - config$min_mean_si, 3)) { - warning("The distribution you chose for the mean SI is not centered around the mean.") - } - if (config$std_std_si <= 0) { - stop("method uncertain_si requires a >0 value for config$std_std_si.") - } - if (config$min_std_si <= 0) { - stop("method uncertain_si requires a >0 value for config$min_std_si.") - } - if (config$max_std_si < config$std_si) { - stop("method uncertain_si requires that config$max_std_si >= config$std_si.") - } - if (config$std_si < config$min_std_si) { - stop("method uncertain_si requires that config$std_si >= config$min_std_si.") - } - if (signif(config$max_std_si - config$std_si, 3) != signif(config$std_si - - config$min_std_si, 3)) { - warning("The distribution you chose for the std of the SI is not centered around the mean.") - } - } if(method == "si_from_sample") { if (is.null(config$n2)) { @@ -588,18 +448,14 @@ EstimateR_func <- function (I, } si_sample <- process_si_sample(si_sample) } - if (config$cv_posterior < 0) { - stop("config$cv_posterior must be >0.") - } + min_nb_cases_per_time_period <- ceiling(1/config$cv_posterior^2 - a_prior) - incidence_per_time_step <- calc_incidence_per_time_step(I, config$t_start, + incidence_per_time_step <- calc_incidence_per_time_step(incid, config$t_start, config$t_end) if (incidence_per_time_step[1] < min_nb_cases_per_time_period) { warning("You're estimating R too early in the epidemic to get the desired posterior CV.") } - if (config$plot != TRUE && config$plot != FALSE) { - stop("config$plot must be TRUE or FALSE.") - } + if (method == "non_parametric_si") { si_uncertainty <- "N" parametric_si <- "N" @@ -627,20 +483,20 @@ EstimateR_func <- function (I, sd = config$std_mean_si) } while (std_si_sample[k] < config$min_std_si || std_si_sample[k] > - config$max_std_si || std_si_sample[k] > mean_si_sample[k]) { + config$max_std_si){ std_si_sample[k] <- rnorm(1, mean = config$std_si, sd = config$std_std_si) } } temp <- lapply(1:config$n1, function(k) sample_from_posterior(config$n2, - I, mean_si_sample[k], std_si_sample[k], si_distr=NULL, a_prior, - b_prior, config$t_start, config$t_end)) + incid, mean_si_sample[k], std_si_sample[k], si_distr=NULL, a_prior, + b_prior, config$t_start, config$t_end)) config$si_distr <- cbind(t(sapply(1:config$n1, function(k) (temp[[k]])[[2]])), - rep(0, config$n1)) + rep(0, config$n1)) r_sample <- matrix(NA, config$n2 * config$n1, nb_time_periods) for (k in 1:config$n1) { r_sample[((k - 1) * config$n2 + 1):(k * config$n2), which(config$t_end > - mean_si_sample[k])] <- (temp[[k]])[[1]][, which(config$t_end > - mean_si_sample[k])] + mean_si_sample[k])] <- (temp[[k]])[[1]][, which(config$t_end > + mean_si_sample[k])] } mean_posterior <- apply(r_sample, 2, mean, na.rm = TRUE) std_posterior <- apply(r_sample, 2, sd, na.rm = TRUE) @@ -667,15 +523,15 @@ EstimateR_func <- function (I, std_si_sample[k] <- sqrt(sum(si_sample[,k]*((1:dim(si_sample)[1]-1) - mean_si_sample[k])^2)) } temp <- lapply(1:config$n1, function(k) sample_from_posterior(config$n2, - I, mean_si=NULL, std_si=NULL, si_sample[,k], a_prior, - b_prior, config$t_start, config$t_end)) + incid, mean_si=NULL, std_si=NULL, si_sample[,k], a_prior, + b_prior, config$t_start, config$t_end)) config$si_distr <- cbind(t(sapply(1:config$n1, function(k) (temp[[k]])[[2]])), - rep(0, config$n1)) + rep(0, config$n1)) r_sample <- matrix(NA, config$n2 * config$n1, nb_time_periods) for (k in 1:config$n1) { r_sample[((k - 1) * config$n2 + 1):(k * config$n2), which(config$t_end > - mean_si_sample[k])] <- (temp[[k]])[[1]][, which(config$t_end > - mean_si_sample[k])] + mean_si_sample[k])] <- (temp[[k]])[[1]][, which(config$t_end > + mean_si_sample[k])] } mean_posterior <- apply(r_sample, 2, mean, na.rm = TRUE) std_posterior <- apply(r_sample, 2, sd, na.rm = TRUE) @@ -696,18 +552,17 @@ EstimateR_func <- function (I, }else{ # CertainSI if (parametric_si == "Y") { - config$si_distr <- sapply(1:T, function(t) DiscrSI(t - 1, - config$mean_si, config$std_si)) + config$si_distr <- discr_si(0:(T-1), config$mean_si, config$std_si) } if (length(config$si_distr) < T + 1) { config$si_distr[(length(config$si_distr) + 1):(T + 1)] <- 0 } final_mean_si <- sum(config$si_distr * (0:(length(config$si_distr) - - 1))) + 1))) Finalstd_si <- sqrt(sum(config$si_distr * (0:(length(config$si_distr) - - 1))^2) - final_mean_si^2) - post <- PosteriorFromSIDistr(I, config$si_distr, a_prior, b_prior, - config$t_start, config$t_end) + 1))^2) - final_mean_si^2) + post <- posterior_from_si_distr(incid, config$si_distr, a_prior, b_prior, + config$t_start, config$t_end) a_posterior <- unlist(post[[1]]) b_posterior <- unlist(post[[2]]) mean_posterior <- a_posterior * b_posterior @@ -727,11 +582,12 @@ EstimateR_func <- function (I, quantile_0.975_posterior <- qgamma(0.975, shape = a_posterior, scale = b_posterior, lower.tail = TRUE, log.p = FALSE) } - results <- list() - results$R <- as.data.frame(cbind(config$t_start, config$t_end, mean_posterior, + + results <- list(R = as.data.frame(cbind(config$t_start, config$t_end, mean_posterior, std_posterior, quantile_0.025_posterior, quantile_0.05_posterior, quantile_0.25_posterior, median_posterior, quantile_0.25_posterior, - quantile_0.25_posterior, quantile_0.975_posterior)) + quantile_0.25_posterior, quantile_0.975_posterior)) ) + names(results$R) <- c("t_start", "t_end", "Mean(R)", "Std(R)", "Quantile.0.025(R)", "Quantile.0.05(R)", "Quantile.0.25(R)", "Median(R)", "Quantile.0.75(R)", "Quantile.0.95(R)", @@ -754,19 +610,19 @@ EstimateR_func <- function (I, names(results$SI.Moments) <- c("Mean", "Std") - if(!is.null(I$dates)) + if(!is.null(incid$dates)) { - results$dates <- check_dates(I) + results$dates <- check_dates(incid) }else { results$dates <- 1:T } - results$I <- rowSums(I[,c("local","imported")]) - results$I_local <- I$local - results$I_imported <- I$imported + results$I <- rowSums(incid[,c("local", "imported")]) + results$I_local <- incid$local + results$I_imported <- incid$imported if (config$plot) { - if(sum(I$imported[-1])>0) # more than the first cases are imported + if(sum(incid$imported[-1])>0) # more than the first cases are imported { add_imported_cases <- TRUE }else { diff --git a/R/init_MCMC_params.R b/R/init_MCMC_params.R deleted file mode 100644 index 3bb7112..0000000 --- a/R/init_MCMC_params.R +++ /dev/null @@ -1,87 +0,0 @@ -####################################################################################################################### -# init_MCMC_params finds clever starting points for the MCMC to be used to estimate the serial interval, when using option si_from_data in EstimateR # -####################################################################################################################### - -#' init_MCMC_params TITLE -#' -#' \code{init_MCMC_params} DESCRIPTION TO COME. -#' -#' @param si_data XXXXXXX. -#' @param dist XXXXXXX. -#' @return XXXXXXX. -#' @details{ -#' XXXXXXX. -#' } -#' @seealso XXXXXXX. -#' @author XXXXXXX. -#' @references XXXXXXX. -#' @importFrom fitdistrplus fitdist -#' @export -#' @examples -#' ## XXXXXXX. -init_MCMC_params <- function(si_data, dist = c("G", "W", "L", "off1G", "off1W", "off1L")) -{ - dist <- match.arg(dist) - naive_SI_obs <- (si_data$SR + si_data$SL ) / 2 - (si_data$ER + si_data$EL ) / 2 - mu <- mean(naive_SI_obs) - sigma <- sd(naive_SI_obs) - if (dist == "G"){ - shape <- (mu/sigma)^2 - scale <- sigma^2/mu - # check this is what we want - # tmp <- rgamma(10000, shape=shape, scale = scale) - # mean(tmp) - # sd(tmp) - param <- c(shape, scale) - } else if (dist == "W"){ - fit.w <- fitdist(naive_SI_obs+0.1, "weibull") # using +0.1 to avoid issues with zero - shape <- fit.w$estimate["shape"] - scale <- fit.w$estimate["scale"] - # check this is what we want - # tmp <- rweibull(10000, shape=shape, scale = scale) - # mean(tmp) - # sd(tmp) - param <- c(shape, scale) - } else if (dist == "L"){ - sdlog <- sqrt(log(sigma^2/(mu^2)+1)) - meanlog <- log(mu) - sdlog^2/2 - # check this is what we want - # tmp <- rlnorm(10000, meanlog=meanlog, sdlog = sdlog) - # mean(tmp) - # sd(tmp) - param <- c(meanlog, sdlog) - } else if (dist == "off1G"){ - shape <- ((mu-1)/sigma)^2 - if(shape<=0) shape <- 0.001 # this is to avoid issues when the mean SI is <1 - scale <- sigma^2/(mu-1) - # check this is what we want - # tmp <- 1+rgamma(10000, shape=shape, scale = scale) - # mean(tmp) - # sd(tmp) - param <- c(shape, scale) - } else if (dist == "off1W"){ - fit.w <- fitdist(naive_SI_obs-1+0.1, "weibull") # using +0.1 to avoid issues with zero - shape <- fit.w$estimate["shape"] - scale <- fit.w$estimate["scale"] - # check this is what we want - # tmp <- 1+rweibull(10000, shape=shape, scale = scale) - # mean(tmp) - # sd(tmp) - param <- c(shape, scale) - } else if (dist == "off1L"){ - sdlog <- sqrt(log(sigma^2/((mu-1)^2)+1)) - meanlog <- log(mu-1) - sdlog^2/2 - # check this is what we want - # tmp <- 1+rlnorm(10000, meanlog=meanlog, sdlog = sdlog) - # mean(tmp) - # sd(tmp) - param <- c(meanlog, sdlog) - } else { - stop(sprintf("Distribtion (%s) not supported",dist)) - } - if(any(is.na(param))) - { - stop("NA result. Check that si_data is in the right format. ") - } - return(param) -} \ No newline at end of file diff --git a/R/init_mcmc_params.R b/R/init_mcmc_params.R new file mode 100644 index 0000000..3eeadef --- /dev/null +++ b/R/init_mcmc_params.R @@ -0,0 +1,129 @@ +####################################################################################################################### +# init_mcmc_params finds clever starting points for the MCMC to be used to estimate the serial interval, when using option si_from_data in estimate_r # +####################################################################################################################### + +#' init_mcmc_params Finds clever starting points for the MCMC to be used to estimate the serial interval, +#' e.g. when using option \code{si_from_data} in \code{estimate_r} +#' +#' \code{init_mcmc_params} Finds values of the serial interval distribution parameters, used to initalise the MCMC estimation of the serial interval distribution. +#' Initial values are computed based on the observed mean and standard deviation of the sample from which the parameters are to be estiamted. +#' +#' @param si_data the data on dates of symptoms of pairs of infector/infected individuals to be used to estimate the serial interval distribution. +#' This should be a dataframe with 5 columns: +#' \itemize{ +#' \item{EL: the lower bound of the symptom onset date of the infector (given as an integer)} +#' \item{ER: the upper bound of the symptom onset date of the infector (given as an integer). Should be such that ER>=EL} +#' \item{SL: the lower bound of the symptom onset date of the infected indivdiual (given as an integer)} +#' \item{SR: the upper bound of the symptom onset date of the infected indivdiual (given as an integer). Should be such that SR>=SL} +#' \item{type (optional): can have entries 0, 1, or 2, corresponding to doubly interval-censored, single interval-censored or exact observations, +#' respectively, see Reich et al. Statist. Med. 2009. If not specified, this will be automatically computed from the dates} +#' } +#' @param dist the parametric distribution to use for the serial interval. +#' Should be one of "G" (Gamma), "W" (Weibull), "L" (Lognormal), "off1G" (Gamma shifted by 1), "off1W" (Weibull shifted by 1), or "off1L" (Lognormal shifted by 1). +#' @return A vector containing the initial values for the two parameters of the distribution of the serial interval. +#' These are the shape and scale for all but the lognormal distribution, for which it is the meanlog and sdlog. +#' @seealso \code{\link{estimate_r}} +#' @author Anne Cori +#' @importFrom fitdistrplus fitdist +#' @export +#' @examples +#' \dontrun{ +#' ## Note the following examples use an MCMC routine +#' ## to estimate the serial interval distribution from data, +#' ## so they may take a few minutes to run +#' +#' ## load data on rotavirus +#' data("MockRotavirus") +#' +#' ## get clever initial values for shape and scale of a Gamma distribution +#' ## fitted to the the data MockRotavirus$si_data +#' clever_init_param <- init_mcmc_params(MockRotavirus$si_data, "G") +#' +#' ## estimate the serial interval from data using a clever starting point for the MCMC chain +#' SI_fit_clever <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, +#' dist="G", +#' init.pars=clever_init_param, +#' burnin = 1000, +#' n.samples = 5000) +#' +#' ## estimate the serial interval from data using a random starting point for the MCMC chain +#' SI_fit_naive <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, +#' dist="G", +#' burnin = 1000, +#' n.samples = 5000) +#' +#' +#' ## use check_cdt_samples_convergence to check convergence in both situations +#' converg_diag_clever <- check_cdt_samples_convergence(SI_fit_clever@samples) +#' converg_diag_naive <- check_cdt_samples_convergence(SI_fit_naive@samples) +#' converg_diag_clever +#' converg_diag_naive +#' +#' } +#' +init_mcmc_params <- function(si_data, dist = c("G", "W", "L", "off1G", "off1W", "off1L")) +{ + dist <- match.arg(dist) + naive_SI_obs <- (si_data$SR + si_data$SL ) / 2 - (si_data$ER + si_data$EL ) / 2 + mu <- mean(naive_SI_obs) + sigma <- sd(naive_SI_obs) + if (dist == "G"){ + shape <- (mu/sigma)^2 + scale <- sigma^2/mu + # check this is what we want + # tmp <- rgamma(10000, shape=shape, scale = scale) + # mean(tmp) + # sd(tmp) + param <- c(shape, scale) + } else if (dist == "W"){ + fit.w <- fitdist(naive_SI_obs+0.1, "weibull") # using +0.1 to avoid issues with zero + shape <- fit.w$estimate["shape"] + scale <- fit.w$estimate["scale"] + # check this is what we want + # tmp <- rweibull(10000, shape=shape, scale = scale) + # mean(tmp) + # sd(tmp) + param <- c(shape, scale) + } else if (dist == "L"){ + sdlog <- sqrt(log(sigma^2/(mu^2)+1)) + meanlog <- log(mu) - sdlog^2/2 + # check this is what we want + # tmp <- rlnorm(10000, meanlog=meanlog, sdlog = sdlog) + # mean(tmp) + # sd(tmp) + param <- c(meanlog, sdlog) + } else if (dist == "off1G"){ + shape <- ((mu-1)/sigma)^2 + if(shape<=0) shape <- 0.001 # this is to avoid issues when the mean SI is <1 + scale <- sigma^2/(mu-1) + # check this is what we want + # tmp <- 1+rgamma(10000, shape=shape, scale = scale) + # mean(tmp) + # sd(tmp) + param <- c(shape, scale) + } else if (dist == "off1W"){ + fit.w <- fitdist(naive_SI_obs-1+0.1, "weibull") # using +0.1 to avoid issues with zero + shape <- fit.w$estimate["shape"] + scale <- fit.w$estimate["scale"] + # check this is what we want + # tmp <- 1+rweibull(10000, shape=shape, scale = scale) + # mean(tmp) + # sd(tmp) + param <- c(shape, scale) + } else if (dist == "off1L"){ + sdlog <- sqrt(log(sigma^2/((mu-1)^2)+1)) + meanlog <- log(mu-1) - sdlog^2/2 + # check this is what we want + # tmp <- 1+rlnorm(10000, meanlog=meanlog, sdlog = sdlog) + # mean(tmp) + # sd(tmp) + param <- c(meanlog, sdlog) + } else { + stop(sprintf("Distribtion (%s) not supported",dist)) + } + if(any(is.na(param))) + { + stop("NA result. Check that si_data is in the right format. ") + } + return(param) +} \ No newline at end of file diff --git a/R/overall_infectivity.R b/R/overall_infectivity.R index 567eceb..ba05875 100644 --- a/R/overall_infectivity.R +++ b/R/overall_infectivity.R @@ -7,24 +7,24 @@ #' #' \code{overall_infectivity} computes the overall infectivity due to previously infected individuals. #' -#' @param I One of the following +#' @param incid One of the following #' \itemize{ #' \item{A vector (or a dataframe with a single column) of non-negative integers containing an incidence time series} -#' \item{A dataframe of non-negative integers with two columns, so that \code{I$local} contains the incidence of cases due to local transmission and \code{I$imported} contains the incidence of imported cases (with \code{I$local + I$imported} the total incidence).} +#' \item{A dataframe of non-negative integers with two columns, so that \code{incid$local} contains the incidence of cases due to local transmission and \code{incid$imported} contains the incidence of imported cases (with \code{incid$local + incid$imported} the total incidence).} #' } #' Note that the cases from the first time step are always all assumed to be imported cases. #' @param si_distr Vector of probabilities giving the discrete distribution of the serial interval. #' @return A vector which contains the overall infectivity \eqn{\lambda_t} at each time step #' @details{ #' The overall infectivity \eqn{\lambda_t} at time step \eqn{t} is equal to the sum of the previously infected individuals -#' (given by the incidence vector \eqn{I}, with \code{I=I$local + I$imported} if \eqn{I} is a matrix), +#' (given by the incidence vector \eqn{I}, with \code{I = incid$local + incid$imported} if \eqn{I} is a matrix), #' weigthed by their infectivity at time \eqn{t} (given by the discrete serial interval distribution \eqn{w_k}). #' In mathematical terms: #' \cr #' \eqn{\lambda_t = \sum_{k=1}^{t-1}I_{t-k}w_k} #' \cr #' } -#' @seealso \code{\link{DiscrSI}}, \code{\link{EstimateR}} +#' @seealso \code{\link{discr_si}}, \code{\link{estimate_r}} #' @author Anne Cori \email{a.cori@@imperial.ac.uk} #' @references Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). #' @export @@ -35,20 +35,18 @@ #' ## compute overall infectivity #' lambda <- overall_infectivity(Flu2009$incidence, Flu2009$si_distr) #' par(mfrow=c(2,1)) -#' plot(Flu2009$incidence, type="s", xlab="time (days)", ylab="incidence") -#' title(main="Epidemic curve") -#' plot(lambda, type="s", xlab="time (days)", ylab="Infectivity") -#' title(main="Overall infectivity") -overall_infectivity <-function (I,si_distr) +#' plot(Flu2009$incidence, type = "s", xlab = "time (days)", ylab = "incidence") +#' title(main = "Epidemic curve") +#' plot(lambda, type = "s", xlab = "time (days)", ylab = "Infectivity") +#' title(main = "Overall infectivity") +overall_infectivity <-function (incid, si_distr) { - I <- process_I(I) - T<-nrow(I) + incid <- process_I(incid) + T <- nrow(incid) check_si_distr(si_distr, "warning") lambda <- vector() - lambda[1]<-NA + lambda[1] <- NA for (t in 2:T) - { - lambda[t] <- sum(si_distr[1:t]*rowSums(I[t:1, c("local","imported")]),na.rm=TRUE) - } + lambda[t] <- sum(si_distr[1:t]*rowSums(incid[t:1, c("local", "imported")]), na.rm=TRUE) return(lambda) } diff --git a/R/plots.R b/R/plots.R index 13c72db..b86e675 100644 --- a/R/plots.R +++ b/R/plots.R @@ -1,13 +1,18 @@ #' Plotting the outputs of functions estimating the reproduction number from incidence time series and assumptions regarding the serial interval distribution #' -#' \code{plots} allows plotting the outputs of functions \code{\link{EstimateR}} and \code{\link{WT}} +#' \code{plots} allows plotting the outputs of functions \code{\link{estimate_r}} and \code{\link{wallinga_teunis}} #' -#' @param x The output of function \code{\link{EstimateR}} or function \code{\link{WT}}, or a list of such outputs. If a list, and \code{what='R'} or \code{what='all'}, all estimates of R are plotted on a single graph. -#' @param what A string specifying what to plot, namely the incidence time series (\code{what='I'}), the estimated reproduction number (\code{what='R'}), the serial interval distribution (\code{what='SI'}, or all three (\code{what='all'})). +#' @param x The output of function \code{\link{estimate_r}} or function \code{\link{wallinga_teunis}}, or a list of such outputs. +#' If a list, and \code{what='R'} or \code{what='all'}, all estimates of R are plotted on a single graph. +#' @param what A string specifying what to plot, namely +#' the incidence time series (\code{what='incid'}), +#' the estimated reproduction number (\code{what='R'}), +#' the serial interval distribution (\code{what='SI'}, +#' or all three (\code{what='all'})). #' @param add_imported_cases A boolean to specify whether, on the incidence time series plot, to add the incidence of imported cases. -#' @param options_I For what = "I" or "all". A list of graphical options: +#' @param options_I For what = "incid" or "all". A list of graphical options: #' \describe{ -#' \item{col}{A colour or vector of colours used for plotting I. By default uses the default R colours.} +#' \item{col}{A colour or vector of colours used for plotting incid. By default uses the default R colours.} #' \item{transp}{A numeric value between 0 and 1 used to monitor transparency of the bars plotted. Defaults to 0.7.} #' \item{xlim}{A parameter similar to that in \code{par}, to monitor the limits of the horizontal axis} #' \item{ylim}{A parameter similar to that in \code{par}, to monitor the limits of the vertical axis} @@ -28,9 +33,9 @@ #' \item{ylim}{A parameter similar to that in \code{par}, to monitor the limits of the vertical axis} #' } #' @param legend A boolean (TRUE by default) governing the presence / absence of legends on the plots -#' @return a plot (if \code{what = "I"}, \code{"R"}, or \code{"SI"}) or a \code{\link{grob}} object (if \code{what = "all"}). +#' @return a plot (if \code{what = "incid"}, \code{"R"}, or \code{"SI"}) or a \code{\link{grob}} object (if \code{what = "all"}). # #' @details -#' @seealso \code{\link{EstimateR}} and \code{\link{WT}} +#' @seealso \code{\link{estimate_r}} and \code{\link{wallinga_teunis}} #' @author Rolina van Gaalen \email{rolina.van.gaalen@rivm.nl} and Anne Cori \email{a.cori@imperial.ac.uk} # #' @references #' @importFrom ggplot2 aes aes_string theme @@ -42,7 +47,7 @@ #' data("Flu2009") #' #' ## estimate the instantaneous reproduction number (method "non_parametric_si") -#' R_i <- EstimateR(Flu2009$incidence, method="non_parametric_si", +#' R_i <- estimate_r(Flu2009$incidence, method="non_parametric_si", #' config=list(t_start=2:26, t_end=8:32, #' si_distr=Flu2009$si_distr, plot=FALSE) #' ) @@ -51,14 +56,15 @@ #' plots(R_i, legend = FALSE) #' #' ## estimate the instantaneous reproduction number (method "non_parametric_si") -#' R_c <- WT(Flu2009$incidence, t_start=2:26, t_end=8:32, method="non_parametric_si", -#' si_distr=Flu2009$si_distr, plot=FALSE) +#' R_c <- wallinga_teunis(Flu2009$incidence, method="non_parametric_si", +#' config = list(t_start=2:26, t_end=8:32, +#' si_distr=Flu2009$si_distr, plot=FALSE)) #' #' ## produce plot of the incidence #' ## (with, on top of total incidence, the incidence of imported cases), #' ## estimated instantaneous and case reproduction numbers #' ## and serial interval distribution used -#' p_I <- plots(R_i, "I", add_imported_cases=TRUE) # plots the incidence +#' p_I <- plots(R_i, "incid", add_imported_cases=TRUE) # plots the incidence #' p_SI <- plots(R_i, "SI") # plots the serial interval distribution #' p_Ri <- plots(R_i, "R", #' options_R = list(ylim=c(0,4))) @@ -73,7 +79,7 @@ #' @importFrom plotly layout mutate arrange rename summarise filter ggplotly #' @importFrom graphics plot #' @importFrom incidence as.incidence -plots <- function(x = NULL, what=c("all", "I", "R", "SI"), add_imported_cases=FALSE, +plots <- function(x = NULL, what=c("all", "incid", "R", "SI"), add_imported_cases=FALSE, options_I = list(col = palette(), transp = 0.7, xlim = NULL, ylim=NULL), options_R = list(col = palette(), transp = 0.2, xlim = NULL, ylim=NULL), options_SI = list(prob_min = 0.001, col = "black", transp = 0.25, xlim = NULL, ylim=NULL), @@ -122,8 +128,8 @@ plots <- function(x = NULL, what=c("all", "I", "R", "SI"), add_imported_cases=FA quantile_0.975_posterior <- x$R[, "Quantile.0.975(R)"] method <- x$method si_distr <- x$si_distr - I <- data.frame(local=x$I_local, imported=x$I_imported) - T<-nrow(I) + incid <- data.frame(local=x$I_local, imported=x$I_imported) + T<-nrow(incid) if(!is.null(x$dates)) { dates <- x$dates @@ -157,14 +163,14 @@ plots <- function(x = NULL, what=c("all", "I", "R", "SI"), add_imported_cases=FA std_si.sample <- x$SI.Moments["Std"] } what <- match.arg(what) - if (what == "I" | what =="all") { + if (what == "incid" | what =="all") { if(add_imported_cases) { - p1 <- plot(as.incidence(I, dates = x$dates), ylab="Incidence", xlab = "Time", color = options_I$col, alpha = options_I$transp) + + p1 <- plot(as.incidence(incid, dates = x$dates), ylab="Incidence", xlab = "Time", color = options_I$col, alpha = options_I$transp) + ggtitle("Epidemic curve") }else { - p1 <- plot(as.incidence(rowSums(I), dates = x$dates), ylab="Incidence", xlab = "Time", color = options_I$col, alpha = options_I$transp) + + p1 <- plot(as.incidence(rowSums(incid), dates = x$dates), ylab="Incidence", xlab = "Time", color = options_I$col, alpha = options_I$transp) + ggtitle("Epidemic curve") } @@ -390,7 +396,7 @@ plots <- function(x = NULL, what=c("all", "I", "R", "SI"), add_imported_cases=FA } } - if(what == "I") + if(what == "incid") { if(!legend) p1 <- p1 + theme(legend.position="none") return(p1) @@ -414,7 +420,7 @@ plots <- function(x = NULL, what=c("all", "I", "R", "SI"), add_imported_cases=FA p3 <- p3 + theme(legend.position="none") } - out <- list(I=p1, R=p2, SI=p3) + out <- list(incid=p1, R=p2, SI=p3) out.grid <- arrangeGrob(grobs=out, nrow = 3, ncol=1) grid.arrange(out.grid, newpage = FALSE) return(out.grid) diff --git a/R/utilities.R b/R/utilities.R index 0f2eff3..1287ffd 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -59,22 +59,22 @@ process_si_data <- function(si_data) } -process_I <- function(I) +process_I <- function(incid) { - if(class(I)=="incidence") + if(class(incid)=="incidence") { - I_inc <- I - I <- as.data.frame(I_inc) - I$I <- rowSums(I_inc$counts) + I_inc <- incid + incid <- as.data.frame(I_inc) + incid$I <- rowSums(I_inc$counts) } vector_I <- FALSE single_col_df_I <- FALSE - if(is.vector(I)) + if(is.vector(incid)) { vector_I <- TRUE - }else if(is.data.frame(I)) + }else if(is.data.frame(incid)) { - if(ncol(I)==1) + if(ncol(incid)==1) { single_col_df_I <- TRUE } @@ -83,95 +83,95 @@ process_I <- function(I) { if(single_col_df_I) { - I_tmp <- as.vector(I[,1]) + I_tmp <- as.vector(incid[,1]) }else { - I_tmp <- I + I_tmp <- incid } - I <- data.frame(local=I_tmp, imported=rep(0, length(I_tmp))) - I_init <- sum(I[1,]) - I[1,] <- c(0, I_init) + incid <- data.frame(local=I_tmp, imported=rep(0, length(I_tmp))) + I_init <- sum(incid[1,]) + incid[1,] <- c(0, I_init) }else { - if(!is.data.frame(I) | (!("I" %in% names(I)) & !all(c("local","imported") %in% names(I)) ) ) + if(!is.data.frame(incid) | (!("I" %in% names(incid)) & !all(c("local","imported") %in% names(incid)) ) ) { - stop("I must be a vector or a dataframe with either i) a column called 'I', or ii) 2 columns called 'local' and 'imported'.") + stop("incid must be a vector or a dataframe with either i) a column called 'I', or ii) 2 columns called 'local' and 'imported'.") } - if(("I" %in% names(I)) & !all(c("local","imported") %in% names(I))) + if(("I" %in% names(incid)) & !all(c("local","imported") %in% names(incid))) { - I$local <- I$I - I$local[1] <- 0 - I$imported <- c(I$I[1], rep(0, nrow(I)-1)) + incid$local <- incid$I + incid$local[1] <- 0 + incid$imported <- c(incid$I[1], rep(0, nrow(incid)-1)) } - if(I$local[1]>0) + if(incid$local[1]>0) { - warning("I$local[1] is >0 but must be 0, as all cases on the first time step are assumed imported. This is corrected automatically by cases being transferred to I$imported.") - I_init <- sum(I[1,c('local','imported')]) - I[1,c('local','imported')] <- c(0, I_init) + warning("incid$local[1] is >0 but must be 0, as all cases on the first time step are assumed imported. This is corrected automatically by cases being transferred to incid$imported.") + I_init <- sum(incid[1,c('local','imported')]) + incid[1,c('local','imported')] <- c(0, I_init) } } - I[which(is.na(I))] <- 0 - date_col <- names(I)=='dates' + incid[which(is.na(incid))] <- 0 + date_col <- names(incid)=='dates' if(any(date_col)) { - if(any(I[,!date_col]<0)) + if(any(incid[,!date_col]<0)) { - stop("I must contain only non negative integer values.") + stop("incid must contain only non negative integer values.") } }else { - if(any(I<0)) + if(any(incid<0)) { - stop("I must contain only non negative integer values.") + stop("incid must contain only non negative integer values.") } } - return(I) + return(incid) } -process_I_vector <- function(I) +process_I_vector <- function(incid) { - if(class(I)=="incidence") + if(class(incid)=="incidence") { - I <- rowSums(I$counts) + incid <- rowSums(incid$counts) } - if(!is.vector(I)) + if(!is.vector(incid)) { - if(is.data.frame(I)) + if(is.data.frame(incid)) { - if(ncol(I)==1) + if(ncol(incid)==1) { - I <- as.vector(I[,1]) - }else if('I' %in% names(I)) + incid <- as.vector(incid[,1]) + }else if('I' %in% names(incid)) { - I <- as.vector(I$I) - }else if(!all(c('local', 'imported') %in% names(I))) + incid <- as.vector(incid$I) + }else if(!all(c('local', 'imported') %in% names(incid))) { - stop("I must be a vector or a dataframe with at least a column named 'I' or two columns named 'local' and 'imported'.") + stop("incid must be a vector or a dataframe with at least a column named 'I' or two columns named 'local' and 'imported'.") } }else { - stop("I must be a vector or a dataframe with at least a column named 'I' or two columns named 'local' and 'imported'.") + stop("incid must be a vector or a dataframe with at least a column named 'I' or two columns named 'local' and 'imported'.") } } - I[which(is.na(I))] <- 0 - date_col <- names(I)=='dates' + incid[which(is.na(incid))] <- 0 + date_col <- names(incid)=='dates' if(any(date_col)) { - if(any(I[,!date_col]<0)) + if(any(incid[,!date_col]<0)) { - stop("I must contain only non negative integer values.") + stop("incid must contain only non negative integer values.") } }else { - if(any(I<0)) + if(any(incid<0)) { - stop("I must contain only non negative integer values.") + stop("incid must contain only non negative integer values.") } } - return(I) + return(incid) } process_si_sample <- function(si_sample) @@ -210,18 +210,18 @@ check_times <- function(t_start, t_end, T) # this only produces warnings and err stop("t_start[i] must be <= t_end[i] for all i.") } if (any(t_start < 2 | t_start > T | t_start%%1 != 0 )) { - stop("t_start must be a vector of integers between 2 and the number of timesteps in I.") + stop("t_start must be a vector of integers between 2 and the number of timesteps in incid.") } if (any(t_end < 2 | t_end > T | t_end%%1 != 0)) { - stop("t_end must be a vector of integers between 2 and the number of timesteps in I.") + stop("t_end must be a vector of integers between 2 and the number of timesteps in incid.") } } -check_si_distr <- function(si_distr, sumToOne = c("error", "warning")) # this only produces warnings and errors, does not return anything +check_si_distr <- function(si_distr, sumToOne = c("error", "warning"), method = "non_parametric_si") # this only produces warnings and errors, does not return anything { sumToOne <- match.arg(sumToOne) if (is.null(si_distr)) { - stop("si_distr argument missing.") + stop(paste0("si_distr argument is missing but is required for method ", method, ".")) } if (!is.vector(si_distr)) { stop("si_distr must be a vector.") @@ -244,20 +244,192 @@ check_si_distr <- function(si_distr, sumToOne = c("error", "warning")) # this on } } -check_dates <- function(I) +check_dates <- function(incid) { - dates <- I$dates + dates <- incid$dates if(class(dates) != "Date" & class(dates) != "numeric") { - stop("I$dates must be an object of class date or numeric.") + stop("incid$dates must be an object of class date or numeric.") }else { if(unique(diff(dates)) != 1) { - stop("I$dates must contain dates which are all in a row.") + stop("incid$dates must contain dates which are all in a row.") }else { return(dates) } } -} \ No newline at end of file +} + +process_config <- function(config) +{ + if (!("mean_prior" %in% names(config))) { + config$mean_prior = 5 + } + + if (!("std_prior" %in% names(config))) { + config$std_prior = 5 + } + + if (config$mean_prior <= 0) { + stop("config$mean_prior must be >0.") + } + if (config$std_prior <= 0) { + stop("config$std_prior must be >0.") + } + + if (!("cv_posterior" %in% names(config))) { + config$cv_posterior = 0.3 + } + + if (!("plot" %in% names(config))) { + config$plot = FALSE + } + + if (!("legend" %in% names(config))) { + config$legend = FALSE + } + + if (!("mcmc_control" %in% names(config))) { + config$mcmc_control = list(init_pars = NULL, burnin = 3000, thin=10, seed = as.integer(Sys.time())) + } + + return(config) +} + +process_config_si_from_data <- function(config, si_data) +{ + config$si_parametric_distr <- match.arg(config$si_parametric_distr, + c("G", "W", "L", "off1G", "off1W", "off1L")) + if (is.null(config$n1)) { + stop("method si_from_data requires to specify the config$n1 argument.") + } + if (is.null(config$n2)) { + stop("method si_from_data requires to specify the config$n2 argument.") + } + if (config$n2 <= 0 || config$n2%%1 != 0) { + stop("method si_from_data requires a >0 integer value for config$n2.") + } + if (config$n1 <= 0 || config$n1%%1 != 0) { + stop("method si_from_data requires a >0 integer value for config$n1.") + } + if(is.null(config$mcmc_control$init_pars)) { + config$mcmc_control$init_pars <- + init_mcmc_params(si_data, config$si_parametric_distr) + } + if((config$si_parametric_distr=="off1G" | + config$si_parametric_distr=="off1W" | + config$si_parametric_distr=="off1L") & + any(si_data$SR-si_data$EL<=1)) + { + stop(paste("You cannot fit a distribution with offset 1 to this SI", + "dataset, because for some data points the maximum serial", + "interval is <=1.\nChoose a different distribution")) + } + return(config) +} + +check_config <- function(config, method) +{ + if (method == "non_parametric_si") { + check_si_distr(config$si_distr, method = method) + } + if (method == "parametric_si") { + if (is.null(config$mean_si)) { + stop("method parametric_si requires to specify the config$mean_si argument.") + } + if (is.null(config$std_si)) { + stop("method parametric_si requires to specify the config$std_si argument.") + } + if (config$mean_si <= 1) { + stop("method parametric_si requires a value >1 for config$mean_si.") + } + if (config$std_si <= 0) { + stop("method parametric_si requires a >0 value for config$std_si.") + } + } + if (method == "uncertain_si") { + if (is.null(config$mean_si)) { + stop("method uncertain_si requires to specify the config$mean_si argument.") + } + if (is.null(config$std_si)) { + stop("method uncertain_si requires to specify the config$std_si argument.") + } + if (is.null(config$n1)) { + stop("method uncertain_si requires to specify the config$n1 argument.") + } + if (is.null(config$n2)) { + stop("method uncertain_si requires to specify the config$n2 argument.") + } + if (is.null(config$std_mean_si)) { + stop("method uncertain_si requires to specify the config$std_mean_si argument.") + } + if (is.null(config$min_mean_si)) { + stop("method uncertain_si requires to specify the config$min_mean_si argument.") + } + if (is.null(config$max_mean_si)) { + stop("method uncertain_si requires to specify the config$max_mean_si argument.") + } + if (is.null(config$std_std_si)) { + stop("method uncertain_si requires to specify the config$std_std_si argument.") + } + if (is.null(config$min_std_si)) { + stop("method uncertain_si requires to specify the config$min_std_si argument.") + } + if (is.null(config$max_std_si)) { + stop("method uncertain_si requires to specify the config$max_std_si argument.") + } + if (config$mean_si <= 0) { + stop("method uncertain_si requires a >0 value for config$mean_si.") + } + if (config$std_si <= 0) { + stop("method uncertain_si requires a >0 value for config$std_si.") + } + if (config$n2 <= 0 || config$n2%%1 != 0) { + stop("method uncertain_si requires a >0 integer value for config$n2.") + } + if (config$n1 <= 0 || config$n1%%1 != 0) { + stop("method uncertain_si requires a >0 integer value for config$n1.") + } + if (config$std_mean_si <= 0) { + stop("method uncertain_si requires a >0 value for config$std_mean_si.") + } + if (config$min_mean_si < 1) { + stop("method uncertain_si requires a value >=1 for config$min_mean_si.") + } + if (config$max_mean_si < config$mean_si) { + stop("method uncertain_si requires that config$max_mean_si >= config$mean_si.") + } + if (config$mean_si < config$min_mean_si) { + stop("method uncertain_si requires that config$mean_si >= config$min_mean_si.") + } + if (signif(config$max_mean_si - config$mean_si, 3) != signif(config$mean_si - + config$min_mean_si, 3)) { + warning("The distribution you chose for the mean SI is not centered around the mean.") + } + if (config$std_std_si <= 0) { + stop("method uncertain_si requires a >0 value for config$std_std_si.") + } + if (config$min_std_si <= 0) { + stop("method uncertain_si requires a >0 value for config$min_std_si.") + } + if (config$max_std_si < config$std_si) { + stop("method uncertain_si requires that config$max_std_si >= config$std_si.") + } + if (config$std_si < config$min_std_si) { + stop("method uncertain_si requires that config$std_si >= config$min_std_si.") + } + if (signif(config$max_std_si - config$std_si, 3) != signif(config$std_si - + config$min_std_si, 3)) { + warning("The distribution you chose for the std of the SI is not centered around the mean.") + } + } + if (config$cv_posterior < 0) { + stop("config$cv_posterior must be >0.") + } + if (config$plot != TRUE && config$plot != FALSE) { + stop("config$plot must be TRUE or FALSE.") + } +} + diff --git a/R/wallinga_teunis.R b/R/wallinga_teunis.R new file mode 100644 index 0000000..5460d3b --- /dev/null +++ b/R/wallinga_teunis.R @@ -0,0 +1,310 @@ +######################################################################################################################### +# wallinga_teunis function to estimate Rc the case reproduction number # +######################################################################################################################### + +#' Estimation of the case reproduction number using the Wallinga and Teunis method +#' +#' \code{wallinga_teunis} estimates the case reproduction number of an epidemic, given the incidence time series and the serial interval distribution. +#' +#' @param incid One of the following +#' \itemize{ +#' \item{Vector (or a dataframe with a column named 'incid') of non-negative integers containing an incidence time series. +#' If the dataframe contains a column \code{incid$dates}, this is used for plotting. +#' \code{incid$dates} must contains only dates in a row.} +#' \item{An object of class \code{\link[incidence]{incidence}}} +#' } +#' @param method the method used to estimate R, one of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample" +#' @param config a list with the following elements: +#' \itemize{ +#' \item{t_start: Vector of positive integers giving the starting times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. t_start[1] should be strictly after the first day with non null incidence.} +#' \item{t_end: Vector of positive integers giving the ending times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}.} +#' \item{method: One of "non_parametric_si" or "parametric_si" (see details).} +#' \item{mean_si: For method "parametric_si" ; positive real giving the mean serial interval.} +#' \item{std_si: For method "parametric_si" ; non negative real giving the stadard deviation of the serial interval.} +#' \item{si_distr: For method "non_parametric_si" ; vector of probabilities giving the discrete distribution of the serial interval, starting with \code{si_distr[1]} (probability that the serial interval is zero), which should be zero.} +#' \item{n_sim: A positive integer giving the number of simulated epidemic trees used for computation of the confidence intervals of the case reproduction number (see details).} +#' \item{plot: Logical. If \code{TRUE} (default is \code{FALSE}), output is plotted (see value).} +#' \item{legend: A boolean (TRUE by default) governing the presence / absence of legends on the plots +#' This specifies the position of the legend in the plot. Alternatively, \code{locator(1)} can be used ; the user will then need to click where the legend needs to be written.} +#' } +#' @return { +#' a list with components: +#' \itemize{ +#' \item{R}{: a dataframe containing: +#' the times of start and end of each time window considered ; +#' the estimated mean, std, and 0.025 and 0.975 quantiles of the reproduction number for each time window.} +#' \item{si_distr}{: a vector containing the discrete serial interval distribution used for estimation} +#' \item{SI.Moments}{: a vector containing the mean and std of the discrete serial interval distribution(s) used for estimation} +#' \item{I}{: the time series of total incidence} +#' \item{I_local}{: the time series of incidence of local cases (so that \code{I_local + I_imported = I})} +#' \item{I_imported}{: the time series of incidence of imported cases (so that \code{I_local + I_imported = I})} +#' \item{dates}{: a vector of dates corresponding to the incidence time series} +#' } +#' } +#' @details{ +#' Estimates of the case reproduction number for an epidemic over predefined time windows can be obtained, +#' for a given discrete distribution of the serial interval, as proposed by Wallinga and Teunis (AJE, 2004). +#' Confidence intervals are obtained by simulating a number (config$n_sim) of possible transmission trees. +#' +#' The methods vary in the way the serial interval distribution is specified. +#' +#' ----------------------- \code{method "non_parametric_si"} ----------------------- +#' +#' The discrete distribution of the serial interval is directly specified in the argument \code{config$si_distr}. +#' +#' If \code{config$plot} is \code{TRUE}, 3 plots are produced. +#' The first one shows the epidemic curve. +#' The second one shows the posterior mean and 95\% credible interval of the reproduction number. The estimate for a time window is plotted at the end of the time window. +#' The third plot shows the discrete distribution of the serial interval. +#' +#' ----------------------- \code{method "parametric_si"} ----------------------- +#' +#' The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{config$mean_si} and \code{config$std_si}. +#' The discrete distribution of the serial interval is derived automatically using \code{\link{discr_si}}. +#' +#' If \code{config$plot} is \code{TRUE}, 3 plots are produced, which are identical to the ones for \code{method "non_parametric_si"} . +#' } +#' @seealso \code{\link{discr_si}}, \code{\link{estimate_r}} +#' @author Anne Cori \email{a.cori@imperial.ac.uk} +#' @references { +#' Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). +#' Wallinga, J. and P. Teunis. Different epidemic curves for severe acute respiratory syndrome reveal similar impacts of control measures (AJE 2004). +#' } +#' @export +#' @import reshape2 grid gridExtra +#' @importFrom ggplot2 last_plot ggplot aes geom_step ggtitle geom_ribbon geom_line xlab ylab xlim geom_hline ylim geom_histogram +#' @importFrom plotly layout mutate arrange rename summarise filter ggplotly +#' @examples +#' ## load data on pandemic flu in a school in 2009 +#' data("Flu2009") +#' +#' ## estimate the case reproduction number (method "non_parametric_si") +#' wallinga_teunis(Flu2009$incidence, +#' method="non_parametric_si", +#' config = list(t_start=2:26, t_end=8:32, +#' si_distr = Flu2009$si_distr, +#' n_sim=100, +#' plot=TRUE)) +#' # the second plot produced shows, at each each day, +#' # the estimate of the case reproduction number over the 7-day window finishing on that day. +#' +#' ## estimate the case reproduction number (method "parametric_si") +#' wallinga_teunis(Flu2009$incidence, method="parametric_si", +#' config = list(t_start=2:26, t_end=8:32, +#' mean_si=2.6, std_si=1.5, +#' n_sim=100, +#' plot=TRUE)) +#' # the second plot produced shows, at each each day, +#' # the estimate of the case reproduction number over the 7-day window finishing on that day. +wallinga_teunis <- function(incid, + method = c("non_parametric_si","parametric_si"), + config) +{ + + ### Functions ### + + ######################################################### + # Draws a possile transmission tree # + ######################################################### + + draw_one_set_of_ancestries <- function() + { + res <- vector() + for(t in config$t_start[1]:config$t_end[length(config$t_end)]) + { + if(length(which(Onset==t))>0) + { + if(length(possible_ances_time[[t]])>0) + { + prob <- config$si_distr[t-possible_ances_time[[t]]+1]*incid[possible_ances_time[[t]]] + if(any(prob>0)) + { + res[which(Onset==t)] <- possible_ances_time[[t]][which(rmultinom(length(which(Onset==t)),size=1,prob=prob)==TRUE,arr.ind=TRUE)[,1]] + }else + { + res[which(Onset==t)] <- NA + } + }else + res[which(Onset==t)] <- NA + } + } + return(res) + } + + ### Error messages ### + + method <- match.arg(method) + + incid <- process_I(incid) + if(!is.null(incid$dates)) + { + dates <- check_dates(incid) + incid <- process_I_vector(rowSums(incid[,c("local","imported")])) + T<-length(incid) + }else + { + incid <- process_I_vector(rowSums(incid[,c("local","imported")])) + T<-length(incid) + dates <- 1:T + } + + + ### Adjusting t_start and t_end so that at least an incident case has been observed before t_start[1] ### + + i <- 1 + while(sum(incid[1:(config$t_start[i]-1)])==0) + { + i <- i+1 + } + temp <- which(config$t_start0)) + { + config$t_start <- config$t_start[-temp] + config$t_end <- config$t_end[-temp] + } + + check_times(config$t_start, config$t_end, T) + nb_time_periods <- length(config$t_start) + + if(is.null(config$n_sim)) + { + config$n_sim <- 10 + warning("setting config$n_sim to 10 as config$n_sim was not specified. ") + } + + if(method=="non_parametric_si") + { + check_si_distr(config$si_distr) + config$si_distr <- c(config$si_distr,0) + } + + if(method=="parametric_si") + { + if(is.null(config$mean_si)) + { + stop("method non_parametric_si requires to specify the config$mean_si argument.") + } + if(is.null(config$std_si)) + { + stop("method non_parametric_si requires to specify the config$std_si argument.") + } + if(config$mean_si<1) + { + stop("method parametric_si requires a value >1 for config$mean_si.") + } + if(config$std_si<0) + { + stop("method parametric_si requires a >0 value for config$std_si.") + } + } + + if(config$plot!=TRUE && config$plot!=FALSE) + { + stop("config$plot must be TRUE or FALSE.") + } + + if(!is.numeric(config$n_sim)) + { + stop("config$n_sim must be a positive integer.") + } + if(config$n_sim<=0) + { + stop("config$n_sim must be a positive integer.") + } + + ### What does each method do ### + + if(method=="non_parametric_si") + { + parametric_si<-"N" + } + if(method=="parametric_si") + { + parametric_si<-"Y" + } + + if(parametric_si=="Y") + { + config$si_distr <- discr_si(0:(T-1),config$mean_si,config$std_si) + } + if(length(config$si_distr)0) + { + config$t_start <- config$t_start[-time_periods_with_no_incidence] + config$t_end <- config$t_end[-time_periods_with_no_incidence] + nb_time_periods <- length(config$t_start) + } + + Onset <- vector() + for (t in 1:T) { + Onset <- c(Onset, rep(t, incid[t])) + } + NbCases <- length(Onset) + + delay <- outer (1:T, 1:T, "-") + si_delay <- apply(delay, 2, function(x) config$si_distr[pmin(pmax(x + 1, 1), length(config$si_distr))]) + sum_on_col_si_delay_tmp <- sapply(1:nrow(si_delay), function(i) sum(si_delay [i,]* incid, na.rm=TRUE) ) + sum_on_col_si_delay <- vector() + for (t in 1:T) { + sum_on_col_si_delay <- c(sum_on_col_si_delay, rep(sum_on_col_si_delay_tmp[t], incid[t])) + } + mat_sum_on_col_si_delay <- matrix(rep(sum_on_col_si_delay_tmp, T), nrow = T, ncol = T) + p <- si_delay/(mat_sum_on_col_si_delay) + p[which(is.na(p))] <- 0 + p[which(is.infinite(p))] <- 0 + mean_r_per_index_case_date <- sapply(1:ncol(p), function(j) sum(p[,j]*incid, na.rm=TRUE)) + mean_r_per_date_wt <- sapply(1:nb_time_periods, function(i) mean(rep(mean_r_per_index_case_date[which((1:T >= config$t_start[i]) * (1:T <= config$t_end[i]) == 1)], incid[which((1:T >= config$t_start[i]) * (1:T <= config$t_end[i]) == 1)]) ) ) + + possible_ances_time <- sapply(1:T,function(t) (t-(which(config$si_distr!=0))+1)[which(t-(which(config$si_distr!=0))+1>0)]) + ancestries_time <- t(sapply(1:config$n_sim , function(i) draw_one_set_of_ancestries())) + + r_sim <- sapply(1:nb_time_periods,function(i) rowSums((ancestries_time[,]>=config$t_start[i]) * (ancestries_time[,]<=config$t_end[i]),na.rm=TRUE)/sum(incid[config$t_start[i]:config$t_end[i]])) + + r025_wt <- apply(r_sim, 2, quantile,0.025,na.rm=TRUE) + r025_wt <- r025_wt[which(!is.na(r025_wt))] + r975_wt <- apply(r_sim, 2, quantile,0.975,na.rm=TRUE) + r975_wt <- r975_wt[which(!is.na(r975_wt))] + std_wt <- apply(r_sim, 2, sd,na.rm=TRUE) + std_wt <- std_wt[which(!is.na(std_wt))] + + results <- list(R = as.data.frame(cbind(config$t_start,config$t_end,mean_r_per_date_wt,std_wt,r025_wt,r975_wt)) ) + + names(results$R) <- c("t_start","t_end","Mean(R)","Std(R)","Quantile.0.025(R)","Quantile.0.975(R)") + + results$method <- method + results$si_distr <- config$si_distr + + results$SI.Moments<-as.data.frame(cbind(final_mean_si,final_std_si)) + names(results$SI.Moments)<-c("Mean","Std") + + if(!is.null(dates)) + results$dates <- dates + results$I <- incid + results$I_local <- incid + results$I_local[1] <- 0 + results$I_imported <- c(incid[1], rep(0, length(incid)-1)) + + if(!is.null(config$plot)) + { + if(config$plot) + { + if(is.null(config$legend)) + config$legend <- FALSE + plots(results, what="all", legend = config$legend) + } + } + + return(results) + +} diff --git a/data/Flu2009.rda b/data/Flu2009.rda index 91e70f8..8173328 100644 Binary files a/data/Flu2009.rda and b/data/Flu2009.rda differ diff --git a/data/flu_2009_NYC_school.rda b/data/flu_2009_NYC_school.rda new file mode 100644 index 0000000..44fec0b Binary files /dev/null and b/data/flu_2009_NYC_school.rda differ diff --git a/man/DiscrSI.Rd b/man/DiscrSI.Rd index fb8df7e..0876243 100755 --- a/man/DiscrSI.Rd +++ b/man/DiscrSI.Rd @@ -1,52 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DiscrSI.R +% Please edit documentation in R/compatibility.R \name{DiscrSI} \alias{DiscrSI} -\title{Discretized Generation Time Distribution Assuming A Shifted Gamma Distribution} +\title{Function to ensure compatibility with EpiEstim versions <2.0} \usage{ DiscrSI(k, mu, sigma) } \arguments{ -\item{k}{Positive integer for which the discrete distribution is desired.} +\item{k}{see \code{k} in \code{discr_si}} -\item{mu}{A positive real giving the mean of the Gamma distribution.} +\item{mu}{see \code{mu} in \code{discr_si}} -\item{sigma}{A non-negative real giving the standard deviation of the Gamma distribution.} -} -\value{ -Gives the discrete probability \eqn{w_k} that the serial interval is equal to \eqn{k}. +\item{sigma}{see \code{sigma} in \code{discr_si}} } \description{ -\code{DiscrSI} computes the discrete distribution of the serial interval, assuming that the serial interval is shifted Gamma distributed, with shift 1. -} -\details{ -{ -Assuming that the serial interval is shifted Gamma distributed with mean \eqn{\mu}, standard deviation \eqn{\sigma} and shift \eqn{1}, -the discrete probability \eqn{w_k} that the serial interval is equal to \eqn{k} is: - -\deqn{w_k = kF_{\{\mu-1,\sigma\}}(k)+(k-2)F_{\{\mu-1,\sigma\}}(k-2)-2(k-1)F_{\{\mu-1,\sigma\}}(k-1)\\+(\mu-1)(2F_{\{\mu-1+\frac{\sigma^2}{\mu-1},\sigma\sqrt{1+\frac{\sigma^2}{\mu-1}}\}}(k-1)-F_{\{\mu-1+\frac{\sigma^2}{\mu-1},\sigma\sqrt{1+\frac{\sigma^2}{\mu-1}}\}}(k-2)-F_{\{\mu-1+\frac{\sigma^2}{\mu-1},\sigma\sqrt{1+\frac{\sigma^2}{\mu-1}}\}}(k))} - -where \eqn{F_{\{\mu,\sigma\}}} is the cumulative density function of a Gamma distribution with mean \eqn{\mu} and standard deviation \eqn{\sigma}. -} -} -\examples{ -## Computing the discrete serial interval of influenza -MeanFluSI <- 2.6 -SdFluSI <- 1.5 -DicreteSIDistr <- vector() -for(i in 0:20) -{ -DicreteSIDistr[i+1] <- DiscrSI(i, MeanFluSI, SdFluSI) -} -plot(0:20, DicreteSIDistr, type="h", lwd=10, lend=1, xlab="time (days)", ylab="frequency") -title(main="Discrete distribution of the serial interval of influenza") -} -\references{ -Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). -} -\seealso{ -\code{\link{overall_infectivity}}, \code{\link{EstimateR}} -} -\author{ -Anne Cori \email{a.cori@imperial.ac.uk} +Please only use for compatibility; +Prefer the new discr_si function instead } diff --git a/man/EstimateR.Rd b/man/EstimateR.Rd index c31f7d1..553f177 100755 --- a/man/EstimateR.Rd +++ b/man/EstimateR.Rd @@ -1,277 +1,58 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/EstimationR.R +% Please edit documentation in R/compatibility.R \name{EstimateR} \alias{EstimateR} -\title{Estimated Instantaneous Reproduction Number} +\title{Function to ensure compatibility with EpiEstim versions <2.0} \usage{ -EstimateR(I, method = c("non_parametric_si", "parametric_si", "uncertain_si", - "si_from_data", "si_from_sample"), si_data = NULL, si_sample = NULL, - config) +EstimateR(I, T.Start, T.End, method = c("NonParametricSI", "ParametricSI", + "UncertainSI"), n1 = NULL, n2 = NULL, Mean.SI = NULL, Std.SI = NULL, + Std.Mean.SI = NULL, Min.Mean.SI = NULL, Max.Mean.SI = NULL, + Std.Std.SI = NULL, Min.Std.SI = NULL, Max.Std.SI = NULL, + SI.Distr = NULL, Mean.Prior = 5, Std.Prior = 5, CV.Posterior = 0.3, + plot = FALSE, leg.pos = "topright") } \arguments{ -\item{I}{One of the following -\itemize{ -\item{A vector (or a dataframe with a single column) of non-negative integers containing the incidence time series} -\item{A dataframe of non-negative integers with either i) \code{I$I} containing the total incidence, or ii) two columns, -so that \code{I$local} contains the incidence of cases due to local transmission and -\code{I$imported} contains the incidence of imported cases (with \code{I$local + I$imported} -the total incidence). If the dataframe contains a column \code{I$dates}, this is used for plotting. -\code{I$dates} must contains only dates in a row.} -\item{An object of class \code{\link[incidence]{incidence}}} -} -Note that the cases from the first time step are always all assumed to be imported cases.} +\item{I}{see \code{incid} in \code{estimate_r}} -\item{method}{One of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample" (see details).} +\item{T.Start}{see \code{config$t_start} in \code{estimate_r}} -\item{si_data}{For method "si_from_data" ; the data on dates of symptoms of pairs of infector/infected individuals to be used to estimate the serial interval distribution (see details).} +\item{T.End}{see \code{config$t_end} in \code{estimate_r}} -\item{si_sample}{For method "si_from_sample" ; a matrix where each column gives one distribution of the serial interval to be explored (see details).} +\item{method}{see method in \code{estimate_r} (but EstimateR uses CamelCase where estimate_r uses snake_case for the method names)} -\item{config}{A list containing the following: -\describe{ -\item{t_start}{Vector of positive integers giving the starting times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. t_start[1] should be strictly after the first day with non null incidence.} -\item{t_end}{Vector of positive integers giving the ending times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}.} -\item{n1}{For method "uncertain_si" and "si_from_data"; positive integer giving the size of the sample of SI distributions to be drawn (see details).} -\item{n2}{For methods "uncertain_si", "si_from_data" and "si_from_sample"; positive integer giving the size of the sample drawn from the posterior distribution of R for each serial interval distribution considered (see details).} -\item{mean_si}{For method "parametric_si" and "uncertain_si" ; positive real giving the mean serial interval (method "parametric_si") or the average mean serial interval (method "uncertain_si", see details).} -\item{std_si}{For method "parametric_si" and "uncertain_si" ; non negative real giving the stadard deviation of the serial interval (method "parametric_si") or the average standard deviation of the serial interval (method "uncertain_si", see details).} -\item{std_mean_si}{For method "uncertain_si" ; standard deviation of the distribution from which mean serial intervals are drawn (see details).} -\item{min_mean_si}{For method "uncertain_si" ; lower bound of the distribution from which mean serial intervals are drawn (see details).} -\item{max_mean_si}{For method "uncertain_si" ; upper bound of the distribution from which mean serial intervals are drawn (see details).} -\item{std_std_si}{For method "uncertain_si" ; standard deviation of the distribution from which standard deviations of the serial interval are drawn (see details).} -\item{min_std_si}{For method "uncertain_si" ; lower bound of the distribution from which standard deviations of the serial interval are drawn (see details).} -\item{max_std_si}{For method "uncertain_si" ; upper bound of the distribution from which standard deviations of the serial interval are drawn (see details).} -\item{si_distr}{For method "non_parametric_si" ; vector of probabilities giving the discrete distribution of the serial interval, starting with \code{si_distr[1]} (probability that the serial interval is zero), which should be zero.} -\item{si_parametric_distr}{For method "si_from_data" ; the parametric distribution to use when estimating the serial interval from data on dates of symptoms of pairs of infector/infected individuals (see details). -Should be one of "G" (Gamma), "W" (Weibull), "L" (Lognormal), "off1G" (Gamma shifted by 1), "off1W" (Weibull shifted by 1), or "off1L" (Lognormal shifted by 1).} -\item{mcmc_control}{For method "si_from_data" ; a list containing the following (see details): -\describe{ - \item{init.pars}{vector of size 2 corresponding to the initial values of parameters to use for the SI distribution. This is the shape and scale for all but the lognormal distribution, for which it is the meanlog and sdlog. If not specified these are chosen automatically using function \code{\link{init_MCMC_params}}.} - \item{burnin}{a positive integer giving the burnin used in the MCMC when estimating the serial interval distribution.} - \item{thin}{a positive integer corresponding to thinning parameter; the MCMC will be run for \code{burnin+n1*thin iterations}; 1 in \code{thin} iterations will be recorded, after the burnin phase, so the posterior sample size is n1.} - \item{seed}{An integer used as the seed for the random number generator at the start of the MCMC estimation; useful to get reproducible results.} -}} -\item{seed}{An optional integer used as the seed for the random number generator at the start of the function (then potentially reset within the MCMC for method \code{si_from_data}); useful to get reproducible results.} -\item{mean_prior}{A positive number giving the mean of the common prior distribution for all reproduction numbers (see details).} -\item{std_prior}{A positive number giving the standard deviation of the common prior distribution for all reproduction numbers (see details).} -\item{cv_posterior}{A positive number giving the aimed posterior coefficient of variation (see details).} -\item{plot}{Logical. If \code{TRUE} (default is \code{FALSE}), output is plotted (see value).} -\item{legend}{A boolean (TRUE by default) governing the presence / absence of legends on the plots} -}} -} -\value{ -{ -a list with components: -\itemize{ -\item{R}{: a dataframe containing: -the times of start and end of each time window considered ; -the posterior mean, std, and 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975 quantiles of the reproduction number for each time window.} - \item{method}{: the method used to estimate R, one of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample"} - \item{si_distr}{: a vector or dataframe (depending on the method) containing the discrete serial interval distribution(s) used for estimation} - \item{SI.Moments}{: a vector or dataframe (depending on the method) containing the mean and std of the discrete serial interval distribution(s) used for estimation} - \item{I}{: the time series of total incidence} - \item{I_local}{: the time series of incidence of local cases (so that \code{I_local + I_imported = I})} - \item{I_imported}{: the time series of incidence of imported cases (so that \code{I_local + I_imported = I})} - \item{dates}{: a vector of dates corresponding to the incidence time series} - \item{MCMC_converged}{ (only for method \code{si_from_data}): a boolean showing whether the Gelman-Rubin MCMC convergence diagnostic was successful (\code{TRUE}) or not (\code{FALSE})} -} -} -} -\description{ -\code{EstimateR} estimates the reproduction number of an epidemic, given the incidence time series and the serial interval distribution. -} -\details{ -{ -Analytical estimates of the reproduction number for an epidemic over predefined time windows can be obtained within a Bayesian framework, -for a given discrete distribution of the serial interval (see references). - -The more incident cases are observed over a time window, the smallest the posterior coefficient of variation (CV, ratio of standard deviation over mean) of the reproduction number. -An aimed CV can be specified in the argument \code{cv_posterior} (default is \code{0.3}), and a warning will be produced if the incidence within one of the time windows considered is too low to get this CV. - -The methods vary in the way the serial interval distribution is specified. - -In short there are five methods to specify the serial interval distribution (see below for more detail on each method). -In the first two methods, a unique serial interval distribution is considered, whereas in the last three, a range of serial interval distributions are integrated over: -\itemize{ -\item{In method "non_parametric_si" the user specifies the discrete distribution of the serial interval} -\item{In method "parametric_si" the user specifies the mean and sd of the serial interval} -\item{In method "uncertain_si" the mean and sd of the serial interval are each drawn from truncated normal distributions, with parameters specified by the user} -\item{In method "si_from_data", the serial interval distribution is directly estimated, using MCMC, from interval censored exposure data, with data provided by the user together with a choice of parametric distribution for the serial interval} -\item{In method "si_from_sample", the user directly provides the sample of serial interval distribution to use for estimation of R. This can be a useful alternative to the previous method, where the MCMC estimation of the serial interval distribution could be run once, and the same estimated SI distribution then used in EstimateR in different contexts, e.g. with different time windows, hence avoiding to rerun the MCMC everytime EstimateR is called.} -} - -If \code{plot} is \code{TRUE}, 3 plots are produced. -The first one shows the epidemic curve. -The second one shows the posterior mean and 95\% credible interval of the reproduction number. The estimate for a time window is plotted at the end of the time window. -The third plot shows the discrete distribution(s) of the serial interval. +\item{n1}{see \code{n1} in \code{estimate_r}} ------------------------ \code{method "non_parametric_si"} ----------------------- - -The discrete distribution of the serial interval is directly specified in the argument \code{si_distr}. +\item{n2}{see \code{n2} in \code{estimate_r}} ------------------------ \code{method "parametric_si"} ----------------------- - -The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{mean_si} and \code{std_si}. -The discrete distribution of the serial interval is derived automatically using \code{\link{DiscrSI}}. +\item{Mean.SI}{see \code{config$mean_si} in \code{estimate_r}} ------------------------ \code{method "uncertain_si"} ----------------------- - -\code{Method "uncertain_si"} allows accounting for uncertainty on the serial interval distribution as described in Cori et al. AJE 2013. -We allow the mean \eqn{\mu} and standard deviation \eqn{\sigma} of the serial interval to vary according to truncated normal distributions. -We sample \code{n1} pairs of mean and standard deviations, \eqn{(\mu^{(1)},\sigma^{(1)}),...,(\mu^{(n_2)},\sigma^{(n_2)})}, by first sampling the mean \eqn{\mu^{(k)}} -from its truncated normal distribution (with mean \code{mean_si}, standard deviation \code{std_mean_si}, minimum \code{min_mean_si} and maximum \code{max_mean_si}), -and then sampling the standard deviation \eqn{\sigma^{(k)}} from its truncated normal distribution -(with mean \code{std_si}, standard deviation \code{std_std_si}, minimum \code{min_std_si} and maximum \code{max_std_si}), but imposing that \eqn{\sigma^{(k)}<\mu^{(k)}}. -This constraint ensures that the Gamma probability density function of the serial interval is null at \eqn{t=0}. -Warnings are produced when the truncated normal distributions are not symmetric around the mean. -For each pair \eqn{(\mu^{(k)},\sigma^{(k)})}, we then draw a sample of size \code{n2} in the posterior distribution of the reproduction number over each time window, conditionnally on this serial interval distribution. -After pooling, a sample of size \eqn{\code{n1}\times\code{n2}} of the joint posterior distribution of the reproduction number over each time window is obtained. -The posterior mean, standard deviation, and 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975 quantiles of the reproduction number for each time window are obtained from this sample. +\item{Std.SI}{see \code{config$std_si} in \code{estimate_r}} ------------------------ \code{method "si_from_data"} ----------------------- - -\code{Method "si_from_data"} allows accounting for uncertainty on the serial interval distribution. -Unlike method "uncertain_si", where we arbitrarily vary the mean and std of the SI in truncated normal distributions, -here, the scope of serial interval distributions considered is directly informed by data -on the (potentially censored) dates of symptoms of pairs of infector/infected individuals. -This data, specified in argument \code{si_data}, should be a dataframe with 5 columns: -\itemize{ -\item{EL: the lower bound of the symptom onset date of the infector (given as an integer)} -\item{ER: the upper bound of the symptom onset date of the infector (given as an integer). Should be such that ER>=EL} -\item{SL: the lower bound of the symptom onset date of the infected indivdiual (given as an integer)} -\item{SR: the upper bound of the symptom onset date of the infected indivdiual (given as an integer). Should be such that SR>=SL} -\item{type (optional): can have entries 0, 1, or 2, corresponding to doubly interval-censored, single interval-censored or exact observations, respsectively, see Reich et al. Statist. Med. 2009. If not specified, this will be automatically computed from the dates} -} -Assuming a given parametric distribution for the serial interval distribution (specified in si_parametric_distr), -the posterior distribution of the serial interval is estimated directly fom these data using MCMC methods implemented in the package \code{coarsedatatools}. -The argument \code{mcmc_control} is a list of characteristics which control the MCMC. -The MCMC is run for a total number of iterations of \code{mcmc_control$burnin + n1*mcmc_control$thin}; -but the output is only recorded after the burnin, and only 1 in every \code{mcmc_control$thin} iterations, -so that the posterior sample size is \code{n1}. -For each element in the posterior sample of serial interval distribution, -we then draw a sample of size \code{n2} in the posterior distribution of the reproduction number over each time window, -conditionnally on this serial interval distribution. -After pooling, a sample of size \eqn{\code{n1}\times\code{n2}} of the joint posterior distribution of -the reproduction number over each time window is obtained. -The posterior mean, standard deviation, and 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975 quantiles of the reproduction number for each time window are obtained from this sample. - ------------------------ \code{method "si_from_sample"} ----------------------- - -\code{Method "si_from_sample"} also allows accounting for uncertainty on the serial interval distribution. -Unlike methods "uncertain_si" and "si_from_data", the user directly provides (in argument \code{si_sample}) a sample of serial interval distribution to be explored. +\item{Std.Mean.SI}{see \code{config$std_mean_si} in \code{estimate_r}} -} -} -\examples{ -## load data on pandemic flu in a school in 2009 -data("Flu2009") +\item{Min.Mean.SI}{see \code{config$min_mean_si} in \code{estimate_r}} -## estimate the reproduction number (method "non_parametric_si") -EstimateR(Flu2009$incidence, method="non_parametric_si", - config=list(t_start=2:26, t_end=8:32, - si_distr=Flu2009$si_distr, plot=TRUE)) -# the second plot produced shows, at each each day, -# the estimate of the reproduction number over the 7-day window finishing on that day. +\item{Max.Mean.SI}{see \code{config$max_mean_si} in \code{estimate_r}} -## example with an incidence object +\item{Std.Std.SI}{see \code{config$std_std_si} in \code{estimate_r}} -# create fake data -library(incidence) -data <- c(0,1,1,2,1,3,4,5,5,5,5,4,4,26,6,7,9) -location <- sample(c("local","imported"), length(data), replace=TRUE) -location[1] <- "imported" # forcing the first case to be imported -# get incidence per group (location) -I <- incidence(data, groups = location) -# Estimate R with assumptions on serial interval -EstimateR(I, method = "parametric_si", - config=list(t_start = 2:21, t_end = 8:27, - mean_si = 2.6, std_si = 1.5, plot = TRUE)) +\item{Min.Std.SI}{see \code{config$min_std_si} in \code{estimate_r}} -## estimate the reproduction number (method "parametric_si") -EstimateR(Flu2009$incidence, method="parametric_si", - config=list(t_start=2:26, t_end=8:32, - mean_si=2.6, std_si=1.5, plot=TRUE)) -# the second plot produced shows, at each each day, -# the estimate of the reproduction number over the 7-day window finishing on that day. +\item{Max.Std.SI}{see \code{config$max_std_si} in \code{estimate_r}} -## estimate the reproduction number (method "uncertain_si") -EstimateR(Flu2009$incidence, method="uncertain_si", - config=list(t_start=2:26, t_end=8:32, - mean_si=2.6, std_mean_si=1, min_mean_si=1, max_mean_si=4.2, - std_si=1.5, std_std_si=0.5, min_std_si=0.5, max_std_si=2.5, - n1=100, n2=100, plot=TRUE)) -# the bottom left plot produced shows, at each each day, -# the estimate of the reproduction number over the 7-day window finishing on that day. +\item{SI.Distr}{see \code{config$si_distr} in \code{estimate_r}} -\dontrun{ -## Note the following examples use an MCMC routine -## to estimate the serial interval distribution from data, -## so they may take a few minutes to run +\item{Mean.Prior}{see \code{config$mean_prior} in \code{estimate_r}} -## load data on rotavirus -data("MockRotavirus") +\item{Std.Prior}{see \code{config$std_prior} in \code{estimate_r}} -## estimate the reproduction number (method "si_from_data") -MCMC_seed <- 1 -overall_seed <- 2 -R_si_from_data <- EstimateR(MockRotavirus$incidence, - method="si_from_data", - si_data=MockRotavirus$si_data, - config=list(t_start=2:47, t_end=8:53, - si_parametric_distr = "G", - mcmc_control = list(burnin = 1000, - thin=10, seed = MCMC_seed), - n1 = 500, n2 = 50, - seed = overall_seed, - plot=TRUE)) -## compare with version with no uncertainty -R_Parametric <- EstimateR(MockRotavirus$incidence, - method="parametric_si", - config=list(t_start=2:47, t_end=8:53, - mean_si = mean(R_si_from_data$SI.Moments$Mean), - std_si = mean(R_si_from_data$SI.Moments$Std), - plot=TRUE)) -## generate plots -p_uncertainty <- plots(R_si_from_data, "R", options_R=list(ylim=c(0, 1.5))) -p_no_uncertainty <- plots(R_Parametric, "R", options_R=list(ylim=c(0, 1.5))) -gridExtra::grid.arrange(p_uncertainty, p_no_uncertainty,ncol=2) -# the left hand side graph is with uncertainty in the SI distribution, the right hand side without. -# The credible intervals are wider when accounting for uncertainty in the SI distribution. +\item{CV.Posterior}{see \code{config$cv_posterior} in \code{estimate_r}} -#' ## estimate the reproduction number (method "si_from_sample") -MCMC_seed <- 1 -overall_seed <- 2 -SI.fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, - dist="G", - init.pars=init_MCMC_params(MockRotavirus$si_data, "G"), - burnin = 1000, - n.samples = 5000, - seed = MCMC_seed) -si_sample <- coarse2estim(SI.fit, thin=10)$si_sample -R_si_from_sample <- EstimateR(MockRotavirus$incidence, - method="si_from_sample", si_sample=si_sample, - config=list(t_start=2:47, t_end=8:53, - n2 = 50, - seed = overall_seed, - plot=TRUE)) +\item{plot}{see \code{config$plot} in \code{estimate_r}} -# check that R_si_from_sample is the same as R_si_from_data -# since they were generated using the same MCMC algorithm to generate the SI sample -# (either internally to EpiEstim or externally) -all(R_si_from_sample$R$`Mean(R)` == R_si_from_data$R$`Mean(R)`) +\item{leg.pos}{Not used anymore, only there for compatibility} } - -} -\references{ -{ -Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). -Wallinga, J. and P. Teunis. Different epidemic curves for severe acute respiratory syndrome reveal similar impacts of control measures (AJE 2004). -Reich, N.G. et al. Estimating incubation period distributions with coarse data (Statis. Med. 2009) -} -} -\seealso{ -\code{\link{DiscrSI}} -} -\author{ -Anne Cori \email{a.cori@imperial.ac.uk} +\description{ +Please only use for compatibility; +Prefer the new estimate_r function instead } diff --git a/man/Flu1918.Rd b/man/Flu1918.Rd index b934fb6..850fca8 100644 --- a/man/Flu1918.Rd +++ b/man/Flu1918.Rd @@ -22,7 +22,8 @@ This data set gives: data("Flu1918") ## estimate the reproduction number (method "non_parametric_si") -EstimateR(Flu1918$incidence, method="non_parametric_si", +estimate_r(Flu1918$incidence, + method="non_parametric_si", config=list(t_start=2:86, t_end=8:92, si_distr=Flu1918$si_distr, plot=TRUE)) diff --git a/man/Flu2009.Rd b/man/Flu2009.Rd index 5caaa89..94b43f2 100644 --- a/man/Flu2009.Rd +++ b/man/Flu2009.Rd @@ -4,13 +4,19 @@ \name{Flu2009} \alias{Flu2009} \title{Data on the 2009 H1N1 influenza pandemic in a school in Pennsylvania.} -\format{A list of two elements: +\format{A list of three elements: \describe{ - \item{incidence}{a dataframe with 32 lines containing dates in first column, and daily incidence in second column,} - \item{si_distr}{a vector containing a set of 12 probabilities.} + \item{incidence}{a dataframe with 32 lines containing dates in first column, and daily incidence in second column (Cauchemez et al., 2011),} + \item{si_distr}{a vector containing a set of 12 probabilities (Ferguson et al, 2005),} + \item{si_data}{a dataframe with 16 lines giving serial interval patient data collected in a household study in San Antonio, + Texas throughout the 2009 H1N1 outbreak (Morgan et al., 2010). } }} \source{ +{ Cauchemez S. et al. (2011) Role of social networks in shaping disease transmission during a community outbreak of 2009 H1N1 pandemic influenza. Proc Natl Acad Sci U S A 108(7), 2825-2830. + +Morgan O.W. et al. (2010) Household transmission of pandemic (H1N1) 2009, San Antonio, Texas, USA, April-May 2009. Emerg Infect Dis 16: 631-637. +} } \description{ This data set gives: @@ -18,13 +24,15 @@ This data set gives: (ARI, defined as at least two symptoms among fever, cough, sore throat, and runny nose) amongst children in a school in Pennsylvania during the 2009 H1N1 influenza pandemic (see source and references), 2/ the discrete daily distribution of the serial interval for influenza, assuming a shifted Gamma distribution with mean 2.6 days, standard deviation 1.5 days and shift 1 day (see references). +3/ interval-censored serial interval data from the 2009 outbreak of H1N1 influenza +in San Antonio, Texas, USA (see references). } \examples{ ## load data on pandemic flu in a school in 2009 data("Flu2009") ## estimate the reproduction number (method "non_parametric_si") -EstimateR(Flu2009$incidence, method="non_parametric_si", +estimate_r(Flu2009$incidence, method="non_parametric_si", config=list(t_start=2:26, t_end=8:32, si_distr=Flu2009$si_distr, plot=TRUE) @@ -32,6 +40,28 @@ EstimateR(Flu2009$incidence, method="non_parametric_si", # the second plot produced shows, at each each day, # the estimate of the reproduction number # over the 7-day window finishing on that day. + +\dontrun{ +## Note the following examples use an MCMC routine +## to estimate the serial interval distribution from data, +## so they may take a few minutes to run + +## estimate the reproduction number (method "si_from_data") +estimate_r(Flu2009$incidence, method="si_from_data", + si_data = Flu2009$si_data, + config=list(t_start=2:26, t_end=8:32, + mcmc_control = list(burnin = 1000, + thin=10, seed = 1), + n1 = 1000, n2 = 50, + si_parametric_distr = "G", + plot=TRUE) + ) +# the second plot produced shows, at each each day, +# the estimate of the reproduction number +# over the 7-day window finishing on that day. +} + + } \references{ { diff --git a/man/Measles1861.Rd b/man/Measles1861.Rd index 31fa75a..4292556 100644 --- a/man/Measles1861.Rd +++ b/man/Measles1861.Rd @@ -22,7 +22,7 @@ This data set gives: data("Measles1861") ## estimate the reproduction number (method "non_parametric_si") -EstimateR(Measles1861$incidence, method="non_parametric_si", +estimate_r(Measles1861$incidence, method="non_parametric_si", config=list(t_start=17:42, t_end=23:48, si_distr=Measles1861$si_distr, plot=TRUE) diff --git a/man/MockRotavirus.Rd b/man/MockRotavirus.Rd index 4cab6b4..f874ae1 100644 --- a/man/MockRotavirus.Rd +++ b/man/MockRotavirus.Rd @@ -27,7 +27,7 @@ This data set gives: data("MockRotavirus") ## estimate the reproduction number (method "si_from_data") -EstimateR(MockRotavirus$incidence, +estimate_r(MockRotavirus$incidence, method="si_from_data", si_data=MockRotavirus$si_data, config=list( diff --git a/man/OverallInfectivity.Rd b/man/OverallInfectivity.Rd new file mode 100644 index 0000000..09a67b6 --- /dev/null +++ b/man/OverallInfectivity.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compatibility.R +\name{OverallInfectivity} +\alias{OverallInfectivity} +\title{Function to ensure compatibility with EpiEstim versions <2.0} +\usage{ +OverallInfectivity(I, SI.Distr) +} +\arguments{ +\item{I}{see \code{incid} in \code{overall_infectivity}} + +\item{SI.Distr}{see \code{si_distr} in \code{overall_infectivity}} +} +\description{ +Please only use for compatibility; +Prefer the new overall_infectivity function instead +} diff --git a/man/SARS2003.Rd b/man/SARS2003.Rd index 90e0da7..6d66d35 100644 --- a/man/SARS2003.Rd +++ b/man/SARS2003.Rd @@ -22,7 +22,7 @@ This data set gives: data("SARS2003") ## estimate the reproduction number (method "non_parametric_si") -EstimateR(SARS2003$incidence, method="non_parametric_si", +estimate_r(SARS2003$incidence, method="non_parametric_si", config=list(t_start=14:101, t_end=20:107, si_distr=SARS2003$si_distr, plot=TRUE) diff --git a/man/Smallpox1972.Rd b/man/Smallpox1972.Rd index 70c5ea1..a54afd2 100644 --- a/man/Smallpox1972.Rd +++ b/man/Smallpox1972.Rd @@ -22,7 +22,7 @@ This data set gives: data("Smallpox1972") ## estimate the reproduction number (method "non_parametric_si") -EstimateR(Smallpox1972$incidence, method="non_parametric_si", +estimate_r(Smallpox1972$incidence, method="non_parametric_si", config=list(t_start=27:51, t_end=33:57, si_distr=Smallpox1972$si_distr, plot=TRUE) diff --git a/man/WT.Rd b/man/WT.Rd old mode 100755 new mode 100644 index aa0fe15..6c207d3 --- a/man/WT.Rd +++ b/man/WT.Rd @@ -1,111 +1,35 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/WT.R +% Please edit documentation in R/compatibility.R \name{WT} \alias{WT} -\title{Estimation of the case reproduction number using the Wallinga and Teunis method} +\title{Function to ensure compatibility with EpiEstim versions <2.0} \usage{ -WT(I, t_start, t_end, method = c("non_parametric_si", "parametric_si"), - mean_si = NULL, std_si = NULL, si_distr = NULL, nSim = 10, - plot = FALSE, legend = FALSE) +WT(I, T.Start, T.End, method = c("NonParametricSI", "ParametricSI"), + Mean.SI = NULL, Std.SI = NULL, SI.Distr = NULL, nSim = 10, + plot = FALSE, leg.pos = "topright") } \arguments{ -\item{I}{One of the following -\itemize{ -\item{Vector (or a dataframe with a column named 'I') of non-negative integers containing an incidence time series. - If the dataframe contains a column \code{I$dates}, this is used for plotting. - \code{I$dates} must contains only dates in a row.} -\item{An object of class \code{\link[incidence]{incidence}}} - }} +\item{I}{see \code{incid} in \code{wallinga_teunis}} -\item{t_start}{Vector of positive integers giving the starting times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. t_start[1] should be strictly after the first day with non null incidence.} +\item{T.Start}{see \code{config$t_start} in \code{wallinga_teunis}} -\item{t_end}{Vector of positive integers giving the ending times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}.} +\item{T.End}{see \code{config$t_end} in \code{wallinga_teunis}} -\item{method}{One of "non_parametric_si" or "parametric_si" (see details).} +\item{method}{see method in \code{wallinga_teunis} (but WT uses CamelCase where wallinga_teunis uses snake_case for the method names)} -\item{mean_si}{For method "parametric_si" ; positive real giving the mean serial interval.} +\item{Mean.SI}{see \code{config$mean_si} in \code{wallinga_teunis}} -\item{std_si}{For method "parametric_si" ; non negative real giving the stadard deviation of the serial interval.} +\item{Std.SI}{see \code{config$std_si} in \code{wallinga_teunis}} -\item{si_distr}{For method "non_parametric_si" ; vector of probabilities giving the discrete distribution of the serial interval, starting with \code{si_distr[1]} (probability that the serial interval is zero), which should be zero.} +\item{SI.Distr}{see \code{config$si_distr} in \code{wallinga_teunis}} -\item{nSim}{A positive integer giving the number of simulated epidemic trees used for computation of the confidence intervals of the case reproduction number (see details).} +\item{nSim}{see \code{config$n_sim} in \code{wallinga_teunis}} -\item{plot}{Logical. If \code{TRUE} (default is \code{FALSE}), output is plotted (see value).} +\item{plot}{see \code{config$plot} in \code{wallinga_teunis}} -\item{legend}{A boolean (TRUE by default) governing the presence / absence of legends on the plots -This specifies the position of the legend in the plot. Alternatively, \code{locator(1)} can be used ; the user will then need to click where the legend needs to be written.} -} -\value{ -{ - a list with components: - \itemize{ - \item{R}{: a dataframe containing: - the times of start and end of each time window considered ; - the estimated mean, std, and 0.025 and 0.975 quantiles of the reproduction number for each time window.} - \item{method}{: the method used to estimate R, one of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample"} - \item{si_distr}{: a vector containing the discrete serial interval distribution used for estimation} - \item{SI.Moments}{: a vector containing the mean and std of the discrete serial interval distribution(s) used for estimation} - \item{I}{: the time series of total incidence} - \item{I_local}{: the time series of incidence of local cases (so that \code{I_local + I_imported = I})} - \item{I_imported}{: the time series of incidence of imported cases (so that \code{I_local + I_imported = I})} - \item{dates}{: a vector of dates corresponding to the incidence time series} - } - } +\item{leg.pos}{Not used anymore, only there for compatibility} } \description{ -\code{WT} estimates the case reproduction number of an epidemic, given the incidence time series and the serial interval distribution. -} -\details{ -{ -Estimates of the case reproduction number for an epidemic over predefined time windows can be obtained, -for a given discrete distribution of the serial interval, as proposed by Wallinga and Teunis (AJE, 2004). -Confidence intervals are obtained by simulating a number (nSim) of possible transmission trees. - -The methods vary in the way the serial interval distribution is specified. - ------------------------ \code{method "non_parametric_si"} ----------------------- - -The discrete distribution of the serial interval is directly specified in the argument \code{si_distr}. - -If \code{plot} is \code{TRUE}, 3 plots are produced. -The first one shows the epidemic curve. -The second one shows the posterior mean and 95\% credible interval of the reproduction number. The estimate for a time window is plotted at the end of the time window. -The third plot shows the discrete distribution of the serial interval. - ------------------------ \code{method "parametric_si"} ----------------------- - -The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{mean_si} and \code{std_si}. -The discrete distribution of the serial interval is derived automatically using \code{\link{DiscrSI}}. - -If \code{plot} is \code{TRUE}, 3 plots are produced, which are identical to the ones for \code{method "non_parametric_si"} . -} -} -\examples{ -## load data on pandemic flu in a school in 2009 -data("Flu2009") - -## estimate the case reproduction number (method "non_parametric_si") -WT(Flu2009$incidence, t_start=2:26, t_end=8:32, method="non_parametric_si", - si_distr=Flu2009$si_distr, plot=TRUE, nSim=100) -# the second plot produced shows, at each each day, -# the estimate of the cqse reproduction number over the 7-day window finishing on that day. - -## estimate the case reproduction number (method "parametric_si") -WT(Flu2009$incidence, t_start=2:26, t_end=8:32, method="parametric_si", - mean_si=2.6, std_si=1.5, plot=TRUE, nSim=100) -# the second plot produced shows, at each each day, -# the estimate of the case reproduction number over the 7-day window finishing on that day. -} -\references{ -{ -Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). -Wallinga, J. and P. Teunis. Different epidemic curves for severe acute respiratory syndrome reveal similar impacts of control measures (AJE 2004). -} -} -\seealso{ -\code{\link{DiscrSI}}, \code{\link{EstimateR}} -} -\author{ -Anne Cori \email{a.cori@imperial.ac.uk} +Please only use for compatibility; +Prefer the new wallinga_teunis function instead } diff --git a/man/check_cdt_samples_convergence.Rd b/man/check_cdt_samples_convergence.Rd index a82f066..ad42104 100644 --- a/man/check_cdt_samples_convergence.Rd +++ b/man/check_cdt_samples_convergence.Rd @@ -1,34 +1,51 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check_cdt_samples_covergence.R +% Please edit documentation in R/check_cdt_samples_convergence.R \name{check_cdt_samples_convergence} \alias{check_cdt_samples_convergence} -\title{check_cdt_samples_convergence TITLE} +\title{Checking convergence of an MCMC chain by using the Gelman-Rubin algorithm} \usage{ -check_cdt_samples_convergence(cdtsamples) +check_cdt_samples_convergence(cdt_samples) } \arguments{ -\item{cdtsamples}{XXXXXXX.} +\item{cdt_samples}{the \code{@sample} slot of a \code{cd.fit.mcmc} S4 object (see package \code{coarseDataTools})} } \value{ TRUE if the Gelman Rubin test for convergence was successful, FALSE otherwise } \description{ -\code{check_cdt_samples_convergence} DESCRIPTION TO COME. +\code{check_cdt_samples_convergence} Checking convergence of an MCMC chain by using the Gelman-Rubin algorithm } \details{ { -XXXXXXX. +This function splits an MCMC chain in two halfs and uses the Gelman-Rubin algorithm to assess convergence of the chain by comparing its two halves. } } \examples{ -## XXXXXXX. -} -\references{ -XXXXXXX. +\dontrun{ +## Note the following examples use an MCMC routine +## to estimate the serial interval distribution from data, +## so they may take a few minutes to run + +## load data on rotavirus +data("MockRotavirus") + +## estimate the serial interval from data +SI_fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, + dist="G", + init_pars=init_mcmc_params(MockRotavirus$si_data, "G"), + burnin = 1000, + n.samples = 5000) + +## use check_cdt_samples_convergence to check convergence +converg_diag <- check_cdt_samples_convergence(SI_fit@samples) +converg_diag + +} + } \seealso{ -XXXXXXX. +\code{\link{estimate_r}} } \author{ -XXXXXXX. +Anne Cori } diff --git a/man/coarse2estim.Rd b/man/coarse2estim.Rd index dc7e898..f42fbd5 100755 --- a/man/coarse2estim.Rd +++ b/man/coarse2estim.Rd @@ -24,7 +24,7 @@ A list with two elements: } } \description{ -\code{coarse2estim} Transforms outputs of \code{coarseDataTools::dic.fit.mcmc} to right format for input into \code{EstimateR} +\code{coarse2estim} Transforms outputs of \code{coarseDataTools::dic.fit.mcmc} to right format for input into \code{estimate_r} } \examples{ \dontrun{ @@ -38,24 +38,26 @@ data("MockRotavirus") ## estimate the serial interval from data SI.fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, dist="G", - init.pars=init_MCMC_params(MockRotavirus$si_data, "G") + init.pars=init_mcmc_params(MockRotavirus$si_data, "G"), burnin = 1000, n.samples = 5000) -## use coarse2estim to turn this in the right format for EstimateR +## use coarse2estim to turn this in the right format for estimate_r si_sample <- coarse2estim(SI.fit, thin=10)$si_sample -## use EstimateR to estimate the reproduction number based on these estimates of the serial interval -R_si_from_sample <- EstimateR(MockRotavirus$incidence, - t_start=2:47, t_end=8:53, - method="si_from_sample", si_sample=si_sample, +## use estimate_r to estimate the reproduction number +## based on these estimates of the serial interval +R_si_from_sample <- estimate_r(MockRotavirus$incidence, + method="si_from_sample", + si_sample=si_sample, + config = list(t_start=2:47, t_end=8:53, n2 = 50, - plot=TRUE, leg.pos=xy.coords(1,3)) + plot=TRUE)) } } \seealso{ -\code{\link{EstimateR}} +\code{\link{estimate_r}} } \author{ The Hackout3 Parameter Estimation team. diff --git a/man/discr_si.Rd b/man/discr_si.Rd new file mode 100644 index 0000000..e479b08 --- /dev/null +++ b/man/discr_si.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discr_si.R +\name{discr_si} +\alias{discr_si} +\title{Discretized Generation Time Distribution Assuming A Shifted Gamma Distribution} +\usage{ +discr_si(k, mu, sigma) +} +\arguments{ +\item{k}{Positive integer, or vector of positive ingerers for which the discrete distribution is desired.} + +\item{mu}{A positive real giving the mean of the Gamma distribution.} + +\item{sigma}{A non-negative real giving the standard deviation of the Gamma distribution.} +} +\value{ +Gives the discrete probability \eqn{w_k} that the serial interval is equal to \eqn{k}. +} +\description{ +\code{discr_si} computes the discrete distribution of the serial interval, assuming that the serial interval is shifted Gamma distributed, with shift 1. +} +\details{ +{ +Assuming that the serial interval is shifted Gamma distributed with mean \eqn{\mu}, standard deviation \eqn{\sigma} and shift \eqn{1}, +the discrete probability \eqn{w_k} that the serial interval is equal to \eqn{k} is: + +\deqn{w_k = kF_{\{\mu-1,\sigma\}}(k)+(k-2)F_{\{\mu-1,\sigma\}}(k-2)-2(k-1)F_{\{\mu-1,\sigma\}}(k-1)\\+(\mu-1)(2F_{\{\mu-1+\frac{\sigma^2}{\mu-1},\sigma\sqrt{1+\frac{\sigma^2}{\mu-1}}\}}(k-1)-F_{\{\mu-1+\frac{\sigma^2}{\mu-1},\sigma\sqrt{1+\frac{\sigma^2}{\mu-1}}\}}(k-2)-F_{\{\mu-1+\frac{\sigma^2}{\mu-1},\sigma\sqrt{1+\frac{\sigma^2}{\mu-1}}\}}(k))} + +where \eqn{F_{\{\mu,\sigma\}}} is the cumulative density function of a Gamma distribution with mean \eqn{\mu} and standard deviation \eqn{\sigma}. +} +} +\examples{ +## Computing the discrete serial interval of influenza +mean_flu_si <- 2.6 +sd_flu_si <- 1.5 +dicrete_si_distr <- discr_si(0:20, mean_flu_si, sd_flu_si) +plot(0:20, dicrete_si_distr, type = "h", + lwd = 10, lend = 1, xlab = "time (days)", ylab = "frequency") +title(main = "Discrete distribution of the serial interval of influenza") +} +\references{ +Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). +} +\seealso{ +\code{\link{overall_infectivity}}, \code{\link{estimate_r}} +} +\author{ +Anne Cori \email{a.cori@imperial.ac.uk} +} diff --git a/man/estimate_r.Rd b/man/estimate_r.Rd new file mode 100644 index 0000000..c0385cc --- /dev/null +++ b/man/estimate_r.Rd @@ -0,0 +1,277 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimate_r.R +\name{estimate_r} +\alias{estimate_r} +\title{Estimated Instantaneous Reproduction Number} +\usage{ +estimate_r(incid, method = c("non_parametric_si", "parametric_si", + "uncertain_si", "si_from_data", "si_from_sample"), si_data = NULL, + si_sample = NULL, config) +} +\arguments{ +\item{incid}{One of the following +\itemize{ +\item{A vector (or a dataframe with a single column) of non-negative integers containing the incidence time series} +\item{A dataframe of non-negative integers with either i) \code{incid$I} containing the total incidence, or ii) two columns, +so that \code{incid$local} contains the incidence of cases due to local transmission and +\code{incid$imported} contains the incidence of imported cases (with \code{incid$local + incid$imported} +the total incidence). If the dataframe contains a column \code{incid$dates}, this is used for plotting. +\code{incid$dates} must contains only dates in a row.} +\item{An object of class \code{\link[incidence]{incidence}}} +} +Note that the cases from the first time step are always all assumed to be imported cases.} + +\item{method}{One of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample" (see details).} + +\item{si_data}{For method "si_from_data" ; the data on dates of symptoms of pairs of infector/infected individuals to be used to estimate the serial interval distribution (see details).} + +\item{si_sample}{For method "si_from_sample" ; a matrix where each column gives one distribution of the serial interval to be explored (see details).} + +\item{config}{A list containing the following: +\describe{ +\item{t_start}{Vector of positive integers giving the starting times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. t_start[1] should be strictly after the first day with non null incidence.} +\item{t_end}{Vector of positive integers giving the ending times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}.} +\item{n1}{For method "uncertain_si" and "si_from_data"; positive integer giving the size of the sample of SI distributions to be drawn (see details).} +\item{n2}{For methods "uncertain_si", "si_from_data" and "si_from_sample"; positive integer giving the size of the sample drawn from the posterior distribution of R for each serial interval distribution considered (see details).} +\item{mean_si}{For method "parametric_si" and "uncertain_si" ; positive real giving the mean serial interval (method "parametric_si") or the average mean serial interval (method "uncertain_si", see details).} +\item{std_si}{For method "parametric_si" and "uncertain_si" ; non negative real giving the stadard deviation of the serial interval (method "parametric_si") or the average standard deviation of the serial interval (method "uncertain_si", see details).} +\item{std_mean_si}{For method "uncertain_si" ; standard deviation of the distribution from which mean serial intervals are drawn (see details).} +\item{min_mean_si}{For method "uncertain_si" ; lower bound of the distribution from which mean serial intervals are drawn (see details).} +\item{max_mean_si}{For method "uncertain_si" ; upper bound of the distribution from which mean serial intervals are drawn (see details).} +\item{std_std_si}{For method "uncertain_si" ; standard deviation of the distribution from which standard deviations of the serial interval are drawn (see details).} +\item{min_std_si}{For method "uncertain_si" ; lower bound of the distribution from which standard deviations of the serial interval are drawn (see details).} +\item{max_std_si}{For method "uncertain_si" ; upper bound of the distribution from which standard deviations of the serial interval are drawn (see details).} +\item{si_distr}{For method "non_parametric_si" ; vector of probabilities giving the discrete distribution of the serial interval, starting with \code{si_distr[1]} (probability that the serial interval is zero), which should be zero.} +\item{si_parametric_distr}{For method "si_from_data" ; the parametric distribution to use when estimating the serial interval from data on dates of symptoms of pairs of infector/infected individuals (see details). +Should be one of "G" (Gamma), "W" (Weibull), "L" (Lognormal), "off1G" (Gamma shifted by 1), "off1W" (Weibull shifted by 1), or "off1L" (Lognormal shifted by 1).} +\item{mcmc_control}{For method "si_from_data" ; a list containing the following (see details): +\describe{ + \item{init_pars}{vector of size 2 corresponding to the initial values of parameters to use for the SI distribution. This is the shape and scale for all but the lognormal distribution, for which it is the meanlog and sdlog. If not specified these are chosen automatically using function \code{\link{init_mcmc_params}}.} + \item{burnin}{a positive integer giving the burnin used in the MCMC when estimating the serial interval distribution.} + \item{thin}{a positive integer corresponding to thinning parameter; the MCMC will be run for \code{burnin+n1*thin iterations}; 1 in \code{thin} iterations will be recorded, after the burnin phase, so the posterior sample size is n1.} + \item{seed}{An integer used as the seed for the random number generator at the start of the MCMC estimation; useful to get reproducible results.} +}} +\item{seed}{An optional integer used as the seed for the random number generator at the start of the function (then potentially reset within the MCMC for method \code{si_from_data}); useful to get reproducible results.} +\item{mean_prior}{A positive number giving the mean of the common prior distribution for all reproduction numbers (see details).} +\item{std_prior}{A positive number giving the standard deviation of the common prior distribution for all reproduction numbers (see details).} +\item{cv_posterior}{A positive number giving the aimed posterior coefficient of variation (see details).} +\item{plot}{Logical. If \code{TRUE} (default is \code{FALSE}), output is plotted (see value).} +\item{legend}{A boolean (TRUE by default) governing the presence / absence of legends on the plots} +}} +} +\value{ +{ +a list with components: +\itemize{ +\item{R}{: a dataframe containing: +the times of start and end of each time window considered ; +the posterior mean, std, and 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975 quantiles of the reproduction number for each time window.} + \item{method}{: the method used to estimate R, one of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample"} + \item{si_distr}{: a vector or dataframe (depending on the method) containing the discrete serial interval distribution(s) used for estimation} + \item{SI.Moments}{: a vector or dataframe (depending on the method) containing the mean and std of the discrete serial interval distribution(s) used for estimation} + \item{I}{: the time series of total incidence} + \item{I_local}{: the time series of incidence of local cases (so that \code{I_local + I_imported = I})} + \item{I_imported}{: the time series of incidence of imported cases (so that \code{I_local + I_imported = I})} + \item{dates}{: a vector of dates corresponding to the incidence time series} + \item{MCMC_converged}{ (only for method \code{si_from_data}): a boolean showing whether the Gelman-Rubin MCMC convergence diagnostic was successful (\code{TRUE}) or not (\code{FALSE})} +} +} +} +\description{ +\code{estimate_r} estimates the reproduction number of an epidemic, given the incidence time series and the serial interval distribution. +} +\details{ +{ +Analytical estimates of the reproduction number for an epidemic over predefined time windows can be obtained within a Bayesian framework, +for a given discrete distribution of the serial interval (see references). + +The more incident cases are observed over a time window, the smallest the posterior coefficient of variation (CV, ratio of standard deviation over mean) of the reproduction number. +An aimed CV can be specified in the argument \code{cv_posterior} (default is \code{0.3}), and a warning will be produced if the incidence within one of the time windows considered is too low to get this CV. + +The methods vary in the way the serial interval distribution is specified. + +In short there are five methods to specify the serial interval distribution (see below for more detail on each method). +In the first two methods, a unique serial interval distribution is considered, whereas in the last three, a range of serial interval distributions are integrated over: +\itemize{ +\item{In method "non_parametric_si" the user specifies the discrete distribution of the serial interval} +\item{In method "parametric_si" the user specifies the mean and sd of the serial interval} +\item{In method "uncertain_si" the mean and sd of the serial interval are each drawn from truncated normal distributions, with parameters specified by the user} +\item{In method "si_from_data", the serial interval distribution is directly estimated, using MCMC, from interval censored exposure data, with data provided by the user together with a choice of parametric distribution for the serial interval} +\item{In method "si_from_sample", the user directly provides the sample of serial interval distribution to use for estimation of R. This can be a useful alternative to the previous method, where the MCMC estimation of the serial interval distribution could be run once, and the same estimated SI distribution then used in estimate_r in different contexts, e.g. with different time windows, hence avoiding to rerun the MCMC everytime estimate_r is called.} +} + +If \code{plot} is \code{TRUE}, 3 plots are produced. +The first one shows the epidemic curve. +The second one shows the posterior mean and 95\% credible interval of the reproduction number. The estimate for a time window is plotted at the end of the time window. +The third plot shows the discrete distribution(s) of the serial interval. + +----------------------- \code{method "non_parametric_si"} ----------------------- + +The discrete distribution of the serial interval is directly specified in the argument \code{si_distr}. + +----------------------- \code{method "parametric_si"} ----------------------- + +The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{mean_si} and \code{std_si}. +The discrete distribution of the serial interval is derived automatically using \code{\link{discr_si}}. + +----------------------- \code{method "uncertain_si"} ----------------------- + +\code{Method "uncertain_si"} allows accounting for uncertainty on the serial interval distribution as described in Cori et al. AJE 2013. +We allow the mean \eqn{\mu} and standard deviation \eqn{\sigma} of the serial interval to vary according to truncated normal distributions. +We sample \code{n1} pairs of mean and standard deviations, \eqn{(\mu^{(1)},\sigma^{(1)}),...,(\mu^{(n_2)},\sigma^{(n_2)})}, by first sampling the mean \eqn{\mu^{(k)}} +from its truncated normal distribution (with mean \code{mean_si}, standard deviation \code{std_mean_si}, minimum \code{min_mean_si} and maximum \code{max_mean_si}), +and then sampling the standard deviation \eqn{\sigma^{(k)}} from its truncated normal distribution +(with mean \code{std_si}, standard deviation \code{std_std_si}, minimum \code{min_std_si} and maximum \code{max_std_si}), but imposing that \eqn{\sigma^{(k)}<\mu^{(k)}}. +This constraint ensures that the Gamma probability density function of the serial interval is null at \eqn{t=0}. +Warnings are produced when the truncated normal distributions are not symmetric around the mean. +For each pair \eqn{(\mu^{(k)},\sigma^{(k)})}, we then draw a sample of size \code{n2} in the posterior distribution of the reproduction number over each time window, conditionnally on this serial interval distribution. +After pooling, a sample of size \eqn{\code{n1}\times\code{n2}} of the joint posterior distribution of the reproduction number over each time window is obtained. +The posterior mean, standard deviation, and 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975 quantiles of the reproduction number for each time window are obtained from this sample. + +----------------------- \code{method "si_from_data"} ----------------------- + +\code{Method "si_from_data"} allows accounting for uncertainty on the serial interval distribution. +Unlike method "uncertain_si", where we arbitrarily vary the mean and std of the SI in truncated normal distributions, +here, the scope of serial interval distributions considered is directly informed by data +on the (potentially censored) dates of symptoms of pairs of infector/infected individuals. +This data, specified in argument \code{si_data}, should be a dataframe with 5 columns: +\itemize{ +\item{EL: the lower bound of the symptom onset date of the infector (given as an integer)} +\item{ER: the upper bound of the symptom onset date of the infector (given as an integer). Should be such that ER>=EL} +\item{SL: the lower bound of the symptom onset date of the infected indivdiual (given as an integer)} +\item{SR: the upper bound of the symptom onset date of the infected indivdiual (given as an integer). Should be such that SR>=SL} +\item{type (optional): can have entries 0, 1, or 2, corresponding to doubly interval-censored, single interval-censored or exact observations, respectively, see Reich et al. Statist. Med. 2009. If not specified, this will be automatically computed from the dates} +} +Assuming a given parametric distribution for the serial interval distribution (specified in si_parametric_distr), +the posterior distribution of the serial interval is estimated directly fom these data using MCMC methods implemented in the package \code{coarsedatatools}. +The argument \code{mcmc_control} is a list of characteristics which control the MCMC. +The MCMC is run for a total number of iterations of \code{mcmc_control$burnin + n1*mcmc_control$thin}; +but the output is only recorded after the burnin, and only 1 in every \code{mcmc_control$thin} iterations, +so that the posterior sample size is \code{n1}. +For each element in the posterior sample of serial interval distribution, +we then draw a sample of size \code{n2} in the posterior distribution of the reproduction number over each time window, +conditionnally on this serial interval distribution. +After pooling, a sample of size \eqn{\code{n1}\times\code{n2}} of the joint posterior distribution of +the reproduction number over each time window is obtained. +The posterior mean, standard deviation, and 0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975 quantiles of the reproduction number for each time window are obtained from this sample. + +----------------------- \code{method "si_from_sample"} ----------------------- + +\code{Method "si_from_sample"} also allows accounting for uncertainty on the serial interval distribution. +Unlike methods "uncertain_si" and "si_from_data", the user directly provides (in argument \code{si_sample}) a sample of serial interval distribution to be explored. + +} +} +\examples{ +## load data on pandemic flu in a school in 2009 +data("Flu2009") + +## estimate the reproduction number (method "non_parametric_si") +estimate_r(Flu2009$incidence, method="non_parametric_si", + config=list(t_start=2:26, t_end=8:32, + si_distr=Flu2009$si_distr, plot=TRUE)) +# the second plot produced shows, at each each day, +# the estimate of the reproduction number over the 7-day window finishing on that day. + +## example with an incidence object + +# create fake data +library(incidence) +data <- c(0,1,1,2,1,3,4,5,5,5,5,4,4,26,6,7,9) +location <- sample(c("local","imported"), length(data), replace=TRUE) +location[1] <- "imported" # forcing the first case to be imported +# get incidence per group (location) +incid <- incidence(data, groups = location) +# Estimate R with assumptions on serial interval +estimate_r(incid, method = "parametric_si", + config=list(t_start = 2:21, t_end = 8:27, + mean_si = 2.6, std_si = 1.5, plot = TRUE)) + +## estimate the reproduction number (method "parametric_si") +estimate_r(Flu2009$incidence, method="parametric_si", + config=list(t_start=2:26, t_end=8:32, + mean_si=2.6, std_si=1.5, plot=TRUE)) +# the second plot produced shows, at each each day, +# the estimate of the reproduction number over the 7-day window finishing on that day. + +## estimate the reproduction number (method "uncertain_si") +estimate_r(Flu2009$incidence, method="uncertain_si", + config=list(t_start=2:26, t_end=8:32, + mean_si=2.6, std_mean_si=1, min_mean_si=1, max_mean_si=4.2, + std_si=1.5, std_std_si=0.5, min_std_si=0.5, max_std_si=2.5, + n1=100, n2=100, plot=TRUE)) +# the bottom left plot produced shows, at each each day, +# the estimate of the reproduction number over the 7-day window finishing on that day. + +\dontrun{ +## Note the following examples use an MCMC routine +## to estimate the serial interval distribution from data, +## so they may take a few minutes to run + +## load data on rotavirus +data("MockRotavirus") + +## estimate the reproduction number (method "si_from_data") +MCMC_seed <- 1 +overall_seed <- 2 +R_si_from_data <- estimate_r(MockRotavirus$incidence, + method="si_from_data", + si_data=MockRotavirus$si_data, + config=list(t_start=2:47, t_end=8:53, + si_parametric_distr = "G", + mcmc_control = list(burnin = 1000, + thin=10, seed = MCMC_seed), + n1 = 500, n2 = 50, + seed = overall_seed, + plot=TRUE)) +## compare with version with no uncertainty +R_Parametric <- estimate_r(MockRotavirus$incidence, + method="parametric_si", + config=list(t_start=2:47, t_end=8:53, + mean_si = mean(R_si_from_data$SI.Moments$Mean), + std_si = mean(R_si_from_data$SI.Moments$Std), + plot=TRUE)) +## generate plots +p_uncertainty <- plots(R_si_from_data, "R", options_R=list(ylim=c(0, 1.5))) +p_no_uncertainty <- plots(R_Parametric, "R", options_R=list(ylim=c(0, 1.5))) +gridExtra::grid.arrange(p_uncertainty, p_no_uncertainty,ncol=2) +# the left hand side graph is with uncertainty in the SI distribution, the right hand side without. +# The credible intervals are wider when accounting for uncertainty in the SI distribution. + +#' ## estimate the reproduction number (method "si_from_sample") +MCMC_seed <- 1 +overall_seed <- 2 +SI.fit <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, + dist = "G", + init.pars = init_mcmc_params(MockRotavirus$si_data, "G"), + burnin = 1000, + n.samples = 5000, + seed = MCMC_seed) +si_sample <- coarse2estim(SI.fit, thin=10)$si_sample +R_si_from_sample <- estimate_r(MockRotavirus$incidence, + method = "si_from_sample", si_sample = si_sample, + config = list(t_start = 2:47, t_end = 8:53, + n2 = 50, + seed = overall_seed, + plot = TRUE)) + +# check that R_si_from_sample is the same as R_si_from_data +# since they were generated using the same MCMC algorithm to generate the SI sample +# (either internally to EpiEstim or externally) +all(R_si_from_sample$R$`Mean(R)` == R_si_from_data$R$`Mean(R)`) +} + +} +\references{ +{ +Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). +Wallinga, J. and P. Teunis. Different epidemic curves for severe acute respiratory syndrome reveal similar impacts of control measures (AJE 2004). +Reich, N.G. et al. Estimating incubation period distributions with coarse data (Statis. Med. 2009) +} +} +\seealso{ +\code{\link{discr_si}} +} +\author{ +Anne Cori \email{a.cori@imperial.ac.uk} +} diff --git a/man/flu_2009_NYC_school.Rd b/man/flu_2009_NYC_school.Rd new file mode 100644 index 0000000..6a67a8a --- /dev/null +++ b/man/flu_2009_NYC_school.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{flu_2009_NYC_school} +\alias{flu_2009_NYC_school} +\title{Data on the 2009 H1N1 influenza pandemic in a school in New York city} +\format{A list of two elements: +\describe{ + \item{incidence}{a dataframe with 14 lines containing dates in first column, and daily incidence in second column ,} + \item{si_data}{a dataframe containing data on the generation time for 16 pairs of infector/infected individuals + (see references and see argument \code{si_data} of function \code{estimate_r} for details on columns) } +}} +\source{ +Lessler J. et al. (2009) Outbreak of 2009 pandemic influenza A (H1N1) at a New York City school. New Eng J Med 361: 2628-2636. +} +\description{ +This data set gives: +1/ the daily incidence of self-reported and laboratory-confirmed cases of influenza +amongst children in a school in New York city during the 2009 H1N1 influenza pandemic (see source and references), +2/ interval-censored serial interval data from the 2009 outbreak of H1N1 influenza +in a New York city school (see references). +} +\examples{ +\dontrun{ +## Note the following examples use an MCMC routine +## to estimate the serial interval distribution from data, +## so they may take a few minutes to run + +## load data on pandemic flu in a New York school in 2009 +data("flu_2009_NYC_school") + +## estimate the reproduction number (method "si_from_data") +estimate_r(flu_2009_NYC_school$incidence, method="si_from_data", + si_data = flu_2009_NYC_school$si_data, + config=list(t_start=2:8, t_end=8:14, + si_parametric_distr = "G", + mcmc_control = list(burnin = 1000, + thin=10, seed = 1), + n1 = 1000, n2 = 50, + plot=TRUE) + ) +# the second plot produced shows, at each each day, +# the estimate of the reproduction number +# over the 7-day window finishing on that day. +} +} +\references{ +{ +Lessler J. et al. (2009) Outbreak of 2009 pandemic influenza A (H1N1) at a New York City school. New Eng J Med 361: 2628-2636. } +} diff --git a/man/init_MCMC_params.Rd b/man/init_MCMC_params.Rd index be5f454..9ddb8c1 100644 --- a/man/init_MCMC_params.Rd +++ b/man/init_MCMC_params.Rd @@ -1,36 +1,74 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/init_MCMC_params.R -\name{init_MCMC_params} -\alias{init_MCMC_params} -\title{init_MCMC_params TITLE} +% Please edit documentation in R/init_mcmc_params.R +\name{init_mcmc_params} +\alias{init_mcmc_params} +\title{init_mcmc_params Finds clever starting points for the MCMC to be used to estimate the serial interval, +e.g. when using option \code{si_from_data} in \code{estimate_r}} \usage{ -init_MCMC_params(si_data, dist = c("G", "W", "L", "off1G", "off1W", "off1L")) +init_mcmc_params(si_data, dist = c("G", "W", "L", "off1G", "off1W", "off1L")) } \arguments{ -\item{si_data}{XXXXXXX.} +\item{si_data}{the data on dates of symptoms of pairs of infector/infected individuals to be used to estimate the serial interval distribution. +This should be a dataframe with 5 columns: +\itemize{ +\item{EL: the lower bound of the symptom onset date of the infector (given as an integer)} +\item{ER: the upper bound of the symptom onset date of the infector (given as an integer). Should be such that ER>=EL} +\item{SL: the lower bound of the symptom onset date of the infected indivdiual (given as an integer)} +\item{SR: the upper bound of the symptom onset date of the infected indivdiual (given as an integer). Should be such that SR>=SL} +\item{type (optional): can have entries 0, 1, or 2, corresponding to doubly interval-censored, single interval-censored or exact observations, +respectively, see Reich et al. Statist. Med. 2009. If not specified, this will be automatically computed from the dates} +}} -\item{dist}{XXXXXXX.} +\item{dist}{the parametric distribution to use for the serial interval. +Should be one of "G" (Gamma), "W" (Weibull), "L" (Lognormal), "off1G" (Gamma shifted by 1), "off1W" (Weibull shifted by 1), or "off1L" (Lognormal shifted by 1).} } \value{ -XXXXXXX. +A vector containing the initial values for the two parameters of the distribution of the serial interval. +These are the shape and scale for all but the lognormal distribution, for which it is the meanlog and sdlog. } \description{ -\code{init_MCMC_params} DESCRIPTION TO COME. -} -\details{ -{ -XXXXXXX. -} +\code{init_mcmc_params} Finds values of the serial interval distribution parameters, used to initalise the MCMC estimation of the serial interval distribution. +Initial values are computed based on the observed mean and standard deviation of the sample from which the parameters are to be estiamted. } \examples{ -## XXXXXXX. +\dontrun{ +## Note the following examples use an MCMC routine +## to estimate the serial interval distribution from data, +## so they may take a few minutes to run + +## load data on rotavirus +data("MockRotavirus") + +## get clever initial values for shape and scale of a Gamma distribution +## fitted to the the data MockRotavirus$si_data +clever_init_param <- init_mcmc_params(MockRotavirus$si_data, "G") + +## estimate the serial interval from data using a clever starting point for the MCMC chain +SI_fit_clever <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, + dist="G", + init.pars=clever_init_param, + burnin = 1000, + n.samples = 5000) + +## estimate the serial interval from data using a random starting point for the MCMC chain +SI_fit_naive <- coarseDataTools::dic.fit.mcmc(dat = MockRotavirus$si_data, + dist="G", + burnin = 1000, + n.samples = 5000) + + +## use check_cdt_samples_convergence to check convergence in both situations +converg_diag_clever <- check_cdt_samples_convergence(SI_fit_clever@samples) +converg_diag_naive <- check_cdt_samples_convergence(SI_fit_naive@samples) +converg_diag_clever +converg_diag_naive + } -\references{ -XXXXXXX. + } \seealso{ -XXXXXXX. +\code{\link{estimate_r}} } \author{ -XXXXXXX. +Anne Cori } diff --git a/man/overall_infectivity.Rd b/man/overall_infectivity.Rd index 8839616..7dba179 100644 --- a/man/overall_infectivity.Rd +++ b/man/overall_infectivity.Rd @@ -4,13 +4,13 @@ \alias{overall_infectivity} \title{Overall Infectivity Due To Previously Infected Individuals} \usage{ -overall_infectivity(I, si_distr) +overall_infectivity(incid, si_distr) } \arguments{ -\item{I}{One of the following +\item{incid}{One of the following \itemize{ \item{A vector (or a dataframe with a single column) of non-negative integers containing an incidence time series} -\item{A dataframe of non-negative integers with two columns, so that \code{I$local} contains the incidence of cases due to local transmission and \code{I$imported} contains the incidence of imported cases (with \code{I$local + I$imported} the total incidence).} +\item{A dataframe of non-negative integers with two columns, so that \code{incid$local} contains the incidence of cases due to local transmission and \code{incid$imported} contains the incidence of imported cases (with \code{incid$local + incid$imported} the total incidence).} } Note that the cases from the first time step are always all assumed to be imported cases.} @@ -25,7 +25,7 @@ A vector which contains the overall infectivity \eqn{\lambda_t} at each time ste \details{ { The overall infectivity \eqn{\lambda_t} at time step \eqn{t} is equal to the sum of the previously infected individuals -(given by the incidence vector \eqn{I}, with \code{I=I$local + I$imported} if \eqn{I} is a matrix), +(given by the incidence vector \eqn{I}, with \code{I = incid$local + incid$imported} if \eqn{I} is a matrix), weigthed by their infectivity at time \eqn{t} (given by the discrete serial interval distribution \eqn{w_k}). In mathematical terms: \cr @@ -40,16 +40,16 @@ data("Flu2009") ## compute overall infectivity lambda <- overall_infectivity(Flu2009$incidence, Flu2009$si_distr) par(mfrow=c(2,1)) -plot(Flu2009$incidence, type="s", xlab="time (days)", ylab="incidence") -title(main="Epidemic curve") -plot(lambda, type="s", xlab="time (days)", ylab="Infectivity") -title(main="Overall infectivity") +plot(Flu2009$incidence, type = "s", xlab = "time (days)", ylab = "incidence") +title(main = "Epidemic curve") +plot(lambda, type = "s", xlab = "time (days)", ylab = "Infectivity") +title(main = "Overall infectivity") } \references{ Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). } \seealso{ -\code{\link{DiscrSI}}, \code{\link{EstimateR}} +\code{\link{discr_si}}, \code{\link{estimate_r}} } \author{ Anne Cori \email{a.cori@imperial.ac.uk} diff --git a/man/plots.Rd b/man/plots.Rd index 1eb211b..6aa6fb1 100644 --- a/man/plots.Rd +++ b/man/plots.Rd @@ -4,22 +4,27 @@ \alias{plots} \title{Plotting the outputs of functions estimating the reproduction number from incidence time series and assumptions regarding the serial interval distribution} \usage{ -plots(x = NULL, what = c("all", "I", "R", "SI"), +plots(x = NULL, what = c("all", "incid", "R", "SI"), add_imported_cases = FALSE, options_I = list(col = palette(), transp = 0.7, xlim = NULL, ylim = NULL), options_R = list(col = palette(), transp = 0.2, xlim = NULL, ylim = NULL), options_SI = list(prob_min = 0.001, col = "black", transp = 0.25, xlim = NULL, ylim = NULL), legend = TRUE) } \arguments{ -\item{x}{The output of function \code{\link{EstimateR}} or function \code{\link{WT}}, or a list of such outputs. If a list, and \code{what='R'} or \code{what='all'}, all estimates of R are plotted on a single graph.} +\item{x}{The output of function \code{\link{estimate_r}} or function \code{\link{wallinga_teunis}}, or a list of such outputs. +If a list, and \code{what='R'} or \code{what='all'}, all estimates of R are plotted on a single graph.} -\item{what}{A string specifying what to plot, namely the incidence time series (\code{what='I'}), the estimated reproduction number (\code{what='R'}), the serial interval distribution (\code{what='SI'}, or all three (\code{what='all'})).} +\item{what}{A string specifying what to plot, namely +the incidence time series (\code{what='incid'}), +the estimated reproduction number (\code{what='R'}), +the serial interval distribution (\code{what='SI'}, +or all three (\code{what='all'})).} \item{add_imported_cases}{A boolean to specify whether, on the incidence time series plot, to add the incidence of imported cases.} -\item{options_I}{For what = "I" or "all". A list of graphical options: +\item{options_I}{For what = "incid" or "all". A list of graphical options: \describe{ -\item{col}{A colour or vector of colours used for plotting I. By default uses the default R colours.} +\item{col}{A colour or vector of colours used for plotting incid. By default uses the default R colours.} \item{transp}{A numeric value between 0 and 1 used to monitor transparency of the bars plotted. Defaults to 0.7.} \item{xlim}{A parameter similar to that in \code{par}, to monitor the limits of the horizontal axis} \item{ylim}{A parameter similar to that in \code{par}, to monitor the limits of the vertical axis} @@ -45,17 +50,17 @@ plots(x = NULL, what = c("all", "I", "R", "SI"), \item{legend}{A boolean (TRUE by default) governing the presence / absence of legends on the plots} } \value{ -a plot (if \code{what = "I"}, \code{"R"}, or \code{"SI"}) or a \code{\link{grob}} object (if \code{what = "all"}). +a plot (if \code{what = "incid"}, \code{"R"}, or \code{"SI"}) or a \code{\link{grob}} object (if \code{what = "all"}). } \description{ -\code{plots} allows plotting the outputs of functions \code{\link{EstimateR}} and \code{\link{WT}} +\code{plots} allows plotting the outputs of functions \code{\link{estimate_r}} and \code{\link{wallinga_teunis}} } \examples{ ## load data on pandemic flu in a school in 2009 data("Flu2009") ## estimate the instantaneous reproduction number (method "non_parametric_si") -R_i <- EstimateR(Flu2009$incidence, method="non_parametric_si", +R_i <- estimate_r(Flu2009$incidence, method="non_parametric_si", config=list(t_start=2:26, t_end=8:32, si_distr=Flu2009$si_distr, plot=FALSE) ) @@ -64,14 +69,15 @@ R_i <- EstimateR(Flu2009$incidence, method="non_parametric_si", plots(R_i, legend = FALSE) ## estimate the instantaneous reproduction number (method "non_parametric_si") -R_c <- WT(Flu2009$incidence, t_start=2:26, t_end=8:32, method="non_parametric_si", - si_distr=Flu2009$si_distr, plot=FALSE) +R_c <- wallinga_teunis(Flu2009$incidence, method="non_parametric_si", + config = list(t_start=2:26, t_end=8:32, + si_distr=Flu2009$si_distr, plot=FALSE)) ## produce plot of the incidence ## (with, on top of total incidence, the incidence of imported cases), ## estimated instantaneous and case reproduction numbers ## and serial interval distribution used -p_I <- plots(R_i, "I", add_imported_cases=TRUE) # plots the incidence +p_I <- plots(R_i, "incid", add_imported_cases=TRUE) # plots the incidence p_SI <- plots(R_i, "SI") # plots the serial interval distribution p_Ri <- plots(R_i, "R", options_R = list(ylim=c(0,4))) @@ -83,7 +89,7 @@ gridExtra::grid.arrange(p_I,p_SI,p_Ri,p_Rc,ncol=2) } \seealso{ -\code{\link{EstimateR}} and \code{\link{WT}} +\code{\link{estimate_r}} and \code{\link{wallinga_teunis}} } \author{ Rolina van Gaalen \email{rolina.van.gaalen@rivm.nl} and Anne Cori \email{a.cori@imperial.ac.uk} diff --git a/man/wallinga_teunis.Rd b/man/wallinga_teunis.Rd new file mode 100644 index 0000000..7313bad --- /dev/null +++ b/man/wallinga_teunis.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wallinga_teunis.R +\name{wallinga_teunis} +\alias{wallinga_teunis} +\title{Estimation of the case reproduction number using the Wallinga and Teunis method} +\usage{ +wallinga_teunis(incid, method = c("non_parametric_si", "parametric_si"), + config) +} +\arguments{ +\item{incid}{One of the following +\itemize{ +\item{Vector (or a dataframe with a column named 'incid') of non-negative integers containing an incidence time series. + If the dataframe contains a column \code{incid$dates}, this is used for plotting. + \code{incid$dates} must contains only dates in a row.} +\item{An object of class \code{\link[incidence]{incidence}}} + }} + +\item{method}{the method used to estimate R, one of "non_parametric_si", "parametric_si", "uncertain_si", "si_from_data" or "si_from_sample"} + +\item{config}{a list with the following elements: +\itemize{ +\item{t_start: Vector of positive integers giving the starting times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}. t_start[1] should be strictly after the first day with non null incidence.} +\item{t_end: Vector of positive integers giving the ending times of each window over which the reproduction number will be estimated. These must be in ascending order, and so that for all \code{i}, \code{t_start[i]<=t_end[i]}.} +\item{method: One of "non_parametric_si" or "parametric_si" (see details).} +\item{mean_si: For method "parametric_si" ; positive real giving the mean serial interval.} +\item{std_si: For method "parametric_si" ; non negative real giving the stadard deviation of the serial interval.} +\item{si_distr: For method "non_parametric_si" ; vector of probabilities giving the discrete distribution of the serial interval, starting with \code{si_distr[1]} (probability that the serial interval is zero), which should be zero.} +\item{n_sim: A positive integer giving the number of simulated epidemic trees used for computation of the confidence intervals of the case reproduction number (see details).} +\item{plot: Logical. If \code{TRUE} (default is \code{FALSE}), output is plotted (see value).} +\item{legend: A boolean (TRUE by default) governing the presence / absence of legends on the plots +This specifies the position of the legend in the plot. Alternatively, \code{locator(1)} can be used ; the user will then need to click where the legend needs to be written.} +}} +} +\value{ +{ + a list with components: + \itemize{ + \item{R}{: a dataframe containing: + the times of start and end of each time window considered ; + the estimated mean, std, and 0.025 and 0.975 quantiles of the reproduction number for each time window.} + \item{si_distr}{: a vector containing the discrete serial interval distribution used for estimation} + \item{SI.Moments}{: a vector containing the mean and std of the discrete serial interval distribution(s) used for estimation} + \item{I}{: the time series of total incidence} + \item{I_local}{: the time series of incidence of local cases (so that \code{I_local + I_imported = I})} + \item{I_imported}{: the time series of incidence of imported cases (so that \code{I_local + I_imported = I})} + \item{dates}{: a vector of dates corresponding to the incidence time series} + } + } +} +\description{ +\code{wallinga_teunis} estimates the case reproduction number of an epidemic, given the incidence time series and the serial interval distribution. +} +\details{ +{ +Estimates of the case reproduction number for an epidemic over predefined time windows can be obtained, +for a given discrete distribution of the serial interval, as proposed by Wallinga and Teunis (AJE, 2004). +Confidence intervals are obtained by simulating a number (config$n_sim) of possible transmission trees. + +The methods vary in the way the serial interval distribution is specified. + +----------------------- \code{method "non_parametric_si"} ----------------------- + +The discrete distribution of the serial interval is directly specified in the argument \code{config$si_distr}. + +If \code{config$plot} is \code{TRUE}, 3 plots are produced. +The first one shows the epidemic curve. +The second one shows the posterior mean and 95\% credible interval of the reproduction number. The estimate for a time window is plotted at the end of the time window. +The third plot shows the discrete distribution of the serial interval. + +----------------------- \code{method "parametric_si"} ----------------------- + +The mean and standard deviation of the continuous distribution of the serial interval are given in the arguments \code{config$mean_si} and \code{config$std_si}. +The discrete distribution of the serial interval is derived automatically using \code{\link{discr_si}}. + +If \code{config$plot} is \code{TRUE}, 3 plots are produced, which are identical to the ones for \code{method "non_parametric_si"} . +} +} +\examples{ +## load data on pandemic flu in a school in 2009 +data("Flu2009") + +## estimate the case reproduction number (method "non_parametric_si") +wallinga_teunis(Flu2009$incidence, + method="non_parametric_si", + config = list(t_start=2:26, t_end=8:32, + si_distr = Flu2009$si_distr, + n_sim=100, + plot=TRUE)) +# the second plot produced shows, at each each day, +# the estimate of the case reproduction number over the 7-day window finishing on that day. + +## estimate the case reproduction number (method "parametric_si") +wallinga_teunis(Flu2009$incidence, method="parametric_si", + config = list(t_start=2:26, t_end=8:32, + mean_si=2.6, std_si=1.5, + n_sim=100, + plot=TRUE)) +# the second plot produced shows, at each each day, +# the estimate of the case reproduction number over the 7-day window finishing on that day. +} +\references{ +{ +Cori, A. et al. A new framework and software to estimate time-varying reproduction numbers during epidemics (AJE 2013). +Wallinga, J. and P. Teunis. Different epidemic curves for severe acute respiratory syndrome reveal similar impacts of control measures (AJE 2004). +} +} +\seealso{ +\code{\link{discr_si}}, \code{\link{estimate_r}} +} +\author{ +Anne Cori \email{a.cori@imperial.ac.uk} +} diff --git a/tests/expected_output/Example4.RData b/tests/expected_output/Example4.RData index f6172f1..731bd8b 100644 Binary files a/tests/expected_output/Example4.RData and b/tests/expected_output/Example4.RData differ diff --git a/tests/testthat/tests.R b/tests/testthat/tests.R index b453078..5e71c51 100644 --- a/tests/testthat/tests.R +++ b/tests/testthat/tests.R @@ -41,7 +41,7 @@ test_that("Can import data from EpiEstim", { set.seed(1) test_that("Example 1 matches saved output", { - out <- EstimateR(Flu2009$incidence, method="non_parametric_si", + out <- estimate_r(Flu2009$incidence, method="non_parametric_si", config=list(t_start=2:26, t_end=8:32, si_distr=Flu2009$si_distr, plot=FALSE, seed=1)) compare_output(out, "Example1") }) @@ -51,22 +51,22 @@ test_that("Example 2 matches saved output", { location <- sample(c("local","imported"), length(data), replace=TRUE) location[1] <- "imported" # forcing the first case to be imported # get incidence per group (location) - I <- incidence(data, groups = location) - out <- EstimateR(I, method = "parametric_si", + incid <- incidence(data, groups = location) + out <- estimate_r(incid, method = "parametric_si", config=list(t_start = 2:21, t_end = 8:27, mean_si = 2.6, std_si = 1.5, plot = FALSE, seed=1)) compare_output(out, "Example2") }) test_that("Example 3 matches saved output", { ## estimate the reproduction number (method "parametric_si") - out <- EstimateR(Flu2009$incidence, method="parametric_si", + out <- estimate_r(Flu2009$incidence, method="parametric_si", config=list(t_start=2:26, t_end=8:32, mean_si=2.6, std_si=1.5, plot=FALSE, seed=1)) compare_output(out, "Example3") }) test_that("Example 4 matches saved output", { ## estimate the reproduction number (method "uncertain_si") - out <- EstimateR(Flu2009$incidence, method="uncertain_si", + out <- estimate_r(Flu2009$incidence, method="uncertain_si", config=list(t_start=2:26, t_end=8:32, mean_si=2.6, std_mean_si=1, min_mean_si=1, max_mean_si=4.2, std_si=1.5, std_std_si=0.5, min_std_si=0.5, max_std_si=2.5,