Skip to content

Commit

Permalink
Measurement bias seems to be working now
Browse files Browse the repository at this point in the history
  • Loading branch information
jameshay218 committed May 13, 2024
1 parent c1c09ba commit 92547e7
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 32 deletions.
6 changes: 3 additions & 3 deletions R/analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ generate_quantiles <- function(x, sig_f = 3, qs = c(0.025, 0.5, 0.975), as_text
#' @param par_tab the table controlling the parameters in the MCMC chain
#' @param nsamp number of samples to take from posterior
#' @param add_residuals if true, returns an extra output summarising residuals between the model prediction and data
#' @param measurement_indices_by_time default NULL, optional vector giving the index of `measurement_bias` that each strain uses the measurement shift from from. eg. if there's 6 circulation years and 3 strain clusters
#' @param measurement_bias default NULL, optional data frame giving the index of `rho` that each biomarker_id and biomarker_group which uses the measurement shift from from. eg. if there's 6 circulation years and 3 strain clusters
#' @param for_res_plot TRUE/FALSE value. If using the output of this for plotting of residuals, returns the actual data points rather than summary statistics
#' @param expand_antibody_data TRUE/FALSE value. If TRUE, solves antibody level predictions for the entire study period (i.e., between the range of antibody_data$sample_time). If left FALSE, then only solves for the infections times at which a antibody level against the circulating biomarker_id was measured in antibody_data.
#' @param expand_to_all_times TRUE/FALSE value. If TRUE, solves antibody level predictions for all possible infection times (i.e., for the range in possible_exposure_times). If left FALSE, then only solves for the infections times at which a antibody level against the circulating biomarker_id was measured in antibody_data.
Expand All @@ -321,7 +321,7 @@ get_antibody_level_predictions <- function(chain, infection_histories, antibody_
individuals, antigenic_map=NULL,
possible_exposure_times=NULL, par_tab,
nsamp = 1000, add_residuals = FALSE,
measurement_indices_by_time = NULL,
measurement_bias = NULL,
for_res_plot = FALSE, expand_antibody_data = FALSE,
expand_to_all_times=FALSE,
antibody_level_before_infection=FALSE, for_regression=FALSE,
Expand Down Expand Up @@ -406,7 +406,7 @@ get_antibody_level_predictions <- function(chain, infection_histories, antibody_
}
model_func <- create_posterior_func(par_tab, antibody_data1, antigenic_map, possible_exposure_times,
prior_version=2,
measurement_indices_by_time = measurement_indices_by_time, function_type = 4,
measurement_bias = measurement_bias, function_type = 4,
antibody_level_before_infection=antibody_level_before_infection,
data_type=data_type,start_level=start_level,
demographics=demographics
Expand Down
11 changes: 5 additions & 6 deletions R/mcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param posterior_func Pointer to posterior function used to calculate a likelihood. This will probably be \code{\link{create_posterior_func}}
#' @param prior_func User function of prior for model parameters. Should take parameter values only
#' @param prior_version which infection history assumption prior_version to use? See \code{\link{describe_priors}} for options. Can be 1, 2, 3 or 4
#' @param measurement_indices optional NULL. For measurement bias function. Vector of indices of length equal to number of circulation times. For each year, gives the index of parameters named "rho" that correspond to each time period
#' @param measurement_bias optional NULL. For measurement bias function. Vector of indices of length equal to number of circulation times. For each year, gives the index of parameters named "rho" that correspond to each time period
#' @param measurement_random_effects optional FALSE. Boolean indicating if measurement bias is a random effects term. If TRUE adds a component to the posterior calculation that calculates the probability of a set of measurement shifts "rho", given a mean and standard deviation
#' @param proposal_ratios optional NULL. Can set the relative sampling weights of the infection state times. Should be an integer vector of length matching nrow(antigenic_map). Otherwise, leave as NULL for uniform sampling.
#' @param random_start_parameters if FALSE, uses whatever parameter values were passed in `par_tab` as the starting positions for the MCMC chain
Expand Down Expand Up @@ -69,7 +69,7 @@ serosolver <- function(par_tab,
posterior_func = create_posterior_func,
prior_func = NULL,
prior_version = 2,
measurement_indices = NULL,
measurement_bias = NULL,
measurement_random_effects = FALSE,
proposal_ratios = NULL,
temp = 1,
Expand Down Expand Up @@ -264,7 +264,7 @@ serosolver <- function(par_tab,
prior_version=prior_version,
solve_likelihood,
age_mask,
measurement_indices_by_time = measurement_indices,
measurement_bias = measurement_bias,
n_alive = n_alive,
function_type = 1,
data_type=data_type,
Expand All @@ -289,7 +289,7 @@ serosolver <- function(par_tab,
prior_version=prior_version,
solve_likelihood,
age_mask,
measurement_indices_by_time = measurement_indices,
measurement_bias = measurement_bias,
n_alive = n_alive,
function_type = 2,
data_type=data_type,
Expand Down Expand Up @@ -352,7 +352,7 @@ serosolver <- function(par_tab,
possible_exposure_times=possible_exposure_times,
antigenic_map=antigenic_map,
start_levels=start_level,
measurement_indices=measurement_indices,
measurement_bias=measurement_bias,
data_type=data_type)

save(serosolver_settings,file=paste0(filename,"_serosolver_settings.RData"))
Expand Down Expand Up @@ -984,7 +984,6 @@ serosolver <- function(par_tab,
}
saved_wd <- normalizePath(saved_wd)
all_diagnostics <- suppressMessages(plot_mcmc_diagnostics(saved_wd, par_tab, adaptive_iterations))

par_est_table <- all_diagnostics$theta_estimates
diagnostic_warnings <- list()
if("Rhat upper CI" %in% colnames(par_est_table) && any(par_est_table[par_est_table$names != "total_infections","Rhat upper CI"] > 1.1)){
Expand Down
14 changes: 7 additions & 7 deletions R/plot_antibody_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ plot_model_fits <- function(chain, infection_histories,
possible_exposure_times=NULL,
nsamp = 1000,
known_infection_history=NULL,
measurement_indices_by_time = NULL,
measurement_bias = NULL,
p_ncol=max(1,floor(length(individuals)/2)),
data_type=1,
expand_to_all_times=FALSE,
Expand All @@ -151,7 +151,7 @@ plot_model_fits <- function(chain, infection_histories,
message("Using provided serosolver settings list")
if(is.null(antigenic_map)) antigenic_map <- settings$antigenic_map
if(is.null(possible_exposure_times)) possible_exposure_times <- settings$possible_exposure_times
if(is.null(measurement_indices_by_time)) measurement_indices_by_time <- settings$measurement_indices_by_time
if(is.null(measurement_bias)) measurement_bias <- settings$measurement_bias
if(is.null(antibody_data)) antibody_data <- settings$antibody_data
if(is.null(demographics)) demographics <- settings$demographics
if(is.null(par_tab)) par_tab <- settings$par_tab
Expand Down Expand Up @@ -180,7 +180,7 @@ plot_model_fits <- function(chain, infection_histories,
individuals,
antigenic_map, possible_exposure_times,
par_tab, nsamp, FALSE,
measurement_indices_by_time,
measurement_bias,
expand_antibody_data=TRUE,
expand_to_all_times=expand_to_all_times,
data_type=data_type,
Expand Down Expand Up @@ -397,15 +397,15 @@ plot_antibody_predictions <- function(chain, infection_histories,
antigenic_map=NULL,
possible_exposure_times=NULL,
nsamp = 1000,
measurement_indices_by_time = NULL,
measurement_bias = NULL,
data_type=1,
start_level="none",
settings=NULL){
## If the list of serosolver settings was included, use these rather than passing each one by one
if(!is.null(settings)){
message("Using provided serosolver settings list")
if(is.null(antigenic_map)) antigenic_map <- settings$antigenic_map
if(is.null(measurement_indices_by_time)) measurement_indices_by_time <- settings$measurement_indices_by_time
if(is.null(measurement_bias)) measurement_bias <- settings$measurement_bias
if(is.null(antibody_data)) antibody_data <- settings$antibody_data
if(is.null(demographics)) demographics <- settings$demographics
if(is.null(par_tab)) par_tab <- settings$par_tab
Expand All @@ -429,7 +429,7 @@ plot_antibody_predictions <- function(chain, infection_histories,
chain, infection_histories, antibody_data, individuals=unique(antibody_data$individual),
antigenic_map, possible_exposure_times,
par_tab, nsamp, FALSE,
measurement_indices_by_time,
measurement_bias,
expand_antibody_data=FALSE,
expand_to_all_times=FALSE,
data_type=data_type,
Expand Down Expand Up @@ -495,7 +495,7 @@ plot_antibody_predictions <- function(chain, infection_histories,

facet_wrap(~name) +
theme_minimal() +
theme(legend.position="none") +
theme(legend.position="bottom") +
xlab("Antibody level") +
ylab("Count") +
ggtitle("Predicted observations vs. measurements from random posterior draws")
Expand Down
7 changes: 6 additions & 1 deletion R/plots_other.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,12 @@ plot_posteriors_theta <- function(chain,par_tab){
p_trace_rho <- p_density_rho <- NULL
if("rho" %in% par_tab$names& "rho" %in% colnames(chain)){
rho_indices <- 1:nrow(par_tab_tmp[par_tab_tmp$names == "rho",])
rho_indices <- c("samp_no","chain_no",c("rho",paste0("rho.",rho_indices[1:(length(rho_indices)-1)])))
if(length(rho_indices) > 1){
additional <- paste0("rho.",rho_indices[1:(length(rho_indices)-1)])
} else {
additional <- NULL
}
rho_indices <- c("samp_no","chain_no",c("rho",additional))
p_trace_rho <- chain[,rho_indices] %>%
pivot_longer(-c(samp_no, chain_no)) %>%
ggplot() +
Expand Down
29 changes: 14 additions & 15 deletions R/posteriors.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @param prior_version which infection history assumption version to use? See \code{\link{describe_priors}} for options. Can be 1, 2, 3 or 4
#' @param solve_likelihood usually set to TRUE. If FALSE, does not solve the likelihood and instead just samples/solves based on the model prior
#' @param age_mask see \code{\link{create_age_mask}} - a vector with one entry for each individual specifying the first epoch of circulation in which an individual could have been exposed
#' @param measurement_indices_by_time if not NULL, then use these indices to specify which measurement bias parameter index corresponds to which time
#' @param measurement_bias if not NULL, then use these indices to specify which measurement bias parameter index corresponds to which time
#' @param n_alive if not NULL, uses this as the number alive in a given year rather than calculating from the ages. This is needed if the number of alive individuals is known, but individual birth dates are not
#' @param function_type integer specifying which version of this function to use. Specify 1 to give a posterior solving function; 2 to give the gibbs sampler for infection history proposals; otherwise just solves the titre model and returns predicted titres. NOTE that this is not the same as the attack rate prior argument, \code{version}!
#' @param titre_before_infection TRUE/FALSE value. If TRUE, solves titre predictions, but gives the predicted titre at a given time point BEFORE any infection during that time occurs.
Expand Down Expand Up @@ -46,7 +46,7 @@ create_posterior_func <- function(par_tab,
prior_version = 1,
solve_likelihood = TRUE,
age_mask = NULL,
measurement_indices_by_time = NULL,
measurement_bias = NULL,
n_alive = NULL,
function_type = 1,
antibody_level_before_infection=FALSE,
Expand All @@ -72,9 +72,9 @@ create_posterior_func <- function(par_tab,
if(verbose) message(cat("Note: no biomarker_group detected in par_tab. Assuming all biomarker_group as 1.\n"))
par_tab$biomarker_group <- 1
}
if (!is.null(measurement_indices_by_time) & !("biomarker_group" %in% colnames(measurement_indices_by_time))) {
if(verbose) message(cat("Note: no biomarker_group detected in measurement_indices_by_time. Assuming all biomarker_group as 1.\n"))
measurement_indices_by_time$biomarker_group <- 1
if (!is.null(measurement_bias) & !("biomarker_group" %in% colnames(measurement_bias))) {
if(verbose) message(cat("Note: no biomarker_group detected in measurement_bias Assuming all biomarker_group as 1.\n"))
measurement_bias$biomarker_group <- 1
}

if(verbose){
Expand Down Expand Up @@ -309,17 +309,17 @@ create_posterior_func <- function(par_tab,
n_pars <- length(theta_indices_unique)

## Sort out any assumptions for measurement bias
use_measurement_bias <- (length(measurement_indices_par_tab) > 0) & !is.null(measurement_indices_by_time)
use_measurement_bias <- (length(measurement_indices_par_tab) > 0) & !is.null(measurement_bias)

antibody_level_shifts <- c(0)
expected_indices <- NULL
measurement_bias <- NULL
measurement_bias_indices <- NULL
additional_arguments <- NULL

repeat_data_exist <- nrow(antibody_data_repeats) > 0
if (use_measurement_bias) {
if(verbose) message(cat("Using measurement bias\n"))
expected_indices <- antibody_data_unique %>% left_join(measurement_indices_by_time,by = c("biomarker_id", "biomarker_group")) %>% pull(rho_index)
expected_indices <- antibody_data_unique %>% left_join(measurement_bias,by = c("biomarker_id", "biomarker_group")) %>% pull(rho_index)
} else {
expected_indices <- c(-1)
}
Expand Down Expand Up @@ -347,7 +347,6 @@ create_posterior_func <- function(par_tab,
if(data_type[biomarker_group] == 1){
likelihood_func_use[[biomarker_group]] <- likelihood_func_fast
if(verbose) message(cat("Setting to discretized, bounded observations\n"))

} else if(data_type[biomarker_group] == 2){
if(verbose) message(cat("Setting to continuous, bounded observations\n"))
likelihood_func_use[[biomarker_group]] <- likelihood_func_fast_continuous
Expand Down Expand Up @@ -412,8 +411,8 @@ create_posterior_func <- function(par_tab,
)

if (use_measurement_bias) {
measurement_bias <- pars[measurement_indices_par_tab]
antibody_level_shifts <- measurement_bias[expected_indices]
measurement_bias_indices <- pars[measurement_indices_par_tab]
antibody_level_shifts <- measurement_bias_indices[expected_indices]
y_new <- y_new + antibody_level_shifts
}
## Calculate likelihood for unique antibody_levels and repeat data
Expand Down Expand Up @@ -481,8 +480,8 @@ create_posterior_func <- function(par_tab,


if (use_measurement_bias) {
measurement_bias <- pars[measurement_indices_par_tab]
antibody_level_shifts <- measurement_bias[expected_indices]
measurement_bias_indices <- pars[measurement_indices_par_tab]
antibody_level_shifts <- measurement_bias_indices[expected_indices]
}
antigenic_map_long <- array(dim=c(length(possible_biomarker_ids)^2,n_biomarker_groups,n_demographic_groups))
antigenic_map_short <- array(dim=c(length(possible_biomarker_ids)^2,n_biomarker_groups,n_demographic_groups))
Expand Down Expand Up @@ -617,8 +616,8 @@ create_posterior_func <- function(par_tab,
antibody_level_before_infection
)
if (use_measurement_bias) {
measurement_bias <- pars[measurement_indices_par_tab]
antibody_level_shifts <- measurement_bias[expected_indices]
measurement_bias_indices <- pars[measurement_indices_par_tab]
antibody_level_shifts <- measurement_bias_indices[expected_indices]
y_new <- y_new + antibody_level_shifts
}
y_new[overall_indices]
Expand Down

0 comments on commit 92547e7

Please sign in to comment.