Measurement bias seems to be working now

seroanalytics · May 13, 2024 · 92547e7 · 92547e7
1 parent c1c09ba
commit 92547e7
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 32 deletions.
diff --git a/R/analysis.R b/R/analysis.R
@@ -295,7 +295,7 @@ generate_quantiles <- function(x, sig_f = 3, qs = c(0.025, 0.5, 0.975), as_text
 #' @param par_tab the table controlling the parameters in the MCMC chain
 #' @param nsamp number of samples to take from posterior
 #' @param add_residuals if true, returns an extra output summarising residuals between the model prediction and data
-#' @param measurement_indices_by_time default NULL, optional vector giving the index of `measurement_bias` that each strain uses the measurement shift from from. eg. if there's 6 circulation years and 3 strain clusters
+#' @param measurement_bias default NULL, optional data frame giving the index of `rho` that each biomarker_id and biomarker_group which uses the measurement shift from from. eg. if there's 6 circulation years and 3 strain clusters
 #' @param for_res_plot TRUE/FALSE value. If using the output of this for plotting of residuals, returns the actual data points rather than summary statistics
 #' @param expand_antibody_data TRUE/FALSE value. If TRUE, solves antibody level predictions for the entire study period (i.e., between the range of antibody_data$sample_time). If left FALSE, then only solves for the infections times at which a antibody level against the circulating biomarker_id was measured in antibody_data.
 #' @param expand_to_all_times TRUE/FALSE value. If TRUE, solves antibody level predictions for all possible infection times (i.e., for the range in possible_exposure_times). If left FALSE, then only solves for the infections times at which a antibody level against the circulating biomarker_id was measured in antibody_data.
@@ -321,7 +321,7 @@ get_antibody_level_predictions <- function(chain, infection_histories, antibody_
                                            individuals, antigenic_map=NULL,
                                            possible_exposure_times=NULL, par_tab,
                                            nsamp = 1000, add_residuals = FALSE,
-                                           measurement_indices_by_time = NULL,
+                                           measurement_bias = NULL,
                                            for_res_plot = FALSE, expand_antibody_data = FALSE,
                                            expand_to_all_times=FALSE,
                                            antibody_level_before_infection=FALSE, for_regression=FALSE,
@@ -406,7 +406,7 @@ get_antibody_level_predictions <- function(chain, infection_histories, antibody_
   }
   model_func <- create_posterior_func(par_tab, antibody_data1, antigenic_map, possible_exposure_times,
                                      prior_version=2,
-                                      measurement_indices_by_time = measurement_indices_by_time, function_type = 4,
+                                     measurement_bias = measurement_bias, function_type = 4,
                                       antibody_level_before_infection=antibody_level_before_infection,
                                       data_type=data_type,start_level=start_level,
                                      demographics=demographics

diff --git a/R/mcmc.R b/R/mcmc.R
@@ -13,7 +13,7 @@
 #' @param posterior_func Pointer to posterior function used to calculate a likelihood. This will probably be \code{\link{create_posterior_func}}
 #' @param prior_func User function of prior for model parameters. Should take parameter values only
 #' @param prior_version which infection history assumption prior_version to use? See \code{\link{describe_priors}} for options. Can be 1, 2, 3 or 4
-#' @param measurement_indices optional NULL. For measurement bias function. Vector of indices of length equal to number of circulation times. For each year, gives the index of parameters named "rho" that correspond to each time period
+#' @param measurement_bias optional NULL. For measurement bias function. Vector of indices of length equal to number of circulation times. For each year, gives the index of parameters named "rho" that correspond to each time period
 #' @param measurement_random_effects optional FALSE. Boolean indicating if measurement bias is a random effects term. If TRUE adds a component to the posterior calculation that calculates the probability of a set of measurement shifts "rho", given a mean and standard deviation
 #' @param proposal_ratios optional NULL. Can set the relative sampling weights of the infection state times. Should be an integer vector of length matching nrow(antigenic_map). Otherwise, leave as NULL for uniform sampling.
 #' @param random_start_parameters if FALSE, uses whatever parameter values were passed in `par_tab` as the starting positions for the MCMC chain
@@ -69,7 +69,7 @@ serosolver <- function(par_tab,
                      posterior_func = create_posterior_func,
                      prior_func = NULL,
                      prior_version = 2,
-                     measurement_indices = NULL,
+                     measurement_bias = NULL,
                      measurement_random_effects = FALSE,
                      proposal_ratios = NULL,
                      temp = 1,
@@ -264,7 +264,7 @@ serosolver <- function(par_tab,
                                            prior_version=prior_version,
                                            solve_likelihood,
                                            age_mask,
-                                           measurement_indices_by_time = measurement_indices,
+                   measurement_bias = measurement_bias,
                                            n_alive = n_alive,
                                            function_type = 1,
                                            data_type=data_type,
@@ -289,7 +289,7 @@ serosolver <- function(par_tab,
                                              prior_version=prior_version,
                                              solve_likelihood,
                                              age_mask,
-                                             measurement_indices_by_time = measurement_indices,
+                     measurement_bias = measurement_bias,
                                              n_alive = n_alive,
                                              function_type = 2,
                                              data_type=data_type,
@@ -352,7 +352,7 @@ serosolver <- function(par_tab,
                               possible_exposure_times=possible_exposure_times,
                               antigenic_map=antigenic_map,
                               start_levels=start_level,
-                              measurement_indices=measurement_indices,
+                              measurement_bias=measurement_bias,
                               data_type=data_type)
 
   save(serosolver_settings,file=paste0(filename,"_serosolver_settings.RData"))
@@ -984,7 +984,6 @@ serosolver <- function(par_tab,
   }
   saved_wd <- normalizePath(saved_wd)
   all_diagnostics <- suppressMessages(plot_mcmc_diagnostics(saved_wd, par_tab, adaptive_iterations))
-
   par_est_table <- all_diagnostics$theta_estimates
   diagnostic_warnings <- list()
   if("Rhat upper CI" %in% colnames(par_est_table) && any(par_est_table[par_est_table$names != "total_infections","Rhat upper CI"] > 1.1)){

diff --git a/R/plot_antibody_model.R b/R/plot_antibody_model.R
@@ -136,7 +136,7 @@ plot_model_fits <- function(chain, infection_histories,
                             possible_exposure_times=NULL,
                             nsamp = 1000,
                             known_infection_history=NULL,
-                            measurement_indices_by_time = NULL,
+                            measurement_bias = NULL,
                             p_ncol=max(1,floor(length(individuals)/2)),
                             data_type=1,
                             expand_to_all_times=FALSE,
@@ -151,7 +151,7 @@ plot_model_fits <- function(chain, infection_histories,
     message("Using provided serosolver settings list")
     if(is.null(antigenic_map)) antigenic_map <- settings$antigenic_map
     if(is.null(possible_exposure_times)) possible_exposure_times <- settings$possible_exposure_times
-    if(is.null(measurement_indices_by_time)) measurement_indices_by_time <- settings$measurement_indices_by_time
+    if(is.null(measurement_bias)) measurement_bias <- settings$measurement_bias
     if(is.null(antibody_data)) antibody_data <- settings$antibody_data
     if(is.null(demographics)) demographics <- settings$demographics
     if(is.null(par_tab)) par_tab <- settings$par_tab
@@ -180,7 +180,7 @@ plot_model_fits <- function(chain, infection_histories,
     individuals,
     antigenic_map, possible_exposure_times, 
     par_tab, nsamp, FALSE, 
-    measurement_indices_by_time,
+    measurement_bias,
     expand_antibody_data=TRUE,
     expand_to_all_times=expand_to_all_times,
     data_type=data_type,
@@ -397,15 +397,15 @@ plot_antibody_predictions <- function(chain, infection_histories,
                                       antigenic_map=NULL, 
                                       possible_exposure_times=NULL,
                                       nsamp = 1000,
-                                      measurement_indices_by_time = NULL,
+                                      measurement_bias = NULL,
                                       data_type=1,
                                       start_level="none",
                                       settings=NULL){
   ## If the list of serosolver settings was included, use these rather than passing each one by one
   if(!is.null(settings)){
     message("Using provided serosolver settings list")
     if(is.null(antigenic_map)) antigenic_map <- settings$antigenic_map
-    if(is.null(measurement_indices_by_time)) measurement_indices_by_time <- settings$measurement_indices_by_time
+    if(is.null(measurement_bias)) measurement_bias <- settings$measurement_bias
     if(is.null(antibody_data)) antibody_data <- settings$antibody_data
     if(is.null(demographics)) demographics <- settings$demographics
     if(is.null(par_tab)) par_tab <- settings$par_tab
@@ -429,7 +429,7 @@ plot_antibody_predictions <- function(chain, infection_histories,
     chain, infection_histories, antibody_data, individuals=unique(antibody_data$individual),
     antigenic_map, possible_exposure_times, 
     par_tab, nsamp, FALSE, 
-    measurement_indices_by_time,
+    measurement_bias,
     expand_antibody_data=FALSE,
     expand_to_all_times=FALSE,
     data_type=data_type,
@@ -495,7 +495,7 @@ plot_antibody_predictions <- function(chain, infection_histories,
 
     facet_wrap(~name) +
     theme_minimal() +
-    theme(legend.position="none") +
+    theme(legend.position="bottom") +
     xlab("Antibody level") +
     ylab("Count") +
     ggtitle("Predicted observations vs. measurements from random posterior draws")

diff --git a/R/plots_other.R b/R/plots_other.R
@@ -185,7 +185,12 @@ plot_posteriors_theta <- function(chain,par_tab){
   p_trace_rho <- p_density_rho <- NULL
   if("rho" %in% par_tab$names& "rho" %in% colnames(chain)){
     rho_indices <- 1:nrow(par_tab_tmp[par_tab_tmp$names == "rho",])
-    rho_indices <- c("samp_no","chain_no",c("rho",paste0("rho.",rho_indices[1:(length(rho_indices)-1)])))
+    if(length(rho_indices) > 1){
+      additional <- paste0("rho.",rho_indices[1:(length(rho_indices)-1)])
+    } else {
+      additional <- NULL
+    }
+    rho_indices <- c("samp_no","chain_no",c("rho",additional))
     p_trace_rho <- chain[,rho_indices] %>%
       pivot_longer(-c(samp_no, chain_no)) %>%
       ggplot() + 

diff --git a/R/posteriors.R b/R/posteriors.R
@@ -10,7 +10,7 @@
 #' @param prior_version which infection history assumption version to use? See \code{\link{describe_priors}} for options. Can be 1, 2, 3 or 4
 #' @param solve_likelihood usually set to TRUE. If FALSE, does not solve the likelihood and instead just samples/solves based on the model prior
 #' @param age_mask see \code{\link{create_age_mask}} - a vector with one entry for each individual specifying the first epoch of circulation in which an individual could have been exposed
-#' @param measurement_indices_by_time if not NULL, then use these indices to specify which measurement bias parameter index corresponds to which time
+#' @param measurement_bias if not NULL, then use these indices to specify which measurement bias parameter index corresponds to which time
 #' @param n_alive if not NULL, uses this as the number alive in a given year rather than calculating from the ages. This is needed if the number of alive individuals is known, but individual birth dates are not
 #' @param function_type integer specifying which version of this function to use. Specify 1 to give a posterior solving function; 2 to give the gibbs sampler for infection history proposals; otherwise just solves the titre model and returns predicted titres. NOTE that this is not the same as the attack rate prior argument, \code{version}!
 #' @param titre_before_infection TRUE/FALSE value. If TRUE, solves titre predictions, but gives the predicted titre at a given time point BEFORE any infection during that time occurs.
@@ -46,7 +46,7 @@ create_posterior_func <- function(par_tab,
                                   prior_version = 1,
                                   solve_likelihood = TRUE,
                                   age_mask = NULL,
-                                  measurement_indices_by_time = NULL,
+                                  measurement_bias = NULL,
                                   n_alive = NULL,
                                   function_type = 1,
                                   antibody_level_before_infection=FALSE,
@@ -72,9 +72,9 @@ create_posterior_func <- function(par_tab,
       if(verbose) message(cat("Note: no biomarker_group detected in par_tab. Assuming all biomarker_group as 1.\n"))
         par_tab$biomarker_group <- 1
     }
-    if (!is.null(measurement_indices_by_time) & !("biomarker_group" %in% colnames(measurement_indices_by_time))) {
-      if(verbose) message(cat("Note: no biomarker_group detected in measurement_indices_by_time. Assuming all biomarker_group as 1.\n"))
-      measurement_indices_by_time$biomarker_group <- 1
+    if (!is.null(measurement_bias) & !("biomarker_group" %in% colnames(measurement_bias))) {
+      if(verbose) message(cat("Note: no biomarker_group detected in measurement_bias Assuming all biomarker_group as 1.\n"))
+      measurement_bias$biomarker_group <- 1
     }
 
     if(verbose){
@@ -309,17 +309,17 @@ create_posterior_func <- function(par_tab,
     n_pars <- length(theta_indices_unique)
 
     ## Sort out any assumptions for measurement bias
-    use_measurement_bias <- (length(measurement_indices_par_tab) > 0) & !is.null(measurement_indices_by_time)
+    use_measurement_bias <- (length(measurement_indices_par_tab) > 0) & !is.null(measurement_bias)
 
     antibody_level_shifts <- c(0)
     expected_indices <- NULL
-    measurement_bias <- NULL
+    measurement_bias_indices <- NULL
     additional_arguments <- NULL
 
     repeat_data_exist <- nrow(antibody_data_repeats) > 0
     if (use_measurement_bias) {
         if(verbose) message(cat("Using measurement bias\n"))
-        expected_indices <- antibody_data_unique %>% left_join(measurement_indices_by_time,by = c("biomarker_id", "biomarker_group")) %>% pull(rho_index)
+        expected_indices <- antibody_data_unique %>% left_join(measurement_bias,by = c("biomarker_id", "biomarker_group")) %>% pull(rho_index)
     } else {
         expected_indices <- c(-1)
     }
@@ -347,7 +347,6 @@ create_posterior_func <- function(par_tab,
         if(data_type[biomarker_group] == 1){
           likelihood_func_use[[biomarker_group]] <- likelihood_func_fast
           if(verbose) message(cat("Setting to discretized, bounded observations\n"))
-
         } else if(data_type[biomarker_group] == 2){
           if(verbose) message(cat("Setting to continuous, bounded observations\n"))
           likelihood_func_use[[biomarker_group]] <- likelihood_func_fast_continuous
@@ -412,8 +411,8 @@ create_posterior_func <- function(par_tab,
             )
 
             if (use_measurement_bias) {
-              measurement_bias <- pars[measurement_indices_par_tab]
-              antibody_level_shifts <- measurement_bias[expected_indices]
+              measurement_bias_indices <- pars[measurement_indices_par_tab]
+              antibody_level_shifts <- measurement_bias_indices[expected_indices]
               y_new <- y_new + antibody_level_shifts
             }
                 ## Calculate likelihood for unique antibody_levels and repeat data
@@ -481,8 +480,8 @@ create_posterior_func <- function(par_tab,
 
 
             if (use_measurement_bias) {
-                measurement_bias <- pars[measurement_indices_par_tab]
-                antibody_level_shifts <- measurement_bias[expected_indices]
+              measurement_bias_indices <- pars[measurement_indices_par_tab]
+                antibody_level_shifts <- measurement_bias_indices[expected_indices]
             }
           antigenic_map_long <- array(dim=c(length(possible_biomarker_ids)^2,n_biomarker_groups,n_demographic_groups))
           antigenic_map_short <- array(dim=c(length(possible_biomarker_ids)^2,n_biomarker_groups,n_demographic_groups))
@@ -617,8 +616,8 @@ create_posterior_func <- function(par_tab,
                 antibody_level_before_infection
             )
             if (use_measurement_bias) {
-                measurement_bias <- pars[measurement_indices_par_tab]
-                antibody_level_shifts <- measurement_bias[expected_indices]
+              measurement_bias_indices <- pars[measurement_indices_par_tab]
+                antibody_level_shifts <- measurement_bias_indices[expected_indices]
                 y_new <- y_new + antibody_level_shifts
             }
             y_new[overall_indices]