Skip to content

Commit

Permalink
Added new observation model for continuous, bounded data, with a fals…
Browse files Browse the repository at this point in the history
…e positive rate and much less variance around true negative levels.
  • Loading branch information
jameshay218 committed Dec 10, 2024
1 parent b2977dc commit f4d0199
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 2 deletions.
7 changes: 7 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ add_measurement_shifts <- function(predicted_antibody_levels, to_add, start_inde
invisible(.Call('_serosolver_add_measurement_shifts', PACKAGE = 'serosolver', predicted_antibody_levels, to_add, start_index_in_data, end_index_in_data))
}

#' Fast observation error function continuous with false positives
NULL

#' Marginal prior probability (p(Z)) of a particular infection history matrix single prior
#' Prior is independent contribution from each year
#' @param infection_history IntegerMatrix, the infection history matrix
Expand Down Expand Up @@ -192,6 +195,10 @@ likelihood_func_fast_continuous <- function(theta, obs, predicted_antibody_level
.Call('_serosolver_likelihood_func_fast_continuous', PACKAGE = 'serosolver', theta, obs, predicted_antibody_levels)
}

likelihood_func_fast_continuous_fp <- function(theta, obs, predicted_antibody_levels) {
.Call('_serosolver_likelihood_func_fast_continuous_fp', PACKAGE = 'serosolver', theta, obs, predicted_antibody_levels)
}

#' Infection history proposal function
#'
#' Proposes a new matrix of infection histories using a beta binomial proposal distribution. This particular implementation allows for n_infs epoch times to be changed with each function call. Furthermore, the size of the swap step is specified for each individual by proposal_inf_hist_distances.
Expand Down
3 changes: 3 additions & 0 deletions R/posteriors.R
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ create_posterior_func <- function(par_tab,
} else if(data_type[biomarker_group] == 2){
if(verbose) message(cat("Setting to continuous, bounded observations\n"))
likelihood_func_use[[biomarker_group]] <- likelihood_func_fast_continuous
} else if(data_type[biomarker_group] == 3){
if(verbose) message(cat("Setting to continuous, bounded observations with false positives\n"))
likelihood_func_use[[biomarker_group]] <- likelihood_func_fast_continuous_fp
} else {
if(verbose) message(cat("Assuming discretized, bounded observations\n"))
likelihood_func_use[[biomarker_group]] <- likelihood_func_fast
Expand Down
26 changes: 25 additions & 1 deletion R/simulate_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ simulate_data <- function(par_tab,
#' Adds truncated noise to antibody data
#' @param y the titre
#' @param theta a vector with max_measurement and error parameters
#' @param data_type integer, currently accepting 1 or 2. Set to 1 for discretized, bounded data, or 2 for continuous, bounded data. Note that with 2, min_measurement must be set.
#' @param data_type integer, currently accepting 1 or 2. Set to 1 for discretized, bounded data, or 2 for continuous, bounded data. 3 is for continuous data assuming that true negatives follow a different distribution -- the vast majority return the min_measurement, but with a rate fp_rate, a random draw from a uniform distribution within the limits of detection is generated.
#' @return a noisy titre
#' @export
#' @examples
Expand All @@ -246,6 +246,30 @@ add_noise <- function(y, theta, measurement_bias = NULL, indices = NULL,data_typ
## If outside of bounds, truncate
noise_y[noise_y < theta["min_measurement"]] <- theta["min_measurement"]
noise_y[noise_y > theta["max_measurement"]] <- theta["max_measurement"]

} else if(data_type == 3){
## If true 0, then exponentially distributed
negative_predictions <- which(y == 0)
positive_predictions <- which(y > 0)
noise_y <- y

if (!is.null(measurement_bias)) {
noise_y[negative_predictions] <- theta["min_measurement"] + rbernoulli(length(noise_y[negative_predictions]),theta["fp_rate"])*runif(length(noise_y[negative_predictions]), 0,theta["max_measurement"]-theta["min_measurement"]) + measurement_bias[indices[negative_predictions]]
noise_y[positive_predictions] <- rnorm(length(y[positive_predictions]),
mean = y[positive_predictions] + measurement_bias[indices[positive_predictions]],
sd = theta["obs_sd"])
} else {
noise_y[negative_predictions] <- theta["min_measurement"] + rbernoulli(length(noise_y[negative_predictions]),theta["fp_rate"])*runif(length(noise_y[negative_predictions]), 0,theta["max_measurement"]-theta["min_measurement"])

noise_y[positive_predictions] <- rnorm(length(y[positive_predictions]),
mean = y[positive_predictions],
sd = theta["obs_sd"])
}

## If outside of bounds, truncate
noise_y[noise_y < theta["min_measurement"]] <- theta["min_measurement"]
noise_y[noise_y > theta["max_measurement"]] <- theta["max_measurement"]

} else {
## Draw from normal
if (!is.null(measurement_bias)) {
Expand Down
2 changes: 1 addition & 1 deletion man/add_noise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,18 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// likelihood_func_fast_continuous_fp
NumericVector likelihood_func_fast_continuous_fp(const NumericVector& theta, const NumericVector& obs, const NumericVector& predicted_antibody_levels);
RcppExport SEXP _serosolver_likelihood_func_fast_continuous_fp(SEXP thetaSEXP, SEXP obsSEXP, SEXP predicted_antibody_levelsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::traits::input_parameter< const NumericVector& >::type theta(thetaSEXP);
Rcpp::traits::input_parameter< const NumericVector& >::type obs(obsSEXP);
Rcpp::traits::input_parameter< const NumericVector& >::type predicted_antibody_levels(predicted_antibody_levelsSEXP);
rcpp_result_gen = Rcpp::wrap(likelihood_func_fast_continuous_fp(theta, obs, predicted_antibody_levels));
return rcpp_result_gen;
END_RCPP
}
// inf_hist_prop_prior_v3
arma::mat inf_hist_prop_prior_v3(arma::mat infection_history_mat, const IntegerVector& sampled_indivs, const IntegerVector& age_mask, const IntegerVector& sample_mask, const IntegerVector& proposal_inf_hist_distances, const IntegerVector& n_infs, double shape1, double shape2, const NumericVector& rand_ns, const double& proposal_inf_hist_indiv_swap_ratio);
RcppExport SEXP _serosolver_inf_hist_prop_prior_v3(SEXP infection_history_matSEXP, SEXP sampled_indivsSEXP, SEXP age_maskSEXP, SEXP sample_maskSEXP, SEXP proposal_inf_hist_distancesSEXP, SEXP n_infsSEXP, SEXP shape1SEXP, SEXP shape2SEXP, SEXP rand_nsSEXP, SEXP proposal_inf_hist_indiv_swap_ratioSEXP) {
Expand Down Expand Up @@ -375,6 +387,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_serosolver_inf_mat_prior_total_group_cpp", (DL_FUNC) &_serosolver_inf_mat_prior_total_group_cpp, 4},
{"_serosolver_likelihood_func_fast", (DL_FUNC) &_serosolver_likelihood_func_fast, 3},
{"_serosolver_likelihood_func_fast_continuous", (DL_FUNC) &_serosolver_likelihood_func_fast_continuous, 3},
{"_serosolver_likelihood_func_fast_continuous_fp", (DL_FUNC) &_serosolver_likelihood_func_fast_continuous_fp, 3},
{"_serosolver_inf_hist_prop_prior_v3", (DL_FUNC) &_serosolver_inf_hist_prop_prior_v3, 10},
{"_serosolver_inf_hist_prop_prior_v2_and_v4", (DL_FUNC) &_serosolver_inf_hist_prop_prior_v2_and_v4, 60},
{NULL, NULL, 0}
Expand Down
121 changes: 121 additions & 0 deletions src/likelihood_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,57 @@ NumericVector likelihood_func_fast_continuous(const NumericVector &theta, const
return(ret);
}


//' Fast observation error function continuous with false positives
//' Calculate the probability of a set of observed antibody levels given a corresponding set of predicted antibody levels assuming continuous, bounded observations.
//' For true negatives (i.e., model predicts no infections), then the majority of the PDF is at min_measurement.
//' There is a probability, fp_rate, of observing a value within the detectable range.
//' @name Fast observation error function continuous
//' @param theta NumericVector, a named parameter vector giving the normal distribution standard deviation and the max observable antibody level.
//' Also a parameter fp_rate, giving the probability of a (uniformly distributed) false positive given true negative.
//' @param obs NumericVector, the vector of observed log antibody levels
//' @param predicted_antibody_levels NumericVector, the vector of predicted log antibody levels
//' @param a vector of same length as the input data giving the probability of observing each observation given the predictions
//' @return a likelihood for each observed antibody level
//' @export
//' @family likelihood_functions
// [[Rcpp::export(rng = false)]]
NumericVector likelihood_func_fast_continuous_fp(const NumericVector &theta, const NumericVector &obs, const NumericVector &predicted_antibody_levels){
int total_measurements = predicted_antibody_levels.size();
NumericVector ret(total_measurements);

const double sd = theta["obs_sd"];
const double den = sd*M_SQRT2;
const double den2 = log(sd*2.50662827463);
const double max_measurement = theta["max_measurement"];
const double min_measurement = theta["min_measurement"];
const double log_const = log(0.5);
const double fp_rate = theta["fp_rate"];
const double true_neg_prob = log(1.0 - fp_rate);
const double false_pos_prob = log(fp_rate);
const double unif_pdf = log(1.0/(max_measurement-min_measurement));
for(int i = 0; i < total_measurements; ++i){
// Most antibody levels are between min_measurement and max_measurement, this is the difference in normal cdfs
if(predicted_antibody_levels[i] > min_measurement){
if(obs[i] < max_measurement && obs[i] > min_measurement){
ret[i] = -0.5*(pow((obs[i]-predicted_antibody_levels[i])/sd, 2)) - den2;
// For antibody levels above the maximum,
} else if(obs[i] >= max_measurement) {
ret[i] = log_const + log(erfc((max_measurement - predicted_antibody_levels[i])/den));
} else {
ret[i] = log_const + log(1.0 + erf((min_measurement - predicted_antibody_levels[i])/den));
}
} else {
if(obs[i] <= min_measurement){
ret[i] = true_neg_prob;
} else {
ret[i] = false_pos_prob + unif_pdf;
}
}
}
return(ret);
}

// Likelihood calculation for infection history proposal
// Not really to be used elsewhere other than in \code{\link{inf_hist_prop_prior_v2_and_v4}}, as requires correct indexing for the predicted antibody levels vector. Also, be very careful, as predicted_antibody_levels is set to 0 at the end!
void proposal_likelihood_func(double &new_prob,
Expand Down Expand Up @@ -299,3 +350,73 @@ void proposal_likelihood_func_continuous(double &new_prob,
predicted_antibody_levels[x] = 0;
}
}


// Likelihood calculation for infection history proposal for continuous, bounded data with false positives
// Not really to be used elsewhere other than in \code{\link{inf_hist_prop_prior_v2_and_v4}}, as requires correct indexing for the predicted antibody levels vector. Also, be very careful, as predicted_antibody_levels is set to 0 at the end!
void proposal_likelihood_func_continuous_fp(double &new_prob,
NumericVector &predicted_antibody_levels,
const int &indiv,
const NumericVector &data,
const NumericVector &repeat_data,
const IntegerVector &repeat_indices,
const IntegerVector &cum_nrows_per_individual_in_data,
const IntegerVector &cum_nrows_per_individual_in_repeat_data,
const double &log_const,
const double &sd,
const double &den,
const double &den2,
const double &log_fp_rate,
const double &log_fp_rate_compliment,
const double &log_unif_pdf,
const double &max_measurement,
const double &min_measurement,
const bool &repeat_data_exist,
const double &obs_weight = 1.0){
for(int x = cum_nrows_per_individual_in_data[indiv]; x < cum_nrows_per_individual_in_data[indiv+1]; ++x){
if(predicted_antibody_levels[x] > min_measurement){
if(data[x] < max_measurement && data[x] > min_measurement){
new_prob += -0.5*(pow((data[x]-predicted_antibody_levels[x])/sd, 2)) - den2;
} else if(data[x] >= max_measurement) {
new_prob += log_const + log(erfc((max_measurement - predicted_antibody_levels[x])/den));
} else {
new_prob += log_const + log(1.0 + erf((min_measurement - predicted_antibody_levels[x])/den));
}
} else {
if(data[x] <= min_measurement){
new_prob += log_fp_rate_compliment;
} else {
new_prob += log_fp_rate + log_unif_pdf;
}
}
}

// =====================
// Do something for repeat data here
if(repeat_data_exist){
for(int x = cum_nrows_per_individual_in_repeat_data[indiv]; x < cum_nrows_per_individual_in_repeat_data[indiv+1]; ++x){
if(predicted_antibody_levels[repeat_indices[x]] > min_measurement){
if(repeat_data[x] < max_measurement && repeat_data[x] > min_measurement){
new_prob += -0.5*(pow((repeat_data[x]-predicted_antibody_levels[repeat_indices[x]])/sd, 2)) - den2;
} else if(repeat_data[x] >= max_measurement) {
new_prob += log_const + log(erfc((max_measurement - predicted_antibody_levels[repeat_indices[x]])/den));
} else {
new_prob += log_const + log(1.0 + erf((min_measurement - predicted_antibody_levels[repeat_indices[x]])/den));
}
} else {
if(repeat_data[x] <= min_measurement){
new_prob += log_fp_rate_compliment;
} else {
new_prob += log_fp_rate + log_unif_pdf;
}
}
}
}
// Re-weight likelihood
new_prob = new_prob*obs_weight;

// Need to erase the predicted antibody level data...
for(int x = cum_nrows_per_individual_in_data[indiv]; x < cum_nrows_per_individual_in_data[indiv+1]; ++x){
predicted_antibody_levels[x] = 0;
}
}
24 changes: 24 additions & 0 deletions src/likelihood_funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,27 @@ void proposal_likelihood_func_continuous(double &new_prob,
const bool &repeat_data_exist,
const double &obs_weight);
#endif

#ifndef PROPOSAL_LIKELIHOOD_FUNC_CONTINUOUS_FP_H
#define PROPOSAL_LIKELIHOOD_FUNC_CONTINUOUS_FP_H
void proposal_likelihood_func_continuous_fp(double &new_prob,
NumericVector &predicted_antibody_levels,
const int &indiv,
const NumericVector &data,
const NumericVector &repeat_data,
const IntegerVector &repeat_indices,
const IntegerVector &cum_nrows_per_individual_in_data,
const IntegerVector &cum_nrows_per_individual_in_repeat_data,
const double &log_const,
const double &sd,
const double &den,
const double &den2,
const double &log_fp_rate,
const double &log_fp_rate_compliment,
const double &log_unif_pdf,
const double &max_measurement,
const double &min_measurement,
const bool &repeat_data_exist,
const double &obs_weight);
#endif

28 changes: 28 additions & 0 deletions src/proposal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,11 @@ List inf_hist_prop_prior_v2_and_v4(
const double log_const = log(0.5);
NumericMatrix den2s(n_groups,n_types);

NumericMatrix true_neg_probs(n_groups,n_types);
NumericMatrix false_pos_probs(n_groups,n_types);
NumericMatrix unif_pdfs(n_groups,n_types);


// Base model parameters
NumericMatrix boost_long_parameters(n_groups,n_types);
NumericMatrix boost_short_parameters(n_groups,n_types);
Expand All @@ -366,6 +371,7 @@ List inf_hist_prop_prior_v2_and_v4(
int wane_long_index = unique_theta_indices("wane_long");
int antigenic_seniority_index = unique_theta_indices("antigenic_seniority");
int error_index = unique_theta_indices("obs_sd");
int fp_rate_index = unique_theta_indices("fp_rate");

int min_index = unique_theta_indices("min_measurement");
int max_index = unique_theta_indices("max_measurement");
Expand All @@ -390,6 +396,9 @@ List inf_hist_prop_prior_v2_and_v4(

min_measurements(g,x) = theta(g,min_index + x*n_theta);
max_measurements(g,x) = theta(g,max_index + x*n_theta);
true_neg_probs(g,x) = log(1.0 - theta(g,fp_rate_index + x*n_theta));
false_pos_probs(g,x) = log(theta(g,fp_rate_index + x*n_theta));
unif_pdfs(g,x) = log(1.0/(max_measurements(g,x)-min_measurements(g,x)));

boost_long_parameters(g,x) = theta(g,boost_long_index + x*n_theta);
boost_short_parameters(g,x) = theta(g,boost_short_index + x*n_theta);
Expand Down Expand Up @@ -784,6 +793,25 @@ List inf_hist_prop_prior_v2_and_v4(
repeat_data_exist,
obs_weight);

} else if(data_type==3){
proposal_likelihood_func_continuous_fp(new_prob, predicted_antibody_levels,
index,
antibody_data,
antibody_data_repeats,
repeat_indices,
cum_nrows_per_individual_in_data,
cum_nrows_per_individual_in_repeat_data,
log_const,
sds(group,biomarker_group),
dens(group,biomarker_group),
den2s(group,biomarker_group),
false_pos_probs(group,biomarker_group),
true_neg_probs(group,biomarker_group),
unif_pdfs(group,biomarker_group),
max_measurements(group,biomarker_group),
min_measurements(group,biomarker_group),
repeat_data_exist,
obs_weight);
} else {
// Data_type 1 is discretized, bounded data
proposal_likelihood_func(new_prob, predicted_antibody_levels,
Expand Down

0 comments on commit f4d0199

Please sign in to comment.