diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 0c0de53..9e6815a 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -81,4 +81,4 @@ jobs: name: ${{ runner.os }}-r${{ matrix.config.r }}-results path: check steps: - - uses: actions/checkout@main + - uses: actions/checkout@main \ No newline at end of file diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index e33c1e2..7ea54c1 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -34,6 +34,13 @@ jobs: with: extra-packages: any::pkgdown, local::. needs: website + + - name: Install Bioconductor core + run: | + if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") + BiocManager::install(version = "3.21") # or the latest compatible version + BiocManager::install(c("BiocGenerics", "S4Vectors", "IRanges", "SummarizedExperiment", "SingleCellExperiment", "TreeSummarizedExperiment", "phyloseq"), ask = FALSE) + shell: Rscript {0} - name: Build site run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) diff --git a/DESCRIPTION b/DESCRIPTION index 0101016..dd2bb8f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,8 +22,8 @@ Suggests: testthat (>= 3.0.0), numDeriv, phyloseq, - TreeSummarizedExperiment, SummarizedExperiment, + TreeSummarizedExperiment, SingleCellExperiment, knitr, dplyr, diff --git a/NAMESPACE b/NAMESPACE index 2f9554f..1591080 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(make_reference_constraints) export(pseudohuber_median) export(psuedohuber_median) export(score_test) +export(test_linear_combo) import(MASS) import(Matrix) importFrom(magrittr,"%>%") diff --git a/R/emuFit.R b/R/emuFit.R index 8798d86..fa9f7fb 100644 --- a/R/emuFit.R +++ b/R/emuFit.R @@ -688,7 +688,14 @@ emuFit <- function(Y, "z_hat" = z_hat, "I" = I, "Dy" = Dy, - "cluster" = cluster) + "cluster" = cluster, + "X" = X, + "constraint_fn" = constraint_fn, + "constraint_grad_fn" = constraint_grad_fn) + + if (is.null(Y_augmented)) { + results$Y <- Y + } if (refit & penalize) { results$estimation_converged <- converged_estimates diff --git a/R/micro_wald.R b/R/micro_wald.R index 40e59fa..9ce6ccc 100644 --- a/R/micro_wald.R +++ b/R/micro_wald.R @@ -106,7 +106,7 @@ micro_wald <- function(Y, } H <- matrix(0,nrow = p, ncol = J - 1 ) - H[null_k,] <- constraint_grad_fn[[k]](B[null_k,])[-j_ref] + H[null_k,] <- constraint_grad_fn[[null_k]](B[null_k,])[-j_ref] if(null_j != j_ref){ diff --git a/R/test_linear_combo.R b/R/test_linear_combo.R new file mode 100644 index 0000000..7de921e --- /dev/null +++ b/R/test_linear_combo.R @@ -0,0 +1,177 @@ +#' Test a linear combination +#' +#' Test a hypothesis about a linear combination of parameters. Currently this is +#' only implemented for robust Wald tests. +#' +#' @param fitted_model An \code{emuFit} fitted model. +#' @param linear_combo A vector of length \code{p = ncol(X)} to determine the linear +#' combination of parameters to be tested. +#' @param hypothesis The null hypothesis to compare the linear combination of +#' coefficients against. The default value is \code{0}. +#' @param j A vector of \code{j} values giving the indices of taxa to test. The default +#' behavior is to include all taxa. +#' +#' @return A data frame giving confidence intervals using robust standard errors, test +#' statistics for the robust Wald test, and p-values, for the linear combination of +#' parameters that is being tested, for each taxon specified in \code{j}. +#' +#' @examples +#' data(wirbel_sample_small) +#' data(wirbel_otu_small) +#' emuRes <- emuFit(formula = ~ Group + Country, data = wirbel_sample_small, Y = wirbel_otu_small, +#' test_kj = data.frame(k = 2, j = 1), tolerance = 0.01, +#' constraint_tol = 0.01, B_null_tol = 0.01) +#' # here we set large tolerances for the example to run quickly, +#' # but we recommend smaller tolerances in practice +#' # test whether CountryFRA - constraint_FRA = CountryUSA - constraint_USA for taxon 1 +#' linear_combo_res <- test_linear_combo(fitted_model = emuRes, +#' linear_combo = c(0, 0, 1, -1), +#' j = 1) +#' +#' @export +#' +test_linear_combo <- function(fitted_model, + linear_combo, + hypothesis = 0, + j = NULL) { + + # get useful arguments + X <- fitted_model$X + if (is.null(fitted_model$Y_augmented)) { + Y <- fitted_model$Y + } else { + Y <- fitted_model$Y_augmented + } + B <- fitted_model$B + constraint_fn <- fitted_model$constraint_fn + constraint_grad_fn <- fitted_model$constraint_grad_fn + cluster <- fitted_model$cluster + call_args <- as.list(fitted_model$call)[-1] + if ("verbose" %in% names(call_args)) { + verbose <- call_args$verbose %in% c(TRUE, "development") + } else { + verbose <- FALSE + } + if ("alpha" %in% names(call_args)) { + alpha <- call_args$alpha + } else { + alpha <- 0.05 + } + nominal_coverage <- 1 - alpha + + # set j if needed + n <- nrow(X) + p <- ncol(X) + J <- ncol(B) + if (is.null(j)) { + j <- 1:J + } + + # calculate other needed quantities + j_ref <- get_j_ref(Y) + X_cup <- X_cup_from_X(X, J) + coefs <- data.frame(j = j, linear_combo_estimate = NA, ci_lower = NA, ci_upper = NA, + test_stat = NA, wald_pval = NA) + + # copy wald testing from micro_wald, with updates for linear combo + + # impose convenience constraint and update z + for (k in 1:p) { + B[k, ] <- B[k, ] - B[k, j_ref] + } + z <- update_z(Y, X, B) + # long form B + B_cup <- B_cup_from_B(B) + + # drop columns corresp. to j_ref from X_cup + to_erase <- (j_ref - 1) * p + 1:p + scores <- vector(n, mode = "list") + + if(verbose){ + message("Computing 'meat' matrix.") + } + + log_means <- X%*%B + matrix(z,ncol = 1)%*%matrix(1,nrow = 1, ncol = J) + + #compute residuals + Y_diff <- Y - exp(log_means) + + #compute score from residuals + scores <- lapply(1:n, + function(i){ + Y_diff[i,,drop = FALSE]%*% + X_cup[(i - 1)*J + 1:J,]}) + + if(!is.null(cluster)){ + scores <- lapply(unique(cluster), + function(i) + Reduce("+",scores[cluster == i])) + } + + score_mat <- do.call(rbind,scores) + score_mat <- methods::as(score_mat,"sparseMatrix") + + if(verbose){ + message("Computing information matrix.") + } + + # get information matrix + if (is.null(fitted_model$I)) { + I <- f_info(Y,B_cup = B_cup_from_B(B), B, X, X_cup, compute_together = FALSE) + } else { + I <- fitted_model$I + } + + if(verbose){ + message("Inverting information to obtain 'bread' matrix.") + } + + #get a matrix whose crossproduct with itself is the sandwich covariance for B + half_rob_cov <- Matrix::t(Matrix::solve(I[-to_erase,-to_erase], Matrix::t(score_mat)[-to_erase,], + method = "cholmod_solve")) + + #return to original parametrization + for(k in 1:p){ + B[k,] <- B[k,] - constraint_fn[[k]](B[k,]) + } + + if(verbose){ + message("Performing Wald tests and constructing Wald CIs.") + } + + #testing / confidence interval construction + for(s in 1:nrow(coefs)){ + + null_j <- coefs$j[s] + + if(verbose){ + message("Performing test ", s, " of ", nrow(coefs),": column ", null_j, " of B.") + } + + H <- matrix(0,nrow = p, ncol = J - 1 ) + + for (k in 1:p) { + if (!(linear_combo[k] == 0)) { + H[k,] <- constraint_grad_fn[[k]](B[k, ])[-j_ref] + if(null_j != j_ref){ + null_j_index <- ifelse(null_j < j_ref, null_j, null_j - 1) + H[k, null_j_index] <- H[k, null_j_index] - linear_combo[k] + } + } + } + + H_cup <- B_cup_from_B(H) + + var_kj <- sum(as.numeric(as.matrix(half_rob_cov%*%H_cup)^2)) + + h <- as.vector(B[, null_j] %*% linear_combo - hypothesis) + coefs$linear_combo_estimate[s] <- h + ci <- h + c(-1,1)*qnorm(1 - (1- nominal_coverage)/2)*sqrt(var_kj) + coefs[s, c("ci_lower", "ci_upper")] <- ci + test_stat <- h/sqrt(var_kj) + pval <- pchisq(test_stat^2, 1, lower.tail = FALSE) + coefs[s, c("test_stat", "wald_pval")] <- c(test_stat, pval) + } + + return(coefs) +} \ No newline at end of file diff --git a/man/test_linear_combo.Rd b/man/test_linear_combo.Rd new file mode 100644 index 0000000..bd4e259 --- /dev/null +++ b/man/test_linear_combo.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/test_linear_combo.R +\name{test_linear_combo} +\alias{test_linear_combo} +\title{Test a linear combination} +\usage{ +test_linear_combo(fitted_model, linear_combo, hypothesis = 0, j = NULL) +} +\arguments{ +\item{fitted_model}{An \code{emuFit} fitted model.} + +\item{linear_combo}{A vector of length \code{p = ncol(X)} to determine the linear +combination of parameters to be tested.} + +\item{hypothesis}{The null hypothesis to compare the linear combination of +coefficients against. The default value is \code{0}.} + +\item{j}{A vector of \code{j} values giving the indices of taxa to test. The default +behavior is to include all taxa.} +} +\value{ +A data frame giving confidence intervals using robust standard errors, test +statistics for the robust Wald test, and p-values, for the linear combination of +parameters that is being tested, for each taxon specified in \code{j}. +} +\description{ +Test a hypothesis about a linear combination of parameters. Currently this is +only implemented for robust Wald tests. +} +\examples{ +data(wirbel_sample_small) +data(wirbel_otu_small) +emuRes <- emuFit(formula = ~ Group + Country, data = wirbel_sample_small, Y = wirbel_otu_small, + test_kj = data.frame(k = 2, j = 1), tolerance = 0.01, + constraint_tol = 0.01, B_null_tol = 0.01) + # here we set large tolerances for the example to run quickly, + # but we recommend smaller tolerances in practice +# test whether CountryFRA - constraint_FRA = CountryUSA - constraint_USA for taxon 1 +linear_combo_res <- test_linear_combo(fitted_model = emuRes, + linear_combo = c(0, 0, 1, -1), + j = 1) + +} diff --git a/tests/testthat/test-test_linear_combo.R b/tests/testthat/test-test_linear_combo.R new file mode 100644 index 0000000..552dd50 --- /dev/null +++ b/tests/testthat/test-test_linear_combo.R @@ -0,0 +1,125 @@ +set.seed(11) +J <- 7 +n <- 12 +X <- cbind(1,rnorm(n), rnorm(n)) +b0 <- rnorm(J) +b1 <- seq(1,5,length.out = J) - + mean(seq(1,5,length.out = J)) +b2 <- seq(1,7,length.out = J) - + mean(seq(1,7,length.out = J)) +b <- rbind(b0, b1, b2) +Y <- radEmu:::simulate_data(n = n, + J = J, + X = X, + B = b, + distn = "ZINB", + zinb_size = 2, + zinb_zero_prop = 0.2, + mean_z = 5) + +rownames(X) <- paste0("Sample_",1:12) +rownames(Y) <- paste0("Sample_",1:12) +covariates <- data.frame(cov1 = X[, 2], cov2 = X[, 3]) +emuRes <- emuFit(X = X, Y = Y, run_score_tests = FALSE, tolerance = 0.01, return_wald_p = TRUE) + +test_that("test-linear-combo gives same result as Wald test when linear combo is c(0, 1)", { + lin_combo_test <- test_linear_combo(fitted_model = emuRes, linear_combo = c(0, 1, 0), j = 1) + expect_true(all.equal(unname(emuRes$coef[1, c("estimate", "lower", "upper", "wald_p")]), + unname(lin_combo_test[c("linear_combo_estimate", "ci_lower", + "ci_upper", "wald_pval")]))) +}) + +test_that("test-linear-combo can run a linear combo with multiple values", { + lin_combo_test1 <- test_linear_combo(fitted_model = emuRes, linear_combo = c(0, 1, -1), j = 1) + lin_combo_test4 <- test_linear_combo(fitted_model = emuRes, linear_combo = c(0, 1, -1), j = 4) + lin_combo_test14 <- test_linear_combo(fitted_model = emuRes, linear_combo = c(0, 1, -1), j = c(1, 4)) + expect_true(lin_combo_test1$wald_pval < lin_combo_test4$wald_pval) + expect_equal(lin_combo_test1$linear_combo_estimate, emuRes$B[2, 1] - emuRes$B[3, 1]) + all.equal(lin_combo_test14[1, ], lin_combo_test1) +}) + +# EDIT THIS ONE!! + +# test reparameterization - scale +test_that("test-linear-combo with reparameterization", { + X1 <- X + X1[, 2] <- 100*X[, 2] + emuRes1 <- emuFit(X = X1, Y = Y, run_score_tests = FALSE, tolerance = 0.01, return_wald_p = TRUE) + lin_combo_test <- test_linear_combo(fitted_model = emuRes, linear_combo = c(0, 1/100, 0)) +}) + +test_that("linear combo test controls t1e for Poisson data, n = 100", { + + skip(message = "Skipping test t1e of Wald linear combos because this takes a long time") + + nsim <- 100 + set.seed(11) + J <- 11 + n <- 100 + X <- cbind(1,rnorm(n), rnorm(n)) + b0 <- rnorm(J) + b1 <- seq(1,5,length.out = J) - + mean(seq(1,5,length.out = J)) + b2 <- seq(1,7,length.out = J) - + mean(seq(1,7,length.out = J)) + b2[4] <- -.8 + b2[8] <- .8 + b <- rbind(b0, b1, b2) + + res <- data.frame(sim = 1:nsim, j1 = NA, j4 = NA, j5 = NA, j6 = NA, j8 = NA) + for (sim in 1:nsim) { + print(sim) + Y <- radEmu:::simulate_data(n = n, J = J, + X = X, + B = b, + distn = "Poisson", + zinb_size = 2, + zinb_zero_prop = 0.2, + mean_z = 5) + emuRes <- emuFit(X = X, Y = Y, run_score_tests = FALSE, tolerance = 0.001, return_wald_p = TRUE, + match_row_names = FALSE) + lin_combo_test <- test_linear_combo(fitted_model = emuRes, linear_combo = c(0, 1, -1), j = c(1, 4, 5, 6, 8)) + res[sim, c(2:6)] <- lin_combo_test$wald_pval + } + + expect_false(mean(res$j1 <= 0.05) <= 0.05) # b1 = -2, b2 = -3 + expect_true(mean(res$j4 <= 0.05) <= 0.05) # b1 = b2 = -0.8 + expect_false(mean(res$j5 <= 0.05) <= 0.05) # b1 = -0.4, b2 = -0.6 + expect_true(mean(res$j6 <= 0.05) <= 0.05) # b1 = b2 = 0 + expect_true(mean(res$j8 <= 0.05) <= 0.05) # b1 = b2 = 0.8 + +}) + +# EDIT THIS ONE!! +# test reparameterization - location +set.seed(11) +J <- 7 +n <- 120 +cov <- rep(c("A", "B", "C"), each = 40) +b0 <- rnorm(J) +b0 <- b0 - radEmu:::pseudohuber_median(b0, 0.1) +b1 <- rnorm(J) +b1 <- b1 - radEmu:::pseudohuber_median(b1, 0.1) +b2 <- rnorm(J) +b2 <- b2 - radEmu:::pseudohuber_median(b2, 0.1) +b <- rbind(b0, b1, b2) +X1 <- model.matrix(~ cov, data = data.frame(cov)) +X2 <- model.matrix(~ cov, data = data.frame(cov)) +Y <- radEmu:::simulate_data(n = n, + J = J, + X = X1, + B = b, + distn = "ZINB", + zinb_size = 2, + zinb_zero_prop = 0.2, + mean_z = 5) +emuRes1 <- emuFit(X = X1, Y = Y, run_score_tests = FALSE, tolerance = 0.01, return_wald_p = TRUE, + match_row_names = FALSE) +emuRes2 <- emuFit(X = X2, Y = Y, run_score_tests = FALSE, tolerance = 0.01, return_wald_p = TRUE, + match_row_names = FALSE) + + +test_that("test-linear-combo with reparameterization", { + lin_combo_test <- test_linear_combo(fitted_model = emuRes1, linear_combo = c(1, 1, 0)) +}) + diff --git a/vignettes/intro_radEmu_with_tse.Rmd b/vignettes/intro_radEmu_with_tse.Rmd index 91ab17a..0186548 100644 --- a/vignettes/intro_radEmu_with_tse.Rmd +++ b/vignettes/intro_radEmu_with_tse.Rmd @@ -63,8 +63,8 @@ print(paste0("TreeSummarizedExperiment is installed: ", tse)) Now that we have loaded the `TreeSummarizedExperiment` package, we will create our `TreeSummarizedExperiment` data object. ```{r, message = FALSE, eval = tse} -library(TreeSummarizedExperiment) library(SummarizedExperiment) +library(TreeSummarizedExperiment) library(SingleCellExperiment) setClassUnion("ExpData", c("matrix", "SummarizedExperiment")) ```