statdivlab · svteichman · Jun 3, 2025 · Jun 4, 2025 · Jun 6, 2025 · Jun 16, 2025
diff --git a/R/X_cup_from_X.R b/R/X_cup_from_X.R
@@ -20,3 +20,28 @@ X_cup_from_X <- function(X,J){
 
   return(X_cup)
 }
+
+X_cup_from_X_fast <- function(X, J) {
+  n <- nrow(X)
+  p <- ncol(X)
+
+  # Total number of rows in final matrix
+  total_rows <- n * J
+
+  # Construct i index (row indices)
+  i_coords <- rep(1:total_rows, each = p)
+
+  # Construct j index (column indices)
+  j_block <- rep(0:(J - 1), each = p) * p
+  j_coords <- rep(j_block, times = n) + rep(1:p, times = total_rows)
+
+  # Construct values
+  X_rep <- X[rep(1:n, each = J), ]
+  x_vals <- as.vector(t(X_rep))  # row-wise unrolling
+
+  X_cup <- Matrix::sparseMatrix(i = i_coords,
+                                j = j_coords,
+                                x = x_vals)
+
+  return(X_cup)
+}
diff --git a/R/emuFit.R b/R/emuFit.R
@@ -63,6 +63,10 @@
 #' @param tolerance tolerance for stopping criterion in full model fitting; once
 #' no element of B is updated by more than this value in a single step, we exit
 #' optimization. Defaults to 1e-3.
+#' @param max_abs_B maximum allowed value for elements of B (in absolute value) in full model fitting. In
+#' most cases this is not needed as Firth penalty will prevent infinite estimates
+#' under separation. However, such a threshold may be helpful in very poorly conditioned problems (e.g., with many
+#' nearly collinear regressors). Default is 250.
 #' @param rho_init numeric: value at which to initiate rho parameter in augmented Lagrangian
 #' algorithm. Default is 1.
 #' @param tau numeric: value to scale rho by in each iteration of augmented Lagrangian
@@ -104,6 +108,8 @@
 #' If a value between 0 and 1, all zero-comparison p-values below the value will be set to NA. 
 #' Default is \code{0.01}. 
 #' @param unobserved_taxon_error logical: should an error be thrown if Y includes taxa that have 0 counts for all samples? Default is TRUE.
+#' @param est_par logical: when possible should computation be parallelized for estimation under the alternative. This is suggested for large 
+#' \code{n}, especially if when running with \code{verbose = "development"}, the augmentation steps appear to be taking a long time.
 #' 
 #' @return A list containing elements 'coef', 'B', 'penalized', 'Y_augmented',
 #' 'z_hat', 'I', 'Dy', and 'score_test_hyperparams' if score tests are run.  
@@ -179,6 +185,7 @@ emuFit <- function(Y,
                    constraint_param = 0.1,
                    verbose = FALSE,
                    tolerance = 1e-4,
+                   max_abs_B = 250,
                    B_null_tol = 1e-3,
                    rho_init = 1,
                    inner_tol = 1,
@@ -195,7 +202,8 @@ emuFit <- function(Y,
                    return_score_components = FALSE,
                    return_both_score_pvals = FALSE,
                    remove_zero_comparison_pvals = 0.01,
-                   unobserved_taxon_error = TRUE) {
+                   unobserved_taxon_error = TRUE,
+                   est_par = FALSE) {
 
   # Record call
   call <- match.call(expand.dots = FALSE)
@@ -234,7 +242,7 @@ emuFit <- function(Y,
   # check for zero-comparison parameters
   zero_comparison_res <- zero_comparison_check(X = X, Y = Y)
 
-  X_cup <- X_cup_from_X(X,J)
+  X_cup <- X_cup_from_X_fast(X,J)
 
 
 
@@ -263,7 +271,9 @@ emuFit <- function(Y,
                                max_step = max_step,
                                tolerance = tolerance,
                                verbose = (verbose == "development"),
-                               j_ref = j_ref)
+                               max_abs_B = max_abs_B,
+                               j_ref = j_ref,
+                               par = est_par)
       Y_test <- fitted_model$Y_augmented
       fitted_B <- fitted_model$B
       converged_estimates <- fitted_model$convergence
@@ -281,11 +291,17 @@ emuFit <- function(Y,
                      max_stepsize = max_step,
                      tolerance = tolerance,
                      j_ref = j_ref,
+                     max_abs_B = max_abs_B,
                      verbose = (verbose == "development"))
       fitted_B <- fitted_model
       Y_test <- Y
     }
 
+    max_est_B <- max(abs(fitted_B))
+    if (max_est_B >= 0.9 * max_abs_B) {
+      warning("At least one estimated B value is within 10% of your `max_abs_B` boundary. We suggest that you rerun estimation with a larger `max_abs_B` value.")
+    }
+
   } else {
     if (is.null(B) & is.null(fitted_model)) {
       stop("If refit is FALSE, B or fitted_model must be provided")
@@ -307,7 +323,7 @@ emuFit <- function(Y,
     } else {
       fitted_B <- B
       if (penalize) {
-        G <- get_G_for_augmentations(X, J, n, X_cup)
+        G <- get_G_for_augmentations_fast(X, J, n, X_cup)
         Y_test <- Y_augmented <- Y + 
           get_augmentations(X = X, G = G, Y = Y, B = fitted_B)
       } else {

diff --git a/R/emuFit_micro_penalized.R b/R/emuFit_micro_penalized.R
@@ -19,14 +19,18 @@
 #' @param max_abs_B numeric: maximum allowed value for elements of B (in absolute value). In
 #' most cases this is not needed as Firth penalty will prevent infinite estimates
 #' under separation. However, such a threshold may be helpful in very poorly conditioned problems (e.g., with many
-#' nearly collinear regressors). Default is 50.
+#' nearly collinear regressors). Default is 250.
 #' @param use_legacy_augmentation logical: should an older (slower) implementation of
 #' data augmentation be used? Only used for testing - there is no advantage to using
 #' the older implementation in applied settings.
 #' @param j_ref which column of B to set to zero as a convenience identifiability
 #' during optimization. Default is NULL, in which case this column is chosen based
 #' on characteristics of Y (i.e., j_ref chosen to maximize number of entries of
 #' Y_j_ref greater than zero).
+#' @param par ogical: when possible should computation be parallelized for computing augmentation values. 
+#' This is suggested for large \code{n}, especially if when running with \code{verbose = "development"}, 
+#' the augmentation steps appear to be taking a long time.
+#'
 #' @return A p x J matrix containing regression coefficients (under constraint
 #' g(B_k) = 0)
 #'
@@ -43,14 +47,15 @@ emuFit_micro_penalized <-
            verbose = TRUE,
            max_abs_B = 250,
            use_legacy_augmentation = FALSE,
-           j_ref = NULL
+           j_ref = NULL,
+           par = FALSE
   ){
 
     J <- ncol(Y)
     p <- ncol(X)
     n <- nrow(Y)
     if(use_legacy_augmentation){
-      X_tilde <- X_cup_from_X(X,J)
+      X_tilde <- X_cup_from_X_fast(X,J)
     }
     Y_augmented <- Y
     if (is.null(B)) {
@@ -67,9 +72,9 @@ emuFit_micro_penalized <-
 may take a moment.")
       }
       if(is.null(X_cup)){
-        X_cup <- X_cup_from_X(X,J)
+        X_cup <- X_cup_from_X_fast(X,J)
       }
-      G <- get_G_for_augmentations(X,J,n,X_cup)
+      G <- get_G_for_augmentations_fast(X,J,n,X_cup)
     }
     while(!converged){
       # print(counter)
@@ -99,10 +104,11 @@ maintained only for testing purposes.")
                         J = J,
                         verbose = verbose)
         } else{
-          augmentations <- get_augmentations(X = X,
-                                             G = G,
-                                             Y = Y,
-                                             B = fitted_model)
+          augmentations <- get_augmentations_par(X = X,
+                                                 G = G,
+                                                 Y = Y,
+                                                 B = fitted_model, 
+                                                 par = par)
           Y_augmented <- Y + augmentations
         }
       }

diff --git a/R/fit_null.R b/R/fit_null.R
@@ -87,7 +87,7 @@ fit_null <- function(B,
 
   #get X_cup for later use
   if (is.null(X_cup)) {
-    X_cup = X_cup_from_X(X,J)
+    X_cup = X_cup_from_X_fast(X,J)
   }
 
   #set iteration to zero

diff --git a/R/get_G_for_augmentations.R b/R/get_G_for_augmentations.R
@@ -7,7 +7,6 @@ get_G_for_augmentations <- function(X,
   p <- ncol(X)
   #calculate indices of columns to remove from X_cup (aka X_tilde, the expanded design matrix)
   to_delete <- p*(J - 1) + 1:p #col indices for elements of B^J in long B format
-  # X_tilde_J <- X_cup_from_X(X,J)
   X_tilde_J <- X_cup
   for(i in 1:n){
     X_tilde_J[(i - 1)*J  + J,] <- 0
@@ -22,3 +21,21 @@ get_G_for_augmentations <- function(X,
   G <- cbind(X_tilde_J,Z)
   return(G)
 }
+
+get_G_for_augmentations_fast <- function(X, J, n, X_cup) {
+  p <- ncol(X)
+  #calculate indices of columns to remove from X_cup (aka X_tilde, the expanded design matrix)
+  to_delete <- p*(J - 1) + 1:p #col indices for elements of B^J in long B format
+  last_class_rows <- seq(from = J, to = n * J, by = J)
+  X_cup[last_class_rows, ] <- 0
+
+  X_cup <- X_cup[, -to_delete]
+
+  #design matrix in z
+  Z <- Matrix::sparseMatrix(i = 1:(n*J),
+                            j = rep(1:n, each = J),
+                            x = 1,
+                            dims = c(n * J, n))
+  G <- cbind(X_cup,Z)
+  return(G)
+}
diff --git a/R/get_augmentations.R b/R/get_augmentations.R
@@ -63,3 +63,74 @@ get_augmentations <- function(X,
 
 
 }
+
+# this actually isn't faster in tests
+get_augmentations_par <- function(X,
+                                  G, #design matrix in beta_vec and z
+                                  Y,
+                                  B,
+                                  par = FALSE){
+
+
+  #dimensions
+  n <- nrow(Y)
+  p <- nrow(B)
+  J <- ncol(Y)
+
+  #parametrize so last column of B is / can be dropped
+  for(k in 1:p){
+    B[k, ] <- B[k, ] - B[k, J]
+  }
+  B_tilde_J <- B_cup_from_B(B)
+  to_delete <- p*(J - 1) + 1:p
+  B_tilde_J <- B_tilde_J[-to_delete, , drop = FALSE]
+
+
+  Y_rowsums <- rowSums(Y)
+  z_tilde <- numeric(n)
+  for(i in 1:n){
+    z_tilde[i] <- log(Y_rowsums[i]) - sum_of_logs(X[i, , drop= FALSE] %*% B)
+  }
+
+  theta_tilde <- rbind(B_tilde_J, matrix(z_tilde, ncol = 1))
+
+  log_means <- G %*% theta_tilde
+
+  W <- Matrix::Diagonal(x = exp(log_means@x))
+
+  #information matrix (we're using tricky equivalency to poisson model)
+  info <- Matrix::crossprod(G, W) %*% G
+  # info <- methods::as(info, "symmetricMatrix")
+  info <- Matrix::forceSymmetric(info)
+  #cholesky decomposition
+  info_chol <- Matrix::chol(info, pivot = FALSE)
+  #info^(-0.5) by inverting cholesky decomp
+  info_half_inv <- Matrix::solve(info_chol)
+
+
+
+  if (.Platform$OS.type == "unix" & par) {
+
+    cores <- parallel::detectCores()
+    aug_res <- parallel::mclapply(1:n, function(i) {
+      G_i <- G[1:J + (i - 1)*J, , drop = FALSE]
+      G_i <- G_i %*% info_half_inv
+      Matrix::rowSums(Matrix::Diagonal(x = exp(log_means[(i - 1)*J + 1:J, ])) %*% (G_i^2) )/2
+    }, mc.cores = cores - 1)
+    augmentations <- do.call(rbind, aug_res)
+
+  } else {
+    augmentations <- matrix(0, nrow = n, ncol = J)
+    for(i in 1:n){
+      G_i <- G[1:J + (i - 1)*J, , drop = FALSE]
+      G_i <- G_i %*% info_half_inv
+
+      augmentations[i, ] <-
+        Matrix::rowSums(Matrix::Diagonal(x = exp(log_means[(i - 1)*J + 1:J, ])) %*% (G_i^2) )/2
+
+    }
+  }
+
+  return(augmentations)
+
+}
diff --git a/man/emuFit.Rd b/man/emuFit.Rd
diff --git a/man/emuFit_micro_penalized.Rd b/man/emuFit_micro_penalized.Rd
diff --git a/tests/testthat/test-X_cup_from_X.R b/tests/testthat/test-X_cup_from_X.R
@@ -0,0 +1,13 @@
+test_that("X_cup_from_X_fast works and is faster than X_cup_from_X", {
+  X <- cbind(1, rnorm(100), runif(100))
+  J <- 200
+  start1 <- proc.time()
+  Xcup1 <- X_cup_from_X(X, J)
+  end1 <- proc.time() - start1
+  start2 <- proc.time()
+  Xcup2 <- X_cup_from_X_fast(X, J)
+  end2 <- proc.time() - start2
+
+  expect_true(all.equal(Xcup1, Xcup2))
+  expect_true(end2[3] < end1[3])
+})