sped up matrix (pseudo)inversion throughout -- related to #241 and #246

jr-leary7 · Oct 13, 2024 · 071e2bb · 071e2bb
1 parent d51b5d9
commit 071e2bb
Show file tree

Hide file tree

Showing 15 changed files with 73 additions and 38 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -58,6 +58,7 @@ Suggests:
     Seurat, 
     bluster, 
     cluster, 
+    speedglm, 
     rmarkdown,
     gridExtra, 
     BiocStyle, 

diff --git a/NAMESPACE b/NAMESPACE
@@ -39,9 +39,7 @@ importFrom(MASS,theta.mm)
 importFrom(Matrix,Matrix)
 importFrom(Matrix,bdiag)
 importFrom(Matrix,colSums)
-importFrom(Matrix,diag)
 importFrom(Matrix,rowMeans)
-importFrom(Matrix,solve)
 importFrom(Matrix,t)
 importFrom(Rcpp,sourceCpp)
 importFrom(RcppEigen,fastLmPure)

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -9,3 +9,7 @@ eigenMapMatMult <- function(A, B, n_cores = 1L) {
     .Call(`_scLANE_eigenMapMatMult`, A, B, n_cores)
 }
 
+eigenMapPseudoInverse <- function(A, tolerance = 1e-6, n_cores = 1L) {
+    .Call(`_scLANE_eigenMapPseudoInverse`, A, tolerance, n_cores)
+}
+
diff --git a/R/biasCorrectGEE.R b/R/biasCorrectGEE.R
@@ -5,7 +5,6 @@
 #' @description This functions implements several bias-correction methods for the GEE sandwich variance-covariance matrix; they are to be used when the number of subjects is small or the numer of timepoints per-subject is very large.
 #' @importFrom stats fitted.values cor
 #' @importFrom dplyr with_groups summarise
-#' @importFrom MASS ginv
 #' @importFrom Matrix bdiag
 #' @param fitted.model The fitted model of class \code{geem} returned by \code{\link{marge2}}. Defaults to NULL.
 #' @param correction.method A string specifying the correction method to be used. Currently supported options are "df" and "kc". Defaults to "kc".
@@ -121,9 +120,10 @@ biasCorrectGEE <- function(fitted.model = NULL,
       }
       sigma2 <- fitted.model$phi
       Var_Yi <- sigma2 * R_i
-      Var_Yi_inv <- try({
-        MASS::ginv(Var_Yi)
-      }, silent = TRUE)
+      Var_Yi_inv <- try({ eigenMapMatrixInvert(Var_Yi, n_cores = 1L) }, silent = TRUE)
+      if (inherits(Var_Yi_inv, "try-error")) {
+        Var_Yi_inv <- try({ eigenMapPseudoInverse(Var_Yi, n_cores = 1L) }, silent = TRUE)
+      }
       if (inherits(Var_Yi_inv, "try-error")) {
         if (verbose) {
           warning(paste0("Covariance matrix inversion failed for subject ",
@@ -137,9 +137,9 @@ biasCorrectGEE <- function(fitted.model = NULL,
     W <- as.matrix(Matrix::bdiag(cov_matrices))
     X <- fitted.model$X
     XWX <- t(X) %*% W %*% X
-    XWX_inv <- try({ eigenMapMatrixInvert(XWX, n_cores = 1) }, silent = TRUE)
+    XWX_inv <- try({ eigenMapMatrixInvert(XWX, n_cores = 1L) }, silent = TRUE)
     if (inherits(XWX_inv, "try-error")) {
-      XWX_inv <- MASS::ginv(XWX)
+      XWX_inv <- eigenMapPseudoInverse(XWX, n_cores = 1L)
     }
     H <- X %*% XWX_inv %*% t(X) %*% W
     tr_H <- sum(diag(H))

diff --git a/R/marge2.R b/R/marge2.R
@@ -853,7 +853,7 @@ marge2 <- function(X_pred = NULL,
                                         data = model_df, 
                                         family = MASS::negative.binomial(theta_hat, link = "log"), 
                                         trace = FALSE, 
-                                        model = TRUE, 
+                                        model = FALSE, 
                                         y = FALSE, 
                                         fitted = TRUE)
       }

diff --git a/R/plotModels.R b/R/plotModels.R
@@ -64,6 +64,8 @@ plotModels <- function(test.dyn.res = NULL,
   # check inputs
   if (is.null(expr.mat) || is.null(pt) || is.null(gene) || is.null(test.dyn.res)) { stop("You forgot one or more of the arguments to plotModels().") }
   if ((is.gee || is.glmm) && is.null(id.vec)) { stop("id.vec must be provided to plotModels() when running in GEE or GLMM mode.") }
+  cor.structure <- tolower(cor.structure)
+  if (is.gee && !(cor.structure %in% c("ar1", "independence", "exchangeable"))) { stop("GEE models require a specified correlation structure.") }
   # get raw counts from SingleCellExperiment, Seurat, or CellDataSets object
   if (inherits(expr.mat, "SingleCellExperiment")) {
     expr.mat <- BiocGenerics::counts(expr.mat)

diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R
@@ -5,7 +5,6 @@
 #' @author David I. Warton
 #' @author Jack R. Leary
 #' @importFrom RcppEigen fastLmPure
-#' @importFrom Matrix solve
 #' @importFrom MASS ginv
 #' @description Calculate the score statistic for a GEE model.
 #' @param Y The response variable. Defaults to NULL.
@@ -98,7 +97,7 @@ score_fun_gee <- function(Y = NULL,
                                    B = J21_transpose,
                                    n_cores = 1)
     Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3
-    Sigma_inv <- try({ Matrix::solve(Sigma) }, silent = TRUE)
+    Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1L) }, silent = TRUE)
     if (inherits(Sigma_inv, "try-error")) {
       Sigma_inv <- MASS::ginv(Sigma)
     }

diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R
@@ -5,8 +5,6 @@
 #' @author David I. Warton
 #' @author Jack R. Leary
 #' @importFrom RcppEigen fastLmPure
-#' @importFrom Matrix solve
-#' @importFrom MASS ginv
 #' @description Calculate the score statistic for a GLM model.
 #' @param Y The response variable. Defaults to NULL.
 #' @param VS.est_list A product of matrices. Defaults to NULL.
@@ -54,9 +52,9 @@ score_fun_glm <- function(Y = NULL,
     B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i),
                              B = XA,
                              n_cores = 1)
-    XVX_22 <- try({ Matrix::solve(inv.XVX_22) }, silent = TRUE)
+    XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1L) }, silent = TRUE)
     if (inherits(XVX_22, "try-error")) {
-      XVX_22 <- MASS::ginv(inv.XVX_22)
+      XVX_22 <- eigenMapPseudoInverse(inv.XVX_22, n_cores = 1L)
     }
     temp_prod <- eigenMapMatMult(A = B.est,
                                  B = XVX_22,

diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R
@@ -5,9 +5,8 @@
 #' @author David I. Warton
 #' @author Jack R. Leary
 #' @importFrom geeM geem
-#' @importFrom MASS negative.binomial ginv
+#' @importFrom MASS negative.binomial
 #' @importFrom stats fitted.values
-#' @importFrom Matrix solve
 #' @description A function that calculates parts of the score statistic for GEEs only (it is used for the full path for forward selection).
 #' @param Y The response variable Defaults to NULL.
 #' @param B_null The design matrix matrix under the null model Defaults to NULL.
@@ -73,9 +72,9 @@ stat_out_score_gee_null <- function(Y = NULL,
     V_est_i <- eigenMapMatMult(A = temp_prod,
                                B = diag_sqrt_V_est,
                                n_cores = 1)
-    V_est_i_inv <- try({ Matrix::solve(V_est_i) }, silent = TRUE)
+    V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i, n_cores = 1L) }, silent = TRUE)
     if (inherits(V_est_i_inv, "try-error")) {
-      V_est_i_inv <- MASS::ginv(V_est_i)
+      V_est_i_inv <- eigenMapPseudoInverse(V_est_i, n_cores = 1L)
     }
     S_est_i <- t(Y)[temp_seq_n] - mu_est_sub
     temp_prod <- eigenMapMatMult(A = S_est_i,
@@ -121,9 +120,9 @@ stat_out_score_gee_null <- function(Y = NULL,
   if (nrow(J11) == 1 && ncol(J11) == 1) {
     J11_inv <- 1 / J11
   } else {
-    J11_inv <- try({ Matrix::solve(J11) }, silent = TRUE)
+    J11_inv <- try({ eigenMapMatrixInvert(J11, n_cores = 1L) }, silent = TRUE)
     if (inherits(J11_inv, "try-error")) {
-      J11_inv <- MASS::ginv(J11)
+      J11_inv <- eigenMapPseudoInverse(J11, n_cores = 1L)
     }
   }
   temp_prod <- eigenMapMatMult(A = J11_inv,

diff --git a/R/stat_out_score_glm_null.R b/R/stat_out_score_glm_null.R
@@ -6,8 +6,6 @@
 #' @author Jack R. Leary
 #' @importFrom gamlss gamlss
 #' @importFrom stats fitted.values
-#' @importFrom Matrix diag t
-#' @importFrom MASS ginv
 #' @description A function that calculates parts of the score statistic for GLMs only (it is used for the full path for forward selection).
 #' @param Y : The response variable Defaults to NULL.
 #' @param B_null : Design matrix under the null model. Defaults to NULL.
@@ -29,8 +27,8 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) {
   mu_est <- as.matrix(stats::fitted.values(ests))
   V_est <- mu_est * (1 + mu_est * sigma_est)  # Type I NB variance = mu (1 + mu * sigma); sigma = 1 / theta
   VS_est_list <- (Y - mu_est) / V_est
-  mu_V_diag <- Matrix::diag(c(mu_est^2 / V_est))
-  temp_prod <- eigenMapMatMult(A = Matrix::t(B_null),
+  mu_V_diag <- diag(c(mu_est^2 / V_est))
+  temp_prod <- eigenMapMatMult(A = t(B_null),
                                B = mu_V_diag,
                                n_cores = 1)
   A_list_inv <- eigenMapMatMult(A = temp_prod,
@@ -39,12 +37,12 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) {
   if (ncol(A_list_inv) == 1 && nrow(A_list_inv) == 1) {
     A_list <- 1 / A_list_inv
   } else {
-    A_list <- try({ solve(A_list_inv) }, silent = TRUE)
+    A_list <- try({ eigenMapMatrixInvert(A_list_inv, n_cores = 1L) }, silent = TRUE)
     if (inherits(A_list, "try-error")) {
-      A_list <- MASS::ginv(A_list_inv)
+      A_list <- eigenMapPseudoInverse(A_list_inv, n_cores = 1L)
     }
   }
-  B1_list <- eigenMapMatMult(A = Matrix::t(B_null),
+  B1_list <- eigenMapMatMult(A = t(B_null),
                              B = mu_V_diag,
                              n_cores = 1)
   res <- list(VS.est_list = VS_est_list,

diff --git a/R/waldTestGEE.R b/R/waldTestGEE.R
@@ -3,7 +3,6 @@
 #' @name waldTestGEE
 #' @author Jack R. Leary
 #' @description Performs a basic Wald test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
-#' @importFrom MASS ginv
 #' @importFrom stats pchisq
 #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
 #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
@@ -74,7 +73,10 @@ waldTestGEE <- function(mod.1 = NULL,
     }
     vcov_mat <- vcov_mat[coef_idx, coef_idx]
     wald_test_stat <- try({
-      vcov_mat_inv <- eigenMapMatrixInvert(vcov_mat, n_cores = 1)
+      vcov_mat_inv <- eigenMapMatrixInvert(vcov_mat, n_cores = 1L)
+      if (inherits(vcov_mat_inv, "try-error")) {
+        vcov_mat_inv <- eigenMapPseudoInverse(vcov_mat, n_cores = 1L)
+      }
       as.numeric(crossprod(coef_vals, vcov_mat_inv) %*% coef_vals)
     }, silent = TRUE)
     if (inherits(wald_test_stat, "try-error")) {

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -12,7 +12,7 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
 // eigenMapMatrixInvert
-SEXP eigenMapMatrixInvert(const Eigen::Map<Eigen::MatrixXd> A, int n_cores);
+Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map<Eigen::MatrixXd> A, int n_cores);
 RcppExport SEXP _scLANE_eigenMapMatrixInvert(SEXP ASEXP, SEXP n_coresSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
@@ -24,7 +24,7 @@ BEGIN_RCPP
 END_RCPP
 }
 // eigenMapMatMult
-SEXP eigenMapMatMult(const Eigen::Map<Eigen::MatrixXd> A, const Eigen::Map<Eigen::MatrixXd> B, int n_cores);
+Eigen::MatrixXd eigenMapMatMult(const Eigen::Map<Eigen::MatrixXd> A, const Eigen::Map<Eigen::MatrixXd> B, int n_cores);
 RcppExport SEXP _scLANE_eigenMapMatMult(SEXP ASEXP, SEXP BSEXP, SEXP n_coresSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
@@ -36,10 +36,24 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// eigenMapPseudoInverse
+Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map<Eigen::MatrixXd> A, double tolerance, int n_cores);
+RcppExport SEXP _scLANE_eigenMapPseudoInverse(SEXP ASEXP, SEXP toleranceSEXP, SEXP n_coresSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd> >::type A(ASEXP);
+    Rcpp::traits::input_parameter< double >::type tolerance(toleranceSEXP);
+    Rcpp::traits::input_parameter< int >::type n_cores(n_coresSEXP);
+    rcpp_result_gen = Rcpp::wrap(eigenMapPseudoInverse(A, tolerance, n_cores));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_scLANE_eigenMapMatrixInvert", (DL_FUNC) &_scLANE_eigenMapMatrixInvert, 2},
     {"_scLANE_eigenMapMatMult", (DL_FUNC) &_scLANE_eigenMapMatMult, 3},
+    {"_scLANE_eigenMapPseudoInverse", (DL_FUNC) &_scLANE_eigenMapPseudoInverse, 3},
     {NULL, NULL, 0}
 };
 

diff --git a/src/eigenMapInvert.cpp b/src/eigenMapInvert.cpp
@@ -3,8 +3,8 @@
 // [[Rcpp::depends(RcppEigen)]]
 // [[Rcpp::export]]
 
-SEXP eigenMapMatrixInvert(const Eigen::Map<Eigen::MatrixXd> A, int n_cores = 1){
+Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map<Eigen::MatrixXd> A, int n_cores = 1){
   Eigen::setNbThreads(n_cores);
   Eigen::MatrixXd B = A.llt().solve(Eigen::MatrixXd::Identity(A.rows(), A.cols()));
-  return Rcpp::wrap(B);
+  return B;
 }
diff --git a/src/eigenMapMatMult.cpp b/src/eigenMapMatMult.cpp
@@ -3,11 +3,11 @@
 // [[Rcpp::depends(RcppEigen)]]
 // [[Rcpp::export]]
 
-SEXP eigenMapMatMult(const Eigen::Map<Eigen::MatrixXd> A,
-                     const Eigen::Map<Eigen::MatrixXd> B,
-                     int n_cores = 1) {
+Eigen::MatrixXd eigenMapMatMult(const Eigen::Map<Eigen::MatrixXd> A,
+                                const Eigen::Map<Eigen::MatrixXd> B,
+                                int n_cores = 1) {
   Eigen::setNbThreads(n_cores);
   Eigen::MatrixXd C;
   C.noalias() = A * B;
-  return Rcpp::wrap(C);
+  return C;
 }
diff --git a/src/eigenMapPseudoInverse.cpp b/src/eigenMapPseudoInverse.cpp
@@ -0,0 +1,20 @@
+#include <RcppEigen.h>
+
+// [[Rcpp::depends(RcppEigen)]]
+// [[Rcpp::export]]
+
+Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map<Eigen::MatrixXd> A, 
+                                      double tolerance = 1e-6, 
+                                      int n_cores = 1) {
+  Eigen::setNbThreads(n_cores);
+  Eigen::JacobiSVD<Eigen::MatrixXd> svd(A, Eigen::ComputeThinU | Eigen::ComputeThinV);
+  Eigen::MatrixXd singularValues = svd.singularValues();
+  Eigen::MatrixXd singularValuesInv(A.cols(), A.rows());
+  singularValuesInv.setZero();
+  for (int i = 0; i < singularValues.size(); ++i) {
+    if (singularValues(i) > tolerance) {
+      singularValuesInv(i, i) = 1.0 / singularValues(i);
+    }
+  }
+  return svd.matrixV() * singularValuesInv * svd.matrixU().transpose();
+}