updated documentation & test coverage

jr-leary7 · Sep 27, 2023 · 801b508 · 801b508
1 parent d94cdde
commit 801b508
Show file tree

Hide file tree

Showing 8 changed files with 65 additions and 123 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,7 @@
 * Updated the `clusterGenes()` function to be much more efficient as well as changing the distance metric used to be cosine dissimilarity. 
 * Added `theme_scLANE()` for output plots. 
 * Enhanced documentation. 
+* Increased test coverage.
 
 # `scLANE` 0.7.2 
 

diff --git a/R/backward_sel.R b/R/backward_sel.R
diff --git a/R/backward_sel_WIC.R b/R/backward_sel_WIC.R
@@ -1,25 +1,31 @@
 #' Backward selection function for MARGE - uses the Wald information criterion (WIC).
 #'
 #' @name backward_sel_WIC
-#' @param Y : the response variable. Defaults to NULL.
-#' @param B_new : the model matrix. Defaults to NULL.
+#' @author Jakub Stoklosa
+#' @author David I. Warton
+#' @author Jack Leary
+#' @importFrom gamlss gamlss
+#' @importFrom geeM geem
+#' @importFrom MASS negative.binomial
+#' @importFrom withr with_output_sink
+#' @param Y The response variable. Defaults to NULL.
+#' @param B_new The model matrix. Defaults to NULL.
 #' @param is.gee Is the model a GEE? Defaults to FALSE.
 #' @param id.vec A vector of observation IDs that is necessary for fitting a GEE model. Defaults to NULL.
 #' @param cor.structure The specified working correlation structure of the GEE model. Must be one of "independence", "ar1", or "exchangeable". Defaults to NULL.
 #' @return \code{backward_sel_WIC} returns the Wald statistic from the fitted model (the penalty is applied later on).
-#' @author Jakub Stoklosa and David I. Warton
 #' @references Stoklosa, J. Gibb, H. Warton, D.I. Fast forward selection for Generalized Estimating Equations With a Large Number of Predictor Variables. \emph{Biometrics}, \strong{70}, 110--120.
 #' @references Stoklosa, J. and Warton, D.I. (2018). A generalized estimating equation approach to multivariate adaptive regression splines. \emph{Journal of Computational and Graphical Statistics}, \strong{27}, 245--253.
-#' @importFrom gamlss gamlss
-#' @importFrom geeM geem
-#' @importFrom MASS negative.binomial
-#' @seealso \code{\link{backward_sel}}
 
-backward_sel_WIC <- function(Y = NULL, B_new = NULL, is.gee = FALSE, id.vec = NULL, cor.structure = NULL) {
+backward_sel_WIC <- function(Y = NULL,
+                             B_new = NULL,
+                             is.gee = FALSE,
+                             id.vec = NULL,
+                             cor.structure = NULL) {
   # check inputs
   if (is.null(Y) || is.null(B_new)) { stop("Some inputs are missing from backward_sel_WIC().") }
-  if (is.gee & is.null(id.vec)) { stop("GEEs require a vector of observation IDs in backward_sel_WIC().") }
-  if (is.gee & is.null(cor.structure)) { stop("GEEs require a working correlation structure in backward_sel_WIC().") }
+  if (is.gee && is.null(id.vec)) { stop("GEEs require a vector of observation IDs in backward_sel_WIC().") }
+  if (is.gee && is.null(cor.structure)) { stop("GEEs require a working correlation structure in backward_sel_WIC().") }
   cor.structure <- tolower(cor.structure)
   if (is.gee) {
     fit <- geeM::geem(Y ~ B_new - 1,
@@ -32,10 +38,9 @@ backward_sel_WIC <- function(Y = NULL, B_new = NULL, is.gee = FALSE, id.vec = NU
     fit <- gamlss::gamlss(Y ~ B_new - 1,
                           family = "NBI",
                           trace = FALSE)
-    sink(tempfile())
-    fit_sum <- summary(fit)
-    sink()
-    fit_sum_mat <- as.matrix(fit_sum)
+    withr::with_output_sink(tempfile(), {
+      fit_sum_mat <- as.matrix(summary(fit))
+    })
     wald_stat <- ((fit_sum_mat[, 3])[-c(1, nrow(fit_sum_mat))])^2
   }
   return(wald_stat)

diff --git a/R/embedGenes.R b/R/embedGenes.R
@@ -48,33 +48,38 @@ embedGenes <- function(smoothed.counts = NULL,
                                      n_neighbors = k.param,
                                      init = "spectral")
   # clustering w/ silhouette score parameter tuning
-  smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
-                                               k = k.param,
-                                               type = "jaccard",
-                                               BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
-  dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
-  clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5), \(r) {
-    smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
-                                                    objective_function = "modularity",
-                                                    resolution_parameter = r)
-    if (smoothed_counts_clust$nb_clusters == 1) {
-      sil_val <- 0
-    } else {
-      sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
-    }
-    clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
-                      resolution = r,
-                      silhouette = sil_val)
-    return(clust_res)
-  })
-  best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
+  if (cluster.genes) {
+    smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
+                                                 k = k.param,
+                                                 type = "jaccard",
+                                                 BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
+    dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
+    clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), \(r) {
+      smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
+                                                      objective_function = "modularity",
+                                                      resolution_parameter = r)
+      if (smoothed_counts_clust$nb_clusters == 1) {
+        sil_val <- 0
+      } else {
+        sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
+      }
+      clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
+                        resolution = r,
+                        silhouette = sil_val)
+      return(clust_res)
+    })
+    best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
+    cluster_vec <- clust_runs[[best_clustering]]$clusters
+  } else {
+    cluster_vec <- NA_integer_
+  }
   # prepare results
   pca_df <- as.data.frame(smoothed_counts_pca$x[, seq(pcs.return)])
   colnames(pca_df) <- paste0("pc", seq(pcs.return))
-  gene_clust_df <- data.frame(gene = genes,
-                              leiden = clust_runs[[best_clustering]]$clusters,
-                              umap1 = smoothed_counts_umap[, 1],
-                              umap2 = smoothed_counts_umap[, 2])
-  gene_clust_df <- dplyr::bind_cols(gene_clust_df, pca_df)
-  return(gene_clust_df)
+  gene_df <- data.frame(gene = genes,
+                        leiden = cluster_vec,
+                        umap1 = smoothed_counts_umap[, 1],
+                        umap2 = smoothed_counts_umap[, 2])
+  gene_df <- dplyr::bind_cols(gene_df, pca_df)
+  return(gene_df)
 }
diff --git a/R/stripGLM.R b/R/stripGLM.R
@@ -17,8 +17,6 @@ stripGLM <- function(glm.obj = NULL) {
   if (inherits(glm.obj, "try-error")) {return(glm.obj)}
   if (is.null(glm.obj)) { stop("You forgot to supply inputs to stripGLM().") }
   if (!inherits(glm.obj, "glm")) { stop("Input to stripGLM() must be of class glm.") }
-
-
 
   # strip out unnecessary glm pieces
   glm.obj$effects <- c()

diff --git a/man/backward_sel.Rd b/man/backward_sel.Rd
diff --git a/man/backward_sel_WIC.Rd b/man/backward_sel_WIC.Rd
diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R
@@ -1,6 +1,8 @@
 # load data & prepare for testing
 load(system.file("testdata/sim_test_data.RData", package = "scLANE"))
+sim_data_seu <- Seurat::as.Seurat(sim_data)
 cell_offset <- createCellOffset(sim_data)
+cell_offset_seu <- createCellOffset(sim_data_seu)
 genes_to_test <- c(rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "Dynamic"][1:10],
                    rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "NotDynamic"][1:10])
 counts_test <- t(as.matrix(SingleCellExperiment::counts(sim_data)[genes_to_test, ]))
@@ -80,6 +82,7 @@ withr::with_output_sink(tempfile(), {
                       return.basis = TRUE,
                       return.GCV = TRUE,
                       return.WIC = TRUE)
+  marge_mod_stripped <- stripGLM(marge_mod$final_mod)
   # run GLM model -- with offset
   marge_mod_offset <- marge2(X_pred = pt_test,
                              Y = counts_test[, 3],
@@ -206,8 +209,11 @@ test_that("internal marge functions", {
 
 test_that("createCellOffset() output", {
   expect_type(cell_offset, "double")
+  expect_type(cell_offset_seu, "double")
   expect_length(cell_offset, 300)
+  expect_length(cell_offset_seu, 300)
   expect_false(any(is.na(cell_offset)))
+  expect_false(any(is.na(cell_offset_seu)))
 })
 
 test_that("testDynamic() output", {
@@ -266,6 +272,7 @@ test_that("marge2() output -- GLM backend", {
   expect_s3_class(marge_mod_offset, "marge")
   expect_s3_class(marge_mod$final_mod, "negbin")
   expect_s3_class(marge_mod_offset$final_mod, "negbin")
+  expect_s3_class(marge_mod_stripped, "negbin")
   expect_equal(marge_mod$model_type, "GLM")
   expect_equal(marge_mod_offset$model_type, "GLM")
   expect_true(marge_mod$final_mod$converged)