From 3e83ffa1a6a608dcc8937222212347f7555ce2fe Mon Sep 17 00:00:00 2001
From: Jack Leary <j.leary@ufl.edu>
Date: Mon, 25 Sep 2023 13:20:54 -0400
Subject: [PATCH] updated tests, docs, and added gene embedding fn -- closes
 #126

---
 DESCRIPTION                  |  2 +
 NAMESPACE                    |  3 +-
 NEWS.md                      | 15 +++++--
 R/clusterGenes.R             | 71 ++++++++++++--------------------
 R/embedGenes.R               | 78 ++++++++++++++++++++++++++++++++++++
 man/clusterGenes.Rd          |  4 +-
 man/embedGenes.Rd            | 50 +++++++++++++++++++++++
 tests/testthat/test_scLANE.R | 48 ++++++++++++++++------
 8 files changed, 206 insertions(+), 65 deletions(-)
 create mode 100644 R/embedGenes.R
 create mode 100644 man/embedGenes.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 8356220..fd37722 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -43,6 +43,8 @@ URL: https://github.com/jr-leary7/scLANE
 BugReports: https://github.com/jr-leary7/scLANE/issues
 Suggests: 
     covr, 
+    coop,
+    uwot, 
     ggh4x, 
     knitr,
     irlba, 
diff --git a/NAMESPACE b/NAMESPACE
index 980ab85..dbf4c7e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(clusterGenes)
 export(createCellOffset)
+export(embedGenes)
 export(enrichDynamicGenes)
 export(extractBreakpoints)
 export(fitGLMM)
@@ -95,11 +96,11 @@ importFrom(purrr,reduce)
 importFrom(scales,label_comma)
 importFrom(scales,label_number)
 importFrom(splines,bs)
+importFrom(stats,as.dist)
 importFrom(stats,as.formula)
 importFrom(stats,coef)
 importFrom(stats,cutree)
 importFrom(stats,deviance)
-importFrom(stats,dist)
 importFrom(stats,fitted)
 importFrom(stats,fitted.values)
 importFrom(stats,hclust)
diff --git a/NEWS.md b/NEWS.md
index 066e0bc..4d9e81b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,14 +1,21 @@
-# scLANE 0.7.2 
+# `scLANE` 0.7.3
+
+* Added a function named `embedGenes()` that takes a smoothed counts matrix as input & returns PCA & UMAP embeddings along with a graph-based clustering. 
+* Updated the `clusterGenes()` function to be much more efficient as well as changing the distance metric used to be cosine dissimilarity. 
+* Added `theme_scLANE()` for output plots. 
+* Enhanced documentation. 
+
+# `scLANE` 0.7.2 
 
 * Added a function named `sortGenesHeatmap()` that aids in the creation of expression cascade heatmaps by sorting genes according to where in pseudotime their peak expression is. 
 * Changed the parameter `approx.knot` in the `testDynamic()` function to use (stochasticity-controlled) subsampling instead of `seq()` to reduce candidate knot space. 
-* Added `summarizeModels()` to sum up slopes across pseudotime intervals
+* Added `summarizeModels()` to sum up slopes across pseudotime intervals. 
 
-# scLANE 0.7.1
+# `scLANE` 0.7.1
 
 * Changed input format of all functions to allow counts matrices formatted as `SingleCellExperiment` or `Seurat` objects, sparse matrices, or dense matrices.
 * Updated visualization functions to reflect changes made in `ggplot2` v3.4 (mostly changing the `size` parameter in line-based geoms to be `linewidth` instead). 
 
-# scLANE 0.6.3
+# `scLANE` 0.6.3
 
 * Added a `NEWS.md` file to track changes to the package.
diff --git a/R/clusterGenes.R b/R/clusterGenes.R
index 30a15e2..ff81b59 100644
--- a/R/clusterGenes.R
+++ b/R/clusterGenes.R
@@ -2,10 +2,10 @@
 #'
 #' @name clusterGenes
 #' @author Jack Leary
-#' @description This function takes as input the output from \code{\link{testDynamic}} and clusters the fitted values from the model for each gene using one of several user-chosen algorithms. An approximately optimal clustering is determined by iterating over reasonable hyperparameter values & choosing the value with the highest mean silhouette score.
+#' @description This function takes as input the output from \code{\link{testDynamic}} and clusters the fitted values from the model for each gene using one of several user-chosen algorithms. An approximately optimal clustering is determined by iterating over reasonable hyperparameter values & choosing the value with the highest mean silhouette score based on the cosine distance.
 #' @import magrittr
 #' @importFrom purrr map discard map2 reduce
-#' @importFrom stats setNames hclust cutree kmeans dist
+#' @importFrom stats setNames hclust cutree kmeans as.dist
 #' @param test.dyn.res The list returned by \code{\link{testDynamic}} - no extra processing required. Defaults to NULL.
 #' @param pt A data.frame containing the pseudotime or latent time estimates for each cell. Defaults to NULL.
 #' @param size.factor.offset (Optional) An offset to be used to rescale the fitted values. Can be generated easily with \code{\link{createCellOffset}}. No need to provide if the GEE backend was used. Defaults to NULL.
@@ -19,6 +19,7 @@
 #' }
 #' @return A data.frame of with three columns: \code{Gene}, \code{Lineage}, and \code{Cluster}.
 #' @seealso \code{\link{testDynamic}}
+#' @seealso \code{\link{embedGenes}}
 #' @seealso \code{\link{plotClusteredGenes}}
 #' @export
 #' @examples
@@ -74,80 +75,65 @@ clusterGenes <- function(test.dyn.res = NULL,
                                              n = n.PC,
                                              center = TRUE,
                                              scale. = TRUE)
+      dist_matrix <- stats::as.dist(1 - coop::tcosine(x = fitted_vals_pca$x))
+    } else {
+      dist_matrix <- stats::as.dist(1 - coop::tcosine(x = fitted_vals_mat))
     }
     # hierarchical clustering routine w/ Ward's linkage
     if (clust.algo  == "hclust") {
-      if (use.pca) {
-        hclust_tree <- stats::hclust(stats::dist(fitted_vals_pca$x), method = "ward.D2")
-      } else {
-        hclust_tree <- stats::hclust(stats::dist(fitted_vals_mat), method = "ward.D2")
-      }
+      hclust_tree <- stats::hclust(dist_matrix, method = "ward.D2")
       k_vals <- c(2:10)
       sil_vals <- vector("numeric", 9L)
+      clust_list <- vector("list", 9L)
       for (k in seq_along(k_vals)) {
         clust_res <- stats::cutree(hclust_tree, k = k_vals[k])
-        if (use.pca) {
-          sil_res <- cluster::silhouette(clust_res, stats::dist(fitted_vals_pca$x))
-        } else {
-          sil_res <- cluster::silhouette(clust_res, stats::dist(fitted_vals_mat))
-        }
-        sil_vals[k] <- mean(sil_res[, 3])  # silhouette widths stored in third column
+        sil_res <- cluster::silhouette(clust_res, dist_matrix)
+        sil_vals[k] <- mean(sil_res[, 3])
+        clust_list[[k]] <- clust_res
       }
-      k_to_use <- k_vals[which.max(sil_vals)]
-      clust_res <- stats::cutree(hclust_tree, k = k_to_use)
       gene_clusters <- data.frame(Gene = rownames(fitted_vals_mat),
                                   Lineage = lineages[l],
-                                  Cluster = clust_res)
+                                  Cluster = clust_list[[which.max(sil_vals)]])
     # k-means clustering routine w/ Hartigan-Wong algorithm
     } else if (clust.algo == "kmeans") {
       k_vals <- c(2:10)
       sil_vals <- vector("numeric", 9L)
+      clust_list <- vector("list", 9L)
       for (k in seq_along(k_vals)) {
         if (use.pca) {
           clust_res <- stats::kmeans(fitted_vals_pca$x,
                                      centers = k_vals[k],
                                      nstart = 5,
                                      algorithm = "Hartigan-Wong")
-          sil_res <- cluster::silhouette(clust_res$cluster, stats::dist(fitted_vals_pca$x))
         } else {
           clust_res <- stats::kmeans(fitted_vals_mat,
                                      centers = k_vals[k],
                                      nstart = 5,
                                      algorithm = "Hartigan-Wong")
-          sil_res <- cluster::silhouette(clust_res$cluster, stats::dist(fitted_vals_mat))
         }
-        sil_vals[k] <- mean(sil_res[, 3])  # silhouette widths stored in third column
-      }
-      k_to_use <- k_vals[which.max(sil_vals)]
-      if (use.pca) {
-        clust_res <- stats::kmeans(fitted_vals_pca$x,
-                                   centers = k_to_use,
-                                   nstart = 10,
-                                   algorithm = "Hartigan-Wong")
-      } else {
-        clust_res <- stats::kmeans(fitted_vals_mat,
-                                   centers = k_to_use,
-                                   nstart = 10,
-                                   algorithm = "Hartigan-Wong")
+        clust_list[[k]] <- clust_res
+        sil_res <- cluster::silhouette(clust_res$cluster, dist_matrix)
+        sil_vals[k] <- mean(sil_res[, 3])
       }
       gene_clusters <- data.frame(Gene = rownames(fitted_vals_mat),
                                   Lineage = lineages[l],
-                                  Cluster = clust_res$cluster)
+                                  Cluster = clust_list[[which.max(sil_vals)]]$cluster)
     # Leiden clustering routine
     } else if (clust.algo == "leiden") {
       if (use.pca) {
         fitted_vals_graph <- bluster::makeSNNGraph(x = fitted_vals_pca$x,
-                                                   k = 30,
+                                                   k = 20,
                                                    type = "jaccard",
-                                                   BNPARAM = BiocNeighbors::AnnoyParam(distance = "Euclidean"))
+                                                   BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
       } else {
         fitted_vals_graph <- bluster::makeSNNGraph(x = fitted_vals_mat,
-                                                   k = 30,
+                                                   k = 20,
                                                    type = "jaccard",
-                                                   BNPARAM = BiocNeighbors::AnnoyParam(distance = "Euclidean"))
+                                                   BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
       }
       res_vals <- seq(0.1, 1, by = 0.1)
       sil_vals <- vector("numeric", 10L)
+      clust_list <- vector("list", 10L)
       for (r in seq_along(res_vals)) {
         clust_res <- igraph::cluster_leiden(graph = fitted_vals_graph,
                                             objective_function = "modularity",
@@ -155,21 +141,14 @@ clusterGenes <- function(test.dyn.res = NULL,
         if (clust_res$nb_clusters == 1) {
           sil_vals[r] <- 0
         } else {
-          if (use.pca) {
-            sil_res <- cluster::silhouette(clust_res$membership, stats::dist(fitted_vals_pca$x))
-          } else {
-            sil_res <- cluster::silhouette(clust_res$membership, stats::dist(fitted_vals_mat))
-          }
+          sil_res <- cluster::silhouette(clust_res$membership, dist_matrix)
           sil_vals[r] <- mean(sil_res[, 3])
         }
+        clust_list[[r]] <- clust_res$membership
       }
-      res_to_use <- res_vals[which.max(sil_vals)]
-      clust_res <- igraph::cluster_leiden(graph = fitted_vals_graph,
-                                          objective_function = "modularity",
-                                          resolution_parameter = res_to_use)
       gene_clusters <- data.frame(Gene = rownames(fitted_vals_mat),
                                   Lineage = lineages[l],
-                                  Cluster = clust_res$membership)
+                                  Cluster = clust_list[[which.max(sil_vals)]])
     }
     gene_cluster_list[[l]] <- gene_clusters
   }
diff --git a/R/embedGenes.R b/R/embedGenes.R
new file mode 100644
index 0000000..f41b307
--- /dev/null
+++ b/R/embedGenes.R
@@ -0,0 +1,78 @@
+#' Generate a table of fitted values and celltype metadata for genes of interest.
+#'
+#' @name embedGenes
+#' @author Jack Leary
+#' @import magrittr
+#' @importFrom purrr map map_dbl
+#' @importFrom dplyr bind_cols
+#' @importFrom stats as.dist
+#' @description Embed genes in dimension-reduced space given a smoothed counts matrix.
+#' @param smoothed.counts The output from \code{\link{smoothedCountsMatrix}}. Defaults to NULL.
+#' @param genes A character vector of genes to embed. If not specified, all genes in \code{test.dyn.res} are used. Defaults to NULL.
+#' @param pc.embed (Optional) How many PCs should be used to cluster the genes and run UMAP? Defaults to 30.
+#' @param pcs.return (Optional) How many principal components should be included in the output? Defaults to 2.
+#' @param cluster.genes (Optional) Should genes be clustered in PCA space using the Leiden algorithm? Defaults to TRUE.
+#' @param gene.meta.data (Optional) A data.frame of metadata values for each gene (HVG status, Ensembl ID, gene biotype, etc.) that will be included in the result table. Defaults to NULL.
+#' @param k.param (Optional) The value of nearest-neighbors used in creating the SNN graph prior to clustering & in running UMAP. Defaults to 20.
+#' @param random.seed (Optional) The random seed used to control stochasticity in the clustering algorithm. Defaults to 312.
+#' @return A data.frame containing embedding coordinates, cluster IDs, and metadata for each gene.
+#' @export
+#' @examples
+#' \dontrun{
+#' embedGenes(smoothed_counts$Lineage_A,
+#'            pcs.return = 3,
+#'            cluster.genes = TRUE)
+#' }
+
+embedGenes <- function(smoothed.counts = NULL,
+                       genes = NULL,
+                       pc.embed = 30,
+                       pcs.return = 2,
+                       cluster.genes = TRUE,
+                       gene.meta.data = NULL,
+                       k.param = 20,
+                       random.seed = 312) {
+  # check inputs
+  if (is.null(smoothed.counts)) { stop("You forgot to provide a smoothed counts matrix to embedGenes().") }
+  # embeddings
+  set.seed(random.seed)
+  smoothed_counts_pca <- irlba::prcomp_irlba(t(smoothed.counts),
+                                             pc.embed = pc.embed,
+                                             center = TRUE,
+                                             scale. = TRUE)
+  smoothed_counts_umap <- uwot::umap(smoothed_counts_pca$x,
+                                     n_components = 2,
+                                     metric = "cosine",
+                                     n_neighbors = k.param,
+                                     init = "spectral")
+  # clustering w/ silhouette score parameter tuning
+  smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
+                                               k = k.param,
+                                               type = "jaccard",
+                                               BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
+  dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
+  clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5), \(r) {
+    smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
+                                                    objective_function = "modularity",
+                                                    resolution_parameter = r)
+    if (smoothed_counts_clust$nb_clusters == 1) {
+      sil_val <- 0
+    } else {
+      sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
+    }
+    clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
+                      resolution = r,
+                      silhouette = sil_val)
+    return(clust_res)
+  })
+  best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
+  # prepare results
+  pca_df <- as.data.frame(smoothed_counts_pca$x[, seq(pcs.return)])
+  colnames(pca_df) <- paste0("pc", seq(pcs.return))
+  gene_clust_df <- data.frame(gene = colnames(smoothed.counts),
+                              leiden = clust_runs[[best_clustering]]$clusters,
+                              umap1 = smoothed_counts_umap[, 1],
+                              umap2 = smoothed_counts_umap[, 2])
+  gene_clust_df <- dplyr::bind_cols(gene_clust_df, pca_df)
+  return(gene_clust_df)
+}
diff --git a/man/clusterGenes.Rd b/man/clusterGenes.Rd
index 0637e33..135c081 100644
--- a/man/clusterGenes.Rd
+++ b/man/clusterGenes.Rd
@@ -33,7 +33,7 @@ clusterGenes(
 A data.frame of with three columns: \code{Gene}, \code{Lineage}, and \code{Cluster}.
 }
 \description{
-This function takes as input the output from \code{\link{testDynamic}} and clusters the fitted values from the model for each gene using one of several user-chosen algorithms. An approximately optimal clustering is determined by iterating over reasonable hyperparameter values & choosing the value with the highest mean silhouette score.
+This function takes as input the output from \code{\link{testDynamic}} and clusters the fitted values from the model for each gene using one of several user-chosen algorithms. An approximately optimal clustering is determined by iterating over reasonable hyperparameter values & choosing the value with the highest mean silhouette score based on the cosine distance.
 }
 \details{
 \itemize{
@@ -58,6 +58,8 @@ clusterGenes(gene_stats,
 \seealso{
 \code{\link{testDynamic}}
 
+\code{\link{embedGenes}}
+
 \code{\link{plotClusteredGenes}}
 }
 \author{
diff --git a/man/embedGenes.Rd b/man/embedGenes.Rd
new file mode 100644
index 0000000..9743bf6
--- /dev/null
+++ b/man/embedGenes.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/embedGenes.R
+\name{embedGenes}
+\alias{embedGenes}
+\title{Generate a table of fitted values and celltype metadata for genes of interest.}
+\usage{
+embedGenes(
+  smoothed.counts = NULL,
+  genes = NULL,
+  pc.embed = 30,
+  pcs.return = 2,
+  cluster.genes = TRUE,
+  gene.meta.data = NULL,
+  k.param = 20,
+  random.seed = 312
+)
+}
+\arguments{
+\item{smoothed.counts}{The output from \code{\link{smoothedCountsMatrix}}. Defaults to NULL.}
+
+\item{genes}{A character vector of genes to embed. If not specified, all genes in \code{test.dyn.res} are used. Defaults to NULL.}
+
+\item{pc.embed}{(Optional) How many PCs should be used to cluster the genes and run UMAP? Defaults to 30.}
+
+\item{pcs.return}{(Optional) How many principal components should be included in the output? Defaults to 2.}
+
+\item{cluster.genes}{(Optional) Should genes be clustered in PCA space using the Leiden algorithm? Defaults to TRUE.}
+
+\item{gene.meta.data}{(Optional) A data.frame of metadata values for each gene (HVG status, Ensembl ID, gene biotype, etc.) that will be included in the result table. Defaults to NULL.}
+
+\item{k.param}{(Optional) The value of nearest-neighbors used in creating the SNN graph prior to clustering & in running UMAP. Defaults to 20.}
+
+\item{random.seed}{(Optional) The random seed used to control stochasticity in the clustering algorithm. Defaults to 312.}
+}
+\value{
+A data.frame containing embedding coordinates, cluster IDs, and metadata for each gene.
+}
+\description{
+Embed genes in dimension-reduced space given a smoothed counts matrix.
+}
+\examples{
+\dontrun{
+embedGenes(smoothed_counts$Lineage_A,
+           pcs.return = 3,
+           cluster.genes = TRUE)
+}
+}
+\author{
+Jack Leary
+}
diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R
index 24ab3db..1b57648 100644
--- a/tests/testthat/test_scLANE.R
+++ b/tests/testthat/test_scLANE.R
@@ -1,8 +1,8 @@
 # load data & prepare for testing
 load(system.file("testdata/sim_test_data.RData", package = "scLANE"))
 cell_offset <- createCellOffset(sim_data)
-genes_to_test <- c(rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "Dynamic"][1:5],
-                   rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "NotDynamic"][1:5])
+genes_to_test <- c(rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "Dynamic"][1:10],
+                   rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "NotDynamic"][1:10])
 counts_test <- t(as.matrix(SingleCellExperiment::counts(sim_data)[genes_to_test, ]))
 pt_test <- data.frame(PT = sim_data$cell_time_normed)
 
@@ -41,7 +41,7 @@ withr::with_output_sink(tempfile(), {
                                 cor.structure = "ar1",
                                 id.vec = sim_data$subject,
                                 n.cores = 2,
-                                track.time = TRUE)
+                                track.time = FALSE)
   glmm_gene_stats <- testDynamic(sim_data,
                                  pt = pt_test,
                                  genes = genes_to_test,
@@ -149,12 +149,20 @@ withr::with_output_sink(tempfile(), {
                           id.vec = sim_data$subject)
   # downstream analysis
   set.seed(312)
-  gene_clusters <- clusterGenes(test.dyn.res = glm_gene_stats,
-                                pt = pt_test,
-                                size.factor.offset = cell_offset,
-                                clust.algo = "leiden")
+  gene_clusters_leiden <- clusterGenes(test.dyn.res = glm_gene_stats,
+                                       pt = pt_test,
+                                       size.factor.offset = cell_offset,
+                                       clust.algo = "leiden")
+  gene_clusters_kmeans <- clusterGenes(test.dyn.res = glm_gene_stats,
+                                       pt = pt_test,
+                                       size.factor.offset = cell_offset,
+                                       clust.algo = "kmeans")
+  gene_clusters_hclust <- clusterGenes(test.dyn.res = glm_gene_stats,
+                                       pt = pt_test,
+                                       size.factor.offset = cell_offset,
+                                       clust.algo = "hclust")
   gene_clust_table <- plotClusteredGenes(glm_gene_stats,
-                                         gene.clusters = gene_clusters,
+                                         gene.clusters = gene_clusters_leiden,
                                          size.factor.offset = cell_offset,
                                          pt = pt_test,
                                          n.cores = 2)
@@ -163,6 +171,11 @@ withr::with_output_sink(tempfile(), {
                                           size.factor.offset = cell_offset,
                                           parallel.exec = TRUE,
                                           n.cores = 2)
+  gene_embedding <- embedGenes(smoothed.counts = smoothed_counts$Lineage_A,
+                               pc.embed = 5,
+                               pcs.return = 2,
+                               k.param = 5,
+                               random.seed = 312)
   sorted_genes <- sortGenesHeatmap(heatmap.mat = smoothed_counts$Lineage_A,
                                    pt.vec = pt_test$PT)
   fitted_values_table <- getFittedValues(test.dyn.res = glm_gene_stats,
@@ -201,9 +214,9 @@ test_that("testDynamic() output", {
   expect_s3_class(glm_gene_stats, "scLANE")
   expect_s3_class(gee_gene_stats, "scLANE")
   expect_s3_class(glmm_gene_stats, "scLANE")
-  expect_length(glm_gene_stats, 10)
-  expect_length(gee_gene_stats, 10)
-  expect_length(glmm_gene_stats, 10)
+  expect_length(glm_gene_stats, 20)
+  expect_length(gee_gene_stats, 20)
+  expect_length(glmm_gene_stats, 20)
   expect_s3_class(glm_gene_stats$ABCF1$Lineage_A$MARGE_Summary, "data.frame")
   expect_s3_class(gee_gene_stats$ABCF1$Lineage_A$MARGE_Summary, "data.frame")
   expect_s3_class(glmm_gene_stats$ABCF1$Lineage_A$MARGE_Summary, "data.frame")
@@ -293,8 +306,12 @@ test_that("plotModels() output", {
 })
 
 test_that("clusterGenes() output", {
-  expect_s3_class(gene_clusters, "data.frame")
-  expect_equal(ncol(gene_clusters), 3)
+  expect_s3_class(gene_clusters_leiden, "data.frame")
+  expect_s3_class(gene_clusters_kmeans, "data.frame")
+  expect_s3_class(gene_clusters_hclust, "data.frame")
+  expect_equal(ncol(gene_clusters_leiden), 3)
+  expect_equal(ncol(gene_clusters_kmeans), 3)
+  expect_equal(ncol(gene_clusters_hclust), 3)
 })
 
 test_that("plotClusteredGenes() output", {
@@ -308,6 +325,11 @@ test_that("smoothedCountsMatrix() output", {
   expect_type(smoothed_counts$Lineage_A, "double")
 })
 
+test_that("embedGenes() output", {
+  expect_s3_class(gene_embedding, "data.frame")
+  expect_equal(ncol(gene_embedding), 6)
+})
+
 test_that("sortGenesHeatmap() output", {
   expect_type(sorted_genes, "character")
   expect_length(sorted_genes, ncol(smoothed_counts$Lineage_A))