diff --git a/.github/workflows/bioc-check.yaml b/.github/workflows/bioc-check.yaml new file mode 100644 index 0000000..13a952e --- /dev/null +++ b/.github/workflows/bioc-check.yaml @@ -0,0 +1,26 @@ +# workflow derived from: https://github.com/insightsengineering/bioc-check-action + + +on: + push: + branches: main + pull_request: + branches: main + +name: bioc-check + +jobs: + bioc-check: + runs-on: ubuntu-latest + name: BiocCheck + steps: + - uses: actions/checkout@v3 + - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r@v2 + with: + r-version: release + http-user-agent: release + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + - name: Run BiocCheck + uses: insightsengineering/bioc-check-action@v1 diff --git a/.github/workflows/render-README.yaml b/.github/workflows/render-README.yaml index b7a250d..9d46354 100644 --- a/.github/workflows/render-README.yaml +++ b/.github/workflows/render-README.yaml @@ -10,7 +10,7 @@ name: render-README jobs: render: - runs-on: ubuntu-latest + runs-on: macos-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: @@ -22,9 +22,9 @@ jobs: - uses: r-lib/actions/setup-pandoc@v2 - name: install CRAN packages - run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "dplyr", "purrr", "remotes", "devtools", "BiocManager", "Seurat"), dependencies = TRUE)' + run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "dplyr", "purrr", "remotes", "devtools", "BiocManager", "Seurat"), force = TRUE)' - name: install BioConductor packages - run: Rscript -e 'BiocManager::install(c("SingleCellExperiment", "scater", "scran", "scuttle", "bluster"))' + run: Rscript -e 'BiocManager::install(c("SingleCellExperiment", "scater", "scran", "scuttle", "bluster"), force = TRUE)' - name: install GitHub packages run: Rscript -e 'remotes::install_github("jr-leary7/scLANE")' - name: render README diff --git a/DESCRIPTION b/DESCRIPTION index 5529133..ef4685d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -63,6 +63,7 @@ Suggests: BiocStyle, slingshot, gprofiler2, + BiocParallel, BiocGenerics, BiocNeighbors, testthat (>= 3.0.0), diff --git a/NEWS.md b/NEWS.md index e658ebb..e55e4c2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Better compression of included datasets. * Added `geneProgramScoring()` for module scoring of dynamic gene clusters. * Added `plotModelCoefs()` to annotate gene dynamics plots with a table of model coefficients. +* Added citation file with link to Zenodo repository (until preprint is up). +* Added runnable examples to most functions. # `scLANE` v0.7.6 diff --git a/R/GetResultsDE.R b/R/GetResultsDE.R index 5b0ac01..22c0c41 100644 --- a/R/GetResultsDE.R +++ b/R/GetResultsDE.R @@ -16,12 +16,8 @@ #' @seealso \code{\link{testDynamic}} #' @seealso \code{\link[stats]{p.adjust}} #' @examples -#' \dontrun{ -#' getResultsDE(gene_stats) -#' getResultsDE(gene_stats, -#' p.adj.method = "BH", -#' fdr.cutoff = 5e-3) -#' } +#' data(scLANE_models) +#' scLANE_de_res <- getResultsDE(scLANE_models) getResultsDE <- function(test.dyn.res = NULL, p.adj.method = "holm", diff --git a/R/clusterGenes.R b/R/clusterGenes.R index 5833918..42a61fb 100644 --- a/R/clusterGenes.R +++ b/R/clusterGenes.R @@ -23,19 +23,12 @@ #' @seealso \code{\link{plotClusteredGenes}} #' @export #' @examples -#' \dontrun{ -#' clusterGenes(gene_stats, pt = pt_df) -#' clusterGenes(gene_stats, -#' pt = pt_df, -#' size.factor.offset = createCellOffset(sce_obj), -#' clust.algo = "kmeans", -#' use.pca = TRUE, -#' n.PC = 10, -#' lineages = "B") -#' clusterGenes(gene_stats, -#' pt = pt_df, -#' lineages = c("A", "C")) -#' } +#' data(sim_pseudotime) +#' data(scLANE_models) +#' cell_offset <- createCellOffset(sim_counts) +#' gene_clusters <- clusterGenes(scLANE_models, +#' pt = sim_pseudotime, +#' size.factor.offset = cell_offset) clusterGenes <- function(test.dyn.res = NULL, pt = NULL, diff --git a/R/createCellOffset.R b/R/createCellOffset.R index ecaa277..4576e3c 100644 --- a/R/createCellOffset.R +++ b/R/createCellOffset.R @@ -13,11 +13,8 @@ #' @seealso \code{\link[scuttle]{computeLibraryFactors}} #' @export #' @examples -#' \dontrun{ -#' createCellOffset(expr.mat = sce_obj) -#' createCellOffset(expr.mat = counts(sce_obj)) -#' createCellOffset(expr.mat = seu_obj, scale.factor = 1e5) -#' } +#' data(sim_counts) +#' cell_offset <- createCellOffset(sim_counts) createCellOffset <- function(expr.mat = NULL, scale.factor = 1e4) { # check inputs diff --git a/R/createSlopeTestData.R b/R/createSlopeTestData.R index 339213d..524c827 100644 --- a/R/createSlopeTestData.R +++ b/R/createSlopeTestData.R @@ -11,13 +11,6 @@ #' @return A data.frame containing model data. #' @seealso \code{\link{marge2}} #' @seealso \code{\link{testSlope}} -#' @examples -#' \dontrun{ -#' createSlopeTestData(marge_mod, pt_df) -#' createSlopeTestData(marge_mod, -#' pt = pt_df, -#' is.glmm = TRUE) -#' } createSlopeTestData <- function(marge.model = NULL, pt = NULL, diff --git a/R/data.R b/R/data.R index 5ef1677..6e59afb 100644 --- a/R/data.R +++ b/R/data.R @@ -17,3 +17,11 @@ #' } #' @usage data(sim_pseudotime) "sim_pseudotime" + +#' An object of class \code{scLANE}. +#' +#' Contains the results from running \code{\link{testDynamic}} on \code{\link{sim_counts}}. +#' +#' @format An object of class \code{scLANE}. +#' @usage data(scLANE_models) +"scLANE_models" diff --git a/R/embedGenes.R b/R/embedGenes.R index 7bb7eed..8af54f5 100644 --- a/R/embedGenes.R +++ b/R/embedGenes.R @@ -17,14 +17,16 @@ #' @param k.param (Optional) The value of nearest-neighbors used in creating the SNN graph prior to clustering & in running UMAP. Defaults to 20. #' @param resolution.param (Optional) The value of the resolution parameter for the Leiden algorithm. If unspecified, silhouette scoring is used to select an optimal value. Defaults to NULL. #' @param random.seed (Optional) The random seed used to control stochasticity in the clustering algorithm. Defaults to 312. +#' @param n.cores (Optional) Integer specifying the number of threads used by \code{\link[uwot]{umap}} and in \code{\link[bluster]{makeSNNGraph}}. Defaults to 2. #' @return A data.frame containing embedding coordinates, cluster IDs, and metadata for each gene. #' @export #' @examples -#' \dontrun{ -#' embedGenes(smoothed_counts$Lineage_A, -#' pcs.return = 3, -#' cluster.genes = TRUE) -#' } +#' data(sim_pseudotime) +#' data(scLANE_models) +#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, +#' pt = sim_pseudotime, +#' n.cores = 1L) +#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) embedGenes <- function(smoothed.counts = NULL, genes = NULL, @@ -35,7 +37,8 @@ embedGenes <- function(smoothed.counts = NULL, gene.meta.data = NULL, k.param = 20, resolution.param = NULL, - random.seed = 312) { + random.seed = 312, + n.cores = 2L) { # check inputs if (is.null(smoothed.counts)) { stop("You forgot to provide a smoothed counts matrix to embedGenes().") } genes <- colnames(smoothed.counts) @@ -52,7 +55,8 @@ embedGenes <- function(smoothed.counts = NULL, n_neighbors = k.param, init = "spectral", nn_method = "annoy", - seed = random.seed) + seed = random.seed, + n_threads = n.cores) } else { smoothed_counts_umap <- uwot::umap(smoothed.counts, n_components = 2, @@ -60,7 +64,8 @@ embedGenes <- function(smoothed.counts = NULL, n_neighbors = k.param, init = "spectral", nn_method = "annoy", - seed = random.seed) + seed = random.seed, + n_threads = n.cores) } # clustering w/ silhouette score parameter tuning if (cluster.genes) { @@ -68,12 +73,14 @@ embedGenes <- function(smoothed.counts = NULL, smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x, k = k.param, type = "jaccard", - BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine")) + BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"), + BPPARAM = BiocParallel::SnowParam(workers = n.cores)) } else { smoothed_counts_snn <- bluster::makeSNNGraph(smoothed.counts, k = k.param, type = "jaccard", - BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine")) + BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"), + BPPARAM = BiocParallel::SnowParam(workers = n.cores)) } if (is.null(resolution.param)) { if (pca.init) { diff --git a/R/enrichDynamicGenes.R b/R/enrichDynamicGenes.R index 4acb05b..654e60d 100644 --- a/R/enrichDynamicGenes.R +++ b/R/enrichDynamicGenes.R @@ -12,12 +12,9 @@ #' @seealso \code{\link[gprofiler2]{gost}} #' @export #' @examples -#' \dontrun{ -#' enrichDynamicGenes(scLANE.de.res = de_stats) -#' enrichDynamicGenes(scLANE.de.res = de_stats, -#' lineage = "B", -#' species = "mmusculus") -#' } +#' data(scLANE_models) +#' scLANE_de_res <- getResultsDE(scLANE_models) +#' enr_res <- enrichDynamicGenes(scLANE_de_res) enrichDynamicGenes <- function(scLANE.de.res = NULL, lineage = NULL, diff --git a/R/extractBreakpoints.R b/R/extractBreakpoints.R index 3bb1871..4e19b00 100644 --- a/R/extractBreakpoints.R +++ b/R/extractBreakpoints.R @@ -8,9 +8,13 @@ #' @return A data.frame of breakpoints & their directions. #' @export #' @examples -#' \dontrun{ -#' extractBreakpoints(model = marge_mod) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' marge_model <- marge2(sim_pseudotime, +#' Y = BiocGenerics::counts(sim_counts)[4, ], +#' Y.offset = cell_offset) +#' breakpoint_df <- extractBreakpoints(model = marge_model) extractBreakpoints <- function(model = NULL, directions = TRUE) { # check inputs diff --git a/R/fitGLMM.R b/R/fitGLMM.R index 3549575..8473967 100644 --- a/R/fitGLMM.R +++ b/R/fitGLMM.R @@ -22,12 +22,13 @@ #' @seealso \code{\link{modelLRT}} #' @export #' @examples -#' \dontrun{ -#' fitGLMM(X_pred = pt_df, -#' Y = raw_counts, -#' Y.offset = size_factor_vec, -#' id.vec = subject_vec) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' glmm_mod <- fitGLMM(X_pred = sim_pseudotime, +#' Y = BiocGenerics::counts(sim_counts)[4, ], +#' Y.offset = cell_offset, +#' id.vec = sim_counts$subject) fitGLMM <- function(X_pred = NULL, Y = NULL, diff --git a/R/geneProgramScoring.R b/R/geneProgramScoring.R index 49a6102..1404ce1 100644 --- a/R/geneProgramScoring.R +++ b/R/geneProgramScoring.R @@ -15,18 +15,22 @@ #' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. #' @export #' @examples -#' \dontrun{ -#' geneProgramScoring(seu_obj, -#' genes = gene_embed$gene, -#' gene.clusters = gene_embed$leiden, -#' program.labels = c("cell cycle", "organogenesis")) -#' } +#' data(sim_pseudotime) +#' data(scLANE_models) +#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, +#' pt = sim_pseudotime, +#' n.cores = 1L) +#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) +#' sim_counts <- geneProgramScoring(sim_counts, +#' genes = gene_embed$gene, +#' gene.clusters = gene_embed$leiden, +#' n.cores = 1L) geneProgramScoring <- function(expr.mat = NULL, genes = NULL, gene.clusters = NULL, program.labels = NULL, - n.cores = 2) { + n.cores = 2L) { # check inputs if (is.null(expr.mat) || is.null(genes) || is.null(gene.clusters)) { stop("Arguments to geneProgramScoring() are missing.") } if (!is.factor(gene.clusters)) { diff --git a/R/getFittedValues.R b/R/getFittedValues.R index 738f325..1364a39 100644 --- a/R/getFittedValues.R +++ b/R/getFittedValues.R @@ -20,14 +20,13 @@ #' @return A data.frame containing depth- and log1p-normalized expression, model predictions, and cell-level metadata. #' @export #' @examples -#' \dontrun{ -#' getFittedValues(gene_stats, -#' genes = c("Neurog3", "Epcam", "Krt19"), -#' pt = pt_df, -#' expr.mat = seu_obj, -#' cell.meta.data = seu_obj@meta.data, -#' ci.alpha = 0.05) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' data(scLANE_models) +#' fitted_vals <- getFittedValues(scLANE_models, +#' genes = sample(names(scLANE_models), 5), +#' pt = sim_pseudotime, +#' expr.mat = sim_counts) getFittedValues <- function(test.dyn.res = NULL, genes = NULL, diff --git a/R/getKnotDist.R b/R/getKnotDist.R index c88fc4b..502f226 100644 --- a/R/getKnotDist.R +++ b/R/getKnotDist.R @@ -11,9 +11,8 @@ #' @return A data.frame containing gene name, lineage ID, and knot location in pseudotime. #' @export #' @examples -#' \dontrun{ -#' getKnotDist(gene_stats) -#' } +#' data(scLANE_models) +#' knot_dist <- getKnotDist(scLANE_models) getKnotDist <- function(test.dyn.res = NULL, dyn.genes = NULL) { # check inputs diff --git a/R/marge2.R b/R/marge2.R index 73dfedc..7f87a2b 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -42,24 +42,12 @@ #' @seealso \code{\link[geeM]{geem}} #' @export #' @examples -#' \dontrun{ -#' marge2(pseudotime_df, -#' Y = expr_vec, -#' M = 3) -#' marge2(pseudotime_df, -#' Y = expr_vec, -#' Y.offset = size_factor_vec, -#' is.gee = TRUE, -#' id.vec = subject_vec, -#' cor.structure = "exchangeable") -#' marge2(pseudotime_df, -#' Y = expr_vec, -#' is.gee = TRUE, -#' id.vec = subject_vec, -#' cor.structure = "ar1", -#' n.knot.max = 10, -#' return.basis = TRUE) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' marge_model <- marge2(sim_pseudotime, +#' Y = BiocGenerics::counts(sim_counts)[4, ], +#' Y.offset = cell_offset) marge2 <- function(X_pred = NULL, Y = NULL, diff --git a/R/nbGAM.R b/R/nbGAM.R index 0811985..1462559 100644 --- a/R/nbGAM.R +++ b/R/nbGAM.R @@ -21,18 +21,14 @@ #' @seealso \code{\link[gamlss.dist]{NBI}} #' @export #' @examples -#' \dontrun{ -#' nbGAM(expr_vec, pt_df) -#' nbGAM(expr_vec, -#' pt = pt_df, -#' id.vec = subject_ids, -#' random.slopes = TRUE) -#' nbGAM(expr_vec, -#' pt = pt_df, -#' Y.offset = size_factor_vec, -#' penalize.spline = TRUE, -#' spline.df = 10) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' gam_mod <- nbGAM(BiocGenerics::counts(sim_counts)[4, ], +#' pt = sim_pseudotime, +#' Y.offset = cell_offset, +#' penalize.spline = TRUE, +#' spline.df = 10) nbGAM <- function(expr = NULL, pt = NULL, diff --git a/R/npConvolve.R b/R/npConvolve.R index 3b76f81..f3e6159 100644 --- a/R/npConvolve.R +++ b/R/npConvolve.R @@ -9,12 +9,12 @@ #' @return A convolution with same length as the input vector. #' @details #' \itemize{ -#' \item The convolution here uses \code{\link[stats]{convolve}}, but creates the kernel and padding in such a way that it matches the output from `np.convolve` in Python's \code{numpy} matrix algebra package. +#' \item The convolution here uses \code{\link[stats]{convolve}}, but creates the kernel and padding in such a way that it matches the output from \code{np.convolve} in Python's \code{numpy} matrix algebra package. #' } #' @seealso \code{\link[stats]{convolve}} #' @export #' @examples -#' npConvolve(x = rnorm(20), conv.kernel = rep(1/5, 5)) +#' convolved_vec <- npConvolve(x = rnorm(20), conv.kernel = rep(1/5, 5)) #' npConvolve <- function(x = NULL, conv.kernel = NULL) { diff --git a/R/plotClusteredGenes.R b/R/plotClusteredGenes.R index a7e6cd6..1d7e6d6 100644 --- a/R/plotClusteredGenes.R +++ b/R/plotClusteredGenes.R @@ -37,7 +37,7 @@ plotClusteredGenes <- function(test.dyn.res = NULL, pt = NULL, size.factor.offset = NULL, parallel.exec = TRUE, - n.cores = 2) { + n.cores = 2L) { # check inputs if (is.null(test.dyn.res) || is.null(gene.clusters) || is.null(pt)) { stop("Arguments to plotClusteredGenes() are missing.") } colnames(pt) <- paste0("Lineage_", LETTERS[seq_len(ncol(pt))]) diff --git a/R/plotModelCoefs.R b/R/plotModelCoefs.R index 052f347..2915b42 100644 --- a/R/plotModelCoefs.R +++ b/R/plotModelCoefs.R @@ -18,13 +18,16 @@ #' @return A \code{ggplot2} object displaying a gene dynamics plot & a table of coefficients across pseudotime intervals. #' @export #' @examples -#' \dontrun{ -#' plotModelCoefs(gene_stats, -#' gene = "BRCA2", -#' pt = pt_df, -#' expr.mat = seu_obj, +#' data(sim_counts) +#' data(sim_pseudotime) +#' data(scLANE_models) +#' cell_offset <- createCellOffset(sim_counts) +#' scLANE_de_res <- getResultsDE(scLANE_models) +#' plotModelCoefs(scLANE_models, +#' gene = scLANE_de_res$Gene[1], +#' pt = sim_pseudotime, +#' expr.mat = sim_counts, #' size.factor.offset = cell_offset) -#' } plotModelCoefs <- function(test.dyn.res = NULL, gene = NULL, diff --git a/R/plotModels.R b/R/plotModels.R index 4e01a1f..4ad13f7 100644 --- a/R/plotModels.R +++ b/R/plotModels.R @@ -34,30 +34,15 @@ #' @return A \code{ggplot} object. #' @export #' @examples -#' \dontrun{ -#' plotModels(gene_stats, -#' gene = "AURKA", -#' pt = pt_df, -#' expr.mat = count_mat, -#' size.factor.offset = cell_offset) -#' plotModels(gene_stats, -#' gene = "CD3E", -#' pt = pt_df, -#' expr.mat = seu_obj, -#' size.factor.offset = cell_offset, -#' ci.alpha = 0.1, -#' filter.lineage = c("A", "C")) -#' plotModels(gene_stats, -#' gene = "CD14", -#' pt = pt_df, -#' expr.mat = sce_obj, -#' size.factor.offset = cell_offset, -#' is.glmm = TRUE, -#' id.vec = subject_ids, -#' plot.glm = TRUE, # plots an NB GLMM with random intercepts & slopes per-subject -#' plot.gam = TRUE, # plots an NB GAMM with random intercepts per-subject -#' gg.theme = ggplot2::theme_minimal()) -#' } +#' data(sim_counts) +#' data(scLANE_models) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' model_plot <- plotModels(scLANE_models, +#' gene = names(scLANE_models)[2], +#' pt = sim_pseudotime, +#' expr.mat = sim_counts, +#' size.factor.offset = cell_offset) plotModels <- function(test.dyn.res = NULL, gene = NULL, @@ -315,7 +300,7 @@ plotModels <- function(test.dyn.res = NULL, ggplot2::scale_y_continuous(labels = scales::label_comma()) + ggplot2::scale_x_continuous(labels = scales::label_number(accuracy = 0.1)) + ggplot2::labs(x = "Pseudotime", - y = "Expression", + y = ifelse(log1p.norm, "Normalized Expression", "Expression"), color = "Subject", fill = "Subject", title = gene) + @@ -347,7 +332,7 @@ plotModels <- function(test.dyn.res = NULL, ggplot2::scale_y_continuous(labels = scales::label_comma()) + ggplot2::scale_x_continuous(labels = scales::label_number(accuracy = 0.1)) + ggplot2::labs(x = "Pseudotime", - y = "Expression", + y = ifelse(log1p.norm, "Normalized Expression", "Expression"), color = "Lineage", fill = "Lineage", title = gene) + diff --git a/R/smoothedCountsMatrix.R b/R/smoothedCountsMatrix.R index 620006c..daa124f 100644 --- a/R/smoothedCountsMatrix.R +++ b/R/smoothedCountsMatrix.R @@ -19,14 +19,11 @@ #' @seealso \code{\link{testDynamic}} #' @export #' @examples -#' \dontrun{ -#' smoothedCountsMatrix(gene_stats, pt = pt_df) -#' smoothedCountsMatrix(gene_stats, -#' pt = pt_df, -#' genes = c("AURKA", "KRT19", "EPCAM", "GATA6"), -#' parallel.exec = TRUE, -#' n.cores = 2) -#' } +#' data(sim_pseudotime) +#' data(scLANE_models) +#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, +#' pt = sim_pseudotime, +#' n.cores = 1L) smoothedCountsMatrix <- function(test.dyn.res = NULL, size.factor.offset = NULL, @@ -34,7 +31,7 @@ smoothedCountsMatrix <- function(test.dyn.res = NULL, genes = NULL, log1p.norm = FALSE, parallel.exec = TRUE, - n.cores = 2) { + n.cores = 2L) { # check inputs if (is.null(test.dyn.res) || is.null(pt)) { stop("Please provide the scLANE output from testDynamic().") } # set up parallel execution @@ -58,7 +55,7 @@ smoothedCountsMatrix <- function(test.dyn.res = NULL, purrr::discard(rlang::is_na) %>% purrr::discard(rlang::is_null) %>% purrr::discard(\(p) rlang::inherits_only(p, "try-error")) - fitted_vals_mat <- purrr::map(fitted_vals_list, \(z) { + fitted_vals_mat <- furrr::future_map(fitted_vals_list, \(z) { if (is.null(size.factor.offset)) { exp(z$marge_link_fit) } else { diff --git a/R/sortGenesHeatmap.R b/R/sortGenesHeatmap.R index 1565280..7135177 100644 --- a/R/sortGenesHeatmap.R +++ b/R/sortGenesHeatmap.R @@ -12,11 +12,13 @@ #' @seealso \code{\link{smoothedCountsMatrix}} #' @export #' @examples -#' \dontrun{ -#' smoothed_counts <- smoothedCountsMatrix(gene_stats, pt = pt_df) -#' sortGenesheatmap(heatmap.mat = smoothed_counts$Lineage_A, -#' pt.vec = pt_df[!is.na(pt_df$Lineage_A), ]$Lineage_A) -#' } +#' data(sim_pseudotime) +#' data(scLANE_models) +#' smoothed_counts <- smoothedCountsMatrix(scLANE_models, +#' pt = sim_pseudotime, +#' n.cores = 1L) +#' sorted_genes <- sortGenesHeatmap(smoothed_counts$Lineage_A, +#' pt.vec = sim_pseudotime$PT) sortGenesHeatmap <- function(heatmap.mat = NULL, pt.vec = NULL) { # check inputs diff --git a/R/stripGLM.R b/R/stripGLM.R index a18b0ea..72a0923 100644 --- a/R/stripGLM.R +++ b/R/stripGLM.R @@ -8,9 +8,13 @@ #' @export #' @seealso \code{\link{glm}} #' @examples -#' \dontrun{ -#' stripGLM(marge_model) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' marge_model <- marge2(sim_pseudotime, +#' Y = BiocGenerics::counts(sim_counts)[4, ], +#' Y.offset = cell_offset) +#' smaller_model <- stripGLM(marge_model$final_mod) stripGLM <- function(glm.obj = NULL) { # check inputs diff --git a/R/summarizeModel.R b/R/summarizeModel.R index 744e9e9..fa5809f 100644 --- a/R/summarizeModel.R +++ b/R/summarizeModel.R @@ -12,9 +12,13 @@ #' @seealso \code{\link{marge2}} #' @export #' @examples -#' \dontrun{ -#' summarizeModel(marge.model = marge_mod, pt = pt_df) -#' } +#' data(sim_counts) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' marge_model <- marge2(sim_pseudotime, +#' Y = BiocGenerics::counts(sim_counts)[4, ], +#' Y.offset = cell_offset) +#' model_summary <- summarizeModel(marge.model = marge_model, pt = sim_pseudotime) summarizeModel <- function(marge.model = NULL, pt = NULL) { # check inputs diff --git a/R/testDynamic.R b/R/testDynamic.R index 1a1b8cc..1b96e71 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -46,29 +46,12 @@ #' @seealso \code{\link[glmmTMB]{glmmTMB}} #' @export #' @examples -#' \donttest{ #' data(sim_counts) #' data(sim_pseudotime) #' cell_offset <- createCellOffset(sim_counts) -#' testDynamic(sim_counts, -#' pt = sim_pseudotime, -#' size.factor.offset = cell_offset, -#' genes = sample(rownames(sim_counts), 20)) -#' testDynamic(sim_counts, -#' pt = sim_pseudotime, -#' size.factor.offset = cell_offset, -#' is.gee = TRUE, -#' id.vec = sim_counts$subject, -#' cor.structure = "ar1", -#' genes = sample(rownames(sim_counts), 20)) -#' testDynamic(sim_counts, -#' pt = sim_pseudotime, -#' size.factor.offset = cell_offset, -#' is.glmm = TRUE, -#' glmm.adaptive = TRUE, -#' id.vec = sim_counts$subject, -#' genes = sample(rownames(sim_counts), 20)) -#'} +#' scLANE_models <- testDynamic(sim_counts, +#' pt = sim_pseudotime, +#' size.factor.offset = cell_offset) testDynamic <- function(expr.mat = NULL, pt = NULL, @@ -172,6 +155,7 @@ testDynamic <- function(expr.mat = NULL, .packages = package_list, .noexport = no_export, .errorhandling = "pass", + .inorder = TRUE, .verbose = FALSE) %dopar% { lineage_list <- vector("list", n_lineages) for (j in seq(n_lineages)) { @@ -256,7 +240,7 @@ testDynamic <- function(expr.mat = NULL, # slim down GLM object if not a GEE / GLMM model (which are much smaller for some reason) if (!(is.gee || is.glmm)) { - null_mod <- scLANE::stripGLM(glm.obj = null_mod) + null_mod <- stripGLM(glm.obj = null_mod) } # record model fit status @@ -275,21 +259,21 @@ testDynamic <- function(expr.mat = NULL, } # summarize hinge function coefficients - null_sumy <- scLANE:::pull.null.sumy(null_mod, is.gee, is.glmm) - marge_sumy <- scLANE:::pull.marge.sumy(marge_mod, is.gee, is.glmm) + null_sumy <- pull.null.sumy(null_mod, is.gee, is.glmm) + marge_sumy <- pull.marge.sumy(marge_mod, is.gee, is.glmm) # perform slope test - marge_slope_df <- scLANE:::createSlopeTestData(marge.model = marge_mod, - pt = pt[lineage_cells, j, drop = FALSE], - is.gee = is.gee, - is.glmm = is.glmm) + marge_slope_df <- createSlopeTestData(marge.model = marge_mod, + pt = pt[lineage_cells, j, drop = FALSE], + is.gee = is.gee, + is.glmm = is.glmm) marge_slope_df <- dplyr::mutate(marge_slope_df, Gene = genes[i], Lineage = LETTERS[j], .before = 1) # solve values of slopes across pseudotime intervals -- TODO: add support for GLMM backend if (!is.glmm) { - marge_dynamic_df <- scLANE:::summarizeModel(marge.model = marge_mod, + marge_dynamic_df <- summarizeModel(marge.model = marge_mod, pt = pt[lineage_cells, j, drop = FALSE]) marge_dynamic_df <- data.frame(t(unlist(marge_dynamic_df))) %>% dplyr::mutate(Gene = genes[i], diff --git a/R/testSlope.R b/R/testSlope.R index 0544216..c155157 100644 --- a/R/testSlope.R +++ b/R/testSlope.R @@ -15,12 +15,8 @@ #' @seealso \code{\link[stats]{p.adjust}} #' @export #' @examples -#' \dontrun{ -#' testSlope(gene_stats) -#' testSlope(gene_stats, -#' method = "BH", -#' fdr.cutoff = 0.05) -#' } +#' data(scLANE_models) +#' slope_test_res <- testSlope(scLANE_models) testSlope <- function(test.dyn.res = NULL, p.adj.method = "holm", diff --git a/R/theme_scLANE.R b/R/theme_scLANE.R index 9908a11..84a7142 100644 --- a/R/theme_scLANE.R +++ b/R/theme_scLANE.R @@ -11,13 +11,16 @@ #' @return A \code{ggplot2} theme. #' @export #' @examples -#' \dontrun{ -#' plotModels(gene_stats, -#' gene = "CD14", -#' pt = pt_df, -#' expr.mat = count_mat -#' ) + theme_scLANE() -#' } +#' data(sim_counts) +#' data(scLANE_models) +#' data(sim_pseudotime) +#' cell_offset <- createCellOffset(sim_counts) +#' plotModels(scLANE_models, +#' gene = names(scLANE_models)[1], +#' pt = sim_pseudotime, +#' expr.mat = sim_counts, +#' size.factor.offset = cell_offset) + +#' theme_scLANE() theme_scLANE <- function(base.size = 12, base.lwd = 0.75, diff --git a/R/tp1.R b/R/tp1.R index de9b18a..cf1e90b 100644 --- a/R/tp1.R +++ b/R/tp1.R @@ -1,16 +1,18 @@ #' Truncated p-th power function (positive part). #' #' @name tp1 -#' @param x : a vector of predictor variable values. -#' @param t : a specified knot value. -#' @param p : the pth degree of the polynomial considered. -#' @return \code{tp1} returns a vector of values that have been transformed using a truncated p-th power function (positive part) for a specified knot value. +#' @param x A predictor variable value. Defaults to NULL. #' @author Jakub Stoklosa and David I. Warton +#' @param t A specified knot value. Defaults to NULL. +#' @param p The \eqn{p^{th}} degree of the polynomial considered. Defaults to 1. +#' @return A vector of values that have been transformed using a truncated \eqn{p^{th}} power function (positive part) for a specified knot value. #' @references Friedman, J. (1991). Multivariate adaptive regression splines. \emph{The Annals of Statistics}, \strong{19}, 1--67. #' @references Stoklosa, J. and Warton, D.I. (2018). A generalized estimating equation approach to multivariate adaptive regression splines. \emph{Journal of Computational and Graphical Statistics}, \strong{27}, 245--253. #' @seealso \code{\link{tp2}} -tp1 <- function(x, t, p = 1) { +tp1 <- function(x = NULL, + t = NULL, + p = 1) { res <- ((x - t)^p) * (x > t) return(res) } diff --git a/R/tp2.R b/R/tp2.R index 40946ad..cc62dd3 100644 --- a/R/tp2.R +++ b/R/tp2.R @@ -1,16 +1,18 @@ #' Truncated p-th power function (negative part). #' #' @name tp2 -#' @param x : a predictor variable value. -#' @param t : a specified knot value. -#' @param p : the pth degree of the polynomial considered. -#' @return \code{tp2} returns a vector of values that have been transformed using a truncated p-th power function (negative part) for a specified knot value. #' @author Jakub Stoklosa and David I. Warton +#' @param x A predictor variable value. Defaults to NULL. +#' @param t A specified knot value. Defaults to NULL. +#' @param p The \eqn{p^{th}} degree of the polynomial considered. Defaults to 1. +#' @return A vector of values that have been transformed using a truncated \eqn{p^{th}} power function (negative part) for a specified knot value. #' @references Friedman, J. (1991). Multivariate adaptive regression splines. \emph{The Annals of Statistics}, \strong{19}, 1--67. #' @references Stoklosa, J. and Warton, D.I. (2018). A generalized estimating equation approach to multivariate adaptive regression splines. \emph{Journal of Computational and Graphical Statistics}, \strong{27}, 245--253. #' @seealso \code{\link{tp1}} -tp2 <- function(x, t, p = 1) { +tp2 <- function(x = NULL, + t = NULL, + p = 1) { res <- ((t - x)^p) * (x < t) return(res) } diff --git a/README.Rmd b/README.Rmd index 975a0a7..28f9b7a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -28,7 +28,7 @@ knitr::opts_chunk$set(warning = FALSE, ![last commit](https://img.shields.io/github/last-commit/jr-leary7/scLANE/main?color=darkgreen) [![codecov](https://codecov.io/gh/jr-leary7/scLANE/branch/main/graph/badge.svg?token=U2U5RTF2VW)](https://codecov.io/gh/jr-leary7/scLANE) [![CodeFactor](https://www.codefactor.io/repository/github/jr-leary7/sclane/badge)](https://www.codefactor.io/repository/github/jr-leary7/sclane) -[![DOI](https://img.shields.io/static/v1?label=DOI&message=10.5281/zenodo.10030621&color=blue)](https://doi.org/10.5281/zenodo.10030621) +[![DOI](https://img.shields.io/static/v1?label=DOI&message=10.5281/zenodo.10106689&color=blue)](https://doi.org/10.5281/zenodo.10106689) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) @@ -90,7 +90,9 @@ plotPCA(sim_data, colour_by = "subject") + Since we have multi-subject data, we can use any of the three model modes to run our DE testing. We'll start with the simplest model, the GLM, then work our way through the other options in order of increasing complexity. We first prepare our inputs - a dataframe containing our cell ordering, a set of genes to build models for, and a vector of per-cell size factors to be used as offsets during estimation. In reality, it's usually unnecessary to fit a model for every single gene in a dataset, as trajectories are usually estimated using a subset of the entire set of genes (usually a few thousand most highly variable genes). For the purpose of demonstration, we'll select 50 genes each from the dynamic and non-dynamic populations. {% note %} -**Note** In this case we're working with a single pseudotime lineage, though in real datasets several lineages often exist; in order to fit models for a subset of lineages simply remove the corresponding columns from the cell ordering dataframe passed as input to `testDynamic()`. + +**Note:** In this case we're working with a single pseudotime lineage, though in real datasets several lineages often exist; in order to fit models for a subset of lineages simply remove the corresponding columns from the cell ordering dataframe passed as input to `testDynamic()`. + {% endnote %} ```{r prep-data} @@ -167,7 +169,9 @@ scLANE_models_glmm <- testDynamic(sim_data, ``` {% note %} -**Note** The GLMM mode is still under development, as we are working on further reducing runtime and increasing the odds of the underlying optimization process converging successfully. As such, updates will be frequent and functionality / results may shift slightly. + +**Note:** The GLMM mode is still under development, as we are working on further reducing runtime and increasing the odds of the underlying optimization process converging successfully. As such, updates will be frequent and functionality / results may shift slightly. + {% endnote %} Like the GLM mode, the GLMM mode uses a likelihood ratio test to compare the null & alternate models. We fit the two nested models using maximum likelihood estimation instead of [REML](https://en.wikipedia.org/wiki/Restricted_maximum_likelihood) in order to perform this test; the null model in this case is a negative binomial GLMM with a random intercept for each subject. diff --git a/README.md b/README.md index 986017e..587b7fc 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,21 @@ -- [`scLANE`](#sclane) - - [Installation](#installation) - - [Model structure](#model-structure) -- [Usage](#usage) - - [Libraries](#libraries) - - [Input data](#input-data) - - [Trajectory DE testing](#trajectory-de-testing) - - [Downstream analysis & - visualization](#downstream-analysis--visualization) -- [Conclusions & best practices](#conclusions--best-practices) -- [Contact information](#contact-information) -- [References](#references) +- scLANE + - Installation + - Model structure +- Usage + - Libraries + - Input data + - Trajectory DE testing + - Downstream analysis & + visualization +- Conclusions & best + practices +- Contact + information +- References @@ -24,7 +29,7 @@ commit](https://img.shields.io/github/last-commit/jr-leary7/scLANE/main?color=darkgreen) [![codecov](https://codecov.io/gh/jr-leary7/scLANE/branch/main/graph/badge.svg?token=U2U5RTF2VW)](https://codecov.io/gh/jr-leary7/scLANE) [![CodeFactor](https://www.codefactor.io/repository/github/jr-leary7/sclane/badge)](https://www.codefactor.io/repository/github/jr-leary7/sclane) -[![DOI](https://img.shields.io/static/v1?label=DOI&message=10.5281/zenodo.10030621&color=blue)](https://doi.org/10.5281/zenodo.10030621) +[![DOI](https://img.shields.io/static/v1?label=DOI&message=10.5281/zenodo.10106689&color=blue)](https://doi.org/10.5281/zenodo.10106689) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) @@ -142,11 +147,14 @@ the entire set of genes (usually a few thousand most highly variable genes). For the purpose of demonstration, we’ll select 50 genes each from the dynamic and non-dynamic populations. -> \[!NOTE\] In this case we’re working with a single pseudotime lineage, -> though in real datasets several lineages often exist; in order to fit -> models for a subset of lineages simply remove the corresponding -> columns from the cell ordering dataframe passed as input to -> `testDynamic()`. +{% note %} + +**Note:** In this case we’re working with a single pseudotime lineage, +though in real datasets several lineages often exist; in order to fit +models for a subset of lineages simply remove the corresponding columns +from the cell ordering dataframe passed as input to `testDynamic()`. + +{% endnote %} ``` r set.seed(312) @@ -170,7 +178,10 @@ scLANE_models_glm <- testDynamic(sim_data, genes = gene_sample, size.factor.offset = cell_offset, n.cores = 4) -#> scLANE testing completed for 100 genes across 1 lineage in 24.807 secs +#> Registered S3 method overwritten by 'bit': +#> method from +#> print.ri gamlss +#> scLANE testing completed for 100 genes across 1 lineage in 35.114 secs ``` After the function finishes running, we use `getResultsDE()` to generate @@ -189,13 +200,13 @@ select(scLANE_res_glm, Gene, Lineage, Test_Stat, P_Val, P_Val_Adj, Gene_Dynamic_ col.names = c("Gene", "Lineage", "LRT stat.", "P-value", "Adj. p-value", "Predicted dynamic status")) ``` -| Gene | Lineage | LRT stat. | P-value | Adj. p-value | Predicted dynamic status | -|:-------|:--------|----------:|--------:|-------------:|-------------------------:| -| MFSD2B | A | 209.755 | 0.000 | 0.000 | 1 | -| FOXD3 | A | 4.276 | 0.039 | 0.451 | 0 | -| NOP14 | A | 7.712 | 0.005 | 0.115 | 0 | -| TMCO3 | A | 167.759 | 0.000 | 0.000 | 1 | -| DLC1 | A | 2.803 | 0.094 | 0.451 | 0 | +| Gene | Lineage | LRT stat. | P-value | Adj. p-value | Predicted dynamic status | +|:---------|:--------|----------:|--------:|-------------:|-------------------------:| +| MFSD2B | A | 209.755 | 0.000 | 0.000 | 1 | +| GOLGA8EP | A | 4.390 | 0.036 | 0.537 | 0 | +| NOP14 | A | 7.712 | 0.005 | 0.132 | 0 | +| TMCO3 | A | 167.582 | 0.000 | 0.000 | 1 | +| BATF2 | A | 5.216 | 0.074 | 0.537 | 0 | ### GEE mode @@ -218,7 +229,7 @@ scLANE_models_gee <- testDynamic(sim_data, id.vec = sim_data$subject, cor.structure = "ar1", n.cores = 4) -#> scLANE testing completed for 100 genes across 1 lineage in 2.343 mins +#> scLANE testing completed for 100 genes across 1 lineage in 2.201 mins ``` We again generate the table of DE test results. The variance of the @@ -237,11 +248,11 @@ select(scLANE_res_gee, Gene, Lineage, Test_Stat, P_Val, P_Val_Adj, Gene_Dynamic_ | Gene | Lineage | Wald stat. | P-value | Adj. p-value | Predicted dynamic status | |:---------|:--------|-----------:|--------:|-------------:|-------------------------:| -| BAD | A | 301009.051 | 0 | 0.000 | 1 | -| SPCS3 | A | 18.352 | 0 | 0.002 | 1 | +| DGUOK | A | 200675.460 | 0.000 | 0.000 | 1 | +| VDAC1 | A | 13.025 | 0.001 | 0.025 | 0 | | GOLGA8EP | A | NA | NA | NA | 0 | -| WAPAL | A | 3168.110 | 0 | 0.000 | 1 | -| MPG | A | 924.419 | 0 | 0.000 | 1 | +| WAPAL | A | 3254.351 | 0.000 | 0.000 | 1 | +| IDH3G | A | 863.479 | 0.000 | 0.000 | 1 | ### GLMM mode @@ -265,13 +276,17 @@ scLANE_models_glmm <- testDynamic(sim_data, glmm.adaptive = TRUE, id.vec = sim_data$subject, n.cores = 4) -#> scLANE testing completed for 100 genes across 1 lineage in 2.37 mins +#> scLANE testing completed for 100 genes across 1 lineage in 3.427 mins ``` -> \[!NOTE\] The GLMM mode is still under development, as we are working -> on further reducing runtime and increasing the odds of the underlying -> optimization process converging successfully. As such, updates will be -> frequent and functionality / results may shift slightly. +{% note %} + +**Note:** The GLMM mode is still under development, as we are working on +further reducing runtime and increasing the odds of the underlying +optimization process converging successfully. As such, updates will be +frequent and functionality / results may shift slightly. + +{% endnote %} Like the GLM mode, the GLMM mode uses a likelihood ratio test to compare the null & alternate models. We fit the two nested models using maximum @@ -291,8 +306,8 @@ select(scLANE_res_glmm, Gene, Lineage, Test_Stat, P_Val, P_Val_Adj, Gene_Dynamic | Gene | Lineage | LRT stat. | P-value | Adj. p-value | Predicted dynamic status | |:--------|:--------|----------:|--------:|-------------:|-------------------------:| -| KLHDC10 | A | 123.059 | 0.000 | 0 | 1 | -| DDX41 | A | 74.493 | 0.000 | 0 | 1 | +| DDX1 | A | 131.707 | 0.000 | 0 | 1 | +| TSPAN1 | A | 78.616 | 0.000 | 0 | 1 | | WDSUB1 | A | NA | NA | NA | 0 | | FAM135B | A | NA | NA | NA | 0 | | NLGN4Y | A | 9.878 | 0.627 | 1 | 0 | diff --git a/data/scLANE_models.rda b/data/scLANE_models.rda new file mode 100644 index 0000000..9c6f758 Binary files /dev/null and b/data/scLANE_models.rda differ diff --git a/data/sim_pseudotime.rda b/data/sim_pseudotime.rda index 965bc16..15dd974 100644 Binary files a/data/sim_pseudotime.rda and b/data/sim_pseudotime.rda differ diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..aff2571 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,13 @@ +citHeader("To cite scLANE in publications use:") + +citEntry( + entry = "Manual", + title = "Single Cell Linear Adaptive Expression Testing (scLANE)", + author = "Jack Leary and Rhonda Bacher", + year = "2023", + note = "R package version 0.7.7", + url = "https://doi.org/10.5281/zenodo.10106689", + textVersion = paste( + + ) +) diff --git a/man/clusterGenes.Rd b/man/clusterGenes.Rd index a0d770d..67ca7cf 100644 --- a/man/clusterGenes.Rd +++ b/man/clusterGenes.Rd @@ -41,19 +41,12 @@ This function takes as input the output from \code{\link{testDynamic}} and clust } } \examples{ -\dontrun{ -clusterGenes(gene_stats, pt = pt_df) -clusterGenes(gene_stats, - pt = pt_df, - size.factor.offset = createCellOffset(sce_obj), - clust.algo = "kmeans", - use.pca = TRUE, - n.PC = 10, - lineages = "B") -clusterGenes(gene_stats, - pt = pt_df, - lineages = c("A", "C")) -} +data(sim_pseudotime) +data(scLANE_models) +cell_offset <- createCellOffset(sim_counts) +gene_clusters <- clusterGenes(scLANE_models, + pt = sim_pseudotime, + size.factor.offset = cell_offset) } \seealso{ \code{\link{testDynamic}} diff --git a/man/createCellOffset.Rd b/man/createCellOffset.Rd index 0085236..6e72ada 100644 --- a/man/createCellOffset.Rd +++ b/man/createCellOffset.Rd @@ -18,11 +18,8 @@ A named numeric vector containing the computed size factor for each cell. Creates a vector of per-cell size factors to be used as input to \code{\link{testDynamic}} as a model offset given a variety of inputs. } \examples{ -\dontrun{ -createCellOffset(expr.mat = sce_obj) -createCellOffset(expr.mat = counts(sce_obj)) -createCellOffset(expr.mat = seu_obj, scale.factor = 1e5) -} +data(sim_counts) +cell_offset <- createCellOffset(sim_counts) } \seealso{ \code{\link{testDynamic}} diff --git a/man/createSlopeTestData.Rd b/man/createSlopeTestData.Rd index 530aa85..ce53462 100644 --- a/man/createSlopeTestData.Rd +++ b/man/createSlopeTestData.Rd @@ -26,14 +26,6 @@ A data.frame containing model data. \description{ Creates a data.frame of \code{marge} model breakpoints, \emph{p}-values, and other info. } -\examples{ -\dontrun{ -createSlopeTestData(marge_mod, pt_df) -createSlopeTestData(marge_mod, - pt = pt_df, - is.glmm = TRUE) -} -} \seealso{ \code{\link{marge2}} diff --git a/man/embedGenes.Rd b/man/embedGenes.Rd index cfeb947..2380b47 100644 --- a/man/embedGenes.Rd +++ b/man/embedGenes.Rd @@ -14,7 +14,8 @@ embedGenes( gene.meta.data = NULL, k.param = 20, resolution.param = NULL, - random.seed = 312 + random.seed = 312, + n.cores = 2L ) } \arguments{ @@ -37,6 +38,8 @@ embedGenes( \item{resolution.param}{(Optional) The value of the resolution parameter for the Leiden algorithm. If unspecified, silhouette scoring is used to select an optimal value. Defaults to NULL.} \item{random.seed}{(Optional) The random seed used to control stochasticity in the clustering algorithm. Defaults to 312.} + +\item{n.cores}{(Optional) Integer specifying the number of threads used by \code{\link[uwot]{umap}} and in \code{\link[bluster]{makeSNNGraph}}. Defaults to 2.} } \value{ A data.frame containing embedding coordinates, cluster IDs, and metadata for each gene. @@ -45,11 +48,12 @@ A data.frame containing embedding coordinates, cluster IDs, and metadata for eac Embed genes in dimension-reduced space given a smoothed counts matrix. } \examples{ -\dontrun{ -embedGenes(smoothed_counts$Lineage_A, - pcs.return = 3, - cluster.genes = TRUE) -} +data(sim_pseudotime) +data(scLANE_models) +smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, + pt = sim_pseudotime, + n.cores = 1L) +gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) } \author{ Jack Leary diff --git a/man/enrichDynamicGenes.Rd b/man/enrichDynamicGenes.Rd index 58cefcf..8d3e01e 100644 --- a/man/enrichDynamicGenes.Rd +++ b/man/enrichDynamicGenes.Rd @@ -20,12 +20,9 @@ The output from \code{\link[gprofiler2]{gost}}. This function uses the \code{gprofiler2} package to perform pathway analysis on a set of genes from one or more lineages that were determined to be dynamic with \code{\link{testDynamic}}. } \examples{ -\dontrun{ -enrichDynamicGenes(scLANE.de.res = de_stats) -enrichDynamicGenes(scLANE.de.res = de_stats, - lineage = "B", - species = "mmusculus") -} +data(scLANE_models) +scLANE_de_res <- getResultsDE(scLANE_models) +enr_res <- enrichDynamicGenes(scLANE_de_res) } \seealso{ \code{\link[gprofiler2]{gost}} diff --git a/man/extractBreakpoints.Rd b/man/extractBreakpoints.Rd index 0e1322b..a7ca637 100644 --- a/man/extractBreakpoints.Rd +++ b/man/extractBreakpoints.Rd @@ -18,9 +18,13 @@ A data.frame of breakpoints & their directions. Extracts the breakpoints from a fitted \code{marge} model. Note - this function relies on the name of the pseudotime variable not having any numeric characters in it e.g., "pseudotime" or "PT" would be fine but "pseudotime1" would not. If multiple lineages exist, use letters to denote lineages instead of numbers e.g., "Lineage_A" and "Lineage_B". This is currently handled automatically in \code{\link{testDynamic}}, so don't change anything. } \examples{ -\dontrun{ -extractBreakpoints(model = marge_mod) -} +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +marge_model <- marge2(sim_pseudotime, + Y = BiocGenerics::counts(sim_counts)[4, ], + Y.offset = cell_offset) +breakpoint_df <- extractBreakpoints(model = marge_model) } \author{ Jack Leary diff --git a/man/figures/README-plot-models-gee-1.png b/man/figures/README-plot-models-gee-1.png index 27383e0..c1d170c 100644 Binary files a/man/figures/README-plot-models-gee-1.png and b/man/figures/README-plot-models-gee-1.png differ diff --git a/man/figures/README-plot-models-glmm-1.png b/man/figures/README-plot-models-glmm-1.png index bb85e97..0bc197b 100644 Binary files a/man/figures/README-plot-models-glmm-1.png and b/man/figures/README-plot-models-glmm-1.png differ diff --git a/man/figures/README-plot-sims-pt-1.png b/man/figures/README-plot-sims-pt-1.png index cd80de7..0734055 100644 Binary files a/man/figures/README-plot-sims-pt-1.png and b/man/figures/README-plot-sims-pt-1.png differ diff --git a/man/figures/README-unnamed-chunk-2-1.png b/man/figures/README-unnamed-chunk-2-1.png index f70fdb3..3666dea 100644 Binary files a/man/figures/README-unnamed-chunk-2-1.png and b/man/figures/README-unnamed-chunk-2-1.png differ diff --git a/man/fitGLMM.Rd b/man/fitGLMM.Rd index beb8e37..f49653e 100644 --- a/man/fitGLMM.Rd +++ b/man/fitGLMM.Rd @@ -42,12 +42,13 @@ An object of class \code{marge} containing the fitted model & other optional qua Fits a negative binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly space across quantiles. The resulting model can output subject-specific and population-level fitted values. } \examples{ -\dontrun{ -fitGLMM(X_pred = pt_df, - Y = raw_counts, - Y.offset = size_factor_vec, - id.vec = subject_vec) -} +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +glmm_mod <- fitGLMM(X_pred = sim_pseudotime, + Y = BiocGenerics::counts(sim_counts)[4, ], + Y.offset = cell_offset, + id.vec = sim_counts$subject) } \seealso{ \code{\link[glmmTMB]{glmmTMB}} diff --git a/man/geneProgramScoring.Rd b/man/geneProgramScoring.Rd index 0dc2711..d28e721 100644 --- a/man/geneProgramScoring.Rd +++ b/man/geneProgramScoring.Rd @@ -9,7 +9,7 @@ geneProgramScoring( genes = NULL, gene.clusters = NULL, program.labels = NULL, - n.cores = 2 + n.cores = 2L ) } \arguments{ @@ -30,12 +30,16 @@ Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the } \examples{ -\dontrun{ -geneProgramScoring(seu_obj, - genes = gene_embed$gene, - gene.clusters = gene_embed$leiden, - program.labels = c("cell cycle", "organogenesis")) -} +data(sim_pseudotime) +data(scLANE_models) +smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, + pt = sim_pseudotime, + n.cores = 1L) +gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) +sim_counts <- geneProgramScoring(sim_counts, + genes = gene_embed$gene, + gene.clusters = gene_embed$leiden, + n.cores = 1L) } \author{ Jack Leary diff --git a/man/getFittedValues.Rd b/man/getFittedValues.Rd index f1aba91..8e6a8f4 100644 --- a/man/getFittedValues.Rd +++ b/man/getFittedValues.Rd @@ -45,14 +45,13 @@ A data.frame containing depth- and log1p-normalized expression, model prediction Generate a table of expression counts, model fitted values, celltype metadata, etc. in order to create custom plots of gene dynamics. } \examples{ -\dontrun{ -getFittedValues(gene_stats, - genes = c("Neurog3", "Epcam", "Krt19"), - pt = pt_df, - expr.mat = seu_obj, - cell.meta.data = seu_obj@meta.data, - ci.alpha = 0.05) -} +data(sim_counts) +data(sim_pseudotime) +data(scLANE_models) +fitted_vals <- getFittedValues(scLANE_models, + genes = sample(names(scLANE_models), 5), + pt = sim_pseudotime, + expr.mat = sim_counts) } \author{ Jack Leary diff --git a/man/getKnotDist.Rd b/man/getKnotDist.Rd index 353cf6e..54b79a9 100644 --- a/man/getKnotDist.Rd +++ b/man/getKnotDist.Rd @@ -18,9 +18,8 @@ A data.frame containing gene name, lineage ID, and knot location in pseudotime. Pulls knot locations for dynamic genes across each lineage, allowing comparisons of where transcriptional switches occur between lineages. } \examples{ -\dontrun{ -getKnotDist(gene_stats) -} +data(scLANE_models) +knot_dist <- getKnotDist(scLANE_models) } \author{ Jack Leary diff --git a/man/getResultsDE.Rd b/man/getResultsDE.Rd index 64b6d98..0d1278c 100644 --- a/man/getResultsDE.Rd +++ b/man/getResultsDE.Rd @@ -20,12 +20,8 @@ A data.frame containing differential expression results & modeling statistics fo This function turns the nested list differential expression results of \code{\link{testDynamic}} and turns them into a tidy \code{data.frame}. } \examples{ -\dontrun{ -getResultsDE(gene_stats) -getResultsDE(gene_stats, - p.adj.method = "BH", - fdr.cutoff = 5e-3) -} +data(scLANE_models) +scLANE_de_res <- getResultsDE(scLANE_models) } \seealso{ \code{\link{testDynamic}} diff --git a/man/marge2.Rd b/man/marge2.Rd index 0a13d3e..a24fcb6 100644 --- a/man/marge2.Rd +++ b/man/marge2.Rd @@ -62,24 +62,12 @@ MARS fitting function for negative binomial generalized linear models (GLMs) & g } } \examples{ -\dontrun{ - marge2(pseudotime_df, - Y = expr_vec, - M = 3) - marge2(pseudotime_df, - Y = expr_vec, - Y.offset = size_factor_vec, - is.gee = TRUE, - id.vec = subject_vec, - cor.structure = "exchangeable") - marge2(pseudotime_df, - Y = expr_vec, - is.gee = TRUE, - id.vec = subject_vec, - cor.structure = "ar1", - n.knot.max = 10, - return.basis = TRUE) -} +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +marge_model <- marge2(sim_pseudotime, + Y = BiocGenerics::counts(sim_counts)[4, ], + Y.offset = cell_offset) } \references{ Friedman, J. (1991). Multivariate adaptive regression splines. \emph{The Annals of Statistics}, \strong{19}, 1--67. diff --git a/man/nbGAM.Rd b/man/nbGAM.Rd index 23656d9..2840be7 100644 --- a/man/nbGAM.Rd +++ b/man/nbGAM.Rd @@ -33,18 +33,14 @@ An object of class \code{gamlss} Fits a negative-binomial family GAM using a cubic basis spline on pseudotime. If data are multi-subject in nature, a random intercept is included for each subject. } \examples{ -\dontrun{ -nbGAM(expr_vec, pt_df) -nbGAM(expr_vec, - pt = pt_df, - id.vec = subject_ids, - random.slopes = TRUE) -nbGAM(expr_vec, - pt = pt_df, - Y.offset = size_factor_vec, - penalize.spline = TRUE, - spline.df = 10) -} +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +gam_mod <- nbGAM(BiocGenerics::counts(sim_counts)[4, ], + pt = sim_pseudotime, + Y.offset = cell_offset, + penalize.spline = TRUE, + spline.df = 10) } \seealso{ \code{\link[gamlss]{gamlss}} diff --git a/man/npConvolve.Rd b/man/npConvolve.Rd index fe0a7c9..e9b58ec 100644 --- a/man/npConvolve.Rd +++ b/man/npConvolve.Rd @@ -19,11 +19,11 @@ Convolve a vector with a user-specified kernel. Can be useful for heatmap smooth } \details{ \itemize{ -\item The convolution here uses \code{\link[stats]{convolve}}, but creates the kernel and padding in such a way that it matches the output from `np.convolve` in Python's \code{numpy} matrix algebra package. +\item The convolution here uses \code{\link[stats]{convolve}}, but creates the kernel and padding in such a way that it matches the output from \code{np.convolve} in Python's \code{numpy} matrix algebra package. } } \examples{ -npConvolve(x = rnorm(20), conv.kernel = rep(1/5, 5)) +convolved_vec <- npConvolve(x = rnorm(20), conv.kernel = rep(1/5, 5)) } \seealso{ diff --git a/man/plotClusteredGenes.Rd b/man/plotClusteredGenes.Rd index a00359c..383bc67 100644 --- a/man/plotClusteredGenes.Rd +++ b/man/plotClusteredGenes.Rd @@ -10,7 +10,7 @@ plotClusteredGenes( pt = NULL, size.factor.offset = NULL, parallel.exec = TRUE, - n.cores = 2 + n.cores = 2L ) } \arguments{ diff --git a/man/plotModelCoefs.Rd b/man/plotModelCoefs.Rd index 87302ac..b2fc81b 100644 --- a/man/plotModelCoefs.Rd +++ b/man/plotModelCoefs.Rd @@ -36,14 +36,17 @@ A \code{ggplot2} object displaying a gene dynamics plot & a table of coefficient Generate a plot of gene dynamics over a single pseudotime lineage, along with a table of coefficients across pseudotime intervals. } \examples{ -\dontrun{ -plotModelCoefs(gene_stats, - gene = "BRCA2", - pt = pt_df, - expr.mat = seu_obj, +data(sim_counts) +data(sim_pseudotime) +data(scLANE_models) +cell_offset <- createCellOffset(sim_counts) +scLANE_de_res <- getResultsDE(scLANE_models) +plotModelCoefs(scLANE_models, + gene = scLANE_de_res$Gene[1], + pt = sim_pseudotime, + expr.mat = sim_counts, size.factor.offset = cell_offset) } -} \author{ Jack Leary } diff --git a/man/plotModels.Rd b/man/plotModels.Rd index 7b5431a..4611656 100644 --- a/man/plotModels.Rd +++ b/man/plotModels.Rd @@ -66,30 +66,15 @@ A \code{ggplot} object. This function visualizes the fitted values of several types of models over the expression and pseudotime values of each cell. } \examples{ -\dontrun{ -plotModels(gene_stats, - gene = "AURKA", - pt = pt_df, - expr.mat = count_mat, - size.factor.offset = cell_offset) -plotModels(gene_stats, - gene = "CD3E", - pt = pt_df, - expr.mat = seu_obj, - size.factor.offset = cell_offset, - ci.alpha = 0.1, - filter.lineage = c("A", "C")) -plotModels(gene_stats, - gene = "CD14", - pt = pt_df, - expr.mat = sce_obj, - size.factor.offset = cell_offset, - is.glmm = TRUE, - id.vec = subject_ids, - plot.glm = TRUE, # plots an NB GLMM with random intercepts & slopes per-subject - plot.gam = TRUE, # plots an NB GAMM with random intercepts per-subject - gg.theme = ggplot2::theme_minimal()) -} +data(sim_counts) +data(scLANE_models) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +model_plot <- plotModels(scLANE_models, + gene = names(scLANE_models)[2], + pt = sim_pseudotime, + expr.mat = sim_counts, + size.factor.offset = cell_offset) } \author{ Jack Leary diff --git a/man/scLANE_models.Rd b/man/scLANE_models.Rd new file mode 100644 index 0000000..ce759a8 --- /dev/null +++ b/man/scLANE_models.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{scLANE_models} +\alias{scLANE_models} +\title{An object of class \code{scLANE}.} +\format{ +An object of class \code{scLANE}. +} +\usage{ +data(scLANE_models) +} +\description{ +Contains the results from running \code{\link{testDynamic}} on \code{\link{sim_counts}}. +} +\keyword{datasets} diff --git a/man/smoothedCountsMatrix.Rd b/man/smoothedCountsMatrix.Rd index c563c6b..5ee0881 100644 --- a/man/smoothedCountsMatrix.Rd +++ b/man/smoothedCountsMatrix.Rd @@ -11,7 +11,7 @@ smoothedCountsMatrix( genes = NULL, log1p.norm = FALSE, parallel.exec = TRUE, - n.cores = 2 + n.cores = 2L ) } \arguments{ @@ -36,14 +36,11 @@ A list of matrices of smoothed counts, with each element of the list being a sin This function takes as input the output from \code{\link{testDynamic}} and returns the fitted values from each model in a wide format, with one column per-gene and one row-per cell. This matrix can be use as input to cell or gene clustering and / or visualizations such as heatmaps. } \examples{ -\dontrun{ -smoothedCountsMatrix(gene_stats, pt = pt_df) -smoothedCountsMatrix(gene_stats, - pt = pt_df, - genes = c("AURKA", "KRT19", "EPCAM", "GATA6"), - parallel.exec = TRUE, - n.cores = 2) -} +data(sim_pseudotime) +data(scLANE_models) +smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, + pt = sim_pseudotime, + n.cores = 1L) } \seealso{ \code{\link{testDynamic}} diff --git a/man/sortGenesHeatmap.Rd b/man/sortGenesHeatmap.Rd index 9f64c07..9eef594 100644 --- a/man/sortGenesHeatmap.Rd +++ b/man/sortGenesHeatmap.Rd @@ -18,11 +18,13 @@ A character vector of genes sorted by their peak expression values over pseudoti Sort genes such that genes with peak expression occurring earlier in pseudotime are first, and vice versa for genes with late peak expression. Useful for ordering genes in order to create heatmaps of expression cascades. } \examples{ -\dontrun{ -smoothed_counts <- smoothedCountsMatrix(gene_stats, pt = pt_df) -sortGenesheatmap(heatmap.mat = smoothed_counts$Lineage_A, - pt.vec = pt_df[!is.na(pt_df$Lineage_A), ]$Lineage_A) -} +data(sim_pseudotime) +data(scLANE_models) +smoothed_counts <- smoothedCountsMatrix(scLANE_models, + pt = sim_pseudotime, + n.cores = 1L) +sorted_genes <- sortGenesHeatmap(smoothed_counts$Lineage_A, + pt.vec = sim_pseudotime$PT) } \seealso{ \code{\link{smoothedCountsMatrix}} diff --git a/man/stripGLM.Rd b/man/stripGLM.Rd index c82aded..a4f1f4f 100644 --- a/man/stripGLM.Rd +++ b/man/stripGLM.Rd @@ -16,9 +16,13 @@ A slimmed-down \code{glm} object. This function removes a \emph{lot} of components from the default GLM object in order to make it take up less memory. It does however retain enough pieces for \code{predict()} to still work. No promises beyond that. } \examples{ -\dontrun{ -stripGLM(marge_model) -} +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +marge_model <- marge2(sim_pseudotime, + Y = BiocGenerics::counts(sim_counts)[4, ], + Y.offset = cell_offset) +smaller_model <- stripGLM(marge_model$final_mod) } \seealso{ \code{\link{glm}} diff --git a/man/summarizeModel.Rd b/man/summarizeModel.Rd index 2450a41..8778ee9 100644 --- a/man/summarizeModel.Rd +++ b/man/summarizeModel.Rd @@ -18,9 +18,13 @@ A data.frame of the model coefficients, cutpoint intervals, and formatted equati This function summarizes the model for each gene and allows for quantiative interpretation of fitted gene dynamics. } \examples{ -\dontrun{ -summarizeModel(marge.model = marge_mod, pt = pt_df) -} +data(sim_counts) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +marge_model <- marge2(sim_pseudotime, + Y = BiocGenerics::counts(sim_counts)[4, ], + Y.offset = cell_offset) +model_summary <- summarizeModel(marge.model = marge_model, pt = sim_pseudotime) } \seealso{ \code{\link{marge2}} diff --git a/man/testDynamic.Rd b/man/testDynamic.Rd index 37e034c..0ec53cf 100644 --- a/man/testDynamic.Rd +++ b/man/testDynamic.Rd @@ -66,29 +66,12 @@ This function tests whether a NB \code{marge} model is better than a null (inter } } \examples{ -\donttest{ data(sim_counts) data(sim_pseudotime) cell_offset <- createCellOffset(sim_counts) -testDynamic(sim_counts, - pt = sim_pseudotime, - size.factor.offset = cell_offset, - genes = sample(rownames(sim_counts), 20)) -testDynamic(sim_counts, - pt = sim_pseudotime, - size.factor.offset = cell_offset, - is.gee = TRUE, - id.vec = sim_counts$subject, - cor.structure = "ar1", - genes = sample(rownames(sim_counts), 20)) -testDynamic(sim_counts, - pt = sim_pseudotime, - size.factor.offset = cell_offset, - is.glmm = TRUE, - glmm.adaptive = TRUE, - id.vec = sim_counts$subject, - genes = sample(rownames(sim_counts), 20)) -} +scLANE_models <- testDynamic(sim_counts, + pt = sim_pseudotime, + size.factor.offset = cell_offset) } \seealso{ \code{\link{getResultsDE}} diff --git a/man/testSlope.Rd b/man/testSlope.Rd index 814e6ce..624ffd3 100644 --- a/man/testSlope.Rd +++ b/man/testSlope.Rd @@ -20,12 +20,8 @@ A dataframe containing the genes, breakpoints, and slope \emph{p}-values from ea This function tests whether each gene's estimated \eqn{\beta} for pseudotime differs significantly from 0 over each empirically estimated sets of knots / pseudotime interval using a Wald test. } \examples{ -\dontrun{ -testSlope(gene_stats) -testSlope(gene_stats, - method = "BH", - fdr.cutoff = 0.05) -} +data(scLANE_models) +slope_test_res <- testSlope(scLANE_models) } \seealso{ \code{\link{testDynamic}} diff --git a/man/theme_scLANE.Rd b/man/theme_scLANE.Rd index 9e4b53e..b023210 100644 --- a/man/theme_scLANE.Rd +++ b/man/theme_scLANE.Rd @@ -27,13 +27,16 @@ A \code{ggplot2} theme. A publication-ready theme for creating gene dynamics plots, embedding plots, etc. } \examples{ -\dontrun{ -plotModels(gene_stats, - gene = "CD14", - pt = pt_df, - expr.mat = count_mat -) + theme_scLANE() -} +data(sim_counts) +data(scLANE_models) +data(sim_pseudotime) +cell_offset <- createCellOffset(sim_counts) +plotModels(scLANE_models, + gene = names(scLANE_models)[1], + pt = sim_pseudotime, + expr.mat = sim_counts, + size.factor.offset = cell_offset) + +theme_scLANE() } \author{ Jack Leary diff --git a/man/tp1.Rd b/man/tp1.Rd index 16f2acb..814cdce 100644 --- a/man/tp1.Rd +++ b/man/tp1.Rd @@ -4,17 +4,17 @@ \alias{tp1} \title{Truncated p-th power function (positive part).} \usage{ -tp1(x, t, p = 1) +tp1(x = NULL, t = NULL, p = 1) } \arguments{ -\item{x}{: a vector of predictor variable values.} +\item{x}{A predictor variable value. Defaults to NULL.} -\item{t}{: a specified knot value.} +\item{t}{A specified knot value. Defaults to NULL.} -\item{p}{: the pth degree of the polynomial considered.} +\item{p}{The \eqn{p^{th}} degree of the polynomial considered. Defaults to 1.} } \value{ -\code{tp1} returns a vector of values that have been transformed using a truncated p-th power function (positive part) for a specified knot value. +A vector of values that have been transformed using a truncated \eqn{p^{th}} power function (positive part) for a specified knot value. } \description{ Truncated p-th power function (positive part). diff --git a/man/tp2.Rd b/man/tp2.Rd index 49658c6..2a9c651 100644 --- a/man/tp2.Rd +++ b/man/tp2.Rd @@ -4,17 +4,17 @@ \alias{tp2} \title{Truncated p-th power function (negative part).} \usage{ -tp2(x, t, p = 1) +tp2(x = NULL, t = NULL, p = 1) } \arguments{ -\item{x}{: a predictor variable value.} +\item{x}{A predictor variable value. Defaults to NULL.} -\item{t}{: a specified knot value.} +\item{t}{A specified knot value. Defaults to NULL.} -\item{p}{: the pth degree of the polynomial considered.} +\item{p}{The \eqn{p^{th}} degree of the polynomial considered. Defaults to 1.} } \value{ -\code{tp2} returns a vector of values that have been transformed using a truncated p-th power function (negative part) for a specified knot value. +A vector of values that have been transformed using a truncated \eqn{p^{th}} power function (negative part) for a specified knot value. } \description{ Truncated p-th power function (negative part). diff --git a/vignettes/scLANE.Rmd b/vignettes/scLANE.Rmd index c963f27..1c51420 100644 --- a/vignettes/scLANE.Rmd +++ b/vignettes/scLANE.Rmd @@ -247,7 +247,7 @@ sim_data <- geneProgramScoring(sim_data, Plotting the program scores for gene cluster 1 shows that cells at the end of the trajectory have overall high expression of genes in that cluster. -```{r, fig.cap="Module scores for gene cluster 0"} +```{r, fig.cap="Module scores for gene cluster 1"} plotPCA(sim_data, colour_by = "cluster_1") + theme_scLANE(umap = TRUE) ```