From bf9cd1b144ee7c9a4fec7dab6249999d055ef658 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Thu, 5 Oct 2023 15:25:21 -0400 Subject: [PATCH] more efficient offset creation and random seed in testDynamic -- closes #132 --- DESCRIPTION | 1 + NAMESPACE | 1 + R/createCellOffset.R | 21 +++++++++++---------- R/testDynamic.R | 8 +++++--- man/createCellOffset.Rd | 4 ++-- man/testDynamic.Rd | 5 ++++- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d50624e..12694df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: gamlss, scales, future, + Matrix, ggplot2, splines, foreach, diff --git a/NAMESPACE b/NAMESPACE index 60275d4..fc7c8f6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ importFrom(MASS,ginv) importFrom(MASS,glm.nb) importFrom(MASS,negative.binomial) importFrom(MASS,theta.mm) +importFrom(Matrix,colSums) importFrom(Rcpp,sourceCpp) importFrom(bigstatsr,as_FBM) importFrom(broom.mixed,tidy) diff --git a/R/createCellOffset.R b/R/createCellOffset.R index 69befc5..d200e1c 100644 --- a/R/createCellOffset.R +++ b/R/createCellOffset.R @@ -1,9 +1,10 @@ -#' Create an offset vector before modeling +#' Create an offset vector before modeling. #' #' @name createCellOffset #' @author Jack Leary +#' @importFrom Matrix colSums #' @description Creates a vector of per-cell size factors to be used as input to \code{\link{testDynamic}} as a model offset given a variety of inputs. -#' @param expr.mat Either a matrix of raw integer counts (cells as columns), a \code{Seurat} object, or a \code{SingleCellExperiment} object. Defaults to NULL. +#' @param expr.mat Either a (sparse or dense) matrix of raw integer counts (cells as columns), a \code{Seurat} object, or a \code{SingleCellExperiment} object. Defaults to NULL. #' @param scale.factor The scaling factor use to multiply the sequencing depth factor for each cell. The default value is 1e4, which returns counts-per-10k. #' @return A named numeric vector containing the computed size factor for each cell. #' @seealso \code{\link{testDynamic}} @@ -21,18 +22,18 @@ createCellOffset <- function(expr.mat = NULL, scale.factor = 1e4) { # check inputs if (is.null(expr.mat)) { stop("Please provide expr.mat to createCellOffset().") } if (inherits(expr.mat, "SingleCellExperiment")) { - expr.mat <- as.matrix(BiocGenerics::counts(expr.mat)) + expr.mat <- BiocGenerics::counts(expr.mat) } else if (inherits(expr.mat, "Seurat")) { - expr.mat <- as.matrix(Seurat::GetAssayData(expr.mat, - slot = "counts", - assay = Seurat::DefaultAssay(expr.mat))) - } else if (inherits(expr.mat, "dgCMatrix")) { - expr.mat <- as.matrix(expr.mat) + expr.mat <- Seurat::GetAssayData(expr.mat, + slot = "counts", + assay = Seurat::DefaultAssay(expr.mat)) + } + if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array") || inherits(expr.mat, "dgCMatrix"))) { + stop("Input expr.mat must be coerceable to a matrix of integer counts.") } - if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } # compute per-cell size factors cell_names <- colnames(expr.mat) - seq_depths <- colSums(expr.mat) + seq_depths <- Matrix::colSums(expr.mat) lib_size_factors <- scale.factor / seq_depths names(lib_size_factors) <- cell_names return(lib_size_factors) diff --git a/R/testDynamic.R b/R/testDynamic.R index 469ab67..06e9217 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -30,6 +30,7 @@ #' @param approx.knot (Optional) Should the knot space be reduced in order to improve computation time? Defaults to TRUE. #' @param glmm.adaptive (Optional) Should the basis functions for the GLMM be chosen adaptively? If not, uses 4 evenly spaced knots. Defaults to FALSE. #' @param track.time (Optional) A boolean indicating whether the amount of time the function takes to run should be tracked and printed to the console. Useful for debugging. Defaults to FALSE. +#' @param random.seed (Optional) The random seed used to initialize RNG streams in parallel. Defaults to 312. #' @details #' \itemize{ #' \item If \code{expr.mat} is a \code{Seurat} object, counts will be extracted from the output of \code{\link[SeuratObject]{DefaultAssay}}. If using this functionality, check to ensure the specified assay is correct before running the function. If the input is a \code{SingleCellExperiment} object, the raw counts will be extracted with \code{\link[BiocGenerics]{counts}}. @@ -86,7 +87,8 @@ testDynamic <- function(expr.mat = NULL, parallel.exec = TRUE, n.cores = 2, approx.knot = TRUE, - track.time = FALSE) { + track.time = FALSE, + random.seed = 312) { # check inputs if (is.null(expr.mat) || is.null(pt)) { stop("You forgot some inputs to testDynamic().") } @@ -138,10 +140,10 @@ testDynamic <- function(expr.mat = NULL, if (parallel.exec) { cl <- parallel::makeCluster(n.cores) doParallel::registerDoParallel(cl) - parallel::clusterSetRNGStream(cl, iseed = 312) + parallel::clusterSetRNGStream(cl, iseed = random.seed) } else { cl <- foreach::registerDoSEQ() - set.seed(312) + set.seed(random.seed) } # convert dense counts matrix to file-backed matrix diff --git a/man/createCellOffset.Rd b/man/createCellOffset.Rd index 0482d5f..2b077b2 100644 --- a/man/createCellOffset.Rd +++ b/man/createCellOffset.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/createCellOffset.R \name{createCellOffset} \alias{createCellOffset} -\title{Create an offset vector before modeling} +\title{Create an offset vector before modeling.} \usage{ createCellOffset(expr.mat = NULL, scale.factor = 10000) } \arguments{ -\item{expr.mat}{Either a matrix of raw integer counts (cells as columns), a \code{Seurat} object, or a \code{SingleCellExperiment} object. Defaults to NULL.} +\item{expr.mat}{Either a (sparse or dense) matrix of raw integer counts (cells as columns), a \code{Seurat} object, or a \code{SingleCellExperiment} object. Defaults to NULL.} \item{scale.factor}{The scaling factor use to multiply the sequencing depth factor for each cell. The default value is 1e4, which returns counts-per-10k.} } diff --git a/man/testDynamic.Rd b/man/testDynamic.Rd index d88549a..68a6e86 100644 --- a/man/testDynamic.Rd +++ b/man/testDynamic.Rd @@ -18,7 +18,8 @@ testDynamic( parallel.exec = TRUE, n.cores = 2, approx.knot = TRUE, - track.time = FALSE + track.time = FALSE, + random.seed = 312 ) } \arguments{ @@ -49,6 +50,8 @@ testDynamic( \item{approx.knot}{(Optional) Should the knot space be reduced in order to improve computation time? Defaults to TRUE.} \item{track.time}{(Optional) A boolean indicating whether the amount of time the function takes to run should be tracked and printed to the console. Useful for debugging. Defaults to FALSE.} + +\item{random.seed}{(Optional) The random seed used to initialize RNG streams in parallel. Defaults to 312.} } \value{ A list of lists, where each element is a gene and each gene contains sublists for each element. Each gene-lineage sublist contains a gene name, lineage number, default \code{marge} vs. null model test results, model statistics, and fitted values. Use \code{\link{getResultsDE}} to tidy the results.