diff --git a/DESCRIPTION b/DESCRIPTION index 12694df..6f8ed4b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: scLANE Type: Package Title: Model gene expression dynamics with spline-based NB GLMs, GEEs, & GLMMs Version: 0.7.3 -Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre")), - person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"))) +Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")), + person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X"))) Description: This package uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time. The modeling architectures currently supported are negative binomial GLMs, GEEs, & GLMMs. Downstream analysis functionalities include model comparison, dynamic gene clustering, smoothed counts generation, gene set enrichment testing, & visualization. diff --git a/NAMESPACE b/NAMESPACE index fc7c8f6..c022fc3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(enrichDynamicGenes) export(extractBreakpoints) export(fitGLMM) export(getFittedValues) +export(getKnotDist) export(getResultsDE) export(marge2) export(modelLRT) @@ -48,9 +49,11 @@ importFrom(dplyr,ntile) importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) +importFrom(dplyr,rowwise) importFrom(dplyr,sample_frac) importFrom(dplyr,select) importFrom(dplyr,summarise) +importFrom(dplyr,ungroup) importFrom(dplyr,with_groups) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) @@ -84,10 +87,10 @@ importFrom(glmmTMB,nbinom2) importFrom(parallel,clusterEvalQ) importFrom(parallel,clusterExport) importFrom(parallel,clusterSetRNGStream) -importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) importFrom(purrr,discard) +importFrom(purrr,imap) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_chr) diff --git a/R/createCellOffset.R b/R/createCellOffset.R index d200e1c..ecaa277 100644 --- a/R/createCellOffset.R +++ b/R/createCellOffset.R @@ -14,6 +14,7 @@ #' @export #' @examples #' \dontrun{ +#' createCellOffset(expr.mat = sce_obj) #' createCellOffset(expr.mat = counts(sce_obj)) #' createCellOffset(expr.mat = seu_obj, scale.factor = 1e5) #' } diff --git a/R/getKnotDist.R b/R/getKnotDist.R new file mode 100644 index 0000000..5b1547c --- /dev/null +++ b/R/getKnotDist.R @@ -0,0 +1,34 @@ +#' Pull the set of knots for dynamic genes across each lineage. +#' +#' @name getKnotDist +#' @author Jack Leary +#' @import magrittr +#' @importFrom purrr imap reduce +#' @description Pulls knot locations for dynamic genes across each lineage, allowing comparisons of where transcriptional switches occur between lineages. +#' @param test.dyn.res The output from \code{\link{testDynamic}}. Defaults to NULL. +#' @param dyn.genes The set of genes to pull knots for. If unspecified, pulls knots for all modeled genes. Defaults to NULL. +#' @return A data.frame containing gene name, lineage ID, and knot location in pseudotime. +#' @export +#' @examples +#' \dontrun{ +#' getKnotDist(gene_stats) +#' } + +getKnotDist <- function(test.dyn.res = NULL, dyn.genes = NULL) { + # check inputs + if (is.null(test.dyn.res)) { stop("You forgot one of the arguments to getKnotDist().") } + if (is.null(dyn.genes)) { + dyn.genes <- names(test.dyn.res) + } + # pull knots per-lineage + knot_df <- purrr::imap(test.dyn.res[dyn.genes], \(x, y) { + purrr::imap(x, \(z, w) { + data.frame(gene = y, + lineage = w, + knot = z$MARGE_Slope_Data$Breakpoint) + }) %>% + purrr::reduce(rbind) + }) %>% + purrr::reduce(rbind) + return(knot_df) +} diff --git a/R/plotModels.R b/R/plotModels.R index 490eddf..e8fe381 100644 --- a/R/plotModels.R +++ b/R/plotModels.R @@ -6,27 +6,28 @@ #' @import glm2 #' @importFrom stats qnorm predict as.formula #' @importFrom purrr map map2 reduce -#' @importFrom dplyr relocate mutate select contains case_when filter if_else +#' @importFrom dplyr relocate mutate select contains case_when filter if_else rowwise ungroup #' @importFrom geeM geem #' @importFrom glmmTMB glmmTMB nbinom2 #' @importFrom MASS negative.binomial theta.mm #' @importFrom tidyr pivot_longer #' @importFrom scales label_comma label_number -#' @importFrom ggplot2 theme_classic ggplot aes geom_point geom_line geom_ribbon facet_wrap scale_y_continuous labs theme element_text guides guide_legend +#' @importFrom ggplot2 ggplot aes geom_point geom_line geom_ribbon facet_wrap scale_y_continuous labs theme element_text guides guide_legend #' @description This function visualizes the fitted values of several types of models over the expression and pseudotime values of each cell. #' @param test.dyn.res The output from \code{\link{testDynamic}}. Defaults to NULL. #' @param gene The name of the gene that's being analyzed. Used as the title of the \code{ggplot} object & to subset the counts matrix. Defaults to NULL. #' @param pt A data.frame of pseudotime values for each cell. Defaults to NULL. -#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a dense matrix of integer-valued counts. Defaults to NULL. +#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts. Defaults to NULL. #' @param size.factor.offset (Optional) An offset to be included in the final model fit. Can be generated easily with \code{\link{createCellOffset}}. Defaults to NULL. +#' @param log1p.norm (Optional) Should log1p-normalized versions of expression & model predictions be returned instead of raw counts? Defaults to TRUE. #' @param is.gee Should a GEE framework be used instead of the default GLM? Defaults to FALSE. #' @param is.glmm Should a GLMM framework be used instead of the default GLM? Defaults to FALSE. #' @param id.vec If the GEE or GLMM framework is being used, a vector of subject IDs to use as input to \code{\link[geeM]{geem}} or \code{\link[glmmTMB]{glmmTMB}}. Defaults to NULL. #' @param cor.structure If the GEE framework is used, specifies the desired working correlation structure. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1". #' @param ci.alpha (Optional) The pre-specified Type I Error rate used in generating (\eqn{1 - \alpha})\% CIs. Defaults to good old 0.05. -#' @param plot.null (Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to TRUE. -#' @param plot.glm (Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to TRUE. -#' @param plot.gam (Optional) Should the fitted values from an NB GAM be plotted? Defaults to TRUE. +#' @param plot.null (Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to FALSE. +#' @param plot.glm (Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to FALSE. +#' @param plot.gam (Optional) Should the fitted values from an NB GAM be plotted? Defaults to FALSE. #' @param plot.scLANE (Optional) Should the fitted values from the \code{scLANE} model be plotted? Defaults to TRUE. #' @param filter.lineage (Optional) A character vector of lineages to filter out before generating the final plot. Should be letters, i.e. lineage "A" or "B". Defaults to NULL. #' @param gg.theme (Optional) A \code{ggplot2} theme to be added to the plot. Defaults to \code{\link{theme_scLANE}}. @@ -37,17 +38,20 @@ #' plotModels(gene_stats, #' gene = "AURKA", #' pt = pt_df, -#' expr.mat = count_mat) +#' expr.mat = count_mat, +#' size.factor.offset = cell_offset) #' plotModels(gene_stats, #' gene = "CD3E", #' pt = pt_df, -#' expr.mat = count_mat, +#' expr.mat = seu_obj, +#' size.factor.offset = cell_offset, #' ci.alpha = 0.1, #' filter.lineage = c("A", "C")) #' plotModels(gene_stats, #' gene = "CD14", #' pt = pt_df, -#' expr.mat = count_mat, +#' expr.mat = sce_obj, +#' size.factor.offset = cell_offset, #' is.glmm = TRUE, #' id.vec = subject_ids, #' plot.glm = TRUE, # plots an NB GLMM with random intercepts & slopes per-subject @@ -60,14 +64,15 @@ plotModels <- function(test.dyn.res = NULL, pt = NULL, expr.mat = NULL, size.factor.offset = NULL, + log1p.norm = TRUE, is.gee = FALSE, is.glmm = FALSE, id.vec = NULL, cor.structure = "ar1", ci.alpha = 0.05, - plot.null = TRUE, - plot.glm = TRUE, - plot.gam = TRUE, + plot.null = FALSE, + plot.glm = FALSE, + plot.gam = FALSE, plot.scLANE = TRUE, filter.lineage = NULL, gg.theme = theme_scLANE()) { @@ -75,16 +80,13 @@ plotModels <- function(test.dyn.res = NULL, if (is.null(expr.mat) || is.null(pt) || is.null(gene) || is.null(test.dyn.res)) { stop("You forgot one or more of the arguments to plotModels().") } # get raw counts from SingleCellExperiment or Seurat object & transpose to cell x gene dense matrix if (inherits(expr.mat, "SingleCellExperiment")) { - expr.mat <- as.matrix(BiocGenerics::counts(expr.mat)) + expr.mat <- BiocGenerics::counts(expr.mat) } else if (inherits(expr.mat, "Seurat")) { - expr.mat <- as.matrix(Seurat::GetAssayData(expr.mat, - slot = "counts", - assay = Seurat::DefaultAssay(expr.mat))) - } else if (inherits(expr.mat, "dgCMatrix")) { - expr.mat <- as.matrix(expr.mat) + expr.mat <- Seurat::GetAssayData(expr.mat, + slot = "counts", + assay = Seurat::DefaultAssay(expr.mat)) } - if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } - expr.mat <- t(expr.mat) + if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array") || inherits(expr.mat, "dgCMatrix"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } # generate parameters for CIs Z <- stats::qnorm(ci.alpha / 2, lower.tail = FALSE) # select sublist for gene of interest @@ -98,7 +100,7 @@ plotModels <- function(test.dyn.res = NULL, mod_df <- data.frame(CELL = rownames(pt)[!is.na(x)], LINEAGE = y, PT = x[!is.na(x)], - COUNT = expr.mat[!is.na(x), gene]) + COUNT = as.numeric(expr.mat[gene, !is.na(x)])) if (is.gee || is.glmm) { mod_df <- dplyr::mutate(mod_df, ID = id.vec[!is.na(x)]) } else { @@ -117,85 +119,99 @@ plotModels <- function(test.dyn.res = NULL, RESP_NULL = .y$Null_Preds$null_link_fit, SE_NULL = .y$Null_Preds$null_link_se)) %>% purrr::map(function(x) { - if (is.gee) { - theta_hat <- MASS::theta.mm(y = x$COUNT, - mu = mean(x$COUNT), - dfr = nrow(x) - 1) - gee_formula <- "COUNT ~ PT" - if (!is.null(size.factor.offset)) { - gee_formula <- paste0(gee_formula, " + offset(log(1 / CELL_OFFSET))") - } - gee_formula <- stats::as.formula(gee_formula) - glm_mod <- geeM::geem(gee_formula, - id = ID, - data = x, - family = MASS::negative.binomial(theta_hat, link = log), - corstr = cor.structure, - scale.fix = FALSE, - sandwich = TRUE) - robust_vcov_mat <- as.matrix(glm_mod$var) - glm_preds <- data.frame(fit = predict(glm_mod), - se.fit = sqrt(apply((tcrossprod(glm_mod$X, robust_vcov_mat)) * glm_mod$X, 1, sum))) - } else if (is.glmm) { - if (is.null(size.factor.offset)) { - glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 | ID) + (1 + PT | ID), - data = x, - family = glmmTMB::nbinom2(link = "log"), - se = TRUE, - REML = FALSE) + if (plot.glm) { + if (is.gee) { + theta_hat <- MASS::theta.mm(y = x$COUNT, + mu = mean(x$COUNT), + dfr = nrow(x) - 1) + gee_formula <- "COUNT ~ PT" + if (!is.null(size.factor.offset)) { + gee_formula <- paste0(gee_formula, " + offset(log(1 / CELL_OFFSET))") + } + gee_formula <- stats::as.formula(gee_formula) + glm_mod <- geeM::geem(gee_formula, + id = ID, + data = x, + family = MASS::negative.binomial(theta_hat, link = log), + corstr = cor.structure, + scale.fix = FALSE, + sandwich = TRUE) + robust_vcov_mat <- as.matrix(glm_mod$var) + glm_preds <- data.frame(fit = predict(glm_mod), + se.fit = sqrt(apply((tcrossprod(glm_mod$X, robust_vcov_mat)) * glm_mod$X, 1, sum))) + } else if (is.glmm) { + if (is.null(size.factor.offset)) { + glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 + PT | ID), + data = x, + family = glmmTMB::nbinom2(link = "log"), + se = TRUE, + REML = FALSE) + } else { + glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 + PT | ID), + data = x, + family = glmmTMB::nbinom2(link = "log"), + offset = log(1 / x$CELL_OFFSET), + se = TRUE, + REML = FALSE) + } + glm_preds <- data.frame(predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) } else { - glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 | ID) + (1 + PT | ID), - data = x, - family = glmmTMB::nbinom2(link = "log"), - offset = log(1 / x$CELL_OFFSET), - se = TRUE, - REML = FALSE) + glm_formula <- "COUNT ~ PT" + if (!is.null(size.factor.offset)) { + glm_formula <- paste0(glm_formula, " + offset(log(1 / CELL_OFFSET))") + } + glm_formula <- stats::as.formula(glm_formula) + glm_mod <- MASS::glm.nb(glm_formula, + data = x, + x = FALSE, + y = FALSE, + method = "glm.fit2", + link = log, + init.theta = 1) + glm_preds <- data.frame(stats::predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) } - glm_preds <- data.frame(predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) + x <- dplyr::mutate(x, + RESP_GLM = glm_preds$fit, + SE_GLM = glm_preds$se.fit) } else { - glm_formula <- "COUNT ~ PT" - if (!is.null(size.factor.offset)) { - glm_formula <- paste0(glm_formula, " + offset(log(1 / CELL_OFFSET))") - } - glm_formula <- stats::as.formula(glm_formula) - glm_mod <- MASS::glm.nb(glm_formula, - data = x, - x = FALSE, - y = FALSE, - method = "glm.fit2", - link = log, - init.theta = 1) - glm_preds <- data.frame(stats::predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) + x <- dplyr::mutate(x, + RESP_GLM = NA_real_, + SE_GLM = NA_real_) } - x %<>% dplyr::mutate(RESP_GLM = glm_preds$fit, - SE_GLM = glm_preds$se.fit) return(x) }) %>% purrr::map(function(x) { - if (is.null(size.factor.offset)) { - if (is.glmm) { - gam_mod <- nbGAM(expr = x$COUNT, - pt = x$PT, - id.vec = x$ID) + if (plot.gam) { + if (is.null(size.factor.offset)) { + if (is.glmm) { + gam_mod <- nbGAM(expr = x$COUNT, + pt = x$PT, + id.vec = x$ID) + } else { + gam_mod <- nbGAM(expr = x$COUNT, pt = x$PT) + } } else { - gam_mod <- nbGAM(expr = x$COUNT, pt = x$PT) + if (is.glmm) { + gam_mod <- nbGAM(expr = x$COUNT, + Y.offset = x$CELL_OFFSET, + pt = x$PT, + id.vec = x$ID) + } else { + gam_mod <- nbGAM(expr = x$COUNT, + Y.offset = x$CELL_OFFSET, + pt = x$PT) + } } + gam_preds <- data.frame(predict(gam_mod, type = "link", se.fit = TRUE)[1:2]) + x <- dplyr::mutate(x, + RESP_GAM = gam_preds$fit, + SE_GAM = gam_preds$se.fit) } else { - if (is.glmm) { - gam_mod <- nbGAM(expr = x$COUNT, - Y.offset = x$CELL_OFFSET, - pt = x$PT, - id.vec = x$ID) - } else { - gam_mod <- nbGAM(expr = x$COUNT, - Y.offset = x$CELL_OFFSET, - pt = x$PT) - } + x <- dplyr::mutate(x, + RESP_GAM = NA_real_, + SE_GAM = NA_real_) } - gam_preds <- data.frame(predict(gam_mod, type = "link", se.fit = TRUE)[1:2]) - x <- dplyr::mutate(x, - RESP_GAM = gam_preds$fit, - SE_GAM = gam_preds$se.fit) + return(x) }) %>% purrr::map(function(x) { @@ -236,35 +252,41 @@ plotModels <- function(test.dyn.res = NULL, } else { x } - })) + })) %>% + dplyr::ungroup() } else { counts_df <- dplyr::mutate(counts_df, dplyr::across(c(COUNT, PRED, CI_LL, CI_UL), \(x) x * CELL_OFFSET)) } } # add conditional filters here if (!plot.null) { - counts_df %<>% dplyr::filter(MODEL != "Intercept-only") + counts_df <- dplyr::filter(counts_df, MODEL != "Intercept-only") } if (!plot.glm) { - counts_df %<>% dplyr::filter(MODEL != "GLM") + counts_df <- dplyr::filter(counts_df, MODEL != "GLM") } if (!plot.gam) { - counts_df %<>% dplyr::filter(MODEL != "GAM") + counts_df <- dplyr::filter(counts_df, MODEL != "GAM") } if (!plot.scLANE) { - counts_df %<>% dplyr::filter(MODEL != "scLANE") + counts_df <- dplyr::filter(counts_df, MODEL != "scLANE") } if (!is.null(filter.lineage)) { - counts_df %<>% dplyr::filter(!LINEAGE %in% filter.lineage) + counts_df <- dplyr::filter(counts_df, !LINEAGE %in% filter.lineage) } # change model labels as necessary if (is.gee) { - counts_df %<>% dplyr::mutate(MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GEE", as.character(MODEL)), - MODEL = factor(MODEL, levels = c("Intercept-only", "GEE", "GAM", "scLANE"))) + counts_df <- dplyr::mutate(counts_df, + MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GEE", as.character(MODEL)), + MODEL = factor(MODEL, levels = c("Intercept-only", "GEE", "GAM", "scLANE"))) } if (is.glmm) { - counts_df %<>% dplyr::mutate(MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GLMM", as.character(MODEL)), - MODEL = factor(MODEL, levels = c("Intercept-only", "GLMM", "GAM", "scLANE"))) + counts_df <- dplyr::mutate(counts_df, + MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GLMM", as.character(MODEL)), + MODEL = factor(MODEL, levels = c("Intercept-only", "GLMM", "GAM", "scLANE"))) + } + if (log1p.norm) { + counts_df <- dplyr::mutate(counts_df, dplyr::across(c(COUNT, PRED, CI_LL, CI_UL), log1p)) } # generate plot if (is.glmm) { diff --git a/R/summarizeModel.R b/R/summarizeModel.R index ff25e09..5282b21 100644 --- a/R/summarizeModel.R +++ b/R/summarizeModel.R @@ -23,9 +23,15 @@ summarizeModel <- function(marge.model = NULL, pt = NULL) { Slope.Segment = NA_real_, Trend.Segment = NA_real_) } else { - # extract model equation & slopes + # extract model equation, slopes, & covariances + if (marge.model$model_type == "GEE") { + coef_vcov <- as.matrix(marge.model$final_mod$var) + } else { + coef_vcov <- vcov(marge.model$final_mod) + } coef_df <- data.frame(coef_name = names(coef(marge.model$final_mod)), - coef_value = unname(coef(marge.model$final_mod))) + coef_value = unname(coef(marge.model$final_mod)), + coef_var = unname(diag(coef_vcov))) coef_df <- coef_df[-which(coef_df$coef_name == "Intercept"), ] diff --git a/R/testDynamic.R b/R/testDynamic.R index 06e9217..1111db5 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -8,7 +8,7 @@ #' @importFrom bigstatsr as_FBM #' @importFrom foreach foreach %dopar% registerDoSEQ #' @importFrom doParallel registerDoParallel -#' @importFrom parallel makeCluster detectCores stopCluster clusterEvalQ clusterExport clusterSetRNGStream +#' @importFrom parallel makeCluster stopCluster clusterEvalQ clusterExport clusterSetRNGStream #' @importFrom withr with_output_sink #' @importFrom MASS glm.nb negative.binomial theta.mm #' @importFrom dplyr rename mutate relocate @@ -93,6 +93,9 @@ testDynamic <- function(expr.mat = NULL, if (is.null(expr.mat) || is.null(pt)) { stop("You forgot some inputs to testDynamic().") } # get raw counts from SingleCellExperiment or Seurat object & transpose to cell x gene dense matrix + if (is.null(genes)) { + genes <- rownames(expr.mat) + } if (inherits(expr.mat, "SingleCellExperiment")) { expr.mat <- BiocGenerics::counts(expr.mat)[genes, ] expr.mat <- as.matrix(expr.mat) @@ -102,15 +105,12 @@ testDynamic <- function(expr.mat = NULL, assay = Seurat::DefaultAssay(expr.mat)) expr.mat <- as.matrix(expr.mat[genes, ]) } else if (inherits(expr.mat, "dgCMatrix")) { - expr.mat <- as.matrix(expr.mat) + expr.mat <- as.matrix(expr.mat[genes, ]) + } else { + expr.mat <- expr.mat[genes, ] } if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } expr.mat <- t(expr.mat) # transpose to cell x gene matrix - if (is.null(genes)) { - genes <- colnames(expr.mat) - } else { - expr.mat <- expr.mat[, genes] - } # extract pseudotime dataframe if input is results from Slingshot if (inherits(pt, "SlingshotDataSet")) { @@ -165,8 +165,7 @@ testDynamic <- function(expr.mat = NULL, package_list <- c("glm2", "scLANE", "MASS", "bigstatsr", "broom.mixed", "dplyr", "stats") if (is.gee) { package_list <- c(package_list, "geeM") - } - if (is.glmm) { + } else if (is.glmm) { package_list <- c(package_list, "glmmTMB") } diff --git a/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd index 196799e..628c5de 100644 --- a/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd +++ b/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd @@ -1,6 +1,6 @@ --- title: "Title" -subtitle: "University of Florida - Dept. of Biostatistics - Bacher Group" +subtitle: "UF Dept. of Biostatistics - Bacher Group" author: "Name" date: "`r Sys.Date()`" output: @@ -20,29 +20,40 @@ knitr::opts_chunk$set(echo = TRUE, comment = NA, message = FALSE, warning = FALSE, - fig.align = "center") + fig.align = "center", + dev = "png", + dpi = 300) set.seed(312) # lucky seed ``` # Libraries ```{r} -library(glm2) -library(mgcv) library(dplyr) -library(scran) library(scLANE) -library(scater) library(ggplot2) -library(tradeSeq) -library(slingshot) -library(doParallel) -library(kableExtra) -library(SingleCellExperiment) ``` # Data +```{r} + +``` + # Analysis +```{r} + +``` + # Conclusions + +```{r} + +``` + +# Session info + +```{r} +sessioninfo::session_info() +``` diff --git a/man/createCellOffset.Rd b/man/createCellOffset.Rd index 2b077b2..0085236 100644 --- a/man/createCellOffset.Rd +++ b/man/createCellOffset.Rd @@ -19,6 +19,7 @@ Creates a vector of per-cell size factors to be used as input to \code{\link{tes } \examples{ \dontrun{ +createCellOffset(expr.mat = sce_obj) createCellOffset(expr.mat = counts(sce_obj)) createCellOffset(expr.mat = seu_obj, scale.factor = 1e5) } diff --git a/man/getKnotDist.Rd b/man/getKnotDist.Rd new file mode 100644 index 0000000..353cf6e --- /dev/null +++ b/man/getKnotDist.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getKnotDist.R +\name{getKnotDist} +\alias{getKnotDist} +\title{Pull the set of knots for dynamic genes across each lineage.} +\usage{ +getKnotDist(test.dyn.res = NULL, dyn.genes = NULL) +} +\arguments{ +\item{test.dyn.res}{The output from \code{\link{testDynamic}}. Defaults to NULL.} + +\item{dyn.genes}{The set of genes to pull knots for. If unspecified, pulls knots for all modeled genes. Defaults to NULL.} +} +\value{ +A data.frame containing gene name, lineage ID, and knot location in pseudotime. +} +\description{ +Pulls knot locations for dynamic genes across each lineage, allowing comparisons of where transcriptional switches occur between lineages. +} +\examples{ +\dontrun{ +getKnotDist(gene_stats) +} +} +\author{ +Jack Leary +} diff --git a/man/plotModels.Rd b/man/plotModels.Rd index 773507c..7b5431a 100644 --- a/man/plotModels.Rd +++ b/man/plotModels.Rd @@ -10,14 +10,15 @@ plotModels( pt = NULL, expr.mat = NULL, size.factor.offset = NULL, + log1p.norm = TRUE, is.gee = FALSE, is.glmm = FALSE, id.vec = NULL, cor.structure = "ar1", ci.alpha = 0.05, - plot.null = TRUE, - plot.glm = TRUE, - plot.gam = TRUE, + plot.null = FALSE, + plot.glm = FALSE, + plot.gam = FALSE, plot.scLANE = TRUE, filter.lineage = NULL, gg.theme = theme_scLANE() @@ -30,10 +31,12 @@ plotModels( \item{pt}{A data.frame of pseudotime values for each cell. Defaults to NULL.} -\item{expr.mat}{Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a dense matrix of integer-valued counts. Defaults to NULL.} +\item{expr.mat}{Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts. Defaults to NULL.} \item{size.factor.offset}{(Optional) An offset to be included in the final model fit. Can be generated easily with \code{\link{createCellOffset}}. Defaults to NULL.} +\item{log1p.norm}{(Optional) Should log1p-normalized versions of expression & model predictions be returned instead of raw counts? Defaults to TRUE.} + \item{is.gee}{Should a GEE framework be used instead of the default GLM? Defaults to FALSE.} \item{is.glmm}{Should a GLMM framework be used instead of the default GLM? Defaults to FALSE.} @@ -44,11 +47,11 @@ plotModels( \item{ci.alpha}{(Optional) The pre-specified Type I Error rate used in generating (\eqn{1 - \alpha})\% CIs. Defaults to good old 0.05.} -\item{plot.null}{(Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to TRUE.} +\item{plot.null}{(Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to FALSE.} -\item{plot.glm}{(Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to TRUE.} +\item{plot.glm}{(Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to FALSE.} -\item{plot.gam}{(Optional) Should the fitted values from an NB GAM be plotted? Defaults to TRUE.} +\item{plot.gam}{(Optional) Should the fitted values from an NB GAM be plotted? Defaults to FALSE.} \item{plot.scLANE}{(Optional) Should the fitted values from the \code{scLANE} model be plotted? Defaults to TRUE.} @@ -67,17 +70,20 @@ This function visualizes the fitted values of several types of models over the e plotModels(gene_stats, gene = "AURKA", pt = pt_df, - expr.mat = count_mat) + expr.mat = count_mat, + size.factor.offset = cell_offset) plotModels(gene_stats, gene = "CD3E", pt = pt_df, - expr.mat = count_mat, + expr.mat = seu_obj, + size.factor.offset = cell_offset, ci.alpha = 0.1, filter.lineage = c("A", "C")) plotModels(gene_stats, gene = "CD14", pt = pt_df, - expr.mat = count_mat, + expr.mat = sce_obj, + size.factor.offset = cell_offset, is.glmm = TRUE, id.vec = subject_ids, plot.glm = TRUE, # plots an NB GLMM with random intercepts & slopes per-subject diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index 29bd595..f07554b 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -34,7 +34,7 @@ withr::with_output_sink(tempfile(), { size.factor.offset = cell_offset, n.cores = 2, track.time = TRUE) - gee_gene_stats <- testDynamic(sim_data, + gee_gene_stats <- testDynamic(expr.mat = sim_data, pt = pt_test, genes = genes_to_test, n.potential.basis.fns = 5, @@ -53,7 +53,7 @@ withr::with_output_sink(tempfile(), { glmm.adaptive = TRUE, id.vec = sim_data$subject, n.cores = 2, - track.time = TRUE) + track.time = FALSE) # get results tables overall glm_test_results <- getResultsDE(glm_gene_stats) gee_test_results <- getResultsDE(gee_gene_stats) @@ -191,6 +191,7 @@ withr::with_output_sink(tempfile(), { gsea_res <- enrichDynamicGenes(glm_test_results, species = "hsapiens") coef_summary_glm <- summarizeModel(marge_mod_offset, pt = pt_test) coef_summary_gee <- summarizeModel(marge_mod_GEE_offset, pt = pt_test) + knot_df <- getKnotDist(glm_gene_stats) }) # run tests @@ -359,3 +360,8 @@ test_that("summarizeModels() output", { expect_type(coef_summary_glm$Slope.Segment, "double") expect_type(coef_summary_gee$Slope.Segment, "double") }) + +test_that("getKnotDist() output", { + expect_s3_class(knot_df, "data.frame") + expect_equal(ncol(knot_df), 3) +})