From 0c6ba94ea638b7edbdca884404f34647aa368dc6 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Mon, 9 Oct 2023 14:19:57 -0400 Subject: [PATCH 1/5] updated metadata docs --- DESCRIPTION | 4 ++-- NAMESPACE | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 12694df..6f8ed4b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: scLANE Type: Package Title: Model gene expression dynamics with spline-based NB GLMs, GEEs, & GLMMs Version: 0.7.3 -Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre")), - person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"))) +Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")), + person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X"))) Description: This package uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time. The modeling architectures currently supported are negative binomial GLMs, GEEs, & GLMMs. Downstream analysis functionalities include model comparison, dynamic gene clustering, smoothed counts generation, gene set enrichment testing, & visualization. diff --git a/NAMESPACE b/NAMESPACE index fc7c8f6..95f1dcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,9 +48,11 @@ importFrom(dplyr,ntile) importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) +importFrom(dplyr,rowwise) importFrom(dplyr,sample_frac) importFrom(dplyr,select) importFrom(dplyr,summarise) +importFrom(dplyr,ungroup) importFrom(dplyr,with_groups) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) From 2611cc188a201cb28b2bddfe65771b64928e9006 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Mon, 9 Oct 2023 14:20:37 -0400 Subject: [PATCH 2/5] sped up plotModels -- closes #131 --- R/plotModels.R | 220 +++++++++++++++++++++++++--------------------- man/plotModels.Rd | 26 +++--- 2 files changed, 137 insertions(+), 109 deletions(-) diff --git a/R/plotModels.R b/R/plotModels.R index 490eddf..e8fe381 100644 --- a/R/plotModels.R +++ b/R/plotModels.R @@ -6,27 +6,28 @@ #' @import glm2 #' @importFrom stats qnorm predict as.formula #' @importFrom purrr map map2 reduce -#' @importFrom dplyr relocate mutate select contains case_when filter if_else +#' @importFrom dplyr relocate mutate select contains case_when filter if_else rowwise ungroup #' @importFrom geeM geem #' @importFrom glmmTMB glmmTMB nbinom2 #' @importFrom MASS negative.binomial theta.mm #' @importFrom tidyr pivot_longer #' @importFrom scales label_comma label_number -#' @importFrom ggplot2 theme_classic ggplot aes geom_point geom_line geom_ribbon facet_wrap scale_y_continuous labs theme element_text guides guide_legend +#' @importFrom ggplot2 ggplot aes geom_point geom_line geom_ribbon facet_wrap scale_y_continuous labs theme element_text guides guide_legend #' @description This function visualizes the fitted values of several types of models over the expression and pseudotime values of each cell. #' @param test.dyn.res The output from \code{\link{testDynamic}}. Defaults to NULL. #' @param gene The name of the gene that's being analyzed. Used as the title of the \code{ggplot} object & to subset the counts matrix. Defaults to NULL. #' @param pt A data.frame of pseudotime values for each cell. Defaults to NULL. -#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a dense matrix of integer-valued counts. Defaults to NULL. +#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts. Defaults to NULL. #' @param size.factor.offset (Optional) An offset to be included in the final model fit. Can be generated easily with \code{\link{createCellOffset}}. Defaults to NULL. +#' @param log1p.norm (Optional) Should log1p-normalized versions of expression & model predictions be returned instead of raw counts? Defaults to TRUE. #' @param is.gee Should a GEE framework be used instead of the default GLM? Defaults to FALSE. #' @param is.glmm Should a GLMM framework be used instead of the default GLM? Defaults to FALSE. #' @param id.vec If the GEE or GLMM framework is being used, a vector of subject IDs to use as input to \code{\link[geeM]{geem}} or \code{\link[glmmTMB]{glmmTMB}}. Defaults to NULL. #' @param cor.structure If the GEE framework is used, specifies the desired working correlation structure. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1". #' @param ci.alpha (Optional) The pre-specified Type I Error rate used in generating (\eqn{1 - \alpha})\% CIs. Defaults to good old 0.05. -#' @param plot.null (Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to TRUE. -#' @param plot.glm (Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to TRUE. -#' @param plot.gam (Optional) Should the fitted values from an NB GAM be plotted? Defaults to TRUE. +#' @param plot.null (Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to FALSE. +#' @param plot.glm (Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to FALSE. +#' @param plot.gam (Optional) Should the fitted values from an NB GAM be plotted? Defaults to FALSE. #' @param plot.scLANE (Optional) Should the fitted values from the \code{scLANE} model be plotted? Defaults to TRUE. #' @param filter.lineage (Optional) A character vector of lineages to filter out before generating the final plot. Should be letters, i.e. lineage "A" or "B". Defaults to NULL. #' @param gg.theme (Optional) A \code{ggplot2} theme to be added to the plot. Defaults to \code{\link{theme_scLANE}}. @@ -37,17 +38,20 @@ #' plotModels(gene_stats, #' gene = "AURKA", #' pt = pt_df, -#' expr.mat = count_mat) +#' expr.mat = count_mat, +#' size.factor.offset = cell_offset) #' plotModels(gene_stats, #' gene = "CD3E", #' pt = pt_df, -#' expr.mat = count_mat, +#' expr.mat = seu_obj, +#' size.factor.offset = cell_offset, #' ci.alpha = 0.1, #' filter.lineage = c("A", "C")) #' plotModels(gene_stats, #' gene = "CD14", #' pt = pt_df, -#' expr.mat = count_mat, +#' expr.mat = sce_obj, +#' size.factor.offset = cell_offset, #' is.glmm = TRUE, #' id.vec = subject_ids, #' plot.glm = TRUE, # plots an NB GLMM with random intercepts & slopes per-subject @@ -60,14 +64,15 @@ plotModels <- function(test.dyn.res = NULL, pt = NULL, expr.mat = NULL, size.factor.offset = NULL, + log1p.norm = TRUE, is.gee = FALSE, is.glmm = FALSE, id.vec = NULL, cor.structure = "ar1", ci.alpha = 0.05, - plot.null = TRUE, - plot.glm = TRUE, - plot.gam = TRUE, + plot.null = FALSE, + plot.glm = FALSE, + plot.gam = FALSE, plot.scLANE = TRUE, filter.lineage = NULL, gg.theme = theme_scLANE()) { @@ -75,16 +80,13 @@ plotModels <- function(test.dyn.res = NULL, if (is.null(expr.mat) || is.null(pt) || is.null(gene) || is.null(test.dyn.res)) { stop("You forgot one or more of the arguments to plotModels().") } # get raw counts from SingleCellExperiment or Seurat object & transpose to cell x gene dense matrix if (inherits(expr.mat, "SingleCellExperiment")) { - expr.mat <- as.matrix(BiocGenerics::counts(expr.mat)) + expr.mat <- BiocGenerics::counts(expr.mat) } else if (inherits(expr.mat, "Seurat")) { - expr.mat <- as.matrix(Seurat::GetAssayData(expr.mat, - slot = "counts", - assay = Seurat::DefaultAssay(expr.mat))) - } else if (inherits(expr.mat, "dgCMatrix")) { - expr.mat <- as.matrix(expr.mat) + expr.mat <- Seurat::GetAssayData(expr.mat, + slot = "counts", + assay = Seurat::DefaultAssay(expr.mat)) } - if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } - expr.mat <- t(expr.mat) + if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array") || inherits(expr.mat, "dgCMatrix"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } # generate parameters for CIs Z <- stats::qnorm(ci.alpha / 2, lower.tail = FALSE) # select sublist for gene of interest @@ -98,7 +100,7 @@ plotModels <- function(test.dyn.res = NULL, mod_df <- data.frame(CELL = rownames(pt)[!is.na(x)], LINEAGE = y, PT = x[!is.na(x)], - COUNT = expr.mat[!is.na(x), gene]) + COUNT = as.numeric(expr.mat[gene, !is.na(x)])) if (is.gee || is.glmm) { mod_df <- dplyr::mutate(mod_df, ID = id.vec[!is.na(x)]) } else { @@ -117,85 +119,99 @@ plotModels <- function(test.dyn.res = NULL, RESP_NULL = .y$Null_Preds$null_link_fit, SE_NULL = .y$Null_Preds$null_link_se)) %>% purrr::map(function(x) { - if (is.gee) { - theta_hat <- MASS::theta.mm(y = x$COUNT, - mu = mean(x$COUNT), - dfr = nrow(x) - 1) - gee_formula <- "COUNT ~ PT" - if (!is.null(size.factor.offset)) { - gee_formula <- paste0(gee_formula, " + offset(log(1 / CELL_OFFSET))") - } - gee_formula <- stats::as.formula(gee_formula) - glm_mod <- geeM::geem(gee_formula, - id = ID, - data = x, - family = MASS::negative.binomial(theta_hat, link = log), - corstr = cor.structure, - scale.fix = FALSE, - sandwich = TRUE) - robust_vcov_mat <- as.matrix(glm_mod$var) - glm_preds <- data.frame(fit = predict(glm_mod), - se.fit = sqrt(apply((tcrossprod(glm_mod$X, robust_vcov_mat)) * glm_mod$X, 1, sum))) - } else if (is.glmm) { - if (is.null(size.factor.offset)) { - glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 | ID) + (1 + PT | ID), - data = x, - family = glmmTMB::nbinom2(link = "log"), - se = TRUE, - REML = FALSE) + if (plot.glm) { + if (is.gee) { + theta_hat <- MASS::theta.mm(y = x$COUNT, + mu = mean(x$COUNT), + dfr = nrow(x) - 1) + gee_formula <- "COUNT ~ PT" + if (!is.null(size.factor.offset)) { + gee_formula <- paste0(gee_formula, " + offset(log(1 / CELL_OFFSET))") + } + gee_formula <- stats::as.formula(gee_formula) + glm_mod <- geeM::geem(gee_formula, + id = ID, + data = x, + family = MASS::negative.binomial(theta_hat, link = log), + corstr = cor.structure, + scale.fix = FALSE, + sandwich = TRUE) + robust_vcov_mat <- as.matrix(glm_mod$var) + glm_preds <- data.frame(fit = predict(glm_mod), + se.fit = sqrt(apply((tcrossprod(glm_mod$X, robust_vcov_mat)) * glm_mod$X, 1, sum))) + } else if (is.glmm) { + if (is.null(size.factor.offset)) { + glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 + PT | ID), + data = x, + family = glmmTMB::nbinom2(link = "log"), + se = TRUE, + REML = FALSE) + } else { + glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 + PT | ID), + data = x, + family = glmmTMB::nbinom2(link = "log"), + offset = log(1 / x$CELL_OFFSET), + se = TRUE, + REML = FALSE) + } + glm_preds <- data.frame(predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) } else { - glm_mod <- glmmTMB::glmmTMB(COUNT ~ PT + (1 | ID) + (1 + PT | ID), - data = x, - family = glmmTMB::nbinom2(link = "log"), - offset = log(1 / x$CELL_OFFSET), - se = TRUE, - REML = FALSE) + glm_formula <- "COUNT ~ PT" + if (!is.null(size.factor.offset)) { + glm_formula <- paste0(glm_formula, " + offset(log(1 / CELL_OFFSET))") + } + glm_formula <- stats::as.formula(glm_formula) + glm_mod <- MASS::glm.nb(glm_formula, + data = x, + x = FALSE, + y = FALSE, + method = "glm.fit2", + link = log, + init.theta = 1) + glm_preds <- data.frame(stats::predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) } - glm_preds <- data.frame(predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) + x <- dplyr::mutate(x, + RESP_GLM = glm_preds$fit, + SE_GLM = glm_preds$se.fit) } else { - glm_formula <- "COUNT ~ PT" - if (!is.null(size.factor.offset)) { - glm_formula <- paste0(glm_formula, " + offset(log(1 / CELL_OFFSET))") - } - glm_formula <- stats::as.formula(glm_formula) - glm_mod <- MASS::glm.nb(glm_formula, - data = x, - x = FALSE, - y = FALSE, - method = "glm.fit2", - link = log, - init.theta = 1) - glm_preds <- data.frame(stats::predict(glm_mod, type = "link", se.fit = TRUE)[1:2]) + x <- dplyr::mutate(x, + RESP_GLM = NA_real_, + SE_GLM = NA_real_) } - x %<>% dplyr::mutate(RESP_GLM = glm_preds$fit, - SE_GLM = glm_preds$se.fit) return(x) }) %>% purrr::map(function(x) { - if (is.null(size.factor.offset)) { - if (is.glmm) { - gam_mod <- nbGAM(expr = x$COUNT, - pt = x$PT, - id.vec = x$ID) + if (plot.gam) { + if (is.null(size.factor.offset)) { + if (is.glmm) { + gam_mod <- nbGAM(expr = x$COUNT, + pt = x$PT, + id.vec = x$ID) + } else { + gam_mod <- nbGAM(expr = x$COUNT, pt = x$PT) + } } else { - gam_mod <- nbGAM(expr = x$COUNT, pt = x$PT) + if (is.glmm) { + gam_mod <- nbGAM(expr = x$COUNT, + Y.offset = x$CELL_OFFSET, + pt = x$PT, + id.vec = x$ID) + } else { + gam_mod <- nbGAM(expr = x$COUNT, + Y.offset = x$CELL_OFFSET, + pt = x$PT) + } } + gam_preds <- data.frame(predict(gam_mod, type = "link", se.fit = TRUE)[1:2]) + x <- dplyr::mutate(x, + RESP_GAM = gam_preds$fit, + SE_GAM = gam_preds$se.fit) } else { - if (is.glmm) { - gam_mod <- nbGAM(expr = x$COUNT, - Y.offset = x$CELL_OFFSET, - pt = x$PT, - id.vec = x$ID) - } else { - gam_mod <- nbGAM(expr = x$COUNT, - Y.offset = x$CELL_OFFSET, - pt = x$PT) - } + x <- dplyr::mutate(x, + RESP_GAM = NA_real_, + SE_GAM = NA_real_) } - gam_preds <- data.frame(predict(gam_mod, type = "link", se.fit = TRUE)[1:2]) - x <- dplyr::mutate(x, - RESP_GAM = gam_preds$fit, - SE_GAM = gam_preds$se.fit) + return(x) }) %>% purrr::map(function(x) { @@ -236,35 +252,41 @@ plotModels <- function(test.dyn.res = NULL, } else { x } - })) + })) %>% + dplyr::ungroup() } else { counts_df <- dplyr::mutate(counts_df, dplyr::across(c(COUNT, PRED, CI_LL, CI_UL), \(x) x * CELL_OFFSET)) } } # add conditional filters here if (!plot.null) { - counts_df %<>% dplyr::filter(MODEL != "Intercept-only") + counts_df <- dplyr::filter(counts_df, MODEL != "Intercept-only") } if (!plot.glm) { - counts_df %<>% dplyr::filter(MODEL != "GLM") + counts_df <- dplyr::filter(counts_df, MODEL != "GLM") } if (!plot.gam) { - counts_df %<>% dplyr::filter(MODEL != "GAM") + counts_df <- dplyr::filter(counts_df, MODEL != "GAM") } if (!plot.scLANE) { - counts_df %<>% dplyr::filter(MODEL != "scLANE") + counts_df <- dplyr::filter(counts_df, MODEL != "scLANE") } if (!is.null(filter.lineage)) { - counts_df %<>% dplyr::filter(!LINEAGE %in% filter.lineage) + counts_df <- dplyr::filter(counts_df, !LINEAGE %in% filter.lineage) } # change model labels as necessary if (is.gee) { - counts_df %<>% dplyr::mutate(MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GEE", as.character(MODEL)), - MODEL = factor(MODEL, levels = c("Intercept-only", "GEE", "GAM", "scLANE"))) + counts_df <- dplyr::mutate(counts_df, + MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GEE", as.character(MODEL)), + MODEL = factor(MODEL, levels = c("Intercept-only", "GEE", "GAM", "scLANE"))) } if (is.glmm) { - counts_df %<>% dplyr::mutate(MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GLMM", as.character(MODEL)), - MODEL = factor(MODEL, levels = c("Intercept-only", "GLMM", "GAM", "scLANE"))) + counts_df <- dplyr::mutate(counts_df, + MODEL = dplyr::if_else(as.character(MODEL) == "GLM", "GLMM", as.character(MODEL)), + MODEL = factor(MODEL, levels = c("Intercept-only", "GLMM", "GAM", "scLANE"))) + } + if (log1p.norm) { + counts_df <- dplyr::mutate(counts_df, dplyr::across(c(COUNT, PRED, CI_LL, CI_UL), log1p)) } # generate plot if (is.glmm) { diff --git a/man/plotModels.Rd b/man/plotModels.Rd index 773507c..7b5431a 100644 --- a/man/plotModels.Rd +++ b/man/plotModels.Rd @@ -10,14 +10,15 @@ plotModels( pt = NULL, expr.mat = NULL, size.factor.offset = NULL, + log1p.norm = TRUE, is.gee = FALSE, is.glmm = FALSE, id.vec = NULL, cor.structure = "ar1", ci.alpha = 0.05, - plot.null = TRUE, - plot.glm = TRUE, - plot.gam = TRUE, + plot.null = FALSE, + plot.glm = FALSE, + plot.gam = FALSE, plot.scLANE = TRUE, filter.lineage = NULL, gg.theme = theme_scLANE() @@ -30,10 +31,12 @@ plotModels( \item{pt}{A data.frame of pseudotime values for each cell. Defaults to NULL.} -\item{expr.mat}{Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a dense matrix of integer-valued counts. Defaults to NULL.} +\item{expr.mat}{Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts. Defaults to NULL.} \item{size.factor.offset}{(Optional) An offset to be included in the final model fit. Can be generated easily with \code{\link{createCellOffset}}. Defaults to NULL.} +\item{log1p.norm}{(Optional) Should log1p-normalized versions of expression & model predictions be returned instead of raw counts? Defaults to TRUE.} + \item{is.gee}{Should a GEE framework be used instead of the default GLM? Defaults to FALSE.} \item{is.glmm}{Should a GLMM framework be used instead of the default GLM? Defaults to FALSE.} @@ -44,11 +47,11 @@ plotModels( \item{ci.alpha}{(Optional) The pre-specified Type I Error rate used in generating (\eqn{1 - \alpha})\% CIs. Defaults to good old 0.05.} -\item{plot.null}{(Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to TRUE.} +\item{plot.null}{(Optional) Should the fitted values from the intercept-only null model be plotted? Defaults to FALSE.} -\item{plot.glm}{(Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to TRUE.} +\item{plot.glm}{(Optional) Should the fitted values from an NB GLM be plotted? If the data are multi-subject, the "GLM" model can be a GEE or GLMM depending on the desired framework. See Examples for more detail. Defaults to FALSE.} -\item{plot.gam}{(Optional) Should the fitted values from an NB GAM be plotted? Defaults to TRUE.} +\item{plot.gam}{(Optional) Should the fitted values from an NB GAM be plotted? Defaults to FALSE.} \item{plot.scLANE}{(Optional) Should the fitted values from the \code{scLANE} model be plotted? Defaults to TRUE.} @@ -67,17 +70,20 @@ This function visualizes the fitted values of several types of models over the e plotModels(gene_stats, gene = "AURKA", pt = pt_df, - expr.mat = count_mat) + expr.mat = count_mat, + size.factor.offset = cell_offset) plotModels(gene_stats, gene = "CD3E", pt = pt_df, - expr.mat = count_mat, + expr.mat = seu_obj, + size.factor.offset = cell_offset, ci.alpha = 0.1, filter.lineage = c("A", "C")) plotModels(gene_stats, gene = "CD14", pt = pt_df, - expr.mat = count_mat, + expr.mat = sce_obj, + size.factor.offset = cell_offset, is.glmm = TRUE, id.vec = subject_ids, plot.glm = TRUE, # plots an NB GLMM with random intercepts & slopes per-subject From 5d2cf93a1d01162faba8f6d5b6feb56cb86f82fa Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Mon, 9 Oct 2023 14:20:54 -0400 Subject: [PATCH 3/5] other small tweaks --- R/createCellOffset.R | 1 + R/summarizeModel.R | 6 ++-- .../Bacher_Group_HTML/skeleton/skeleton.Rmd | 33 ++++++++++++------- man/createCellOffset.Rd | 1 + 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/R/createCellOffset.R b/R/createCellOffset.R index d200e1c..ecaa277 100644 --- a/R/createCellOffset.R +++ b/R/createCellOffset.R @@ -14,6 +14,7 @@ #' @export #' @examples #' \dontrun{ +#' createCellOffset(expr.mat = sce_obj) #' createCellOffset(expr.mat = counts(sce_obj)) #' createCellOffset(expr.mat = seu_obj, scale.factor = 1e5) #' } diff --git a/R/summarizeModel.R b/R/summarizeModel.R index ff25e09..e54e994 100644 --- a/R/summarizeModel.R +++ b/R/summarizeModel.R @@ -23,9 +23,11 @@ summarizeModel <- function(marge.model = NULL, pt = NULL) { Slope.Segment = NA_real_, Trend.Segment = NA_real_) } else { - # extract model equation & slopes + # extract model equation, slopes, & covariances + coef_vcov <- vcov(marge.model$final_mod) coef_df <- data.frame(coef_name = names(coef(marge.model$final_mod)), - coef_value = unname(coef(marge.model$final_mod))) + coef_value = unname(coef(marge.model$final_mod)), + coef_var = unname(diag(coef_vcov))) coef_df <- coef_df[-which(coef_df$coef_name == "Intercept"), ] diff --git a/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd index 196799e..628c5de 100644 --- a/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd +++ b/inst/rmarkdown/templates/Bacher_Group_HTML/skeleton/skeleton.Rmd @@ -1,6 +1,6 @@ --- title: "Title" -subtitle: "University of Florida - Dept. of Biostatistics - Bacher Group" +subtitle: "UF Dept. of Biostatistics - Bacher Group" author: "Name" date: "`r Sys.Date()`" output: @@ -20,29 +20,40 @@ knitr::opts_chunk$set(echo = TRUE, comment = NA, message = FALSE, warning = FALSE, - fig.align = "center") + fig.align = "center", + dev = "png", + dpi = 300) set.seed(312) # lucky seed ``` # Libraries ```{r} -library(glm2) -library(mgcv) library(dplyr) -library(scran) library(scLANE) -library(scater) library(ggplot2) -library(tradeSeq) -library(slingshot) -library(doParallel) -library(kableExtra) -library(SingleCellExperiment) ``` # Data +```{r} + +``` + # Analysis +```{r} + +``` + # Conclusions + +```{r} + +``` + +# Session info + +```{r} +sessioninfo::session_info() +``` diff --git a/man/createCellOffset.Rd b/man/createCellOffset.Rd index 2b077b2..0085236 100644 --- a/man/createCellOffset.Rd +++ b/man/createCellOffset.Rd @@ -19,6 +19,7 @@ Creates a vector of per-cell size factors to be used as input to \code{\link{tes } \examples{ \dontrun{ +createCellOffset(expr.mat = sce_obj) createCellOffset(expr.mat = counts(sce_obj)) createCellOffset(expr.mat = seu_obj, scale.factor = 1e5) } From 82973ab4909068680f0b577abd82ea031a688ebb Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 10 Oct 2023 16:26:35 -0400 Subject: [PATCH 4/5] added function to pull knot distribution -- closes #130 --- R/getKnotDist.R | 34 ++++++++++++++++++++++++++++++++++ man/getKnotDist.Rd | 27 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 R/getKnotDist.R create mode 100644 man/getKnotDist.Rd diff --git a/R/getKnotDist.R b/R/getKnotDist.R new file mode 100644 index 0000000..5b1547c --- /dev/null +++ b/R/getKnotDist.R @@ -0,0 +1,34 @@ +#' Pull the set of knots for dynamic genes across each lineage. +#' +#' @name getKnotDist +#' @author Jack Leary +#' @import magrittr +#' @importFrom purrr imap reduce +#' @description Pulls knot locations for dynamic genes across each lineage, allowing comparisons of where transcriptional switches occur between lineages. +#' @param test.dyn.res The output from \code{\link{testDynamic}}. Defaults to NULL. +#' @param dyn.genes The set of genes to pull knots for. If unspecified, pulls knots for all modeled genes. Defaults to NULL. +#' @return A data.frame containing gene name, lineage ID, and knot location in pseudotime. +#' @export +#' @examples +#' \dontrun{ +#' getKnotDist(gene_stats) +#' } + +getKnotDist <- function(test.dyn.res = NULL, dyn.genes = NULL) { + # check inputs + if (is.null(test.dyn.res)) { stop("You forgot one of the arguments to getKnotDist().") } + if (is.null(dyn.genes)) { + dyn.genes <- names(test.dyn.res) + } + # pull knots per-lineage + knot_df <- purrr::imap(test.dyn.res[dyn.genes], \(x, y) { + purrr::imap(x, \(z, w) { + data.frame(gene = y, + lineage = w, + knot = z$MARGE_Slope_Data$Breakpoint) + }) %>% + purrr::reduce(rbind) + }) %>% + purrr::reduce(rbind) + return(knot_df) +} diff --git a/man/getKnotDist.Rd b/man/getKnotDist.Rd new file mode 100644 index 0000000..353cf6e --- /dev/null +++ b/man/getKnotDist.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getKnotDist.R +\name{getKnotDist} +\alias{getKnotDist} +\title{Pull the set of knots for dynamic genes across each lineage.} +\usage{ +getKnotDist(test.dyn.res = NULL, dyn.genes = NULL) +} +\arguments{ +\item{test.dyn.res}{The output from \code{\link{testDynamic}}. Defaults to NULL.} + +\item{dyn.genes}{The set of genes to pull knots for. If unspecified, pulls knots for all modeled genes. Defaults to NULL.} +} +\value{ +A data.frame containing gene name, lineage ID, and knot location in pseudotime. +} +\description{ +Pulls knot locations for dynamic genes across each lineage, allowing comparisons of where transcriptional switches occur between lineages. +} +\examples{ +\dontrun{ +getKnotDist(gene_stats) +} +} +\author{ +Jack Leary +} From 6132e8dcb244c3a4ba3d62e18ce933d35d036480 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 10 Oct 2023 16:26:53 -0400 Subject: [PATCH 5/5] updated tests and such --- NAMESPACE | 3 ++- R/summarizeModel.R | 6 +++++- R/testDynamic.R | 17 ++++++++--------- tests/testthat/test_scLANE.R | 10 ++++++++-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 95f1dcb..c022fc3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(enrichDynamicGenes) export(extractBreakpoints) export(fitGLMM) export(getFittedValues) +export(getKnotDist) export(getResultsDE) export(marge2) export(modelLRT) @@ -86,10 +87,10 @@ importFrom(glmmTMB,nbinom2) importFrom(parallel,clusterEvalQ) importFrom(parallel,clusterExport) importFrom(parallel,clusterSetRNGStream) -importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) importFrom(purrr,discard) +importFrom(purrr,imap) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_chr) diff --git a/R/summarizeModel.R b/R/summarizeModel.R index e54e994..5282b21 100644 --- a/R/summarizeModel.R +++ b/R/summarizeModel.R @@ -24,7 +24,11 @@ summarizeModel <- function(marge.model = NULL, pt = NULL) { Trend.Segment = NA_real_) } else { # extract model equation, slopes, & covariances - coef_vcov <- vcov(marge.model$final_mod) + if (marge.model$model_type == "GEE") { + coef_vcov <- as.matrix(marge.model$final_mod$var) + } else { + coef_vcov <- vcov(marge.model$final_mod) + } coef_df <- data.frame(coef_name = names(coef(marge.model$final_mod)), coef_value = unname(coef(marge.model$final_mod)), coef_var = unname(diag(coef_vcov))) diff --git a/R/testDynamic.R b/R/testDynamic.R index 06e9217..1111db5 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -8,7 +8,7 @@ #' @importFrom bigstatsr as_FBM #' @importFrom foreach foreach %dopar% registerDoSEQ #' @importFrom doParallel registerDoParallel -#' @importFrom parallel makeCluster detectCores stopCluster clusterEvalQ clusterExport clusterSetRNGStream +#' @importFrom parallel makeCluster stopCluster clusterEvalQ clusterExport clusterSetRNGStream #' @importFrom withr with_output_sink #' @importFrom MASS glm.nb negative.binomial theta.mm #' @importFrom dplyr rename mutate relocate @@ -93,6 +93,9 @@ testDynamic <- function(expr.mat = NULL, if (is.null(expr.mat) || is.null(pt)) { stop("You forgot some inputs to testDynamic().") } # get raw counts from SingleCellExperiment or Seurat object & transpose to cell x gene dense matrix + if (is.null(genes)) { + genes <- rownames(expr.mat) + } if (inherits(expr.mat, "SingleCellExperiment")) { expr.mat <- BiocGenerics::counts(expr.mat)[genes, ] expr.mat <- as.matrix(expr.mat) @@ -102,15 +105,12 @@ testDynamic <- function(expr.mat = NULL, assay = Seurat::DefaultAssay(expr.mat)) expr.mat <- as.matrix(expr.mat[genes, ]) } else if (inherits(expr.mat, "dgCMatrix")) { - expr.mat <- as.matrix(expr.mat) + expr.mat <- as.matrix(expr.mat[genes, ]) + } else { + expr.mat <- expr.mat[genes, ] } if (!(inherits(expr.mat, "matrix") || inherits(expr.mat, "array"))) { stop("Input expr.mat must be coerceable to a matrix of integer counts.") } expr.mat <- t(expr.mat) # transpose to cell x gene matrix - if (is.null(genes)) { - genes <- colnames(expr.mat) - } else { - expr.mat <- expr.mat[, genes] - } # extract pseudotime dataframe if input is results from Slingshot if (inherits(pt, "SlingshotDataSet")) { @@ -165,8 +165,7 @@ testDynamic <- function(expr.mat = NULL, package_list <- c("glm2", "scLANE", "MASS", "bigstatsr", "broom.mixed", "dplyr", "stats") if (is.gee) { package_list <- c(package_list, "geeM") - } - if (is.glmm) { + } else if (is.glmm) { package_list <- c(package_list, "glmmTMB") } diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index 29bd595..f07554b 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -34,7 +34,7 @@ withr::with_output_sink(tempfile(), { size.factor.offset = cell_offset, n.cores = 2, track.time = TRUE) - gee_gene_stats <- testDynamic(sim_data, + gee_gene_stats <- testDynamic(expr.mat = sim_data, pt = pt_test, genes = genes_to_test, n.potential.basis.fns = 5, @@ -53,7 +53,7 @@ withr::with_output_sink(tempfile(), { glmm.adaptive = TRUE, id.vec = sim_data$subject, n.cores = 2, - track.time = TRUE) + track.time = FALSE) # get results tables overall glm_test_results <- getResultsDE(glm_gene_stats) gee_test_results <- getResultsDE(gee_gene_stats) @@ -191,6 +191,7 @@ withr::with_output_sink(tempfile(), { gsea_res <- enrichDynamicGenes(glm_test_results, species = "hsapiens") coef_summary_glm <- summarizeModel(marge_mod_offset, pt = pt_test) coef_summary_gee <- summarizeModel(marge_mod_GEE_offset, pt = pt_test) + knot_df <- getKnotDist(glm_gene_stats) }) # run tests @@ -359,3 +360,8 @@ test_that("summarizeModels() output", { expect_type(coef_summary_glm$Slope.Segment, "double") expect_type(coef_summary_gee$Slope.Segment, "double") }) + +test_that("getKnotDist() output", { + expect_s3_class(knot_df, "data.frame") + expect_equal(ncol(knot_df), 3) +})