diff --git a/DESCRIPTION b/DESCRIPTION index fd37722..d50624e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,12 +53,11 @@ Suggests: Seurat, bluster, cluster, - msigdbr, rmarkdown, slingshot, + gprofiler2, BiocGenerics, BiocNeighbors, - clusterProfiler, testthat (>= 3.0.0), SingleCellExperiment, SummarizedExperiment diff --git a/NAMESPACE b/NAMESPACE index dbf4c7e..60275d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,6 +36,7 @@ importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) importFrom(dplyr,contains) +importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,group_by) diff --git a/R/enrichDynamicGenes.R b/R/enrichDynamicGenes.R index 014833c..4acb05b 100644 --- a/R/enrichDynamicGenes.R +++ b/R/enrichDynamicGenes.R @@ -2,40 +2,29 @@ #' #' @name enrichDynamicGenes #' @author Jack Leary -#' @description This function uses the \code{msigdbr} and \code{clusterProfiler} packages to perform GSEA on a set of genes from one or more lineages that were determined to be dynamic with \code{\link{testDynamic}}. +#' @description This function uses the \code{gprofiler2} package to perform pathway analysis on a set of genes from one or more lineages that were determined to be dynamic with \code{\link{testDynamic}}. #' @import magrittr -#' @importFrom dplyr filter distinct pull +#' @importFrom dplyr filter arrange desc distinct pull #' @param scLANE.de.res The output from \code{\link{getResultsDE}}. Defaults to NULL. #' @param lineage A character vector specifying lineages to isolate. Defaults to NULL. -#' @param species One of "hs" or "mm", specifying whether the gene sets are human or murine in origin. -#' @param gene.set.cat Corresponds to the \code{category} parameter of \code{\link[msigdbr]{msigdbr}}. Defaults to NULL. -#' @param gene.set.subcat Corresponds to the \code{subcategory} parameter of \code{\link[msigdbr]{msigdbr}}. Defaults to NULL. -#' @return The output from \code{\link[clusterProfiler]{enricher}}. -#' @seealso \code{\link[msigdbr]{msigdbr}} -#' @seealso \code{\link[msigdbr]{msigdbr_collections}} -#' @seealso \code{\link[clusterProfiler]{enricher}} +#' @param species The species against which to run enrichment analysis. Defaults to "hsapiens". +#' @return The output from \code{\link[gprofiler2]{gost}}. +#' @seealso \code{\link[gprofiler2]{gost}} #' @export #' @examples #' \dontrun{ +#' enrichDynamicGenes(scLANE.de.res = de_stats) #' enrichDynamicGenes(scLANE.de.res = de_stats, -#' lineage = "A", -#' gene.set.cat = "C1", -#' gene.set.subcat = "CGP") -#' enrichDynamicGenes(scLANE.de.res = de_stats, -#' species = "mm", -#' gene.set.cat = "C2", -#' gene.set.subcat = "CP:REACTOME") +#' lineage = "B", +#' species = "mmusculus") #' } enrichDynamicGenes <- function(scLANE.de.res = NULL, lineage = NULL, - species = "hs", - gene.set.cat = NULL, - gene.set.subcat = NULL) { + species = "hsapiens") { # check inputs - if (is.null(scLANE.de.res) || is.null(gene.set.cat)) { stop("Arguments to enrichDynamicGenes() are missing.") } + if (is.null(scLANE.de.res)) { stop("Arguments to enrichDynamicGenes() are missing.") } species <- tolower(species) - if (!species %in% c("hs", "mm")) { stop("species must be one of 'hs' or 'mm' at this time.") } if (!is.null(lineage)) { scLANE.de.res <- dplyr::filter(scLANE.de.res, Lineage %in% lineage, @@ -43,21 +32,13 @@ enrichDynamicGenes <- function(scLANE.de.res = NULL, } else { scLANE.de.res <- dplyr::filter(scLANE.de.res, Gene_Dynamic_Overall == 1) } - genes <- dplyr::distinct(scLANE.de.res, Gene) %>% + genes <- dplyr::arrange(scLANE.de.res, dplyr::desc(Test_Stat)) %>% + dplyr::distinct(Gene) %>% dplyr::pull(Gene) - if (species == "hs") { - gene_sets <- msigdbr::msigdbr(species = "human", - category = gene.set.cat, - subcategory = gene.set.subcat) - } else if (species == "mm") { - gene_sets <- msigdbr::msigdbr(species = "mouse", - category = gene.set.cat, - subcategory = gene.set.subcat) - } - term_to_gene <- dplyr::distinct(gene_sets, gs_name, gene_symbol) - gsea_res <- clusterProfiler::enricher(gene = genes, - TERM2GENE = term_to_gene, - universe = unique(term_to_gene$gene_symbol), - qvalueCutoff = 0.05) - return(gsea_res) + pathway_enr_res <- gprofiler2::gost(query = genes, + organism = species, + ordered_query = TRUE, + multi_query = FALSE, + significant = FALSE) + return(pathway_enr_res) } diff --git a/R/marge2.R b/R/marge2.R index af0ba58..a202c11 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -12,7 +12,7 @@ #' @importFrom MASS glm.nb negative.binomial theta.mm #' @importFrom utils tail #' @importFrom purrr map_chr -#' @importFrom stats fitted coef offset as.formula +#' @importFrom stats quantile fitted coef offset as.formula #' @param X_pred A matrix of the predictor variables. Defaults to NULL. #' @param Y The response variable. Defaults to NULL. #' @param Y.offset (Optional) An vector of per-cell size factors to be included in the final model fit as an offset. Defaults to NULL. @@ -165,8 +165,10 @@ marge2 <- function(X_pred = NULL, X_red1 <- min_span(X_red = X, q = q, minspan = minspan) X_red2 <- max_span(X_red = X, q = q) X_red <- intersect(X_red1, X_red2) + q05 <- stats::quantile(X_pred[, v], 0.05) + q95 <- stats::quantile(X_pred[, v], 0.95) + X_red <- X_red[X_red > q05 & X_red < q95] if (length(X_red) > n.knot.max) { - # set.seed(as.integer(length(X_red))) X_red <- sample(X_red, size = n.knot.max) } } else { diff --git a/man/enrichDynamicGenes.Rd b/man/enrichDynamicGenes.Rd index e24387e..58cefcf 100644 --- a/man/enrichDynamicGenes.Rd +++ b/man/enrichDynamicGenes.Rd @@ -4,49 +4,31 @@ \alias{enrichDynamicGenes} \title{Perform GSEA on dynamic genes identified by \code{scLANE}.} \usage{ -enrichDynamicGenes( - scLANE.de.res = NULL, - lineage = NULL, - species = "hs", - gene.set.cat = NULL, - gene.set.subcat = NULL -) +enrichDynamicGenes(scLANE.de.res = NULL, lineage = NULL, species = "hsapiens") } \arguments{ \item{scLANE.de.res}{The output from \code{\link{getResultsDE}}. Defaults to NULL.} \item{lineage}{A character vector specifying lineages to isolate. Defaults to NULL.} -\item{species}{One of "hs" or "mm", specifying whether the gene sets are human or murine in origin.} - -\item{gene.set.cat}{Corresponds to the \code{category} parameter of \code{\link[msigdbr]{msigdbr}}. Defaults to NULL.} - -\item{gene.set.subcat}{Corresponds to the \code{subcategory} parameter of \code{\link[msigdbr]{msigdbr}}. Defaults to NULL.} +\item{species}{The species against which to run enrichment analysis. Defaults to "hsapiens".} } \value{ -The output from \code{\link[clusterProfiler]{enricher}}. +The output from \code{\link[gprofiler2]{gost}}. } \description{ -This function uses the \code{msigdbr} and \code{clusterProfiler} packages to perform GSEA on a set of genes from one or more lineages that were determined to be dynamic with \code{\link{testDynamic}}. +This function uses the \code{gprofiler2} package to perform pathway analysis on a set of genes from one or more lineages that were determined to be dynamic with \code{\link{testDynamic}}. } \examples{ \dontrun{ +enrichDynamicGenes(scLANE.de.res = de_stats) enrichDynamicGenes(scLANE.de.res = de_stats, - lineage = "A", - gene.set.cat = "C1", - gene.set.subcat = "CGP") -enrichDynamicGenes(scLANE.de.res = de_stats, - species = "mm", - gene.set.cat = "C2", - gene.set.subcat = "CP:REACTOME") + lineage = "B", + species = "mmusculus") } } \seealso{ -\code{\link[msigdbr]{msigdbr}} - -\code{\link[msigdbr]{msigdbr_collections}} - -\code{\link[clusterProfiler]{enricher}} +\code{\link[gprofiler2]{gost}} } \author{ Jack Leary diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index 3f1ac41..29bd595 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -188,9 +188,7 @@ withr::with_output_sink(tempfile(), { expr.mat = sim_data, cell.meta.data = as.data.frame(SummarizedExperiment::colData(sim_data)), id.vec = sim_data$subject) - gsea_res <- enrichDynamicGenes(scLANE.de.res = glm_test_results, - gene.set.cat = "C2", - species = "hs") + gsea_res <- enrichDynamicGenes(glm_test_results, species = "hsapiens") coef_summary_glm <- summarizeModel(marge_mod_offset, pt = pt_test) coef_summary_gee <- summarizeModel(marge_mod_GEE_offset, pt = pt_test) }) @@ -348,9 +346,9 @@ test_that("getFittedValues() output", { }) test_that("enrichDynamicGenes() output", { - expect_s4_class(gsea_res, "enrichResult") - expect_s3_class(gsea_res@result, "data.frame") - expect_equal(ncol(gsea_res@result), 9) + expect_type(gsea_res, "list") + expect_length(gsea_res, 2) + expect_s3_class(gsea_res$result, "data.frame") }) test_that("summarizeModels() output", {