Skip to content

Commit

Permalink
Merge pull request #138 from jr-leary7/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jr-leary7 authored Oct 11, 2023
2 parents 226b31b + 62f8b0f commit 31f1002
Show file tree
Hide file tree
Showing 19 changed files with 345 additions and 279 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.github$
36 changes: 36 additions & 0 deletions .github/workflows/render-README.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Additional help taken from https://fromthebottomoftheheap.net/2020/04/30/rendering-your-readme-with-github-actions/

on:
push:
branches: [main, master]
paths: README.Rmd

name: render-README

jobs:
render:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-r-dependencies@v2
- uses: r-lib/actions/setup-pandoc@v2

- name: install CRAN packages
run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "dplyr", "purrr", "remotes"))'
- name: install BioConductor packages
run: Rscript -e 'install.packages("BiocManager"); BiocManager::install(c("SingleCellExperiment", "scater", "scran", "scaffold"))'
- name: install GitHub packages
run: Rscript -e 'remotes::install_github("jr-leary7/scLANE")'

- name: render README
run: Rscript -e 'rmarkdown::render("README.Rmd", output_format = "md_document")'

- name: commit rendered README
run: |
git add README.md man/figures/README-*
git commit -m "Recompiled README.Rmd" || echo "No changes to commit"
git push origin main || echo "No changes to commit"
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: This package uses truncated power basis spline models to build flex
Downstream analysis functionalities include model comparison, dynamic gene clustering, smoothed counts generation, gene set enrichment testing, & visualization.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
LazyData: false
RoxygenNote: 7.2.1
Depends:
glm2,
Expand Down Expand Up @@ -70,11 +70,13 @@ LinkingTo:
biocViews:
RNASeq,
Software,
Clustering,
TimeCourse,
Sequencing,
Regression,
SingleCell,
Visualization,
GeneExpression,
Transcriptomics,
GeneSetEnrichment,
DifferentialExpression
4 changes: 2 additions & 2 deletions R/GetResultsDE.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @description This function turns the nested list differential expression results of \code{\link{testDynamic}} and turns them into a tidy \code{data.frame}.
#' @import magrittr
#' @importFrom purrr map_dfr
#' @importFrom dplyr arrange mutate across if_else with_groups relocate
#' @importFrom dplyr arrange desc mutate across if_else with_groups relocate
#' @importFrom tidyselect everything
#' @importFrom stats p.adjust p.adjust.methods
#' @param test.dyn.res The nested list returned by \code{\link{testDynamic}}. Defaults to NULL.
Expand Down Expand Up @@ -38,7 +38,7 @@ getResultsDE <- function(test.dyn.res = NULL,
dplyr::mutate(dplyr::across(tidyselect::everything(), \(z) unname(unlist(z))))
})
}) %>%
dplyr::arrange(P_Val, Test_Stat) %>%
dplyr::arrange(P_Val, dplyr::desc(Test_Stat)) %>%
dplyr::mutate(P_Val_Adj = stats::p.adjust(P_Val, method = p.adj.method),
Gene_Dynamic_Lineage = dplyr::if_else(P_Val_Adj < fdr.cutoff, 1, 0, missing = 0)) %>%
dplyr::with_groups(Gene,
Expand Down
72 changes: 35 additions & 37 deletions R/ModelLRT.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,45 +29,43 @@ modelLRT <- function(mod.1 = NULL,
P_Val = NA,
Model_Type = NA,
Notes = "No test performed due to model failure.")
return(res)
}

mod.1 <- mod.1$final_mod

if (is.null(mod.1) || is.null(mod.0)) { stop("You must provide two models to compare.") }
LRT_notes <- NA_character_
# compute LRT
if (is.glmm) {
if (!(inherits(mod.1, "glmmTMB") && inherits(mod.0, "glmmTMB"))) { stop("Models must be of class glmmTMB.") }
mod.1_ll <- -mod.1$fit$objective
mod.0_ll <- -mod.0$fit$objective
lrt_stat <- 2 * (mod.1_ll - mod.0_ll)
if (is.na(logLik(mod.1))) {
if (!mod.1$sdr$pdHess) {
LRT_notes <- "Non-positive definite Hessian in alternate GLMM, probably because of shallow log-likelihood."
} else {
LRT_notes <- "Log-likelihood for alternate GLMM is very shallow / flat, exercise caution."
} else {
mod.1 <- mod.1$final_mod
if (is.null(mod.1) || is.null(mod.0)) { stop("You must provide two models to compare.") }
LRT_notes <- NA_character_
# compute LRT
if (is.glmm) {
if (!(inherits(mod.1, "glmmTMB") && inherits(mod.0, "glmmTMB"))) { stop("Models must be of class glmmTMB.") }
mod.1_ll <- -mod.1$fit$objective
mod.0_ll <- -mod.0$fit$objective
lrt_stat <- 2 * (mod.1_ll - mod.0_ll)
if (is.na(logLik(mod.1))) {
if (!mod.1$sdr$pdHess) {
LRT_notes <- "Non-positive definite Hessian in alternate GLMM, probably because of shallow log-likelihood."
} else {
LRT_notes <- "Log-likelihood for alternate GLMM is very shallow / flat, exercise caution."
}
}
} else {
if (!(inherits(mod.1, "glm") || inherits(mod.1, "lm"))) { stop("Models must be of class lm or glm.") }
mod.1_ll <- stats::logLik(mod.1)
mod.0_ll <- stats::logLik(mod.0)
lrt_stat <- as.numeric(2 * (mod.1_ll - mod.0_ll))
}
} else {
if (!(inherits(mod.1, "glm") || inherits(mod.1, "lm"))) { stop("Models must be of class lm or glm.") }
mod.1_ll <- stats::logLik(mod.1)
mod.0_ll <- stats::logLik(mod.0)
lrt_stat <- as.numeric(2 * (mod.1_ll - mod.0_ll))
}
dgr_free <- attributes(stats::logLik(mod.1))$df - attributes(stats::logLik(mod.0))$df
if (dgr_free == 0) {
dgr_free <- 1
dgr_free <- attributes(stats::logLik(mod.1))$df - attributes(stats::logLik(mod.0))$df
if (dgr_free == 0) {
dgr_free <- 1
}
p_val <- stats::pchisq(lrt_stat,
dgr_free,
lower = FALSE)
res <- list(Alt_Mod_LL = as.numeric(mod.1_ll),
Null_Mod_LL = as.numeric(mod.0_ll),
LRT_Stat = lrt_stat,
DF = dgr_free,
P_Val = p_val,
Model_Type = ifelse(is.glmm, "GLMM", "GLM"),
Notes = LRT_notes)
}
p_val <- stats::pchisq(lrt_stat,
dgr_free,
lower = FALSE)
res <- list(Alt_Mod_LL = as.numeric(mod.1_ll),
Null_Mod_LL = as.numeric(mod.0_ll),
LRT_Stat = lrt_stat,
DF = dgr_free,
P_Val = p_val,
Model_Type = ifelse(is.glmm, "GLMM", "GLM"),
Notes = LRT_notes)
return(res)
}
6 changes: 3 additions & 3 deletions R/clusterGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ clusterGenes <- function(test.dyn.res = NULL,
if (!clust.algo %in% c("hclust", "kmeans", "leiden")) { stop("clust.algo must be one of 'hclust', 'kmeans', or 'leiden'.") }
if ((use.pca & is.null(n.PC)) || (use.pca & n.PC <= 0)) { stop("n.PC must be a non-zero integer when clustering on principal components.") }
if (is.null(lineages)) {
lineages <- LETTERS[1:length(test.dyn.res[[1]])]
lineages <- LETTERS[seq(length(test.dyn.res[[1]]))]
}
gene_cluster_list <- vector("list", length = length(lineages))
for (l in seq_along(lineages)) {
Expand Down Expand Up @@ -82,7 +82,7 @@ clusterGenes <- function(test.dyn.res = NULL,
# hierarchical clustering routine w/ Ward's linkage
if (clust.algo == "hclust") {
hclust_tree <- stats::hclust(dist_matrix, method = "ward.D2")
k_vals <- c(2:10)
k_vals <- seq(2, 10)
sil_vals <- vector("numeric", 9L)
clust_list <- vector("list", 9L)
for (k in seq_along(k_vals)) {
Expand All @@ -96,7 +96,7 @@ clusterGenes <- function(test.dyn.res = NULL,
Cluster = clust_list[[which.max(sil_vals)]])
# k-means clustering routine w/ Hartigan-Wong algorithm
} else if (clust.algo == "kmeans") {
k_vals <- c(2:10)
k_vals <- seq(2, 10)
sil_vals <- vector("numeric", 9L)
clust_list <- vector("list", 9L)
for (k in seq_along(k_vals)) {
Expand Down
39 changes: 24 additions & 15 deletions R/embedGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#' @param cluster.genes (Optional) Should genes be clustered in PCA space using the Leiden algorithm? Defaults to TRUE.
#' @param gene.meta.data (Optional) A data.frame of metadata values for each gene (HVG status, Ensembl ID, gene biotype, etc.) that will be included in the result table. Defaults to NULL.
#' @param k.param (Optional) The value of nearest-neighbors used in creating the SNN graph prior to clustering & in running UMAP. Defaults to 20.
#' @param resolution.param (Optional) The value of the resolution parameter for the Leiden algorithm. If unspecified, silhouette scoring is used to select an optimal value. Defaults to NULL.
#' @param random.seed (Optional) The random seed used to control stochasticity in the clustering algorithm. Defaults to 312.
#' @return A data.frame containing embedding coordinates, cluster IDs, and metadata for each gene.
#' @export
Expand All @@ -31,6 +32,7 @@ embedGenes <- function(smoothed.counts = NULL,
cluster.genes = TRUE,
gene.meta.data = NULL,
k.param = 20,
resolution.param = NULL,
random.seed = 312) {
# check inputs
if (is.null(smoothed.counts)) { stop("You forgot to provide a smoothed counts matrix to embedGenes().") }
Expand All @@ -53,23 +55,30 @@ embedGenes <- function(smoothed.counts = NULL,
k = k.param,
type = "jaccard",
BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), \(r) {
if (is.null(resolution.param)) {
dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), \(r) {
smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
objective_function = "modularity",
resolution_parameter = r)
if (smoothed_counts_clust$nb_clusters == 1) {
sil_val <- 0
} else {
sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
}
clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
resolution = r,
silhouette = sil_val)
return(clust_res)
})
best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
cluster_vec <- clust_runs[[best_clustering]]$clusters
} else {
smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
objective_function = "modularity",
resolution_parameter = r)
if (smoothed_counts_clust$nb_clusters == 1) {
sil_val <- 0
} else {
sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
}
clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
resolution = r,
silhouette = sil_val)
return(clust_res)
})
best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
cluster_vec <- clust_runs[[best_clustering]]$clusters
resolution_parameter = resolution.param)
cluster_vec <- as.factor(smoothed_counts_clust$membership - 1L)
}
} else {
cluster_vec <- NA_integer_
}
Expand Down
Loading

0 comments on commit 31f1002

Please sign in to comment.