Skip to content

Commit

Permalink
updated documentation & test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
jr-leary7 committed Sep 27, 2023
1 parent d94cdde commit 801b508
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 123 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Updated the `clusterGenes()` function to be much more efficient as well as changing the distance metric used to be cosine dissimilarity.
* Added `theme_scLANE()` for output plots.
* Enhanced documentation.
* Increased test coverage.

# `scLANE` 0.7.2

Expand Down
37 changes: 0 additions & 37 deletions R/backward_sel.R

This file was deleted.

33 changes: 19 additions & 14 deletions R/backward_sel_WIC.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
#' Backward selection function for MARGE - uses the Wald information criterion (WIC).
#'
#' @name backward_sel_WIC
#' @param Y : the response variable. Defaults to NULL.
#' @param B_new : the model matrix. Defaults to NULL.
#' @author Jakub Stoklosa
#' @author David I. Warton
#' @author Jack Leary
#' @importFrom gamlss gamlss
#' @importFrom geeM geem
#' @importFrom MASS negative.binomial
#' @importFrom withr with_output_sink
#' @param Y The response variable. Defaults to NULL.
#' @param B_new The model matrix. Defaults to NULL.
#' @param is.gee Is the model a GEE? Defaults to FALSE.
#' @param id.vec A vector of observation IDs that is necessary for fitting a GEE model. Defaults to NULL.
#' @param cor.structure The specified working correlation structure of the GEE model. Must be one of "independence", "ar1", or "exchangeable". Defaults to NULL.
#' @return \code{backward_sel_WIC} returns the Wald statistic from the fitted model (the penalty is applied later on).
#' @author Jakub Stoklosa and David I. Warton
#' @references Stoklosa, J. Gibb, H. Warton, D.I. Fast forward selection for Generalized Estimating Equations With a Large Number of Predictor Variables. \emph{Biometrics}, \strong{70}, 110--120.
#' @references Stoklosa, J. and Warton, D.I. (2018). A generalized estimating equation approach to multivariate adaptive regression splines. \emph{Journal of Computational and Graphical Statistics}, \strong{27}, 245--253.
#' @importFrom gamlss gamlss
#' @importFrom geeM geem
#' @importFrom MASS negative.binomial
#' @seealso \code{\link{backward_sel}}

backward_sel_WIC <- function(Y = NULL, B_new = NULL, is.gee = FALSE, id.vec = NULL, cor.structure = NULL) {
backward_sel_WIC <- function(Y = NULL,
B_new = NULL,
is.gee = FALSE,
id.vec = NULL,
cor.structure = NULL) {
# check inputs
if (is.null(Y) || is.null(B_new)) { stop("Some inputs are missing from backward_sel_WIC().") }
if (is.gee & is.null(id.vec)) { stop("GEEs require a vector of observation IDs in backward_sel_WIC().") }
if (is.gee & is.null(cor.structure)) { stop("GEEs require a working correlation structure in backward_sel_WIC().") }
if (is.gee && is.null(id.vec)) { stop("GEEs require a vector of observation IDs in backward_sel_WIC().") }
if (is.gee && is.null(cor.structure)) { stop("GEEs require a working correlation structure in backward_sel_WIC().") }
cor.structure <- tolower(cor.structure)
if (is.gee) {
fit <- geeM::geem(Y ~ B_new - 1,
Expand All @@ -32,10 +38,9 @@ backward_sel_WIC <- function(Y = NULL, B_new = NULL, is.gee = FALSE, id.vec = NU
fit <- gamlss::gamlss(Y ~ B_new - 1,
family = "NBI",
trace = FALSE)
sink(tempfile())
fit_sum <- summary(fit)
sink()
fit_sum_mat <- as.matrix(fit_sum)
withr::with_output_sink(tempfile(), {
fit_sum_mat <- as.matrix(summary(fit))
})
wald_stat <- ((fit_sum_mat[, 3])[-c(1, nrow(fit_sum_mat))])^2
}
return(wald_stat)
Expand Down
57 changes: 31 additions & 26 deletions R/embedGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,33 +48,38 @@ embedGenes <- function(smoothed.counts = NULL,
n_neighbors = k.param,
init = "spectral")
# clustering w/ silhouette score parameter tuning
smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
k = k.param,
type = "jaccard",
BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5), \(r) {
smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
objective_function = "modularity",
resolution_parameter = r)
if (smoothed_counts_clust$nb_clusters == 1) {
sil_val <- 0
} else {
sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
}
clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
resolution = r,
silhouette = sil_val)
return(clust_res)
})
best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
if (cluster.genes) {
smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
k = k.param,
type = "jaccard",
BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), \(r) {
smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
objective_function = "modularity",
resolution_parameter = r)
if (smoothed_counts_clust$nb_clusters == 1) {
sil_val <- 0
} else {
sil_val <- mean(cluster::silhouette(as.integer(smoothed_counts_clust$membership - 1L), dist_matrix)[, 3])
}
clust_res <- list(clusters = as.factor(smoothed_counts_clust$membership - 1L),
resolution = r,
silhouette = sil_val)
return(clust_res)
})
best_clustering <- which.max(purrr::map_dbl(clust_runs, \(x) x$silhouette))
cluster_vec <- clust_runs[[best_clustering]]$clusters
} else {
cluster_vec <- NA_integer_
}
# prepare results
pca_df <- as.data.frame(smoothed_counts_pca$x[, seq(pcs.return)])
colnames(pca_df) <- paste0("pc", seq(pcs.return))
gene_clust_df <- data.frame(gene = genes,
leiden = clust_runs[[best_clustering]]$clusters,
umap1 = smoothed_counts_umap[, 1],
umap2 = smoothed_counts_umap[, 2])
gene_clust_df <- dplyr::bind_cols(gene_clust_df, pca_df)
return(gene_clust_df)
gene_df <- data.frame(gene = genes,
leiden = cluster_vec,
umap1 = smoothed_counts_umap[, 1],
umap2 = smoothed_counts_umap[, 2])
gene_df <- dplyr::bind_cols(gene_df, pca_df)
return(gene_df)
}
2 changes: 0 additions & 2 deletions R/stripGLM.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ stripGLM <- function(glm.obj = NULL) {
if (inherits(glm.obj, "try-error")) {return(glm.obj)}
if (is.null(glm.obj)) { stop("You forgot to supply inputs to stripGLM().") }
if (!inherits(glm.obj, "glm")) { stop("Input to stripGLM() must be of class glm.") }



# strip out unnecessary glm pieces
glm.obj$effects <- c()
Expand Down
38 changes: 0 additions & 38 deletions man/backward_sel.Rd

This file was deleted.

13 changes: 7 additions & 6 deletions man/backward_sel_WIC.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions tests/testthat/test_scLANE.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# load data & prepare for testing
load(system.file("testdata/sim_test_data.RData", package = "scLANE"))
sim_data_seu <- Seurat::as.Seurat(sim_data)
cell_offset <- createCellOffset(sim_data)
cell_offset_seu <- createCellOffset(sim_data_seu)
genes_to_test <- c(rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "Dynamic"][1:10],
rownames(sim_data)[SummarizedExperiment::rowData(sim_data)$geneStatus_overall == "NotDynamic"][1:10])
counts_test <- t(as.matrix(SingleCellExperiment::counts(sim_data)[genes_to_test, ]))
Expand Down Expand Up @@ -80,6 +82,7 @@ withr::with_output_sink(tempfile(), {
return.basis = TRUE,
return.GCV = TRUE,
return.WIC = TRUE)
marge_mod_stripped <- stripGLM(marge_mod$final_mod)
# run GLM model -- with offset
marge_mod_offset <- marge2(X_pred = pt_test,
Y = counts_test[, 3],
Expand Down Expand Up @@ -206,8 +209,11 @@ test_that("internal marge functions", {

test_that("createCellOffset() output", {
expect_type(cell_offset, "double")
expect_type(cell_offset_seu, "double")
expect_length(cell_offset, 300)
expect_length(cell_offset_seu, 300)
expect_false(any(is.na(cell_offset)))
expect_false(any(is.na(cell_offset_seu)))
})

test_that("testDynamic() output", {
Expand Down Expand Up @@ -266,6 +272,7 @@ test_that("marge2() output -- GLM backend", {
expect_s3_class(marge_mod_offset, "marge")
expect_s3_class(marge_mod$final_mod, "negbin")
expect_s3_class(marge_mod_offset$final_mod, "negbin")
expect_s3_class(marge_mod_stripped, "negbin")
expect_equal(marge_mod$model_type, "GLM")
expect_equal(marge_mod_offset$model_type, "GLM")
expect_true(marge_mod$final_mod$converged)
Expand Down

0 comments on commit 801b508

Please sign in to comment.