Skip to content

Commit

Permalink
minor code organization
Browse files Browse the repository at this point in the history
  • Loading branch information
mvfki committed Oct 30, 2023
1 parent 8a8e4bf commit d00bd5d
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 97 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ S3method(scaleNotCenter,liger)
S3method(scaleNotCenter,ligerDataset)
S3method(selectGenes,Seurat)
S3method(selectGenes,liger)
export("%>%")
export("cellMeta<-")
export("dataset<-")
export("datasets<-")
Expand Down
107 changes: 105 additions & 2 deletions R/DEG_marker.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ runPairwiseDEG <- function(
#' @export
#' @param conditionBy \code{cellMeta} variable(s). Marker detection will be
#' performed for each level of this variable. Multiple variables will be
#' combined.
#' combined. Default \code{NULL} uses default cluster.
#' @param splitBy Split data by \code{cellMeta} variable(s) here and identify
#' markers for \code{conditionBy} within each chunk. Default \code{NULL}.
#' @param useDatasets Datasets to perform marker detection within. Default
Expand All @@ -140,7 +140,7 @@ runPairwiseDEG <- function(
#' splitBy = "leiden_cluster")
runMarkerDEG <- function(
object,
conditionBy,
conditionBy = NULL,
splitBy = NULL, # The previous by dataset strategy
method = c("wilcoxon", "pseudoBulk"),
useDatasets = NULL,
Expand All @@ -152,6 +152,10 @@ runMarkerDEG <- function(
) {
useDatasets <- .checkUseDatasets(object, useDatasets)
allCellIdx <- seq(ncol(object))[object$dataset %in% useDatasets]
conditionBy <- conditionBy %||% object@uns$defaultCluster
if (is.null(conditionBy)) {
stop("No `conditionBy` given or default cluster not set.")
}
conditionBy <- .fetchCellMetaVar(
object, conditionBy, cellIdx = allCellIdx,
checkCategorical = TRUE, drop = FALSE, droplevels = TRUE
Expand Down Expand Up @@ -271,6 +275,9 @@ runMarkerDEG <- function(
return(slot)
}


###################### Pseudo-bulk Method helper ###############################

setupPseudoRep2 <- function(groups, nRep = 3, seed = 1) {
# The output data.frame should be cell per row by variable per col
set.seed(seed)
Expand Down Expand Up @@ -331,3 +338,99 @@ makePseudoBulk2 <- function(mat, replicateAnn, verbose = TRUE) {
# res <- res[order(res$padj),]
return(res)
}


####################### Wilcoxon rank-sum test helper ##########################

# X: matrix of data to be tested
# y: grouping label of columns of X
# Rcpp source code located in src/wilcoxon.cpp
wilcoxauc <- function(x, clusterVar) {
if (methods::is(x, 'dgTMatrix')) x <- methods::as(x, 'CsparseMatrix')
if (methods::is(x, 'TsparseMatrix')) x <- methods::as(x, 'CsparseMatrix')
if (is.null(row.names(x))) {
rownames(x) <- paste0('Feature', seq(nrow(x)))
}
groupSize <- as.numeric(table(clusterVar))

## Compute primary statistics
n1n2 <- groupSize * (ncol(x) - groupSize)
# rankRes - list(X_ranked, ties), where X_ranked is obs x feature
xRanked <- Matrix::t(x)
# This computes the ranking of non-zero values and the ties
ties <- cpp_rank_matrix_dgc(xRanked@x, xRanked@p,
nrow(xRanked), ncol(xRanked))
# ranksRes <- list(X_ranked = xT, ties = ties)

# rankRes <- colRanking(x)
ustat <- computeUstat(xRanked, clusterVar, n1n2, groupSize)
auc <- t(ustat / n1n2)
pvals <- computePval(ustat, ties, ncol(x), n1n2)
fdr <- apply(pvals, 2, function(p) stats::p.adjust(p, 'BH'))

### Auxiliary Statistics (AvgExpr, PctIn, LFC, etc)
groupSums <- colAggregateSum_sparse(x, as.integer(clusterVar) - 1, length(unique(clusterVar)))
# groupSums <- colAggregateSum(x, clusterVar)
group_nnz <- colNNZAggr_sparse(x, as.integer(clusterVar) - 1, length(unique(clusterVar)))
# group_nnz <- colNNZAggr(x, clusterVar)
group_pct <- t(sweep(group_nnz, 1, as.numeric(table(clusterVar)), "/"))

group_pct_out <- sweep(-group_nnz, 2, colSums(group_nnz), "+")
group_pct_out <- sweep(group_pct_out, 1,
as.numeric(length(clusterVar) - table(clusterVar)),
"/")
group_pct_out <- t(group_pct_out)

groupMeans <- t(sweep(groupSums, 1, as.numeric(table(clusterVar)), "/"))

cs <- colSums(groupSums)
gs <- as.numeric(table(clusterVar))
lfc <- Reduce(cbind, lapply(seq_along(levels(clusterVar)), function(g) {
groupMeans[, g] - (cs - groupSums[g, ])/(length(clusterVar) - gs[g])
}))

data.frame(
feature = rep(row.names(x), times = length(levels(clusterVar))),
group = factor(rep(levels(clusterVar), each = nrow(x)),
levels = levels(clusterVar)),
avgExpr = as.numeric(groupMeans),
logFC = as.numeric(lfc),
statistic = as.numeric(t(ustat)),
auc = as.numeric(auc),
pval = as.numeric(pvals),
padj = as.numeric(fdr),
pct_in = as.numeric(100 * group_pct),
pct_out = as.numeric(100 * group_pct_out)
)
}

computeUstat <- function(Xr, cols, n1n2, groupSize) {
grs <- rowAggregateSum_sparse(Xr, as.integer(cols) - 1, length(unique(cols)))
# grs <- rowAggregateSum(Xr, cols)

# if (inherits(Xr, 'dgCMatrix')) {
# With the ranking of only non-zero features, here the tie-ranking of
# zeros need to be added.
nnz <- rowNNZAggr_sparse(Xr, as.integer(cols) - 1, length(unique(cols)))
gnz <- groupSize - nnz
zero.ranks <- (nrow(Xr) - diff(Xr@p) + 1) / 2
ustat <- t((t(gnz) * zero.ranks)) + grs - groupSize*(groupSize + 1)/2
# } else {
# ustat <- grs - groupSize * (groupSize + 1) / 2
# }
return(ustat)
}

computePval <- function(ustat, ties, N, n1n2) {
z <- ustat - .5 * n1n2
z <- z - sign(z) * .5
.x1 <- N ^ 3 - N
.x2 <- 1 / (12 * (N ^ 2 - N))
rhs <- unlist(lapply(ties, function(tvals) {
(.x1 - sum(tvals ^ 3 - tvals)) * .x2
}))
usigma <- sqrt(matrix(n1n2, ncol = 1) %*% matrix(rhs, nrow = 1))
z <- t(z / usigma)
pvals <- matrix(2 * stats::pnorm(-abs(as.numeric(z))), ncol = ncol(z))
return(pvals)
}
91 changes: 0 additions & 91 deletions R/wilcoxon.R
Original file line number Diff line number Diff line change
Expand Up @@ -396,98 +396,7 @@ calcDatasetSpecificity <- function(
############################# For fast Wilcoxon test ###########################
# helper function for wilcoxon tests on general variables like matrix and
# dgCMatrix
# X: matrix of data to be tested
# y: grouping label of columns of X
# Rcpp source code located in src/wilcoxon.cpp
wilcoxauc <- function(x, clusterVar) {
if (methods::is(x, 'dgTMatrix')) x <- methods::as(x, 'CsparseMatrix')
if (methods::is(x, 'TsparseMatrix')) x <- methods::as(x, 'CsparseMatrix')
if (is.null(row.names(x))) {
rownames(x) <- paste0('Feature', seq(nrow(x)))
}
groupSize <- as.numeric(table(clusterVar))

## Compute primary statistics
n1n2 <- groupSize * (ncol(x) - groupSize)
# rankRes - list(X_ranked, ties), where X_ranked is obs x feature
xRanked <- Matrix::t(x)
# This computes the ranking of non-zero values and the ties
ties <- cpp_rank_matrix_dgc(xRanked@x, xRanked@p,
nrow(xRanked), ncol(xRanked))
# ranksRes <- list(X_ranked = xT, ties = ties)

# rankRes <- colRanking(x)
ustat <- computeUstat(xRanked, clusterVar, n1n2, groupSize)
auc <- t(ustat / n1n2)
pvals <- computePval(ustat, ties, ncol(x), n1n2)
fdr <- apply(pvals, 2, function(p) stats::p.adjust(p, 'BH'))

### Auxiliary Statistics (AvgExpr, PctIn, LFC, etc)
groupSums <- colAggregateSum_sparse(x, as.integer(clusterVar) - 1, length(unique(clusterVar)))
# groupSums <- colAggregateSum(x, clusterVar)
group_nnz <- colNNZAggr_sparse(x, as.integer(clusterVar) - 1, length(unique(clusterVar)))
# group_nnz <- colNNZAggr(x, clusterVar)
group_pct <- t(sweep(group_nnz, 1, as.numeric(table(clusterVar)), "/"))

group_pct_out <- sweep(-group_nnz, 2, colSums(group_nnz), "+")
group_pct_out <- sweep(group_pct_out, 1,
as.numeric(length(clusterVar) - table(clusterVar)),
"/")
group_pct_out <- t(group_pct_out)

groupMeans <- t(sweep(groupSums, 1, as.numeric(table(clusterVar)), "/"))

cs <- colSums(groupSums)
gs <- as.numeric(table(clusterVar))
lfc <- Reduce(cbind, lapply(seq_along(levels(clusterVar)), function(g) {
groupMeans[, g] - (cs - groupSums[g, ])/(length(clusterVar) - gs[g])
}))

data.frame(
feature = rep(row.names(x), times = length(levels(clusterVar))),
group = factor(rep(levels(clusterVar), each = nrow(x)),
levels = levels(clusterVar)),
avgExpr = as.numeric(groupMeans),
logFC = as.numeric(lfc),
statistic = as.numeric(t(ustat)),
auc = as.numeric(auc),
pval = as.numeric(pvals),
padj = as.numeric(fdr),
pct_in = as.numeric(100 * group_pct),
pct_out = as.numeric(100 * group_pct_out)
)
}

computeUstat <- function(Xr, cols, n1n2, groupSize) {
grs <- rowAggregateSum_sparse(Xr, as.integer(cols) - 1, length(unique(cols)))
# grs <- rowAggregateSum(Xr, cols)

# if (inherits(Xr, 'dgCMatrix')) {
# With the ranking of only non-zero features, here the tie-ranking of
# zeros need to be added.
nnz <- rowNNZAggr_sparse(Xr, as.integer(cols) - 1, length(unique(cols)))
gnz <- groupSize - nnz
zero.ranks <- (nrow(Xr) - diff(Xr@p) + 1) / 2
ustat <- t((t(gnz) * zero.ranks)) + grs - groupSize*(groupSize + 1)/2
# } else {
# ustat <- grs - groupSize * (groupSize + 1) / 2
# }
return(ustat)
}

computePval <- function(ustat, ties, N, n1n2) {
z <- ustat - .5 * n1n2
z <- z - sign(z) * .5
.x1 <- N ^ 3 - N
.x2 <- 1 / (12 * (N ^ 2 - N))
rhs <- unlist(lapply(ties, function(tvals) {
(.x1 - sum(tvals ^ 3 - tvals)) * .x2
}))
usigma <- sqrt(matrix(n1n2, ncol = 1) %*% matrix(rhs, nrow = 1))
z <- t(z / usigma)
pvals <- matrix(2 * stats::pnorm(-abs(as.numeric(z))), ncol = ncol(z))
return(pvals)
}

################################################################################
# Visualization ####
Expand Down
5 changes: 3 additions & 2 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#' @importFrom Rcpp evalCpp
#' @importFrom Matrix colSums rowSums t summary
#' @importFrom magrittr %>%
#' @importFrom rlang .data %||%
#' @importFrom methods new show
#' @importFrom utils .DollarNames
#' @useDynLib rliger2, .registration = TRUE
NULL


#' @importFrom magrittr %>%
#' @export
magrittr::`%>%`

.modalClassDict <- list(
default = "ligerDataset",
Expand Down
4 changes: 2 additions & 2 deletions man/liger-DEG.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d00bd5d

Please sign in to comment.