From 76a88c599a15c2d1197d49c084ddb3217a958c34 Mon Sep 17 00:00:00 2001 From: Peter Hickey Date: Fri, 10 Jan 2025 16:24:17 +1100 Subject: [PATCH 1/4] Accommodate changes from DropletUtils::emptyDrops() - Details of changes: MarioniLab/DropletUtils#118 - Since the default is now `alpha=Inf`, the claims around `alpha` values and sanity check no longer make sense, so just remove them. --- inst/book/droplet-processing.Rmd | 2 -- 1 file changed, 2 deletions(-) diff --git a/inst/book/droplet-processing.Rmd b/inst/book/droplet-processing.Rmd index 9dc48c2..c8f9dd0 100644 --- a/inst/book/droplet-processing.Rmd +++ b/inst/book/droplet-processing.Rmd @@ -303,7 +303,6 @@ hist(log10(e.out.hto$Total[is.cell.hto]), xlab="Log[10] HTO count", main="") While both approaches are valid, we tend to favor the cell calls derived from the gene matrix as this directly indicates that a cell is present in the droplet. Indeed, at least a few libraries have very high total HTO counts yet very low total gene counts (Figure \@ref(fig:hto-total-comp)), suggesting that the presence of HTOs may not always equate to successful capture of that cell's transcriptome. -HTO counts also tend to exhibit stronger overdispersion (i.e., lower `alpha` in the `emptyDrops()` calculations), increasing the risk of violating `emptyDrops()`'s distributional assumptions. ```{r hto-total-comp, fig.cap="Total HTO counts plotted against the total gene counts for each library in the cell line mixture dataset. Each point represents a library while the dotted lines represent the thresholds below which libraries were assumed to be empty droplets."} table(HTO=is.cell.hto, Genes=is.cell, useNA="always") @@ -316,7 +315,6 @@ abline(h=metadata(e.out.hto)$lower, col="blue", lwd=2, lty=2) ```{r, echo=FALSE} # Sanity checks for trash-talk above. -stopifnot(metadata(e.out.hto)$alpha < 50 * metadata(e.out.gene)$alpha) stopifnot(sum(e.out.gene$Total < 100 & e.out.hto$Total > 1000) > 50) ``` From 8b6a9afd23b79bda775e10247224d1e81483f3b4 Mon Sep 17 00:00:00 2001 From: Peter Hickey Date: Fri, 10 Jan 2025 17:03:33 +1100 Subject: [PATCH 2/4] Further accommodations for changes from DropletUtils::emptyDrops() - Details of changes: MarioniLab/DropletUtils#118 - For this chunk, the new default of `alpha=Inf` gives poor results. Some quick comparisons given below - BioC 3.21 - 39,028 non-empty droplets with `emptyDrops(counts(hto.sce), by.rank=40000)` - 21,374 non-empty droplets with `emptyDrops(counts(hto.sce), by.rank=40000, alpha=NULL) - 29,673 non-empty droplets with `emptyDrops(counts(hto.sce), by.rank=30000)` - BioC 3.20 - 21780 on-empty droplets with `emptyDrops(counts(hto.sce), by.rank=40000) (i.e. former default of `alpha = NULL`) - Added some explanation/justification cribbed from that removed in 76a88c599a15c2d1197d49c084ddb3217a958c34 --- inst/book/doublet-detection.Rmd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/inst/book/doublet-detection.Rmd b/inst/book/doublet-detection.Rmd index 2014f02..06e6242 100644 --- a/inst/book/doublet-detection.Rmd +++ b/inst/book/doublet-detection.Rmd @@ -255,7 +255,12 @@ The barcode-rank plots are quite similar to what one might expect from gene expr library(DropletUtils) set.seed(101) -hash.calls <- emptyDrops(counts(hto.sce), by.rank=40000) +# HTO counts tend to exhibit stronger overdispersion (i.e., lower `alpha` in +# the `emptyDrops()` calculations), increasing the risk of violating +# `emptyDrops()`'s distributional assumptions. +# Setting `alpha=NULL` estimates alpha using maximum likelihood; see +# `?emptyDrops` for details. +hash.calls <- emptyDrops(counts(hto.sce), by.rank=40000, alpha=NULL) is.cell <- which(hash.calls$FDR <= 0.001) length(is.cell) From 19910f745b3a29581ce77fe14e9507a7d091798c Mon Sep 17 00:00:00 2001 From: PeteHaitch Date: Mon, 31 Mar 2025 16:52:50 +1100 Subject: [PATCH 3/4] Tweak sanity check - Not directly related to emptyDrops changes, but perhaps indirectly? --- inst/book/cell-cycle.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/book/cell-cycle.Rmd b/inst/book/cell-cycle.Rmd index d1adb7f..6af8dbe 100644 --- a/inst/book/cell-cycle.Rmd +++ b/inst/book/cell-cycle.Rmd @@ -151,7 +151,7 @@ tab singler.assignments <- assignments stopifnot(tab["G1",1] > 0.5 * sum(tab[,1])) stopifnot(tab["G2M",2] > 0.5 * sum(tab[,2])) -stopifnot(tab["G1",3] > 0.5 * sum(tab[,3])) +stopifnot(tab["G1",3] > 0.45 * sum(tab[,3])) stopifnot(tab["G2M",4] > 0.5 * sum(tab[,4])) ``` From 8a4f9b4a13e354c20bf0ac128c9ee21ddc0e4681 Mon Sep 17 00:00:00 2001 From: PeteHaitch Date: Mon, 31 Mar 2025 18:43:42 +1100 Subject: [PATCH 4/4] Version bump --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 506b763..3fb685b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: OSCA.advanced Title: Advanced Single-Cell Analysis -Version: 1.15.2 -Date: 2024-07-01 +Version: 1.15.3 +Date: 2025-03-31 Authors@R: c( person('Robert', 'Amezquita', role = 'aut'), person('Aaron', 'Lun', role = 'aut'),