diff --git a/.Rbuildignore b/.Rbuildignore index 6adf9b8..7e51386 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -21,6 +21,6 @@ LICENSE ^devdata ^tests/testthat/*\.h5$ ^vignettes/articles$ +cran-comments.md ^doc$ ^Meta$ -cran-comments.md diff --git a/DESCRIPTION b/DESCRIPTION index f9a878f..95d149f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rliger -Version: 2.0.1.9004 -Date: 2024-09-27 +Version: 2.0.99 +Date: 2024-10-01 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. @@ -34,7 +34,7 @@ URL: https://welch-lab.github.io/liger/ License: GPL-3 biocViews: LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 VignetteBuilder: knitr Encoding: UTF-8 Additional_repositories: https://welch-lab.r-universe.dev, https://blaserlab.r-universe.dev @@ -90,3 +90,4 @@ Suggests: SingleCellExperiment, SummarizedExperiment, testthat +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index e44582f..e66bb5a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,8 @@ S3method("[[",liger) S3method("[[<-",liger) S3method("names<-",liger) S3method(.DollarNames,liger) +S3method(alignFactors,Seurat) +S3method(alignFactors,liger) S3method(as.liger,Seurat) S3method(as.liger,SingleCellExperiment) S3method(as.liger,dgCMatrix) @@ -18,6 +20,8 @@ S3method(as.ligerDataset,ligerDataset) S3method(as.ligerDataset,matrix) S3method(c,liger) S3method(cbind,ligerDataset) +S3method(centroidAlign,Seurat) +S3method(centroidAlign,liger) S3method(closeAllH5,liger) S3method(closeAllH5,ligerDataset) S3method(fortify,liger) @@ -73,6 +77,7 @@ export("scaleData<-") export("scaleUnsharedData<-") export("varFeatures<-") export("varUnsharedFeatures<-") +export(alignFactors) export(as.liger) export(as.ligerDataset) export(calcARI) @@ -82,6 +87,7 @@ export(calcDatasetSpecificity) export(calcNMI) export(calcPurity) export(cellMeta) +export(centroidAlign) export(closeAllH5) export(commandDiff) export(commands) @@ -276,6 +282,7 @@ importFrom(Matrix,summary) importFrom(Matrix,t) importFrom(Rcpp,evalCpp) importFrom(ggplot2,fortify) +importFrom(lifecycle,deprecated) importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(methods,new) diff --git a/NEWS.md b/NEWS.md index 82ee182..c225750 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,21 +13,22 @@ - Pseudo-bulk should be easy because we are just aggregating cells. - Wilcoxon might be a bit harder because ranks are calculated per gene but the H5 sparse data is column majored. Might need to find a fast on-disk transposition method, which would also enhance RcppPlanc performance when running ANLS on H5 data. -## rliger 2.0.1.9004 +## rliger 2.0.99 -- Added `ligerToH5AD()` allowing reticulate/Python free export of liger object to H5AD format. This might not be releasable due to the need of calling non-exported functions from *hdf5r* library. -- Added organism support in `runGeneralQC()` and refined hemoglobin gene matching regex pattern. -- Changed `runMarkerDEG()` and `runPairwiseDEG()` default method from `"wilcoxon"` to `"pseudoBulk"` -- Fixed `runMarkerDEG(method = "pseudobulk")` bug in assigning pseudo-replicates, and optimized error/warning signaling. -- Optimized DE test memory usage scalability for both pseudo-bulk method and wilcoxon test +- Added `centroidAlign()` for new cell factor loading alignment method - Added `plotProportionBox()` for visualizing compositional analysis - Added `plotBarcodeRank()` for basic QC visualization - Added `plotPairwiseDEGHeatmap()` for visualizing pairwise DEG results - Added `plotGODot()` for visualizing GO enrichment results +- Added `calcNMI()` for evaluating clustering results against ground truth +- Added `ligerToH5AD()` allowing reticulate/Python free export of liger object to H5AD format. This might not be releasable due to the need of calling non-exported functions from *hdf5r* library. +- Added organism support in `runGeneralQC()` and refined hemoglobin gene matching regex pattern. +- Optimized DE test memory usage scalability for both pseudo-bulk method and wilcoxon test - Optimized `plotProportionPie()` by adding argument `circleColors` - Optimized `plotVolcano()` text annotation positioning - Optimized visualization function additional argument documentation -- Added `calcNMI()` for evaluating clustering results against ground truth +- Changed `runMarkerDEG()` and `runPairwiseDEG()` default method from `"wilcoxon"` to `"pseudoBulk"` +- Fixed `runMarkerDEG(method = "pseudobulk")` bug in assigning pseudo-replicates, and optimized error/warning signaling. - Fixed bug in `calcAlignment()`, `subsetMemLigerDataset()`, `cellMeta()` - Fixed bug in old version updating functions diff --git a/R/ATAC.R b/R/ATAC.R index 10dbf8a..1f5e54c 100644 --- a/R/ATAC.R +++ b/R/ATAC.R @@ -2,11 +2,11 @@ #' after integration #' @description #' This function is designed for creating peak data for a dataset with only gene -#' expression. This function uses quantile normalized cell factor loading to -#' find nearest neighbors between cells from the queried dataset (without peak) -#' and cells from reference dataset (with peak). And then impute the peak for -#' the former basing on the weight. Therefore, the reference dataset selected -#' must be of "atac" modality setting. +#' expression. This function uses aligned cell factor loading to find nearest +#' neighbors between cells from the queried dataset (without peak) and cells +#' from reference dataset (with peak). And then impute the peak for the former +#' basing on the weight. Therefore, the reference dataset selected must be of +#' "atac" modality setting. #' @param object \linkS4class{liger} object with aligned factor loading computed #' in advance. #' @param nNeighbors The maximum number of nearest neighbors to search. Default @@ -39,7 +39,7 @@ #' bmmc <- scaleNotCenter(bmmc) #' if (requireNamespace("RcppPlanc", quietly = TRUE)) { #' bmmc <- runINMF(bmmc, k = 20) -#' bmmc <- quantileNorm(bmmc) +#' bmmc <- alignFactors(bmmc) #' bmmc <- normalizePeak(bmmc) #' bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna") #' } @@ -60,7 +60,7 @@ imputeKNN <- function( if (is.null(getMatrix(object, "H.norm"))) cli::cli_abort( "Aligned factor loading has to be available for imputation. - Please run {.fn quantileNorm} in advance.") + Please run {.fn alignFactors} in advance.") reference <- .checkArgLen(reference, n = 1) reference <- .checkUseDatasets(object, reference)#, modal = "atac") queries <- .checkUseDatasets(object, queries) @@ -177,7 +177,7 @@ imputeKNN <- function( #' bmmc <- selectGenes(bmmc) #' bmmc <- scaleNotCenter(bmmc) #' bmmc <- runINMF(bmmc, miniBatchSize = 100) -#' bmmc <- quantileNorm(bmmc) +#' bmmc <- alignFactors(bmmc) #' bmmc <- normalizePeak(bmmc) #' bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna") #' corr <- linkGenesAndPeaks( @@ -370,7 +370,7 @@ linkGenesAndPeaks <- function( #' requireNamespace("IRanges", quietly = TRUE) && #' requireNamespace("psych", quietly = TRUE)) { #' bmmc <- runINMF(bmmc) -#' bmmc <- quantileNorm(bmmc) +#' bmmc <- alignFactors(bmmc) #' bmmc <- normalizePeak(bmmc) #' bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna") #' corr <- linkGenesAndPeaks( @@ -480,7 +480,7 @@ exportInteractTrack <- function( invisible(NULL) } -#' [Deprecated] Export predicted gene-pair interaction +#' `r lifecycle::badge("deprecated")` Export predicted gene-pair interaction #' @description Export the predicted gene-pair interactions calculated by #' upstream function \code{\link{linkGenesAndPeaks}} into an Interact Track file #' which is compatible with \href{https://genome.ucsc.edu/cgi-bin/hgCustom}{UCSC diff --git a/R/DEG_marker.R b/R/DEG_marker.R index 1dfafd7..b90b346 100644 --- a/R/DEG_marker.R +++ b/R/DEG_marker.R @@ -130,10 +130,10 @@ #' \code{1}. #' @param verbose Logical. Whether to show information of the progress. Default #' \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set. -#' @return A data.frame with DEG information with the following field: -#' \enumerate{ -#' \item{feature - Gene names} -#' \item{group - Test group name. Multiple tests might be present for each +#' @return A data.frame with DEG information with the all or some of the +#' following fields: +#' \item{feature}{Gene names} +#' \item{group}{Test group name. Multiple tests might be present for each #' function call. This is the main variable to distinguish the tests. For a #' pairwise test, a row with a certain group name represents the test result #' between the this group against the other control group; When split by a @@ -144,21 +144,20 @@ #' all other cells. When running split marker detection, the group name would #' be in "split.group" format, meaning the stats is by comparing the group in #' the split level against all other cells in the same split level.} -#' \item{logFC - Log fold change} -#' \item{pval - P-value} -#' \item{padj - Adjusted p-value} -#' \item{avgExpr - Mean expression in the test group indicated by the "group" +#' \item{logFC}{Log fold change} +#' \item{pval}{P-value} +#' \item{padj}{Adjusted p-value} +#' \item{avgExpr}{Mean expression in the test group indicated by the "group" #' field. Only available for wilcoxon tests.} -#' \item{statistic - Wilcoxon rank-sum test statistic. Only available for +#' \item{statistic}{Wilcoxon rank-sum test statistic. Only available for #' wilcoxon tests.} -#' \item{auc - Area under the ROC curve. Only available for wilcoxon tests.} -#' \item{pct_in - Percentage of cells in the test group, indicated by the +#' \item{auc}{Area under the ROC curve. Only available for wilcoxon tests.} +#' \item{pct_in}{Percentage of cells in the test group, indicated by the #' "group" field, that express the feature. Only available for wilcoxon #' tests.} -#' \item{pct_out - Percentage of cells in the control group or other cells, as +#' \item{pct_out}{Percentage of cells in the control group or other cells, as #' explained for the "group" field, that express the feature. Only available #' for wilcoxon tests.} -#' } #' @rdname liger-DEG #' @export #' @examples diff --git a/R/GSEA.R b/R/GSEA.R index d3b5fe3..f443307 100644 --- a/R/GSEA.R +++ b/R/GSEA.R @@ -258,6 +258,7 @@ runGOEnrich <- function( #' @return A ggplot object if only one group or a list of ggplot objects. #' @export #' @examples +#' \donttest{ #' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster #' # Test the DEG between "stim" and "ctrl", within each cluster #' result <- runPairwiseDEG( @@ -269,7 +270,6 @@ runGOEnrich <- function( #' ) #' # Setting `significant = FALSE` because it's hard for a gene list obtained #' # from small test dataset to represent real-life biology. -#' \donttest{ #' if (requireNamespace("gprofiler2", quietly = TRUE)) { #' go <- runGOEnrich(result, group = "0.stim", significant = FALSE) #' # The toy example won't have significant result. @@ -317,11 +317,11 @@ plotGODot <- function( next } g <- resdf %>% - dplyr::select( - .data[['term_name']], - .data[['p_value']], - .data[['intersection_size']] - ) %>% + dplyr::select(dplyr::all_of(c( + 'term_name', + 'p_value', + 'intersection_size' + ))) %>% dplyr::arrange(.data[['p_value']]) %>% dplyr::slice_head(n = n) %>% dplyr::mutate( diff --git a/R/RcppExports.R b/R/RcppExports.R index 21c9ead..13d6265 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -5,30 +5,18 @@ RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algo .Call(`_rliger_RunModularityClusteringCpp`, SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) } -normalize_dense_cpp <- function(x, MARGIN = 2L, L = 1L) { - .Call(`_rliger_normalize_dense_cpp`, x, MARGIN, L) +moe_correct_ridge_cpp <- function(Z_orig, R, lambda, Phi, B, N) { + .Call(`_rliger_moe_correct_ridge_cpp`, Z_orig, R, lambda, Phi, B, N) } -scale_dense_cpp <- function(x, MARGIN = 2L, center = TRUE, scale = TRUE) { - .Call(`_rliger_scale_dense_cpp`, x, MARGIN, center, scale) +normalize_byCol_dense_rcpp <- function(x) { + .Call(`_rliger_normalize_byCol_dense_rcpp`, x) } colNormalize_dense_cpp <- function(x, L) { .Call(`_rliger_colNormalize_dense_cpp`, x, L) } -colAggregateMedian_dense_cpp <- function(x, group, n) { - .Call(`_rliger_colAggregateMedian_dense_cpp`, x, group, n) -} - -harmony_moe_correct_ridge_cpp <- function(Z_orig, R, lambda, Phi, B, N) { - .Call(`_rliger_harmony_moe_correct_ridge_cpp`, Z_orig, R, lambda, Phi, B, N) -} - -normalize_byCol_dense_rcpp <- function(x) { - .Call(`_rliger_normalize_byCol_dense_rcpp`, x) -} - scaleNotCenter_byRow_rcpp <- function(x) { .Call(`_rliger_scaleNotCenter_byRow_rcpp`, x) } @@ -73,6 +61,10 @@ colAggregateSums_sparse <- function(x, group, ngroups) { .Call(`_rliger_colAggregateSums_sparse`, x, group, ngroups) } +colAggregateMedian_dense_cpp <- function(x, group, n) { + .Call(`_rliger_colAggregateMedian_dense_cpp`, x, group, n) +} + sample_cpp <- function(x, size) { .Call(`_rliger_sample_cpp`, x, size) } diff --git a/R/cINMF.R b/R/cINMF.R index 26299ab..7260f09 100644 --- a/R/cINMF.R +++ b/R/cINMF.R @@ -1,6 +1,7 @@ #' Perform consensus iNMF on scaled datasets #' @description -#' \bold{NOT STABLE} - This is an experimental function and is subject to change. +#' `r lifecycle::badge("experimental")` This is an experimental function and is +#' subject to change. #' #' Performs consensus integrative non-negative matrix factorization (c-iNMF) #' to return factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. In order to diff --git a/R/classConversion.R b/R/classConversion.R index 98ed926..bb59e3d 100644 --- a/R/classConversion.R +++ b/R/classConversion.R @@ -256,14 +256,15 @@ as.ligerDataset.SingleCellExperiment <- function( #' default cluster labeling to set the Idents. Default \code{FALSE}. #' @param merge Logical, whether to merge layers of different datasets into one. #' Not recommended. Default \code{FALSE}. -#' @param by.dataset [Deprecated]. Use \code{identByDataset} instead. -#' @param nms [Defunct] Will be ignored because new object structure does not -#' have related problem. -#' @param renormalize [Defunct] Will be ignored because since Seurat V5, layers -#' of data can exist at the same time and it is better to left it for users to -#' do it by themselves. -#' @param use.liger.genes [Defunct] Will be ignored and will always set LIGER -#' variable features to the place. +#' @param by.dataset `r lifecycle::badge("superseded")`. Use +#' \code{identByDataset} instead. +#' @param nms `r lifecycle::badge("defunct")` Will be ignored because new object +#' structure does not have related problem. +#' @param renormalize `r lifecycle::badge("defunct")` Will be ignored because +#' since Seurat V5, layers of data can exist at the same time and it is better +#' to left it for users to do it by themselves. +#' @param use.liger.genes `r lifecycle::badge("defunct")` Will be ignored and +#' will always set LIGER variable features to the place. #' @export #' @rdname ligerToSeurat #' @return Always returns Seurat object(s) of the latest version. By default a diff --git a/R/classes.R b/R/classes.R index 4ee7d0b..798ea5a 100644 --- a/R/classes.R +++ b/R/classes.R @@ -177,7 +177,7 @@ setValidity("ligerDataset", .valid.ligerDataset) #' @slot W iNMF output matrix of shared gene loadings for each factor. See #' \code{\link{runIntegration}}. #' @slot H.norm Matrix of aligned factor loading for each cell. See -#' \code{\link{quantileNorm}} and \code{\link{runIntegration}}. +#' \code{\link{alignFactors}} and \code{\link{runIntegration}}. #' @slot commands List of \linkS4class{ligerCommand} objects. Record of #' analysis. Use \code{commands} to retrieve information. See detailed section #' accordingly. diff --git a/R/clustering.R b/R/clustering.R index 52306cf..03acd32 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -1,14 +1,13 @@ #' SNN Graph Based Community Detection #' @description -#' After quantile normalization, users can additionally run the Leiden or +#' After aligning cell factor loadings, users can additionally run the Leiden or #' Louvain algorithm for community detection, which is widely used in #' single-cell analysis and excels at merging small clusters into broad cell #' classes. #' -#' While using quantile normalized factor loadings (result from -#' \code{\link{quantileNorm}}) is recommended, this function looks for -#' unnormalized factor loadings (result from \code{\link{runIntegration}}) when -#' the former is not available. +#' While using aligned factor loadings (result from \code{\link{alignFactors}}) +#' is recommended, this function looks for unaligned factor loadings (raw result +#' from \code{\link{runIntegration}}) when the former is not available. #' @param object A \linkS4class{liger} object. Should have valid factorization #' result available. #' @param nNeighbors Integer, the maximum number of nearest neighbors to @@ -80,7 +79,7 @@ runCluster <- function(object, Hsearch <- searchH(object, useRaw) H <- Hsearch$H useRaw <- Hsearch$useRaw - type <- ifelse(useRaw, "unnormalized", "quantile normalized") + type <- ifelse(useRaw, "unaligned", "aligned") if (!is.null(useDims)) H <- H[, useDims, drop = FALSE] @@ -148,7 +147,7 @@ runCluster <- function(object, return(object) } -#' [Deprecated] Louvain algorithm for community detection +#' `r lifecycle::badge("superseded")` Louvain algorithm for community detection #' @description #' After quantile normalization, users can additionally run the Louvain #' algorithm for community detection, which is widely used in single-cell @@ -362,7 +361,8 @@ mapCellMeta <- function(object, from, newTo = NULL, ...) { #' calculation. Default \code{NULL} uses all datasets. #' @param verbose Logical. Whether to show information of the progress. Default #' \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set. -#' @param classes.compare [Deprecated/Renamed]. Use \code{trueCluster} instead. +#' @param classes.compare `r lifecycle::badge("superseded")` Use +#' \code{trueCluster} instead. #' @return A numeric scalar, the purity of the clustering result indicated by #' \code{useCluster} compared to \code{trueCluster}. #' @export @@ -451,7 +451,8 @@ calcPurity <- function(object, #' calculation. Default \code{NULL} uses all datasets. #' @param verbose Logical. Whether to show information of the progress. Default #' \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set. -#' @param classes.compare [Deprecated/Renamed]. Use \code{trueCluster} instead. +#' @param classes.compare . `r lifecycle::badge("superseded")` Use +#' \code{trueCluster} instead. #' @return A numeric scalar, the ARI of the clustering result indicated by #' \code{useCluster} compared to \code{trueCluster}. #' @export diff --git a/R/deprecated.R b/R/deprecated.R index 295dedc..c2996cb 100755 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -11,7 +11,7 @@ NULL # These are deprecated functions likely to be removed in future versions. # Documentation for these functions is incomplete. -#' Quantile align (normalize) factor loadings +#' `r lifecycle::badge("superseded")` Quantile align (normalize) factor loadings #' #' This is a deprecated function. Calling 'quantileNorm' instead. #' @@ -46,7 +46,7 @@ NULL #' @param resolution Controls the number of communities detected. Higher resolution -> more #' communities. (default 1) #' @param dims.use Indices of factors to use for shared nearest factor determination (default -#' 1:ncol(H[[1]])). +#' \code{1:ncol(H[[1]])}). #' @param dist.use Distance metric to use in calculating nearest neighbors (default "CR"). #' @param center Centers the data when scaling factors (useful for less sparse modalities like #' methylation data). (default FALSE) diff --git a/R/downsample.R b/R/downsample.R index f004f9c..f5addcb 100644 --- a/R/downsample.R +++ b/R/downsample.R @@ -83,7 +83,7 @@ downsample <- function( else return(subsetLiger(object = object, cellIdx = selected, ...)) } -#' [Deprecated] See \code{\link{downsample}} +#' `r lifecycle::badge("superseded")` See \code{\link{downsample}} #' @description This function mainly aims at downsampling datasets to a size #' suitable for plotting. #' @param object \linkS4class{liger} object diff --git a/R/embedding.R b/R/embedding.R index 5a1ddbe..4ad6581 100644 --- a/R/embedding.R +++ b/R/embedding.R @@ -1,7 +1,7 @@ #' Perform UMAP Dimensionality Reduction #' @description -#' Run UMAP on the quantile normalized cell factors (result from -#' \code{\link{quantileNorm}}), or unnormalized cell factors (result from +#' Run UMAP on the aligned cell factors (result from +#' \code{\link{alignFactors}}), or unaligned cell factors (raw result from #' \code{\link{runIntegration}})) to generate a 2D embedding for visualization #' (or general dimensionality reduction). Has option to run on subset of #' factors. It is generally recommended to use this method for dimensionality @@ -18,7 +18,7 @@ #' 0.001 to 0.5, with 0.1 being a reasonable default. #' @param object \linkS4class{liger} object with factorization results. #' @param useRaw Whether to use un-aligned cell factor loadings (\eqn{H} -#' matrices). Default \code{NULL} search for quantile-normalized loadings first +#' matrices). Default \code{NULL} search for aligned factor loadings first #' and un-aligned loadings then. #' @param useDims Index of factors to use for computing the embedding. Default #' \code{NULL} uses all factors. @@ -74,7 +74,7 @@ runUMAP <- function( Hsearch <- searchH(object, useRaw) H <- Hsearch$H useRaw <- Hsearch$useRaw - type <- ifelse(useRaw, "unnormalized", "quantile normalized") + type <- ifelse(useRaw, "unaligned", "aligned") if (isTRUE(verbose)) cli::cli_process_start("Generating UMAP on {type} cell factor loadings") if (!is.null(useDims)) H <- H[, useDims, drop = FALSE] @@ -97,8 +97,8 @@ runUMAP <- function( #' Perform t-SNE dimensionality reduction #' @description -#' Runs t-SNE on the quantile normalized cell factors (result from -#' \code{\link{quantileNorm}}), or unnormalized cell factors (result from +#' Runs t-SNE on the aligned cell factors (result from +#' \code{\link{alignFactors}}), or unaligned cell factors (result from #' \code{\link{runIntegration}})) to generate a 2D embedding for visualization. #' By default \code{\link[Rtsne]{Rtsne}} (Barnes-Hut implementation of t-SNE) #' method is invoked, while alternative "fftRtsne" method (FFT-accelerated @@ -161,7 +161,7 @@ runTSNE <- function( Hsearch <- searchH(object, useRaw) H <- Hsearch$H useRaw <- Hsearch$useRaw - type <- ifelse(useRaw, "unnormalized", "quantile normalized") + type <- ifelse(useRaw, "unaligned", "aligned") if (isTRUE(verbose)) cli::cli_process_start("Generating TSNE ({method}) on {type} cell factor loadings") if (!is.null(useDims)) H <- H[, useDims, drop = FALSE] diff --git a/R/factorMarker.R b/R/factorMarker.R index 060ac3c..581def2 100644 --- a/R/factorMarker.R +++ b/R/factorMarker.R @@ -25,9 +25,9 @@ #' @param factor.share.thresh,dataset.specificity,log.fc.thresh,pval.thresh,num.genes,print.genes #' \bold{Deprecated}. See Usage section for replacement. #' @return A list object consisting of the following entries: -#' \item{[value of `dataset1`]}{data.frame of dataset1-specific markers} +#' \item{value of `dataset1`}{data.frame of dataset1-specific markers} #' \item{shared}{data.frame of shared markers} -#' \item{[value of `dataset1`]}{data.frame of dataset2-specific markers} +#' \item{value of `dataset1`}{data.frame of dataset2-specific markers} #' \item{num_factors_V1}{A frequency table indicating the number of factors each #' marker appears, in dataset1} #' \item{num_factors_V2}{A frequency table indicating the number of factors each diff --git a/R/import.R b/R/import.R index f124af2..83240ed 100644 --- a/R/import.R +++ b/R/import.R @@ -23,7 +23,7 @@ #' @param removeMissing Logical. Whether to remove cells that do not have any #' counts and features not expressed in any cells from each dataset. Default #' \code{TRUE}. -#' @param addPrefix Logical. Whether to add "_" as a prefix of +#' @param addPrefix Logical. Whether to add "datasetName_" as a prefix of #' cell identifiers (e.g. barcodes) to avoid duplicates in multiple libraries ( #' common with 10X data). Default \code{"auto"} detects if matrix columns #' already has the exact prefix or not. Logical value forces the action. @@ -47,8 +47,8 @@ #' \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set. #' @param ... Additional slot values that should be directly placed in object. #' @param raw.data,remove.missing,format.type,data.name,indices.name,indptr.name,genes.name,barcodes.name -#' \bold{Deprecated.} See Usage section for replacement. -#' @param take.gene.union Defuncted. Will be ignored. +#' `r lifecycle::badge("superseded")` See Usage section for replacement. +#' @param take.gene.union `r lifecycle::badge("defunct")` Will be ignored. #' @export #' @seealso \code{\link{createLigerDataset}}, \code{\link{createH5LigerDataset}} #' @examples @@ -444,10 +444,11 @@ createH5LigerDataset <- function( #' @description #' This file reads a liger object stored in RDS files under all kinds of types. #' 1. A \linkS4class{liger} object with in-memory data created from package -#' version since 1.99. 2. A liger object with on-disk H5 data associated, where -#' the link to H5 files will be automatically restored. 3. A liger object -#' created with older package version, and can be updated to the latest data -#' structure by default. +#' version since 1.99. +#' 2. A liger object with on-disk H5 data associated, where the link to H5 files +#' will be automatically restored. +#' 3. A liger object created with older package version, and can be updated to +#' the latest data structure by default. #' @param filename Path to an RDS file of a \code{liger} object of old versions. #' @param dimredName The name of variable in \code{cellMeta} slot to store the #' dimensionality reduction matrix, which originally located in @@ -702,10 +703,10 @@ importCGE <- function( #' pipelines but needs user arguments for correct recognition. Similarly, the #' returned value can directly be used for constructing a \linkS4class{liger} #' object. -#' @param path [A.] A Directory containing the matrix.mtx, genes.tsv (or +#' @param path (A.) A Directory containing the matrix.mtx, genes.tsv (or #' features.tsv), and barcodes.tsv files provided by 10X. A vector, a named #' vector, a list or a named list can be given in order to load several data -#' directories. [B.] The 10X root directory where subdirectories of per-sample +#' directories. (B.) The 10X root directory where subdirectories of per-sample #' output folders can be found. Sample names will by default take the name of #' the vector, list or subfolders. #' @param sampleNames A vector of names to override the detected or set sample diff --git a/R/integration.R b/R/integration.R index 8e108fb..8368fdc 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1,8 +1,8 @@ #' Integrate scaled datasets with iNMF or variant methods #' @description #' LIGER provides dataset integration methods based on iNMF (integrative -#' Non-negative Matrix Factorization [1]) and its variants (online iNMF [2] and -#' UINMF [3]). This function wraps \code{\link{runINMF}}, +#' Non-negative Matrix Factorization \[1\]) and its variants (online iNMF \[2\] +#' and UINMF \[3\]). This function wraps \code{\link{runINMF}}, #' \code{\link{runOnlineINMF}} and \code{\link{runUINMF}}, of which the help #' pages have more detailed description. #' @param object A \linkS4class{liger} object or a Seurat object with @@ -430,7 +430,7 @@ runINMF.Seurat <- function( return(bestResult) } -#' [Deprecated] Perform iNMF on scaled datasets +#' `r lifecycle::badge("deprecated")` Perform iNMF on scaled datasets #' @description #' \bold{Please turn to \code{\link{runINMF}} or \code{\link{runIntegration}}}. #' @@ -941,7 +941,7 @@ runOnlineINMF.Seurat <- function( return(object) } -#' [Deprecated] Perform online iNMF on scaled datasets +#' `r lifecycle::badge("deprecated")` Perform online iNMF on scaled datasets #' @description #' \bold{Please turn to \code{\link{runOnlineINMF}} or #' \code{\link{runIntegration}}}. @@ -1251,8 +1251,125 @@ runUINMF.liger <- function( } +#' Align factor loadings to get final integration +#' @description +#' This function is a wrapper to switch between alternative factor loading +#' alignment methods that LIGER provides, which is a required step for producing +#' the final integrated result. Two methods are provided (click on options for +#' more details): +#' +#' \itemize{ +#' \item{\code{method = "\link{quantileNorm}"}: Previously published quantile +#' normalization method. (default)} +#' \item{\code{method = "\link{centroidAlign}"}: Newly developed centroid +#' alignment method. `r lifecycle::badge("experimental")`} +#' } +#' @export +#' @rdname alignFactors +#' @seealso \code{\link{quantileNorm}}, \code{\link{centroidAlign}} +#' @param object A \linkS4class{liger} or Seurat object with valid factorization +#' result available (i.e. \code{\link{runIntegration}} performed in advance). +#' @param method Character, method to align factors. Default +#' \code{"centroidAlign"}. Optionally \code{"quantileNorm"}. +#' @param ... Additional arguments passed to selected methods. +#' For \code{"quantileNorm"}: +#' \describe{ +#' \item{\code{quantiles}}{Number of quantiles to use for quantile +#' normalization. Default \code{50}.} +#' \item{\code{reference}}{Character, numeric or logical selection of one +#' dataset, out of all available datasets in \code{object}, to use as a +#' "reference" for quantile normalization. Default \code{NULL} tries to find +#' an RNA dataset with the largest number of cells; if no RNA dataset +#' available, use the globally largest dataset.} +#' \item{\code{minCells}}{Minimum number of cells to consider a cluster +#' shared across datasets. Default \code{20}.} +#' \item{\code{nNeighbors}}{Number of nearest neighbors for within-dataset +#' knn graph. Default \code{20}.} +#' \item{\code{useDims}}{Indices of factors to use for shared nearest factor +#' determination. Default \code{NULL} uses all factors.} +#' \item{\code{center}}{Whether to center the data when scaling factors. +#' Could be useful for less sparse modalities like methylation data. +#' Default \code{FALSE}.} +#' \item{\code{maxSample}}{Maximum number of cells used for quantile +#' normalization of each cluster and factor. Default \code{1000}.} +#' \item{\code{eps}}{The error bound of the nearest neighbor search. Lower +#' values give more accurate nearest neighbor graphs but take much longer to +#' compute. Default \code{0.9}.} +#' \item{\code{refineKNN}}{Whether to increase robustness of cluster +#' assignments using KNN graph. Default \code{TRUE}.} +#' \item{\code{clusterName}}{Variable name that will store the clustering +#' result in metadata of a \linkS4class{liger} object or a \code{Seurat} +#' object. Default \code{"quantileNorm_cluster"}.} +#' \item{\code{seed}}{Random seed to allow reproducible results. Default +#' \code{1}.} +#' \item{\code{verbose}}{Logical. Whether to show information of the +#' progress. Default \code{getOption("ligerVerbose")} or \code{TRUE} if +#' users have not set.} +#' } +#' For \code{"centroidAlign"} `r lifecycle::badge("experimental")`: +#' \describe{ +#' \item{\code{lambda}}{Ridge regression penalty applied to each dataset. +#' Can be one number that applies to all datasets, or a numeric vector with +#' length equal to the number of datasets. Default \code{1}.} +#' \item{\code{useDims}}{Indices of factors to use considered for the +#' alignment. Default \code{NULL} uses all factors.} +#' \item{\code{scaleEmb}}{Logical, whether to scale the factor loading being +#' considered as the embedding. Default \code{TRUE}.} +#' \item{\code{centerEmb}}{Logical, whether to center the factor loading +#' being considered as the embedding before scaling it. Default \code{TRUE}.} +#' \item{\code{scaleCluster}}{Logical, whether to scale the factor loading +#' being considered as the cluster assignment probability. Default +#' \code{FALSE}.} +#' \item{\code{centerCluster}}{Logical, whether to center the factor loading +#' being considered as the cluster assignment probability before scaling it. +#' Default \code{FALSE}.} +#' \item{\code{shift}}{Logical, whether to shift the factor loading being +#' considered as the cluster assignment probability after centered scaling. +#' Default \code{FALSE}.} +#' \item{\code{diagnosis}}{Logical, whether to return cell metadata variables +#' with diagnostic information. Default \code{FALSE}.} +#' } +alignFactors <- function( + object, + method = c("quantileNorm", "centroidAlign"), + ... +) { + UseMethod("alignFactors", object) +} +#' @export +#' @rdname alignFactors +#' @method alignFactors liger +alignFactors.liger <- function( + object, + method = c("quantileNorm", "centroidAlign"), + ... +) { + method <- match.arg(method) + if (method == "centroidAlign") { + object <- centroidAlign(object, ...) + } else if (method == "quantileNorm") { + object <- quantileNorm(object, ...) + } + return(object) +} +#' @export +#' @rdname alignFactors +#' @method alignFactors Seurat +alignFactors.Seurat <- function( + object, + method = c("quantileNorm", "centroidAlign"), + ... +) { + method <- match.arg(method) + if (method == "centroidAlign") { + object <- centroidAlign(object, ...) + } else if (method == "quantileNorm") { + object <- quantileNorm(object, ...) + } + return(object) +} ########################### Quantile Normalization ############################# @@ -1383,6 +1500,7 @@ quantileNorm.liger <- function( ) object@H.norm <- out$H.norm cellMeta(object, clusterName, check = FALSE) <- out$clusters + object@uns$alignmentMethod <- "quantileNorm" return(object) } @@ -1553,7 +1671,7 @@ quantileNorm.Seurat <- function( return(ref) } -#' [Deprecated] Quantile align (normalize) factor loading +#' `r lifecycle::badge("superseded")` Quantile align (normalize) factor loading #' @description #' \bold{Please turn to \code{\link{quantileNorm}}.} #' @@ -1584,7 +1702,7 @@ quantileNorm.Seurat <- function( #' Lower values give more accurate nearest neighbor graphs but take much longer #' to computer. #' @param dims.use Indices of factors to use for shared nearest factor -#' determination (default 1:ncol(H[[1]])). +#' determination (default \code{1:ncol(H[[1]])}). #' @param do.center Centers the data when scaling factors (useful for less #' sparse modalities like methylation data). (default FALSE) #' @param max_sample Maximum number of cells used for quantile normalization of @@ -1633,20 +1751,279 @@ quantile_norm <- function( # nocov start ) } # nocov end +.same <- function(x, y) { + if (identical(x, y)) return(x) + else cli::cli_abort("Different features are used for each dataset.") +} +########################### align centroid ############################# -.same <- function(x, y) { - if (identical(x, y)) return(x) - else cli::cli_abort("Different features are used for each dataset.") +#' `r lifecycle::badge("experimental")` Align factor loading by centroid alignment (beta) +#' @description +#' This process treats the factor loading of each dataset as the low dimensional +#' embedding as well as the cluster assignment probability, i.e. the soft +#' clustering result. Then the method aligns the embedding by linearly moving +#' the centroids of the same cluster but within each dataset towards each other. +#' +#' \bold{ATTENTION: This method is still under development while has shown +#' encouraging results in benchmarking tests. The arguments and their default +#' values reflect the best scored parameters in the tests and some of them may +#' be subject to change in the future.} +#' @details +#' Diagnostic information include: +#' +#' \itemize{ +#' \item{object$raw_which.max: The index of the factor with the maximum value +#' in the raw factor loading.} +#' \item{object$R_which.max: The index of the factor with the maximum value in +#' the soft clustering probability matrix used for correction.} +#' \item{object$Z_which.max: The index of the factor with the maximum value in +#' the aligned factor loading.} +#' } +#' +#' @param object A \linkS4class{liger} or Seurat object with valid factorization +#' result available (i.e. \code{\link{runIntegration}} performed in advance). +#' @param lambda Ridge regression penalty applied to each dataset. Can be one +#' number that applies to all datasets, or a numeric vector with length equal to +#' the number of datasets. Default \code{1}. +#' @param useDims Indices of factors to use considered for the alignment. +#' Default \code{NULL} uses all factors. +#' @param scaleEmb Logical, whether to scale the factor loading being considered +#' as the embedding. Default \code{TRUE}. +#' @param centerEmb Logical, whether to center the factor loading being +#' considered as the embedding before scaling it. Default \code{TRUE}. +#' @param scaleCluster Logical, whether to scale the factor loading being +#' considered as the cluster assignment probability. Default \code{FALSE}. +#' @param centerCluster Logical, whether to center the factor loading being +#' considered as the cluster assignment probability before scaling it. Default +#' \code{FALSE}. +#' @param shift Logical, whether to shift the factor loading being considered as +#' the cluster assignment probability after centered scaling. Default +#' \code{FALSE}. +#' @param diagnosis Logical, whether to return cell metadata variables with +#' diagnostic information. See Details. Default \code{FALSE}. +#' @param ... Arguments passed to other S3 methods of this function. +#' @return Returns the updated input object +#' \itemize{ +#' \item{liger method +#' \itemize{ +#' \item{Update the \code{H.norm} slot for the aligned cell factor +#' loading, ready for running graph based community detection clustering +#' or dimensionality reduction for visualization.} +#' \item{Update the \code{cellMata} slot with diagnostic information if +#' \code{diagnosis = TRUE}.} +#' }} +#' \item{Seurat method +#' \itemize{ +#' \item{Update the \code{reductions} slot with a new \code{DimReduc} +#' object containing the aligned cell factor loading.} +#' \item{Update the metadata with diagnostic information if +#' \code{diagnosis = TRUE}.} +#' }} +#' } +#' @examples +#' pbmc <- centroidAlign(pbmcPlot) +#' @export +#' @rdname centroidAlign +centroidAlign <- function( + object, + ... +) { + lifecycle::signal_stage(stage = "experimental", "centroidAlign()") + UseMethod("centroidAlign", object) } +#' @export +#' @rdname centroidAlign +#' @method centroidAlign liger +centroidAlign.liger <- function( + object, + lambda = 1, + useDims = NULL, + scaleEmb = TRUE, + centerEmb = TRUE, + scaleCluster = FALSE, + centerCluster = FALSE, + shift = FALSE, + diagnosis = FALSE, + ... +) { + .checkObjVersion(object) + .checkValidFactorResult(object, checkV = FALSE) + object <- recordCommand(object, ...) + if (any(modalOf(object) == "meth")) { + cli::cli_alert_warning( + "Methylation data is detected while centroid alignment method is not optimized for methylation data yet." + ) + cli::cli_alert_info("It is recommended to use {.fn quantileNorm} method instead.") + } + lambda <- .checkArgLen(lambda, length(object), repN = TRUE, class = "numeric") + + out <- .centroidAlign.Hraw( + object = Reduce(cbind, getMatrix(object, "H")), + datasetVar = object$dataset, + lambda = lambda, + useDims = useDims, + diagnosis = diagnosis, + scaleEmb = scaleEmb, centerEmb = centerEmb, + scaleCluster = scaleCluster, centerCluster = centerCluster, + shift = shift, + ... + ) + object@H.norm <- out$aligned + object@uns$alignmentMethod <- "centroidAlign" + if ("raw_which.max" %in% names(out)) { + cellMeta(object, "raw_which.max", check = FALSE) <- out$raw_which.max + } + if ("Z_which.max" %in% names(out)) { + cellMeta(object, "Z_which.max", check = FALSE) <- out$Z_which.max + } + if ("R_which.max" %in% names(out)) { + cellMeta(object, "R_which.max", check = FALSE) <- out$R_which.max + } + return(object) +} + +#' @export +#' @rdname centroidAlign +#' @method centroidAlign Seurat +#' @param reduction Name of the reduction where LIGER integration result is +#' stored. Default \code{"inmf"}. +centroidAlign.Seurat <- function( + object, + reduction = "inmf", + lambda = 1, + useDims = NULL, + scaleEmb = TRUE, + centerEmb = TRUE, + scaleCluster = FALSE, + centerCluster = FALSE, + shift = FALSE, + diagnosis = FALSE, + ... +) { + resName <- paste0(reduction, "Norm") + reduction <- object[[reduction]] + if (!inherits(reduction, "DimReduc")) { + cli::cli_abort("Specified {.var reduction} does not points to a {.cls DimReduc}.") + } + # Retrieve some information. Might have better ways instead of using `@` + ## Due to proper formatting in Seurat object, Hconcat is already cell x k + ## Transposed to k x N, as in liger + Hconcat <- t(reduction[[]]) + datasetVar <- reduction@misc$dataset + assay <- reduction@assay.used + W <- reduction[] + + lambda <- .checkArgLen(lambda, length(unique(datasetVar)), repN = TRUE, class = "numeric") + + result <- .centroidAlign.Hraw( + object = Hconcat, + datasetVar = datasetVar, + lambda = lambda, + useDims = useDims, + scaleEmb = scaleEmb, centerEmb = centerEmb, + scaleCluster = scaleCluster, centerCluster = centerCluster, + shift = shift, + diagnosis = diagnosis + ) + + reddim <- Seurat::CreateDimReducObject( + embeddings = result$aligned, loadings = W, + assay = assay, key = paste0(resName, "_") + ) + object[[resName]] <- reddim + if ("raw_which.max" %in% names(result)) { + object[["raw_which.max"]] <- result$raw_which.max + } + if ("Z_which.max" %in% names(result)) { + object[["Z_which.max"]] <- result$Z_which.max + } + if ("R_which.max" %in% names(result)) { + object[["R_which.max"]] <- result$R_which.max + } + return(object) +} + +# object - raw H matrices concatenated, k factors by N cells +.centroidAlign.Hraw <- function( + object, + datasetVar, + lambda, + scaleEmb = TRUE, + centerEmb = TRUE, + scaleCluster = FALSE, + centerCluster = FALSE, + useDims = NULL, + shift = FALSE, + diagnosis = FALSE +) { + # Initiate output list + out <- list() + + # transposed to N cells by k factors + object <- t(object) + useDims <- useDims %||% seq_len(ncol(object)) + object <- object[, useDims] + + if (isTRUE(diagnosis)) { + raw_vote <- apply(object, 1, which.max) + raw_vote <- factor(raw_vote) + names(raw_vote) <- rownames(object) + out$raw_which.max <- raw_vote + } + + Z <- safe_scale(object, center = centerEmb, scale = scaleEmb) + # Z transposed to k factors by N cells + Z <- t(Z) + # phi - binary design matrix for dataset belonging, B datasets by N cells + phi <- Matrix::fac2sparse(datasetVar) + + R <- safe_scale(object, center = centerCluster, scale = scaleCluster) + # R transposed to k clusters by N cells + R <- t(R) + if (isTRUE(shift)) { + R <- R - min(R) + } + if (any(R < 0)) { + cli::cli_abort( + c(x = "Negative values found prior to normalizing the cluster assignment probablity distribution", + i = "Can only do either {.code centerCluster = FALSE} or {.code centerCluster = TRUE, shift = TRUE}.") + ) + } + + R <- normalize_byCol_dense_rcpp(R) + + Z_corr <- moe_correct_ridge_cpp( + Z_orig = Z, R = R, lambda = lambda, Phi = phi, B = nrow(phi), N = ncol(phi) + ) + # Z_corr transposed back to N cells by k factors + Z_corr <- t(Z_corr) + dimnames(Z_corr) <- list(rownames(object), paste0("Factor_", seq_len(ncol(Z_corr)))) + + out$aligned <- Z_corr + + if (isTRUE(diagnosis)) { + Z_cluster <- apply(Z_corr, 1, which.max) + Z_cluster <- factor(Z_cluster) + names(Z_cluster) <- colnames(Z) + out$Z_which.max <- Z_cluster + R_cluster <- apply(R, 2, which.max) + R_cluster <- factor(R_cluster) + names(R_cluster) <- colnames(R) + out$R_which.max <- R_cluster + } + + return(out) +} + ################################## EVALUATION ################################## #' Calculate agreement metric after integration @@ -1654,28 +2031,32 @@ quantile_norm <- function( # nocov start #' This metric quantifies how much the factorization and alignment distorts the #' geometry of the original datasets. The greater the agreement, the less #' distortion of geometry there is. This is calculated by performing -#' dimensionality reduction on the original and quantile aligned (or just -#' factorized) datasets, and measuring similarity between the k nearest -#' neighbors for each cell in original and aligned datasets. The Jaccard index -#' is used to quantify similarity, and is the final metric averages across all -#' cells. +#' dimensionality reduction on the original and integrated (factorized or plus +#' aligned) datasets, and measuring similarity between the k nearest +#' neighbors for each cell in original and integrated datasets. The Jaccard +#' index is used to quantify similarity, and is the final metric averages across +#' all cells. #' #' Note that for most datasets, the greater the chosen \code{nNeighbor}, the #' greater the agreement in general. Although agreement can theoretically #' approach 1, in practice it is usually no higher than 0.2-0.3. -#' @param object \code{liger} object. Should call quantile_norm before calling. +#' @param object \code{liger} object. Should call \code{\link{alignFactors}} +#' before calling. #' @param ndims Number of factors to produce in NMF. Default \code{40}. #' @param nNeighbors Number of nearest neighbors to use in calculating Jaccard #' index. Default \code{15}. #' @param useRaw Whether to evaluate just factorized \eqn{H} matrices instead of -#' using quantile aligned \eqn{H.norm} matrix. Default \code{FALSE} uses +#' using aligned \eqn{H.norm} matrix. Default \code{FALSE} uses #' aligned matrix. #' @param byDataset Whether to return agreement calculated for each dataset #' instead of the average for all datasets. Default \code{FALSE}. #' @param seed Random seed to allow reproducible results. Default \code{1}. -#' @param k,rand.seed,by.dataset [Deprecated] See Usage for replacement. -#' @param use.aligned [defunct] Use \code{useRaw} instead. -#' @param dr.method [defunct] We no longer support other methods but just NMF. +#' @param k,rand.seed,by.dataset `r lifecycle::badge("superseded")` See Usage +#' for replacement. +#' @param use.aligned `r lifecycle::badge("superseded")` Use \code{useRaw} +#' instead. +#' @param dr.method `r lifecycle::badge("defunct")` We no longer support other +#' methods but just NMF. #' @return A numeric vector of agreement metric. A single value if #' \code{byDataset = FALSE} or each dataset a value otherwise. #' @export @@ -1686,7 +2067,7 @@ quantile_norm <- function( # nocov start #' selectGenes %>% #' scaleNotCenter %>% #' runINMF %>% -#' quantileNorm +#' alignFactors #' calcAgreement(pbmc) #' } calcAgreement <- function( @@ -1794,7 +2175,7 @@ calcAgreement <- function( #' between cells specified by the two arguments. \code{cellComp} can contain #' cells already specified in \code{cellIdx}.} #' } -#' @param object A \linkS4class{liger} object, with \code{\link{quantileNorm}} +#' @param object A \linkS4class{liger} object, with \code{\link{alignFactors}} #' already run. #' @param clustersUse The clusters to consider for calculating the alignment. #' Should be a vector of existing levels in \code{clusterVar}. Default @@ -1810,9 +2191,10 @@ calcAgreement <- function( #' @param resultBy Select from \code{"all"}, \code{"dataset"} or \code{"cell"}. #' On which level should the mean alignment be calculated. Default \code{"all"}. #' @param seed Random seed to allow reproducible results. Default \code{1}. -#' @param k,rand.seed,cells.use,cells.comp,clusters.use [Deprecated] Please -#' see Usage for replacement. -#' @param by.cell,by.dataset [Defunct] Use \code{resultBy} instead. +#' @param k,rand.seed,cells.use,cells.comp,clusters.use +#' `r lifecycle::badge("superseded")` Please see Usage for replacement. +#' @param by.cell,by.dataset `r lifecycle::badge("superseded")` Use +#' \code{resultBy} instead. #' @return The alignment metric. #' @export #' @examples @@ -1822,7 +2204,7 @@ calcAgreement <- function( #' selectGenes %>% #' scaleNotCenter %>% #' runINMF %>% -#' quantileNorm +#' alignFactors #' calcAlignment(pbmc) #' } calcAlignment <- function( diff --git a/R/preprocess.R b/R/preprocess.R index 200fa38..3a0b001 100644 --- a/R/preprocess.R +++ b/R/preprocess.R @@ -59,9 +59,9 @@ #' on HDF5 based dataset. Default \code{1000} #' @param verbose Logical. Whether to show information of the progress. Default #' \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set. -#' @param mito,ribo,hemo [Deprecated] Now will always compute the percentages -#' of mitochondrial, ribosomal and hemoglobin gene counts. These arguments will -#' be ignored. +#' @param mito,ribo,hemo `r lifecycle::badge("deprecated")` Now will always +#' compute the percentages of mitochondrial, ribosomal and hemoglobin gene +#' counts. These arguments will be ignored. #' @return Updated \code{object} with the \code{cellMeta(object)} updated as #' intended by users. See Details for more information. #' @export diff --git a/R/rliger-package.R b/R/rliger-package.R new file mode 100644 index 0000000..425b3c1 --- /dev/null +++ b/R/rliger-package.R @@ -0,0 +1,7 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom lifecycle deprecated +## usethis namespace: end +NULL diff --git a/R/util.R b/R/util.R index f14158f..9d95483 100644 --- a/R/util.R +++ b/R/util.R @@ -645,7 +645,7 @@ searchH <- function(object, useRaw = NULL) { if (is.null(Ht)) { cli::cli_abort( "No cell factor loading available. - Please run {.fn runIntegration} and {.fn quantileNorm} first." + Please run {.fn runIntegration} and {.fn alignFactors} first." ) } else { useRaw <- TRUE @@ -669,8 +669,8 @@ searchH <- function(object, useRaw = NULL) { H <- getMatrix(object, "H.norm") if (is.null(H)) { cli::cli_abort( - "Quantile-normalized cell factor loading requested but - not found. Please run {.fn quantileNorm} after + "Aligned cell factor loading requested but + not found. Please run {.fn alignFactors} after {.fn runIntegration}." ) } diff --git a/R/visualization.R b/R/visualization.R index 9daaded..fa4d226 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -901,6 +901,7 @@ plotVolcano <- function( #' @returns ggplot #' @export #' @examples +#' \donttest{ #' if (requireNamespace("EnhancedVolcano", quietly = TRUE)) { #' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster #' # Test the DEG between "stim" and "ctrl", within each cluster @@ -913,6 +914,7 @@ plotVolcano <- function( #' ) #' plotEnhancedVolcano(result, "0.stim") #' } +#' } plotEnhancedVolcano <- function( result, group, @@ -1428,7 +1430,7 @@ plotSankey <- function( graphics::mtext(titles[3], side = 3, adj = 0.95, cex = titleCex, font = 2) } -#' [Deprecated] Generate a river (Sankey) plot +#' `r lifecycle::badge("deprecated")` Generate a river (Sankey) plot #' @description #' Creates a riverplot to show how separate cluster assignments from two #' datasets map onto a joint clustering. The joint clustering is by default the diff --git a/README.md b/README.md index 1f4a6db..8b5dfe8 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,15 @@ # LIGER (Linked Inference of Genomic Experimental Relationships) ->**Now we have a comprehensive documentation site for the latest version of [rliger (2.0)](https://welch-lab.github.io/liger/index.html)!** +
+ +>**NEWS** +>- Checkout new cell factor alignment method (function [`centroidAlign()`](https://welch-lab.github.io/liger/reference/centroidAlign.html)), which aligns cell factor loading by moving soft clustering centroids. This method is benchmarked with [*scib*](https://scib.readthedocs.io/en/latest/index.html) datasets and metrics and shows better overall performance than our previous quantile normalization method, especially for the ability to conserve biological information. +>- Checkout Consensus iNMF method (function [`runCINMF()`](https://welch-lab.github.io/liger/reference/runCINMF.html)), which runs regular iNMF multiple times with different random initialization and summarizes a consensus result with better confidence. +>- Please visit [*rliger* website](https://welch-lab.github.io/liger/index.html) for comprehensive documentation and [revised tutorial](https://welch-lab.github.io/liger/articles/Integrating_multi_scRNA_data.html) that walks through scRNAseq integration and analysis in detail +>- More [changelogs](https://welch-lab.github.io/liger/news/index.html) + +
LIGER (installed as `rliger` ) is a package for integrating and analyzing multiple single-cell datasets, developed by the Macosko lab and maintained/extended by the Welch lab. It relies on integrative non-negative matrix factorization to identify shared and dataset-specific factors. @@ -59,21 +67,21 @@ If you have any questions, comments, or suggestions, you are welcomed to [open a ## Usage -For usage examples and guided walkthroughs, check the `vignettes` directory of the repo. +For usage examples and guided walkthroughs of specific use cases, please check our articles below: * [Integrating Multiple Single-Cell RNA-seq Datasets](https://welch-lab.github.io/liger/articles/Integrating_multi_scRNA_data.html) * [Jointly Defining Cell Types from scRNA-seq and scATAC-seq](https://welch-lab.github.io/liger/articles/Integrating_scRNA_and_scATAC_data.html) * [Iterative Single-Cell Multi-Omic Integration Using Online iNMF](https://welch-lab.github.io/liger/articles/online_iNMF_tutorial.html) -* [Integrating unshared features with UINMF](https://welch-lab.github.io/liger/articles/UINMF_vignette.html) +* [Integrating datasets using unshared features with UINMF](https://welch-lab.github.io/liger/articles/UINMF_vignette.html) * [Integrating spatial transcriptomic and transcriptomic datasets using UINMF](https://welch-lab.github.io/liger/articles/STARmap_dropviz_vig.html) -* [scATAC and scRNA Integration using unshared features (UINMF)](https://welch-lab.github.io/liger/articles/SNAREseq_walkthrough.html) +* [Integrating scATAC and scRNA using unshared features (UINMF)](https://welch-lab.github.io/liger/articles/SNAREseq_walkthrough.html) * [Cross-species Analysis with UINMF](https://welch-lab.github.io/liger/articles/cross_species_vig.html) * [Jointly Defining Cell Types from Single-Cell RNA-seq and DNA Methylation](https://welch-lab.github.io/liger/articles/rna_methylation.html) Meanwhile, since version 2.0.0, LIGER is massively updated for usability and interoperability with other packages. Below are links to the introduction of new features. * [Introduction to new liger object and other related classes](https://welch-lab.github.io/liger/articles/liger_object.html) -* [Running Liger directly on Seurat objects](https://welch-lab.github.io/liger/articles/liger_with_seurat.html) +* [Running LIGER directly on Seurat objects](https://welch-lab.github.io/liger/articles/liger_with_seurat.html) If you need to refer to the tutorials for the old version of rliger, please check the [GitHub archive v1.0.1](https://github.com/welch-lab/liger/tree/v1.0.1/vignettes), download the desired rendered HTML files and open them in your browser. diff --git a/man/alignFactors.Rd b/man/alignFactors.Rd new file mode 100644 index 0000000..8dedf9e --- /dev/null +++ b/man/alignFactors.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{alignFactors} +\alias{alignFactors} +\alias{alignFactors.liger} +\alias{alignFactors.Seurat} +\title{Align factor loadings to get final integration} +\usage{ +alignFactors(object, method = c("quantileNorm", "centroidAlign"), ...) + +\method{alignFactors}{liger}(object, method = c("quantileNorm", "centroidAlign"), ...) + +\method{alignFactors}{Seurat}(object, method = c("quantileNorm", "centroidAlign"), ...) +} +\arguments{ +\item{object}{A \linkS4class{liger} or Seurat object with valid factorization +result available (i.e. \code{\link{runIntegration}} performed in advance).} + +\item{method}{Character, method to align factors. Default +\code{"centroidAlign"}. Optionally \code{"quantileNorm"}.} + +\item{...}{Additional arguments passed to selected methods. +For \code{"quantileNorm"}: +\describe{ +\item{\code{quantiles}}{Number of quantiles to use for quantile +normalization. Default \code{50}.} +\item{\code{reference}}{Character, numeric or logical selection of one +dataset, out of all available datasets in \code{object}, to use as a +"reference" for quantile normalization. Default \code{NULL} tries to find +an RNA dataset with the largest number of cells; if no RNA dataset +available, use the globally largest dataset.} +\item{\code{minCells}}{Minimum number of cells to consider a cluster +shared across datasets. Default \code{20}.} +\item{\code{nNeighbors}}{Number of nearest neighbors for within-dataset +knn graph. Default \code{20}.} +\item{\code{useDims}}{Indices of factors to use for shared nearest factor +determination. Default \code{NULL} uses all factors.} +\item{\code{center}}{Whether to center the data when scaling factors. +Could be useful for less sparse modalities like methylation data. +Default \code{FALSE}.} +\item{\code{maxSample}}{Maximum number of cells used for quantile +normalization of each cluster and factor. Default \code{1000}.} +\item{\code{eps}}{The error bound of the nearest neighbor search. Lower +values give more accurate nearest neighbor graphs but take much longer to +compute. Default \code{0.9}.} +\item{\code{refineKNN}}{Whether to increase robustness of cluster +assignments using KNN graph. Default \code{TRUE}.} +\item{\code{clusterName}}{Variable name that will store the clustering +result in metadata of a \linkS4class{liger} object or a \code{Seurat} +object. Default \code{"quantileNorm_cluster"}.} +\item{\code{seed}}{Random seed to allow reproducible results. Default +\code{1}.} +\item{\code{verbose}}{Logical. Whether to show information of the +progress. Default \code{getOption("ligerVerbose")} or \code{TRUE} if +users have not set.} +} +For \code{"centroidAlign"} \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}: +\describe{ +\item{\code{lambda}}{Ridge regression penalty applied to each dataset. +Can be one number that applies to all datasets, or a numeric vector with +length equal to the number of datasets. Default \code{1}.} +\item{\code{useDims}}{Indices of factors to use considered for the +alignment. Default \code{NULL} uses all factors.} +\item{\code{scaleEmb}}{Logical, whether to scale the factor loading being +considered as the embedding. Default \code{TRUE}.} +\item{\code{centerEmb}}{Logical, whether to center the factor loading +being considered as the embedding before scaling it. Default \code{TRUE}.} +\item{\code{scaleCluster}}{Logical, whether to scale the factor loading +being considered as the cluster assignment probability. Default +\code{FALSE}.} +\item{\code{centerCluster}}{Logical, whether to center the factor loading +being considered as the cluster assignment probability before scaling it. +Default \code{FALSE}.} +\item{\code{shift}}{Logical, whether to shift the factor loading being +considered as the cluster assignment probability after centered scaling. +Default \code{FALSE}.} +\item{\code{diagnosis}}{Logical, whether to return cell metadata variables +with diagnostic information. Default \code{FALSE}.} +}} +} +\description{ +This function is a wrapper to switch between alternative factor loading +alignment methods that LIGER provides, which is a required step for producing +the final integrated result. Two methods are provided (click on options for +more details): + +\itemize{ +\item{\code{method = "\link{quantileNorm}"}: Previously published quantile +normalization method. (default)} +\item{\code{method = "\link{centroidAlign}"}: Newly developed centroid +alignment method. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}} +} +} +\seealso{ +\code{\link{quantileNorm}}, \code{\link{centroidAlign}} +} diff --git a/man/calcARI.Rd b/man/calcARI.Rd index 1d47221..efa93d7 100644 --- a/man/calcARI.Rd +++ b/man/calcARI.Rd @@ -31,7 +31,8 @@ calculation. Default \code{NULL} uses all datasets.} \item{verbose}{Logical. Whether to show information of the progress. Default \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.} -\item{classes.compare}{[Deprecated/Renamed]. Use \code{trueCluster} instead.} +\item{classes.compare}{. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Use +\code{trueCluster} instead.} } \value{ A numeric scalar, the ARI of the clustering result indicated by diff --git a/man/calcAgreement.Rd b/man/calcAgreement.Rd index 2601d06..6167e4c 100644 --- a/man/calcAgreement.Rd +++ b/man/calcAgreement.Rd @@ -19,7 +19,8 @@ calcAgreement( ) } \arguments{ -\item{object}{\code{liger} object. Should call quantile_norm before calling.} +\item{object}{\code{liger} object. Should call \code{\link{alignFactors}} +before calling.} \item{ndims}{Number of factors to produce in NMF. Default \code{40}.} @@ -27,7 +28,7 @@ calcAgreement( index. Default \code{15}.} \item{useRaw}{Whether to evaluate just factorized \eqn{H} matrices instead of -using quantile aligned \eqn{H.norm} matrix. Default \code{FALSE} uses +using aligned \eqn{H.norm} matrix. Default \code{FALSE} uses aligned matrix.} \item{byDataset}{Whether to return agreement calculated for each dataset @@ -35,11 +36,14 @@ instead of the average for all datasets. Default \code{FALSE}.} \item{seed}{Random seed to allow reproducible results. Default \code{1}.} -\item{dr.method}{[defunct] We no longer support other methods but just NMF.} +\item{dr.method}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} We no longer support other +methods but just NMF.} -\item{k, rand.seed, by.dataset}{[Deprecated] See Usage for replacement.} +\item{k, rand.seed, by.dataset}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} See Usage +for replacement.} -\item{use.aligned}{[defunct] Use \code{useRaw} instead.} +\item{use.aligned}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Use \code{useRaw} +instead.} } \value{ A numeric vector of agreement metric. A single value if @@ -49,11 +53,11 @@ A numeric vector of agreement metric. A single value if This metric quantifies how much the factorization and alignment distorts the geometry of the original datasets. The greater the agreement, the less distortion of geometry there is. This is calculated by performing -dimensionality reduction on the original and quantile aligned (or just -factorized) datasets, and measuring similarity between the k nearest -neighbors for each cell in original and aligned datasets. The Jaccard index -is used to quantify similarity, and is the final metric averages across all -cells. +dimensionality reduction on the original and integrated (factorized or plus +aligned) datasets, and measuring similarity between the k nearest +neighbors for each cell in original and integrated datasets. The Jaccard +index is used to quantify similarity, and is the final metric averages across +all cells. Note that for most datasets, the greater the chosen \code{nNeighbor}, the greater the agreement in general. Although agreement can theoretically @@ -66,7 +70,7 @@ if (requireNamespace("RcppPlanc", quietly = TRUE)) { selectGenes \%>\% scaleNotCenter \%>\% runINMF \%>\% - quantileNorm + alignFactors calcAgreement(pbmc) } } diff --git a/man/calcAlignment.Rd b/man/calcAlignment.Rd index 0e8cf4e..8cc4fcb 100644 --- a/man/calcAlignment.Rd +++ b/man/calcAlignment.Rd @@ -23,7 +23,7 @@ calcAlignment( ) } \arguments{ -\item{object}{A \linkS4class{liger} object, with \code{\link{quantileNorm}} +\item{object}{A \linkS4class{liger} object, with \code{\link{alignFactors}} already run.} \item{clustersUse}{The clusters to consider for calculating the alignment. @@ -46,10 +46,10 @@ On which level should the mean alignment be calculated. Default \code{"all"}.} \item{seed}{Random seed to allow reproducible results. Default \code{1}.} -\item{k, rand.seed, cells.use, cells.comp, clusters.use}{[Deprecated] Please -see Usage for replacement.} +\item{k, rand.seed, cells.use, cells.comp, clusters.use}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Please see Usage for replacement.} -\item{by.cell, by.dataset}{[Defunct] Use \code{resultBy} instead.} +\item{by.cell, by.dataset}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Use +\code{resultBy} instead.} } \value{ The alignment metric. @@ -94,7 +94,7 @@ if (requireNamespace("RcppPlanc", quietly = TRUE)) { selectGenes \%>\% scaleNotCenter \%>\% runINMF \%>\% - quantileNorm + alignFactors calcAlignment(pbmc) } } diff --git a/man/calcPurity.Rd b/man/calcPurity.Rd index 86ad71c..e6095df 100644 --- a/man/calcPurity.Rd +++ b/man/calcPurity.Rd @@ -31,7 +31,8 @@ calculation. Default \code{NULL} uses all datasets.} \item{verbose}{Logical. Whether to show information of the progress. Default \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.} -\item{classes.compare}{[Deprecated/Renamed]. Use \code{trueCluster} instead.} +\item{classes.compare}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Use +\code{trueCluster} instead.} } \value{ A numeric scalar, the purity of the clustering result indicated by diff --git a/man/centroidAlign.Rd b/man/centroidAlign.Rd new file mode 100644 index 0000000..6fed290 --- /dev/null +++ b/man/centroidAlign.Rd @@ -0,0 +1,119 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{centroidAlign} +\alias{centroidAlign} +\alias{centroidAlign.liger} +\alias{centroidAlign.Seurat} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Align factor loading by centroid alignment (beta)} +\usage{ +centroidAlign(object, ...) + +\method{centroidAlign}{liger}( + object, + lambda = 1, + useDims = NULL, + scaleEmb = TRUE, + centerEmb = TRUE, + scaleCluster = FALSE, + centerCluster = FALSE, + shift = FALSE, + diagnosis = FALSE, + ... +) + +\method{centroidAlign}{Seurat}( + object, + reduction = "inmf", + lambda = 1, + useDims = NULL, + scaleEmb = TRUE, + centerEmb = TRUE, + scaleCluster = FALSE, + centerCluster = FALSE, + shift = FALSE, + diagnosis = FALSE, + ... +) +} +\arguments{ +\item{object}{A \linkS4class{liger} or Seurat object with valid factorization +result available (i.e. \code{\link{runIntegration}} performed in advance).} + +\item{...}{Arguments passed to other S3 methods of this function.} + +\item{lambda}{Ridge regression penalty applied to each dataset. Can be one +number that applies to all datasets, or a numeric vector with length equal to +the number of datasets. Default \code{1}.} + +\item{useDims}{Indices of factors to use considered for the alignment. +Default \code{NULL} uses all factors.} + +\item{scaleEmb}{Logical, whether to scale the factor loading being considered +as the embedding. Default \code{TRUE}.} + +\item{centerEmb}{Logical, whether to center the factor loading being +considered as the embedding before scaling it. Default \code{TRUE}.} + +\item{scaleCluster}{Logical, whether to scale the factor loading being +considered as the cluster assignment probability. Default \code{FALSE}.} + +\item{centerCluster}{Logical, whether to center the factor loading being +considered as the cluster assignment probability before scaling it. Default +\code{FALSE}.} + +\item{shift}{Logical, whether to shift the factor loading being considered as +the cluster assignment probability after centered scaling. Default +\code{FALSE}.} + +\item{diagnosis}{Logical, whether to return cell metadata variables with +diagnostic information. See Details. Default \code{FALSE}.} + +\item{reduction}{Name of the reduction where LIGER integration result is +stored. Default \code{"inmf"}.} +} +\value{ +Returns the updated input object +\itemize{ +\item{liger method +\itemize{ +\item{Update the \code{H.norm} slot for the aligned cell factor +loading, ready for running graph based community detection clustering +or dimensionality reduction for visualization.} +\item{Update the \code{cellMata} slot with diagnostic information if +\code{diagnosis = TRUE}.} +}} +\item{Seurat method +\itemize{ +\item{Update the \code{reductions} slot with a new \code{DimReduc} +object containing the aligned cell factor loading.} +\item{Update the metadata with diagnostic information if +\code{diagnosis = TRUE}.} +}} +} +} +\description{ +This process treats the factor loading of each dataset as the low dimensional +embedding as well as the cluster assignment probability, i.e. the soft +clustering result. Then the method aligns the embedding by linearly moving +the centroids of the same cluster but within each dataset towards each other. + +\bold{ATTENTION: This method is still under development while has shown +encouraging results in benchmarking tests. The arguments and their default +values reflect the best scored parameters in the tests and some of them may +be subject to change in the future.} +} +\details{ +Diagnostic information include: + +\itemize{ +\item{object$raw_which.max: The index of the factor with the maximum value +in the raw factor loading.} +\item{object$R_which.max: The index of the factor with the maximum value in +the soft clustering probability matrix used for correction.} +\item{object$Z_which.max: The index of the factor with the maximum value in +the aligned factor loading.} +} +} +\examples{ +pbmc <- centroidAlign(pbmcPlot) +} diff --git a/man/createLiger.Rd b/man/createLiger.Rd index 4c122ca..cf7aa17 100644 --- a/man/createLiger.Rd +++ b/man/createLiger.Rd @@ -56,7 +56,7 @@ Currently options of \code{"mouse"}, \code{"human"}, \code{"zebrafish"}, counts and features not expressed in any cells from each dataset. Default \code{TRUE}.} -\item{addPrefix}{Logical. Whether to add "_" as a prefix of +\item{addPrefix}{Logical. Whether to add "datasetName_" as a prefix of cell identifiers (e.g. barcodes) to avoid duplicates in multiple libraries ( common with 10X data). Default \code{"auto"} detects if matrix columns already has the exact prefix or not. Logical value forces the action.} @@ -87,9 +87,9 @@ can be dangerous for large scale analysis.} \item{...}{Additional slot values that should be directly placed in object.} -\item{raw.data, remove.missing, format.type, data.name, indices.name, indptr.name, genes.name, barcodes.name}{\bold{Deprecated.} See Usage section for replacement.} +\item{raw.data, remove.missing, format.type, data.name, indices.name, indptr.name, genes.name, barcodes.name}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} See Usage section for replacement.} -\item{take.gene.union}{Defuncted. Will be ignored.} +\item{take.gene.union}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} Will be ignored.} } \description{ This function allows creating \linkS4class{liger} object from diff --git a/man/exportInteractTrack.Rd b/man/exportInteractTrack.Rd index 9a9ae76..550ed26 100644 --- a/man/exportInteractTrack.Rd +++ b/man/exportInteractTrack.Rd @@ -43,7 +43,7 @@ if (requireNamespace("RcppPlanc", quietly = TRUE) && requireNamespace("IRanges", quietly = TRUE) && requireNamespace("psych", quietly = TRUE)) { bmmc <- runINMF(bmmc) - bmmc <- quantileNorm(bmmc) + bmmc <- alignFactors(bmmc) bmmc <- normalizePeak(bmmc) bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna") corr <- linkGenesAndPeaks( diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg new file mode 100644 index 0000000..745ab0c --- /dev/null +++ b/man/figures/lifecycle-archived.svg @@ -0,0 +1,21 @@ + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg new file mode 100644 index 0000000..d5c9559 --- /dev/null +++ b/man/figures/lifecycle-defunct.svg @@ -0,0 +1,21 @@ + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 0000000..b61c57c --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 0000000..5d88fc2 --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1,21 @@ + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg new file mode 100644 index 0000000..897370e --- /dev/null +++ b/man/figures/lifecycle-maturing.svg @@ -0,0 +1,21 @@ + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg new file mode 100644 index 0000000..7c1721d --- /dev/null +++ b/man/figures/lifecycle-questioning.svg @@ -0,0 +1,21 @@ + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/man/figures/lifecycle-soft-deprecated.svg b/man/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 0000000..9c166ff --- /dev/null +++ b/man/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 0000000..9bf21e7 --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1,29 @@ + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 0000000..db8d757 --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1,21 @@ + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + diff --git a/man/getFactorMarkers.Rd b/man/getFactorMarkers.Rd index 591681b..25e9181 100644 --- a/man/getFactorMarkers.Rd +++ b/man/getFactorMarkers.Rd @@ -57,9 +57,9 @@ UMI and frac thresholds, when \code{verbose = TRUE}. Default \code{FALSE}.} } \value{ A list object consisting of the following entries: -\item{[value of `dataset1`]}{data.frame of dataset1-specific markers} +\item{value of \code{dataset1}}{data.frame of dataset1-specific markers} \item{shared}{data.frame of shared markers} -\item{[value of `dataset1`]}{data.frame of dataset2-specific markers} +\item{value of \code{dataset1}}{data.frame of dataset2-specific markers} \item{num_factors_V1}{A frequency table indicating the number of factors each marker appears, in dataset1} \item{num_factors_V2}{A frequency table indicating the number of factors each diff --git a/man/imputeKNN.Rd b/man/imputeKNN.Rd index c45af2c..ff43289 100644 --- a/man/imputeKNN.Rd +++ b/man/imputeKNN.Rd @@ -58,11 +58,11 @@ normalized imputed peak counts if \code{norm = TRUE}. } \description{ This function is designed for creating peak data for a dataset with only gene -expression. This function uses quantile normalized cell factor loading to -find nearest neighbors between cells from the queried dataset (without peak) -and cells from reference dataset (with peak). And then impute the peak for -the former basing on the weight. Therefore, the reference dataset selected -must be of "atac" modality setting. +expression. This function uses aligned cell factor loading to find nearest +neighbors between cells from the queried dataset (without peak) and cells +from reference dataset (with peak). And then impute the peak for the former +basing on the weight. Therefore, the reference dataset selected must be of +"atac" modality setting. } \examples{ bmmc <- normalize(bmmc) @@ -70,7 +70,7 @@ bmmc <- selectGenes(bmmc, datasets.use = "rna") bmmc <- scaleNotCenter(bmmc) if (requireNamespace("RcppPlanc", quietly = TRUE)) { bmmc <- runINMF(bmmc, k = 20) - bmmc <- quantileNorm(bmmc) + bmmc <- alignFactors(bmmc) bmmc <- normalizePeak(bmmc) bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna") } diff --git a/man/liger-DEG.Rd b/man/liger-DEG.Rd index a48455d..88f69d1 100644 --- a/man/liger-DEG.Rd +++ b/man/liger-DEG.Rd @@ -99,35 +99,34 @@ cells, while \code{"datasets"} run within each cluster and compare each dataset against all other datasets.} } \value{ -A data.frame with DEG information with the following field: -\enumerate{ - \item{feature - Gene names} - \item{group - Test group name. Multiple tests might be present for each - function call. This is the main variable to distinguish the tests. For a - pairwise test, a row with a certain group name represents the test result - between the this group against the other control group; When split by a - variable, it would be presented in "split.group" format, meaning the stats - is by comparing the group in the split level against the control group in - the same split level. When running marker detection without splitting, - a row with group "a" represents the stats of the gene in group "a" against - all other cells. When running split marker detection, the group name would - be in "split.group" format, meaning the stats is by comparing the group in - the split level against all other cells in the same split level.} - \item{logFC - Log fold change} - \item{pval - P-value} - \item{padj - Adjusted p-value} - \item{avgExpr - Mean expression in the test group indicated by the "group" - field. Only available for wilcoxon tests.} - \item{statistic - Wilcoxon rank-sum test statistic. Only available for - wilcoxon tests.} - \item{auc - Area under the ROC curve. Only available for wilcoxon tests.} - \item{pct_in - Percentage of cells in the test group, indicated by the - "group" field, that express the feature. Only available for wilcoxon - tests.} - \item{pct_out - Percentage of cells in the control group or other cells, as - explained for the "group" field, that express the feature. Only available - for wilcoxon tests.} -} +A data.frame with DEG information with the all or some of the +following fields: +\item{feature}{Gene names} +\item{group}{Test group name. Multiple tests might be present for each +function call. This is the main variable to distinguish the tests. For a +pairwise test, a row with a certain group name represents the test result +between the this group against the other control group; When split by a +variable, it would be presented in "split.group" format, meaning the stats +is by comparing the group in the split level against the control group in +the same split level. When running marker detection without splitting, +a row with group "a" represents the stats of the gene in group "a" against +all other cells. When running split marker detection, the group name would +be in "split.group" format, meaning the stats is by comparing the group in +the split level against all other cells in the same split level.} +\item{logFC}{Log fold change} +\item{pval}{P-value} +\item{padj}{Adjusted p-value} +\item{avgExpr}{Mean expression in the test group indicated by the "group" +field. Only available for wilcoxon tests.} +\item{statistic}{Wilcoxon rank-sum test statistic. Only available for +wilcoxon tests.} +\item{auc}{Area under the ROC curve. Only available for wilcoxon tests.} +\item{pct_in}{Percentage of cells in the test group, indicated by the +"group" field, that express the feature. Only available for wilcoxon +tests.} +\item{pct_out}{Percentage of cells in the control group or other cells, as +explained for the "group" field, that express the feature. Only available +for wilcoxon tests.} } \description{ Two methods are supported: \code{"pseudoBulk"} and @@ -142,14 +141,14 @@ While using pseudo-bulk method, it is generally recommended that you have these variables available in your object: \enumerate{ - \item{The cell type or cluster labeling. This can be obtained from prior - study or computed with \code{\link{runCluster}}} - \item{The biological replicate labeling, most of the time the - \code{"dataset"} variable automatically generated when the - \linkS4class{liger} object is created. Users may use other variables if - a "dataset" is merged from multiple replicates.} - \item{The condition labeling that reflects the study design, such as the - treatment or disease status for each sample/dataset.} +\item{The cell type or cluster labeling. This can be obtained from prior +study or computed with \code{\link{runCluster}}} +\item{The biological replicate labeling, most of the time the +\code{"dataset"} variable automatically generated when the +\linkS4class{liger} object is created. Users may use other variables if +a "dataset" is merged from multiple replicates.} +\item{The condition labeling that reflects the study design, such as the +treatment or disease status for each sample/dataset.} } Please see below for detailed scenarios. diff --git a/man/liger-class.Rd b/man/liger-class.Rd index 4fa20dc..68a2ee8 100644 --- a/man/liger-class.Rd +++ b/man/liger-class.Rd @@ -412,7 +412,7 @@ section accordingly.} \code{\link{runIntegration}}.} \item{\code{H.norm}}{Matrix of aligned factor loading for each cell. See -\code{\link{quantileNorm}} and \code{\link{runIntegration}}.} +\code{\link{alignFactors}} and \code{\link{runIntegration}}.} \item{\code{commands}}{List of \linkS4class{ligerCommand} objects. Record of analysis. Use \code{commands} to retrieve information. See detailed section diff --git a/man/ligerToSeurat.Rd b/man/ligerToSeurat.Rd index f588a7f..676e35b 100644 --- a/man/ligerToSeurat.Rd +++ b/man/ligerToSeurat.Rd @@ -29,17 +29,18 @@ default cluster labeling to set the Idents. Default \code{FALSE}.} \item{merge}{Logical, whether to merge layers of different datasets into one. Not recommended. Default \code{FALSE}.} -\item{nms}{[Defunct] Will be ignored because new object structure does not -have related problem.} +\item{nms}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} Will be ignored because new object +structure does not have related problem.} -\item{renormalize}{[Defunct] Will be ignored because since Seurat V5, layers -of data can exist at the same time and it is better to left it for users to -do it by themselves.} +\item{renormalize}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} Will be ignored because +since Seurat V5, layers of data can exist at the same time and it is better +to left it for users to do it by themselves.} -\item{use.liger.genes}{[Defunct] Will be ignored and will always set LIGER -variable features to the place.} +\item{use.liger.genes}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} Will be ignored and +will always set LIGER variable features to the place.} -\item{by.dataset}{[Deprecated]. Use \code{identByDataset} instead.} +\item{by.dataset}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}. Use +\code{identByDataset} instead.} } \value{ Always returns Seurat object(s) of the latest version. By default a diff --git a/man/linkGenesAndPeaks.Rd b/man/linkGenesAndPeaks.Rd index 23a1365..aa342e1 100644 --- a/man/linkGenesAndPeaks.Rd +++ b/man/linkGenesAndPeaks.Rd @@ -65,7 +65,7 @@ if (requireNamespace("RcppPlanc", quietly = TRUE) && bmmc <- selectGenes(bmmc) bmmc <- scaleNotCenter(bmmc) bmmc <- runINMF(bmmc, miniBatchSize = 100) - bmmc <- quantileNorm(bmmc) + bmmc <- alignFactors(bmmc) bmmc <- normalizePeak(bmmc) bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna") corr <- linkGenesAndPeaks( diff --git a/man/louvainCluster-deprecated.Rd b/man/louvainCluster-deprecated.Rd index 0329eeb..faa744a 100644 --- a/man/louvainCluster-deprecated.Rd +++ b/man/louvainCluster-deprecated.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/clustering.R \name{louvainCluster-deprecated} \alias{louvainCluster-deprecated} -\title{[Deprecated] Louvain algorithm for community detection} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Louvain algorithm for community detection} \arguments{ \item{object}{\code{liger} object. Should run quantile_norm before calling.} diff --git a/man/makeInteractTrack-deprecated.Rd b/man/makeInteractTrack-deprecated.Rd index 9b6de3a..40723a5 100644 --- a/man/makeInteractTrack-deprecated.Rd +++ b/man/makeInteractTrack-deprecated.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ATAC.R \name{makeInteractTrack-deprecated} \alias{makeInteractTrack-deprecated} -\title{[Deprecated] Export predicted gene-pair interaction} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Export predicted gene-pair interaction} \arguments{ \item{corr.mat}{A sparse matrix of correlation with peak names as rows and gene names as columns.} diff --git a/man/makeRiverplot-deprecated.Rd b/man/makeRiverplot-deprecated.Rd index 52828a7..80101e5 100644 --- a/man/makeRiverplot-deprecated.Rd +++ b/man/makeRiverplot-deprecated.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/visualization.R \name{makeRiverplot-deprecated} \alias{makeRiverplot-deprecated} -\title{[Deprecated] Generate a river (Sankey) plot} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Generate a river (Sankey) plot} \arguments{ \item{object}{\code{liger} object. Should run quantileAlignSNF before calling.} diff --git a/man/normalize.Rd b/man/normalize.Rd index f403a45..d3df14b 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -77,13 +77,13 @@ Default \code{"ligerNormData"}. For older Seurat, stored to \code{data} slot.} \value{ Updated \code{object}. \itemize{ - \item{dgCMatrix method - Returns processed dgCMatrix object} - \item{ligerDataset method - Updates the \code{normData} slot of the object} - \item{liger method - Updates the \code{normData} slot of chosen datasets} - \item{Seurat method - Adds a named layer in chosen assay (V5), or update the - \code{data} slot of the chosen assay (<=V4)} - \item{\code{normalizePeak} - Updates the \code{normPeak} slot of chosen - datasets.} +\item{dgCMatrix method - Returns processed dgCMatrix object} +\item{ligerDataset method - Updates the \code{normData} slot of the object} +\item{liger method - Updates the \code{normData} slot of chosen datasets} +\item{Seurat method - Adds a named layer in chosen assay (V5), or update the +\code{data} slot of the chosen assay (<=V4)} +\item{\code{normalizePeak} - Updates the \code{normPeak} slot of chosen +datasets.} } } \description{ diff --git a/man/online_iNMF-deprecated.Rd b/man/online_iNMF-deprecated.Rd index 8f21488..dd41478 100644 --- a/man/online_iNMF-deprecated.Rd +++ b/man/online_iNMF-deprecated.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{online_iNMF-deprecated} \alias{online_iNMF-deprecated} -\title{[Deprecated] Perform online iNMF on scaled datasets} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Perform online iNMF on scaled datasets} \arguments{ \item{object}{\code{liger} object with data stored in HDF5 files. Should normalize, select genes, and scale before calling.} @@ -28,12 +28,12 @@ A value in the range 20-50 works well for most analyses.} \item{lambda}{Regularization parameter. Larger values penalize dataset-specific effects more - strongly (ie. alignment should increase as lambda increases). We recommend - always using the default value except - possibly for analyses with relatively small differences (biological - replicates, male/female comparisons, etc.) - in which case a lower value such as 1.0 may improve reconstruction quality. - (default 5.0).} +strongly (ie. alignment should increase as lambda increases). We recommend +always using the default value except +possibly for analyses with relatively small differences (biological +replicates, male/female comparisons, etc.) +in which case a lower value such as 1.0 may improve reconstruction quality. +(default 5.0).} \item{max.epochs}{Maximum number of epochs (complete passes through the data). (default 5)} diff --git a/man/optimizeALS-deprecated.Rd b/man/optimizeALS-deprecated.Rd index 9cbf286..279f1fe 100644 --- a/man/optimizeALS-deprecated.Rd +++ b/man/optimizeALS-deprecated.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{optimizeALS-deprecated} \alias{optimizeALS-deprecated} -\title{[Deprecated] Perform iNMF on scaled datasets} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Perform iNMF on scaled datasets} \arguments{ \item{object}{\code{liger} object. Should normalize, select genes, and scale before calling.} diff --git a/man/plotEnhancedVolcano.Rd b/man/plotEnhancedVolcano.Rd index b4453a4..2aea446 100644 --- a/man/plotEnhancedVolcano.Rd +++ b/man/plotEnhancedVolcano.Rd @@ -24,6 +24,7 @@ ggplot Create volcano plot with EnhancedVolcano } \examples{ +\donttest{ if (requireNamespace("EnhancedVolcano", quietly = TRUE)) { defaultCluster(pbmc) <- pbmcPlot$leiden_cluster # Test the DEG between "stim" and "ctrl", within each cluster @@ -37,3 +38,4 @@ if (requireNamespace("EnhancedVolcano", quietly = TRUE)) { plotEnhancedVolcano(result, "0.stim") } } +} diff --git a/man/plotGODot.Rd b/man/plotGODot.Rd index 3372dc4..9cbeac5 100644 --- a/man/plotGODot.Rd +++ b/man/plotGODot.Rd @@ -71,6 +71,7 @@ A ggplot object if only one group or a list of ggplot objects. Visualize GO enrichment test result in dot plot } \examples{ +\donttest{ defaultCluster(pbmc) <- pbmcPlot$leiden_cluster # Test the DEG between "stim" and "ctrl", within each cluster result <- runPairwiseDEG( @@ -82,7 +83,6 @@ result <- runPairwiseDEG( ) # Setting `significant = FALSE` because it's hard for a gene list obtained # from small test dataset to represent real-life biology. -\donttest{ if (requireNamespace("gprofiler2", quietly = TRUE)) { go <- runGOEnrich(result, group = "0.stim", significant = FALSE) # The toy example won't have significant result. diff --git a/man/quantileAlignSNF.Rd b/man/quantileAlignSNF.Rd index 61f38ba..3a1c613 100644 --- a/man/quantileAlignSNF.Rd +++ b/man/quantileAlignSNF.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/deprecated.R \name{quantileAlignSNF} \alias{quantileAlignSNF} -\title{Quantile align (normalize) factor loadings} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Quantile align (normalize) factor loadings} \usage{ quantileAlignSNF( object, @@ -48,7 +48,7 @@ starts (default 10).} communities. (default 1)} \item{dims.use}{Indices of factors to use for shared nearest factor determination (default -1:ncol(H[[1]])).} +\code{1:ncol(H[[1]])}).} \item{dist.use}{Distance metric to use in calculating nearest neighbors (default "CR").} diff --git a/man/quantileNorm.Rd b/man/quantileNorm.Rd index 7ebd079..29fb5cb 100644 --- a/man/quantileNorm.Rd +++ b/man/quantileNorm.Rd @@ -96,21 +96,21 @@ stored. Default \code{"inmf"}.} \value{ Updated input object \itemize{ - \item{liger method - \itemize{ - \item{Update the \code{H.norm} slot for the alignment cell factor - loading, ready for running graph based community detection - clustering or dimensionality reduction for visualization.} - \item{Update the \code{cellMata} slot with a cluster assignment basing - on cell factor loading} - }} - \item{Seurat method - \itemize{ - \item{Update the \code{reductions} slot with a new \code{DimReduc} - object containing the aligned cell factor loading.} - \item{Update the metadata with a cluster assignment basing on cell - factor loading} - }} +\item{liger method +\itemize{ +\item{Update the \code{H.norm} slot for the alignment cell factor +loading, ready for running graph based community detection +clustering or dimensionality reduction for visualization.} +\item{Update the \code{cellMata} slot with a cluster assignment basing +on cell factor loading} +}} +\item{Seurat method +\itemize{ +\item{Update the \code{reductions} slot with a new \code{DimReduc} +object containing the aligned cell factor loading.} +\item{Update the metadata with a cluster assignment basing on cell +factor loading} +}} } } \description{ diff --git a/man/quantile_norm-deprecated.Rd b/man/quantile_norm-deprecated.Rd index ea4f094..5734f20 100644 --- a/man/quantile_norm-deprecated.Rd +++ b/man/quantile_norm-deprecated.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{quantile_norm-deprecated} \alias{quantile_norm-deprecated} -\title{[Deprecated] Quantile align (normalize) factor loading} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Quantile align (normalize) factor loading} \arguments{ \item{object}{\code{liger} object. Should run optimizeALS before calling.} @@ -23,7 +23,7 @@ Lower values give more accurate nearest neighbor graphs but take much longer to computer.} \item{dims.use}{Indices of factors to use for shared nearest factor -determination (default 1:ncol(H[[1]])).} +determination (default \code{1:ncol(H[[1]])}).} \item{do.center}{Centers the data when scaling factors (useful for less sparse modalities like methylation data). (default FALSE)} diff --git a/man/read10X.Rd b/man/read10X.Rd index 9927e8e..9881d70 100644 --- a/man/read10X.Rd +++ b/man/read10X.Rd @@ -49,10 +49,10 @@ read10XATAC( ) } \arguments{ -\item{path}{[A.] A Directory containing the matrix.mtx, genes.tsv (or +\item{path}{(A.) A Directory containing the matrix.mtx, genes.tsv (or features.tsv), and barcodes.tsv files provided by 10X. A vector, a named vector, a list or a named list can be given in order to load several data -directories. [B.] The 10X root directory where subdirectories of per-sample +directories. (B.) The 10X root directory where subdirectories of per-sample output folders can be found. Sample names will by default take the name of the vector, list or subfolders.} @@ -106,17 +106,17 @@ show available options if argument specification cannot be found.} } \value{ \itemize{ - \item{When only one sample is given or detected, and only one feature type - is detected or using CellRanger < 3.0, and \code{returnList = FALSE}, a - sparse matrix object (dgCMatrix class) will be returned.} - \item{When using \code{read10XRNA} or \code{read10XATAC}, which are modality - specific, returns a list named by samples, and each element is the - corresponding sparse matrix object (dgCMatrix class).} - \item{\code{read10X} generally returns a list named by samples. Each sample - element will be another list named by feature types even if only one feature - type is detected (or using CellRanger < 3.0) for data structure consistency. - The feature type "Gene Expression" always comes as the first type if - available.} +\item{When only one sample is given or detected, and only one feature type +is detected or using CellRanger < 3.0, and \code{returnList = FALSE}, a +sparse matrix object (dgCMatrix class) will be returned.} +\item{When using \code{read10XRNA} or \code{read10XATAC}, which are modality +specific, returns a list named by samples, and each element is the +corresponding sparse matrix object (dgCMatrix class).} +\item{\code{read10X} generally returns a list named by samples. Each sample +element will be another list named by feature types even if only one feature +type is detected (or using CellRanger < 3.0) for data structure consistency. +The feature type "Gene Expression" always comes as the first type if +available.} } } \description{ diff --git a/man/readLiger.Rd b/man/readLiger.Rd index 1bfe6f4..ee9d1e4 100644 --- a/man/readLiger.Rd +++ b/man/readLiger.Rd @@ -36,11 +36,14 @@ New version of \linkS4class{liger} object } \description{ This file reads a liger object stored in RDS files under all kinds of types. -1. A \linkS4class{liger} object with in-memory data created from package -version since 1.99. 2. A liger object with on-disk H5 data associated, where -the link to H5 files will be automatically restored. 3. A liger object -created with older package version, and can be updated to the latest data -structure by default. +\enumerate{ +\item A \linkS4class{liger} object with in-memory data created from package +version since 1.99. +\item A liger object with on-disk H5 data associated, where the link to H5 files +will be automatically restored. +\item A liger object created with older package version, and can be updated to +the latest data structure by default. +} } \examples{ # Save and read regular current-version liger object diff --git a/man/readSubset.Rd b/man/readSubset.Rd index 8977558..488da63 100644 --- a/man/readSubset.Rd +++ b/man/readSubset.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/downsample.R \name{readSubset} \alias{readSubset} -\title{[Deprecated] See \code{\link{downsample}}} +\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} See \code{\link{downsample}}} \usage{ readSubset( object, diff --git a/man/rliger-deprecated.Rd b/man/rliger-deprecated.Rd index 9ef3e3f..546da1b 100644 --- a/man/rliger-deprecated.Rd +++ b/man/rliger-deprecated.Rd @@ -102,9 +102,9 @@ makeRiverplot( } \description{ The functions listed below are deprecated and will be defunct in - the near future. When possible, alternative functions with similar - functionality or a replacement are also mentioned. Help pages for - deprecated functions are available at \code{help("-deprecated")}. +the near future. When possible, alternative functions with similar +functionality or a replacement are also mentioned. Help pages for +deprecated functions are available at \code{help("-deprecated")}. } \section{\code{makeInteractTrack}}{ diff --git a/man/rliger-package.Rd b/man/rliger-package.Rd new file mode 100644 index 0000000..46a2b8b --- /dev/null +++ b/man/rliger-package.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rliger-package.R +\docType{package} +\name{rliger-package} +\alias{rliger} +\alias{rliger-package} +\title{rliger: Linked Inference of Genomic Experimental Relationships} +\description{ +Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) \doi{10.1016/j.cell.2019.05.006}, and Liu J, Gao C, Sodicoff J, et al (2020) \doi{10.1038/s41596-020-0391-8} for more details. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://welch-lab.github.io/liger/} + \item Report bugs at \url{https://github.com/welch-lab/liger/issues} +} + +} +\author{ +\strong{Maintainer}: Yichen Wang \email{wayichen@umich.edu} + +Authors: +\itemize{ + \item Joshua Welch \email{welchjd@umich.edu} + \item Chao Gao \email{gchao@umich.edu} + \item Jialin Liu \email{alanliu@umich.edu} + \item Joshua Sodicoff \email{sodicoff@umich.edu} [contributor] + \item Velina Kozareva [contributor] + \item Evan Macosko [contributor] +} + +Other contributors: +\itemize{ + \item Paul Hoffman [contributor] + \item Ilya Korsunsky [contributor] + \item Robert Lee [contributor] + \item Andrew Robbins \email{robbiand@med.umich.edu} [contributor] +} + +} +\keyword{internal} diff --git a/man/runCINMF.Rd b/man/runCINMF.Rd index 80d98c5..283b799 100644 --- a/man/runCINMF.Rd +++ b/man/runCINMF.Rd @@ -103,30 +103,31 @@ feature key. Default \code{"cinmf"}.} } \value{ \itemize{ - \item{liger method - Returns updated input \linkS4class{liger} object - \itemize{ - \item{A list of all \eqn{H} matrices can be accessed with - \code{getMatrix(object, "H")}} - \item{A list of all \eqn{V} matrices can be accessed with - \code{getMatrix(object, "V")}} - \item{The \eqn{W} matrix can be accessed with - \code{getMatrix(object, "W")}} - }} - \item{Seurat method - Returns updated input Seurat object - \itemize{ - \item{\eqn{H} matrices for all datasets will be concatenated and - transposed (all cells by k), and form a DimReduc object in the - \code{reductions} slot named by argument \code{reduction}.} - \item{\eqn{W} matrix will be presented as \code{feature.loadings} in the - same DimReduc object.} - \item{\eqn{V} matrices, an objective error value and the dataset - variable used for the factorization is currently stored in - \code{misc} slot of the same DimReduc object.} - }} +\item{liger method - Returns updated input \linkS4class{liger} object +\itemize{ +\item{A list of all \eqn{H} matrices can be accessed with +\code{getMatrix(object, "H")}} +\item{A list of all \eqn{V} matrices can be accessed with +\code{getMatrix(object, "V")}} +\item{The \eqn{W} matrix can be accessed with +\code{getMatrix(object, "W")}} +}} +\item{Seurat method - Returns updated input Seurat object +\itemize{ +\item{\eqn{H} matrices for all datasets will be concatenated and +transposed (all cells by k), and form a DimReduc object in the +\code{reductions} slot named by argument \code{reduction}.} +\item{\eqn{W} matrix will be presented as \code{feature.loadings} in the +same DimReduc object.} +\item{\eqn{V} matrices, an objective error value and the dataset +variable used for the factorization is currently stored in +\code{misc} slot of the same DimReduc object.} +}} } } \description{ -\bold{NOT STABLE} - This is an experimental function and is subject to change. +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This is an experimental function and is +subject to change. Performs consensus integrative non-negative matrix factorization (c-iNMF) to return factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. In order to diff --git a/man/runCluster.Rd b/man/runCluster.Rd index 512d2b4..3abd7ae 100644 --- a/man/runCluster.Rd +++ b/man/runCluster.Rd @@ -81,15 +81,14 @@ variable in \code{cellMeta} slot. Can be fetched with be stored at \code{object@uns$snn}. } \description{ -After quantile normalization, users can additionally run the Leiden or +After aligning cell factor loadings, users can additionally run the Leiden or Louvain algorithm for community detection, which is widely used in single-cell analysis and excels at merging small clusters into broad cell classes. -While using quantile normalized factor loadings (result from -\code{\link{quantileNorm}}) is recommended, this function looks for -unnormalized factor loadings (result from \code{\link{runIntegration}}) when -the former is not available. +While using aligned factor loadings (result from \code{\link{alignFactors}}) +is recommended, this function looks for unaligned factor loadings (raw result +from \code{\link{runIntegration}}) when the former is not available. } \examples{ pbmcPlot <- runCluster(pbmcPlot) diff --git a/man/runGeneralQC.Rd b/man/runGeneralQC.Rd index 51a1c2c..64d190d 100644 --- a/man/runGeneralQC.Rd +++ b/man/runGeneralQC.Rd @@ -50,9 +50,9 @@ on HDF5 based dataset. Default \code{1000}} \item{verbose}{Logical. Whether to show information of the progress. Default \code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.} -\item{mito, ribo, hemo}{[Deprecated] Now will always compute the percentages -of mitochondrial, ribosomal and hemoglobin gene counts. These arguments will -be ignored.} +\item{mito, ribo, hemo}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Now will always +compute the percentages of mitochondrial, ribosomal and hemoglobin gene +counts. These arguments will be ignored.} } \value{ Updated \code{object} with the \code{cellMeta(object)} updated as diff --git a/man/runINMF.Rd b/man/runINMF.Rd index 78dcea4..071cc45 100644 --- a/man/runINMF.Rd +++ b/man/runINMF.Rd @@ -100,26 +100,26 @@ feature key. Default \code{"inmf"}.} } \value{ \itemize{ - \item{liger method - Returns updated input \linkS4class{liger} object - \itemize{ - \item{A list of all \eqn{H} matrices can be accessed with - \code{getMatrix(object, "H")}} - \item{A list of all \eqn{V} matrices can be accessed with - \code{getMatrix(object, "V")}} - \item{The \eqn{W} matrix can be accessed with - \code{getMatrix(object, "W")}} - }} - \item{Seurat method - Returns updated input Seurat object - \itemize{ - \item{\eqn{H} matrices for all datasets will be concatenated and - transposed (all cells by k), and form a DimReduc object in the - \code{reductions} slot named by argument \code{reduction}.} - \item{\eqn{W} matrix will be presented as \code{feature.loadings} in the - same DimReduc object.} - \item{\eqn{V} matrices, an objective error value and the dataset - variable used for the factorization is currently stored in - \code{misc} slot of the same DimReduc object.} - }} +\item{liger method - Returns updated input \linkS4class{liger} object +\itemize{ +\item{A list of all \eqn{H} matrices can be accessed with +\code{getMatrix(object, "H")}} +\item{A list of all \eqn{V} matrices can be accessed with +\code{getMatrix(object, "V")}} +\item{The \eqn{W} matrix can be accessed with +\code{getMatrix(object, "W")}} +}} +\item{Seurat method - Returns updated input Seurat object +\itemize{ +\item{\eqn{H} matrices for all datasets will be concatenated and +transposed (all cells by k), and form a DimReduc object in the +\code{reductions} slot named by argument \code{reduction}.} +\item{\eqn{W} matrix will be presented as \code{feature.loadings} in the +same DimReduc object.} +\item{\eqn{V} matrices, an objective error value and the dataset +variable used for the factorization is currently stored in +\code{misc} slot of the same DimReduc object.} +}} } } \description{ diff --git a/man/runIntegration.Rd b/man/runIntegration.Rd index 05648ab..e0a44c2 100644 --- a/man/runIntegration.Rd +++ b/man/runIntegration.Rd @@ -76,8 +76,8 @@ linked in Description. } \description{ LIGER provides dataset integration methods based on iNMF (integrative -Non-negative Matrix Factorization [1]) and its variants (online iNMF [2] and -UINMF [3]). This function wraps \code{\link{runINMF}}, +Non-negative Matrix Factorization [1]) and its variants (online iNMF [2] +and UINMF [3]). This function wraps \code{\link{runINMF}}, \code{\link{runOnlineINMF}} and \code{\link{runUINMF}}, of which the help pages have more detailed description. } @@ -91,11 +91,11 @@ if (requireNamespace("RcppPlanc", quietly = TRUE)) { } \references{ \enumerate{ - \item{Joshua D. Welch and et al., Single-Cell Multi-omic Integration Compares +\item{Joshua D. Welch and et al., Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity, Cell, 2019} - \item{Chao Gao and et al., Iterative single-cell multi-omic integration using +\item{Chao Gao and et al., Iterative single-cell multi-omic integration using online learning, Nat Biotechnol., 2021} - \item{April R. Kriebel and Joshua D. Welch, UINMF performs mosaic integration +\item{April R. Kriebel and Joshua D. Welch, UINMF performs mosaic integration of single-cell multi-omic datasets using nonnegative matrix factorization, Nat. Comm., 2022} } diff --git a/man/runOnlineINMF.Rd b/man/runOnlineINMF.Rd index 2751a94..7d34b7a 100644 --- a/man/runOnlineINMF.Rd +++ b/man/runOnlineINMF.Rd @@ -102,29 +102,29 @@ feature key. Default \code{"onlineINMF"}.} } \value{ \itemize{ - \item{liger method - Returns updated input \linkS4class{liger} object. - \itemize{ - \item{A list of all \eqn{H} matrices can be accessed with - \code{getMatrix(object, "H")}} - \item{A list of all \eqn{V} matrices can be accessed with - \code{getMatrix(object, "V")}} - \item{The \eqn{W} matrix can be accessed with - \code{getMatrix(object, "W")}} - \item{Meanwhile, intermediate matrices \eqn{A} and \eqn{B} produced in - HALS update can also be accessed similarly.} - } - } - \item{Seurat method - Returns updated input Seurat object. - \itemize{ - \item{\eqn{H} matrices for all datasets will be concatenated and - transposed (all cells by k), and form a DimReduc object in the - \code{reductions} slot named by argument \code{reduction}.} - \item{\eqn{W} matrix will be presented as \code{feature.loadings} in the - same DimReduc object.} - \item{\eqn{V} matrices, \eqn{A} matrices, \eqn{B} matricesm an objective - error value and the dataset variable used for the factorization is - currently stored in \code{misc} slot of the same DimReduc object.} - }} +\item{liger method - Returns updated input \linkS4class{liger} object. +\itemize{ +\item{A list of all \eqn{H} matrices can be accessed with +\code{getMatrix(object, "H")}} +\item{A list of all \eqn{V} matrices can be accessed with +\code{getMatrix(object, "V")}} +\item{The \eqn{W} matrix can be accessed with +\code{getMatrix(object, "W")}} +\item{Meanwhile, intermediate matrices \eqn{A} and \eqn{B} produced in +HALS update can also be accessed similarly.} +} +} +\item{Seurat method - Returns updated input Seurat object. +\itemize{ +\item{\eqn{H} matrices for all datasets will be concatenated and +transposed (all cells by k), and form a DimReduc object in the +\code{reductions} slot named by argument \code{reduction}.} +\item{\eqn{W} matrix will be presented as \code{feature.loadings} in the +same DimReduc object.} +\item{\eqn{V} matrices, \eqn{A} matrices, \eqn{B} matricesm an objective +error value and the dataset variable used for the factorization is +currently stored in \code{misc} slot of the same DimReduc object.} +}} } } \description{ @@ -137,9 +137,9 @@ represent multiple single-cell datasets in terms of \eqn{H}, \eqn{W}, and \code{k}. The function allows online learning in 3 scenarios: \enumerate{ - \item Fully observed datasets; - \item Iterative refinement using continually arriving datasets; - \item Projection of new datasets without updating the existing factorization +\item Fully observed datasets; +\item Iterative refinement using continually arriving datasets; +\item Projection of new datasets without updating the existing factorization } All three scenarios require fixed memory independent of the number of cells. @@ -159,14 +159,14 @@ Under the circumstance where users need customized information for existing factorization, arguments \code{WInit}, \code{VInit}, \code{AInit} and \code{BInit} are exposed. The requirements for these argument follows: \itemize{ - \item{WInit - A matrix object of size \eqn{m \times k}. (see - \code{\link{runINMF}} for notation)} - \item{VInit - A list object of matrices each of size \eqn{m \times k}. - Number of matrices should match with \code{newDatasets}.} - \item{AInit - A list object of matrices each of size \eqn{k \times k}. - Number of matrices should match with \code{newDatasets}.} - \item{BInit - A list object of matrices each of size \eqn{m \times k}. - Number of matrices should match with \code{newDatasets}.} +\item{WInit - A matrix object of size \eqn{m \times k}. (see +\code{\link{runINMF}} for notation)} +\item{VInit - A list object of matrices each of size \eqn{m \times k}. +Number of matrices should match with \code{newDatasets}.} +\item{AInit - A list object of matrices each of size \eqn{k \times k}. +Number of matrices should match with \code{newDatasets}.} +\item{BInit - A list object of matrices each of size \eqn{m \times k}. +Number of matrices should match with \code{newDatasets}.} } Minibatch iterations is performed on small subset of cells. The exact diff --git a/man/runTSNE.Rd b/man/runTSNE.Rd index 22aabe8..2239292 100644 --- a/man/runTSNE.Rd +++ b/man/runTSNE.Rd @@ -30,7 +30,7 @@ runTSNE( \item{object}{\linkS4class{liger} object with factorization results.} \item{useRaw}{Whether to use un-aligned cell factor loadings (\eqn{H} -matrices). Default \code{NULL} search for quantile-normalized loadings first +matrices). Default \code{NULL} search for aligned factor loadings first and un-aligned loadings then.} \item{useDims}{Index of factors to use for computing the embedding. Default @@ -73,8 +73,8 @@ The \code{object} where a \code{"TSNE"} variable is updated in the \code{cellMeta} slot with the whole 2D embedding matrix. } \description{ -Runs t-SNE on the quantile normalized cell factors (result from -\code{\link{quantileNorm}}), or unnormalized cell factors (result from +Runs t-SNE on the aligned cell factors (result from +\code{\link{alignFactors}}), or unaligned cell factors (result from \code{\link{runIntegration}})) to generate a 2D embedding for visualization. By default \code{\link[Rtsne]{Rtsne}} (Barnes-Hut implementation of t-SNE) method is invoked, while alternative "fftRtsne" method (FFT-accelerated diff --git a/man/runUINMF.Rd b/man/runUINMF.Rd index 3a13b1c..d072a6c 100644 --- a/man/runUINMF.Rd +++ b/man/runUINMF.Rd @@ -53,18 +53,18 @@ Default \code{2L}. Only supported for platform with OpenMP support.} } \value{ \itemize{ - \item{liger method - Returns updated input \linkS4class{liger} object. - \itemize{ - \item{A list of all \eqn{H} matrices can be accessed with - \code{getMatrix(object, "H")}} - \item{A list of all \eqn{V} matrices can be accessed with - \code{getMatrix(object, "V")}} - \item{The \eqn{W} matrix can be accessed with - \code{getMatrix(object, "W")}} - \item{A list of all \eqn{U} matrices can be accessed with - \code{getMatrix(object, "U")}} - } - } +\item{liger method - Returns updated input \linkS4class{liger} object. +\itemize{ +\item{A list of all \eqn{H} matrices can be accessed with +\code{getMatrix(object, "H")}} +\item{A list of all \eqn{V} matrices can be accessed with +\code{getMatrix(object, "V")}} +\item{The \eqn{W} matrix can be accessed with +\code{getMatrix(object, "W")}} +\item{A list of all \eqn{U} matrices can be accessed with +\code{getMatrix(object, "U")}} +} +} } } \description{ diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index 2fffb74..966eece 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -28,7 +28,7 @@ runUMAP( \item{object}{\linkS4class{liger} object with factorization results.} \item{useRaw}{Whether to use un-aligned cell factor loadings (\eqn{H} -matrices). Default \code{NULL} search for quantile-normalized loadings first +matrices). Default \code{NULL} search for aligned factor loadings first and un-aligned loadings then.} \item{useDims}{Index of factors to use for computing the embedding. Default @@ -65,8 +65,8 @@ The \code{object} where a \code{"UMAP"} variable is updated in the \code{cellMeta} slot with the whole 2D embedding matrix. } \description{ -Run UMAP on the quantile normalized cell factors (result from -\code{\link{quantileNorm}}), or unnormalized cell factors (result from +Run UMAP on the aligned cell factors (result from +\code{\link{alignFactors}}), or unaligned cell factors (raw result from \code{\link{runIntegration}})) to generate a 2D embedding for visualization (or general dimensionality reduction). Has option to run on subset of factors. It is generally recommended to use this method for dimensionality diff --git a/man/scaleNotCenter.Rd b/man/scaleNotCenter.Rd index a87d5ca..135cefa 100644 --- a/man/scaleNotCenter.Rd +++ b/man/scaleNotCenter.Rd @@ -92,15 +92,15 @@ annotation. Default \code{"orig.ident"}.} \value{ Updated \code{object} \itemize{ - \item{dgCMatrix method - Returns scaled dgCMatrix object} - \item{ligerDataset method - Updates the \code{scaleData} and - \code{scaledUnsharedData} (if unshared variable feature available) slot - of the object} - \item{liger method - Updates the \code{scaleData} and - \code{scaledUnsharedData} (if unshared variable feature available) slot - of chosen datasets} - \item{Seurat method - Adds a named layer in chosen assay (V5), or update the - \code{scale.data} slot of the chosen assay (<=V4)} +\item{dgCMatrix method - Returns scaled dgCMatrix object} +\item{ligerDataset method - Updates the \code{scaleData} and +\code{scaledUnsharedData} (if unshared variable feature available) slot +of the object} +\item{liger method - Updates the \code{scaleData} and +\code{scaledUnsharedData} (if unshared variable feature available) slot +of chosen datasets} +\item{Seurat method - Adds a named layer in chosen assay (V5), or update the +\code{scale.data} slot of the chosen assay (<=V4)} } } \description{ diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd index 46e389c..56e6ac8 100644 --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -113,14 +113,14 @@ annotation. Default \code{"orig.ident"}.} \value{ Updated object \itemize{ - \item{liger method - Each involved dataset stored in - \linkS4class{ligerDataset} is updated with its \code{\link{featureMeta}} - slot and \code{varUnsharedFeatures} slot (if requested with - \code{useUnsharedDatasets}), while \code{\link{varFeatures}(object)} will be - updated with the final combined gene set.} - \item{Seurat method - Final selection will be updated at - \code{Seurat::VariableFeatures(object)}. Per-dataset information is - stored in the \code{meta.features} slot of the chosen Assay.} +\item{liger method - Each involved dataset stored in +\linkS4class{ligerDataset} is updated with its \code{\link{featureMeta}} +slot and \code{varUnsharedFeatures} slot (if requested with +\code{useUnsharedDatasets}), while \code{\link{varFeatures}(object)} will be +updated with the final combined gene set.} +\item{Seurat method - Final selection will be updated at +\code{Seurat::VariableFeatures(object)}. Per-dataset information is +stored in the \code{meta.features} slot of the chosen Assay.} } } \description{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index bdcbf02..3864cde 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -30,30 +30,30 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// normalize_dense_cpp -arma::mat normalize_dense_cpp(const arma::mat& x, const arma::uword MARGIN, const arma::uword L); -RcppExport SEXP _rliger_normalize_dense_cpp(SEXP xSEXP, SEXP MARGINSEXP, SEXP LSEXP) { +// moe_correct_ridge_cpp +arma::mat moe_correct_ridge_cpp(const arma::mat& Z_orig, const arma::mat& R, const arma::colvec& lambda, const arma::sp_mat& Phi, const unsigned int B, const unsigned int N); +RcppExport SEXP _rliger_moe_correct_ridge_cpp(SEXP Z_origSEXP, SEXP RSEXP, SEXP lambdaSEXP, SEXP PhiSEXP, SEXP BSEXP, SEXP NSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); - Rcpp::traits::input_parameter< const arma::uword >::type MARGIN(MARGINSEXP); - Rcpp::traits::input_parameter< const arma::uword >::type L(LSEXP); - rcpp_result_gen = Rcpp::wrap(normalize_dense_cpp(x, MARGIN, L)); + Rcpp::traits::input_parameter< const arma::mat& >::type Z_orig(Z_origSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type R(RSEXP); + Rcpp::traits::input_parameter< const arma::colvec& >::type lambda(lambdaSEXP); + Rcpp::traits::input_parameter< const arma::sp_mat& >::type Phi(PhiSEXP); + Rcpp::traits::input_parameter< const unsigned int >::type B(BSEXP); + Rcpp::traits::input_parameter< const unsigned int >::type N(NSEXP); + rcpp_result_gen = Rcpp::wrap(moe_correct_ridge_cpp(Z_orig, R, lambda, Phi, B, N)); return rcpp_result_gen; END_RCPP } -// scale_dense_cpp -arma::mat scale_dense_cpp(const arma::mat& x, const arma::uword MARGIN, const bool center, const bool scale); -RcppExport SEXP _rliger_scale_dense_cpp(SEXP xSEXP, SEXP MARGINSEXP, SEXP centerSEXP, SEXP scaleSEXP) { +// normalize_byCol_dense_rcpp +arma::mat normalize_byCol_dense_rcpp(arma::mat x); +RcppExport SEXP _rliger_normalize_byCol_dense_rcpp(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); - Rcpp::traits::input_parameter< const arma::uword >::type MARGIN(MARGINSEXP); - Rcpp::traits::input_parameter< const bool >::type center(centerSEXP); - Rcpp::traits::input_parameter< const bool >::type scale(scaleSEXP); - rcpp_result_gen = Rcpp::wrap(scale_dense_cpp(x, MARGIN, center, scale)); + Rcpp::traits::input_parameter< arma::mat >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(normalize_byCol_dense_rcpp(x)); return rcpp_result_gen; END_RCPP } @@ -69,46 +69,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// colAggregateMedian_dense_cpp -arma::mat colAggregateMedian_dense_cpp(const arma::mat& x, const arma::uvec& group, const arma::uword n); -RcppExport SEXP _rliger_colAggregateMedian_dense_cpp(SEXP xSEXP, SEXP groupSEXP, SEXP nSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); - Rcpp::traits::input_parameter< const arma::uvec& >::type group(groupSEXP); - Rcpp::traits::input_parameter< const arma::uword >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(colAggregateMedian_dense_cpp(x, group, n)); - return rcpp_result_gen; -END_RCPP -} -// harmony_moe_correct_ridge_cpp -arma::mat harmony_moe_correct_ridge_cpp(const arma::mat& Z_orig, const arma::mat& R, const arma::colvec& lambda, const arma::sp_mat& Phi, const unsigned int B, const unsigned int N); -RcppExport SEXP _rliger_harmony_moe_correct_ridge_cpp(SEXP Z_origSEXP, SEXP RSEXP, SEXP lambdaSEXP, SEXP PhiSEXP, SEXP BSEXP, SEXP NSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type Z_orig(Z_origSEXP); - Rcpp::traits::input_parameter< const arma::mat& >::type R(RSEXP); - Rcpp::traits::input_parameter< const arma::colvec& >::type lambda(lambdaSEXP); - Rcpp::traits::input_parameter< const arma::sp_mat& >::type Phi(PhiSEXP); - Rcpp::traits::input_parameter< const unsigned int >::type B(BSEXP); - Rcpp::traits::input_parameter< const unsigned int >::type N(NSEXP); - rcpp_result_gen = Rcpp::wrap(harmony_moe_correct_ridge_cpp(Z_orig, R, lambda, Phi, B, N)); - return rcpp_result_gen; -END_RCPP -} -// normalize_byCol_dense_rcpp -arma::mat normalize_byCol_dense_rcpp(arma::mat x); -RcppExport SEXP _rliger_normalize_byCol_dense_rcpp(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(normalize_byCol_dense_rcpp(x)); - return rcpp_result_gen; -END_RCPP -} // scaleNotCenter_byRow_rcpp arma::sp_mat scaleNotCenter_byRow_rcpp(arma::sp_mat x); RcppExport SEXP _rliger_scaleNotCenter_byRow_rcpp(SEXP xSEXP) { @@ -243,6 +203,19 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// colAggregateMedian_dense_cpp +arma::mat colAggregateMedian_dense_cpp(const arma::mat& x, const arma::uvec& group, const arma::uword n); +RcppExport SEXP _rliger_colAggregateMedian_dense_cpp(SEXP xSEXP, SEXP groupSEXP, SEXP nSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); + Rcpp::traits::input_parameter< const arma::uvec& >::type group(groupSEXP); + Rcpp::traits::input_parameter< const arma::uword >::type n(nSEXP); + rcpp_result_gen = Rcpp::wrap(colAggregateMedian_dense_cpp(x, group, n)); + return rcpp_result_gen; +END_RCPP +} // sample_cpp Rcpp::NumericVector sample_cpp(const int x, const int size); RcppExport SEXP _rliger_sample_cpp(SEXP xSEXP, SEXP sizeSEXP) { @@ -425,12 +398,9 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_rliger_RunModularityClusteringCpp", (DL_FUNC) &_rliger_RunModularityClusteringCpp, 9}, - {"_rliger_normalize_dense_cpp", (DL_FUNC) &_rliger_normalize_dense_cpp, 3}, - {"_rliger_scale_dense_cpp", (DL_FUNC) &_rliger_scale_dense_cpp, 4}, - {"_rliger_colNormalize_dense_cpp", (DL_FUNC) &_rliger_colNormalize_dense_cpp, 2}, - {"_rliger_colAggregateMedian_dense_cpp", (DL_FUNC) &_rliger_colAggregateMedian_dense_cpp, 3}, - {"_rliger_harmony_moe_correct_ridge_cpp", (DL_FUNC) &_rliger_harmony_moe_correct_ridge_cpp, 6}, + {"_rliger_moe_correct_ridge_cpp", (DL_FUNC) &_rliger_moe_correct_ridge_cpp, 6}, {"_rliger_normalize_byCol_dense_rcpp", (DL_FUNC) &_rliger_normalize_byCol_dense_rcpp, 1}, + {"_rliger_colNormalize_dense_cpp", (DL_FUNC) &_rliger_colNormalize_dense_cpp, 2}, {"_rliger_scaleNotCenter_byRow_rcpp", (DL_FUNC) &_rliger_scaleNotCenter_byRow_rcpp, 1}, {"_rliger_safe_scale", (DL_FUNC) &_rliger_safe_scale, 3}, {"_rliger_scaleNotCenter_byCol_dense_rcpp", (DL_FUNC) &_rliger_scaleNotCenter_byCol_dense_rcpp, 1}, @@ -442,6 +412,7 @@ static const R_CallMethodDef CallEntries[] = { {"_rliger_rowVarsDense", (DL_FUNC) &_rliger_rowVarsDense, 2}, {"_rliger_SparseRowVarStd", (DL_FUNC) &_rliger_SparseRowVarStd, 4}, {"_rliger_colAggregateSums_sparse", (DL_FUNC) &_rliger_colAggregateSums_sparse, 3}, + {"_rliger_colAggregateMedian_dense_cpp", (DL_FUNC) &_rliger_colAggregateMedian_dense_cpp, 3}, {"_rliger_sample_cpp", (DL_FUNC) &_rliger_sample_cpp, 2}, {"_rliger_updatePseudoBulkRcpp", (DL_FUNC) &_rliger_updatePseudoBulkRcpp, 4}, {"_rliger_updateNCellExprRcpp", (DL_FUNC) &_rliger_updateNCellExprRcpp, 4}, diff --git a/src/centoidAlign.cpp b/src/centoidAlign.cpp new file mode 100644 index 0000000..43cabc0 --- /dev/null +++ b/src/centoidAlign.cpp @@ -0,0 +1,85 @@ +#include +// [[Rcpp::depends(RcppArmadillo)]] + +using namespace Rcpp; +using namespace arma; + + +// Credit to https://github.com/immunogenomics/harmony/blob/master/src/harmony.cpp +// Z_orig - D (PC) x N (cell) matrix, input embedding +// R - K (cluster) x N matrix, cluster assignment probability matrix +// lambda - B element vector, ridge regression penalty +// Phi - B x N sparse matrix, generated with Matrix::fac2sparse(lig$dataset) +// B - number of batches +// N - number of cells +// [[Rcpp::export()]] +arma::mat moe_correct_ridge_cpp( + const arma::mat& Z_orig, + const arma::mat& R, + const arma::colvec& lambda, + const arma::sp_mat& Phi, + const unsigned int B, + const unsigned int N +) { + arma::sp_mat intcpt = zeros(1, N); + intcpt = intcpt + 1; + + arma::sp_mat Phi_moe = join_cols(intcpt, Phi); + arma::sp_mat Phi_moe_t = Phi_moe.t(); + + arma::sp_mat _Rk(N, N); + arma::sp_mat lambda_mat(B + 1, B + 1); + arma::colvec lambda_moe = arma::zeros(B + 1); + lambda_moe.subvec(1, B) = lambda; + lambda_mat.diag() = lambda_moe; + arma::mat Z_corr = Z_orig; + unsigned int K = R.n_rows; + arma::mat W(B + 1, K); + + std::vectorindex; + // Create index + std::vectorcounters; + arma::vec sizes(sum(Phi, 1)); + // std::cout << sizes << std::endl; + for (unsigned i = 0; i < sizes.n_elem; i++) { + arma::uvec a(int(sizes(i))); + index.push_back(a); + counters.push_back(0); + } + + arma::sp_mat::const_iterator it = Phi.begin(); + arma::sp_mat::const_iterator it_end = Phi.end(); + for(; it != it_end; ++it) + { + unsigned int row_idx = it.row(); + unsigned int col_idx = it.col(); + index[row_idx](counters[row_idx]++) = col_idx; + } + + // Progress p(K, verbose); + for (unsigned k = 0; k < K; k++) { + // p.increment(); + // if (Progress::check_abort()) + // return; + // if (lambda_estimation) { + // lambda_mat.diag() = find_lambda_cpp(alpha, E.row(k).t()); + // } + _Rk.diag() = R.row(k); + arma::sp_mat Phi_Rk = Phi_moe * _Rk; + arma::mat inv_cov(arma::inv(arma::mat(Phi_Rk * Phi_moe_t + lambda_mat))); + + // Calculate R-scaled PCs once + arma::mat Z_tmp = Z_orig.each_row() % R.row(k); + // Generate the betas contribution of the intercept using the data + // This erases whatever was written before in W + W = inv_cov.unsafe_col(0) * sum(Z_tmp, 1).t(); + // Calculate betas by calculating each batch contribution + for(unsigned b=0; b < B; b++) { + // inv_conv is B+1xB+1 whereas index is B long + W += inv_cov.unsafe_col(b + 1) * sum(Z_tmp.cols(index[b]), 1).t(); + } + W.row(0).zeros(); // do not remove the intercept + Z_corr -= W.t() * Phi_Rk; + } + return Z_corr; +} diff --git a/src/cinmf_util.cpp b/src/cinmf_util.cpp deleted file mode 100644 index b04eeab..0000000 --- a/src/cinmf_util.cpp +++ /dev/null @@ -1,187 +0,0 @@ -#include -// [[Rcpp::depends(RcppArmadillo)]] - -using namespace Rcpp; -using namespace arma; - -// [[Rcpp::export()]] -arma::mat normalize_dense_cpp( - const arma::mat& x, - const arma::uword MARGIN = 2, - const arma::uword L = 1 -) { - arma::mat result(x); - if (MARGIN == 1) { - for (int i = 0; i < x.n_rows; ++i) { - double norm = arma::norm(x.row(i), L); - if (norm == 0) { - continue; - } - for (int j = 0; j < x.n_cols; ++j) { - result(i, j) /= norm; - } - } - } else if (MARGIN == 2) { - for (int j = 0; j < x.n_cols; ++j) { - double norm = arma::norm(x.col(j), L); - if (norm == 0) { - continue; - } - for (int i = 0; i < x.n_rows; ++i) { - result(i, j) /= norm; - } - } - } else { - Rcpp::stop("`MARGIN` must be either 1 for each row or 2 for each column."); - } - return result; -} - -// [[Rcpp::export()]] -arma::mat scale_dense_cpp( - const arma::mat& x, - const arma::uword MARGIN = 2, - const bool center = true, - const bool scale = true -) { - if (!center && !scale) { - return x; - } - arma::mat result(x); - if (MARGIN == 1) { - for (int i = 0; i < x.n_rows; ++i) { - if (center) { - double mean = arma::mean(x.row(i)); - result.row(i) -= mean; - } - if (scale) { - double sd = arma::stddev(x.row(i)); - if (sd != 0) result.row(i) /= sd; - } - } - } else if (MARGIN == 2) { - for (int j = 0; j < x.n_cols; ++j) { - if (center) { - double mean = arma::mean(x.col(j)); - result.col(j) -= mean; - } - if (scale) { - double sd = arma::stddev(x.col(j)); - if (sd != 0) result.col(j) /= sd; - } - } - } else { - Rcpp::stop("`MARGIN` must be either 1 for each row or 2 for each column."); - } - return result; -} - -// [[Rcpp::export()]] -arma::mat colNormalize_dense_cpp(arma::mat& x, const arma::uword L) { - arma::mat result(x); - for (int j = 0; j < x.n_cols; ++j) { - double norm = arma::norm(x.col(j), L); - if (norm == 0) { - continue; - } - for (int i = 0; i < x.n_rows; ++i) { - result(i, j) /= norm; - } - } - return result; -} - -// x: n features by n selected factors -// group: n-selected-factor integer vector, pre-transformed to 0-base, -// from upstream kmeans clustering. -// [[Rcpp::export()]] -arma::mat colAggregateMedian_dense_cpp(const arma::mat& x, const arma::uvec& group, const arma::uword n) { - arma::mat result(x.n_rows, n); - for (int i = 0; i < n; ++i) { - arma::uvec idx = arma::find(group == i); - arma::mat sub_x = x.cols(idx); - arma::vec median = arma::median(sub_x, 1); - result.col(i) = median; - } - return result; -} - -// Credit to https://github.com/immunogenomics/harmony/blob/master/src/harmony.cpp -// Z_orig - D (PC) x N (cell) matrix, input embedding -// R - K (cluster) x N matrix, cluster assignment probability matrix -// lambda - B element vector, ridge regression penalty -// Phi - B x N sparse matrix, generated with Matrix::fac2sparse(lig$dataset) -// B - number of batches -// N - number of cells -// [[Rcpp::export()]] -arma::mat harmony_moe_correct_ridge_cpp( - const arma::mat& Z_orig, - const arma::mat& R, - const arma::colvec& lambda, - const arma::sp_mat& Phi, - const unsigned int B, - const unsigned int N -) { - arma::sp_mat intcpt = zeros(1, N); - intcpt = intcpt + 1; - - arma::sp_mat Phi_moe = join_cols(intcpt, Phi); - arma::sp_mat Phi_moe_t = Phi_moe.t(); - - arma::sp_mat _Rk(N, N); - arma::sp_mat lambda_mat(B + 1, B + 1); - arma::colvec lambda_moe = arma::zeros(B + 1); - lambda_moe.subvec(1, B) = lambda; - lambda_mat.diag() = lambda_moe; - arma::mat Z_corr = Z_orig; - unsigned int K = R.n_rows; - arma::mat W(B + 1, K); - - std::vectorindex; - // Create index - std::vectorcounters; - arma::vec sizes(sum(Phi, 1)); - // std::cout << sizes << std::endl; - for (unsigned i = 0; i < sizes.n_elem; i++) { - arma::uvec a(int(sizes(i))); - index.push_back(a); - counters.push_back(0); - } - - arma::sp_mat::const_iterator it = Phi.begin(); - arma::sp_mat::const_iterator it_end = Phi.end(); - for(; it != it_end; ++it) - { - unsigned int row_idx = it.row(); - unsigned int col_idx = it.col(); - index[row_idx](counters[row_idx]++) = col_idx; - } - - // Progress p(K, verbose); - for (unsigned k = 0; k < K; k++) { - // p.increment(); - // if (Progress::check_abort()) - // return; - // if (lambda_estimation) { - // lambda_mat.diag() = find_lambda_cpp(alpha, E.row(k).t()); - // } - _Rk.diag() = R.row(k); - arma::sp_mat Phi_Rk = Phi_moe * _Rk; - arma::mat inv_cov(arma::inv(arma::mat(Phi_Rk * Phi_moe_t + lambda_mat))); - - // Calculate R-scaled PCs once - arma::mat Z_tmp = Z_orig.each_row() % R.row(k); - // Generate the betas contribution of the intercept using the data - // This erases whatever was written before in W - W = inv_cov.unsafe_col(0) * sum(Z_tmp, 1).t(); - // Calculate betas by calculating each batch contribution - for(unsigned b=0; b < B; b++) { - // inv_conv is B+1xB+1 whereas index is B long - W += inv_cov.unsafe_col(b + 1) * sum(Z_tmp.cols(index[b]), 1).t(); - } - W.row(0).zeros(); // do not remove the intercept - Z_corr -= W.t() * Phi_Rk; - } - return Z_corr; -} - diff --git a/src/data_processing.cpp b/src/data_processing.cpp index d8d35b6..02ee5c5 100644 --- a/src/data_processing.cpp +++ b/src/data_processing.cpp @@ -19,6 +19,22 @@ arma::mat normalize_byCol_dense_rcpp(arma::mat x) { return x; } +// Different from the one above, this function supports L-n normalization +// while the one above only does sum normalization, not even necessarily L-1 +// [[Rcpp::export()]] +arma::mat colNormalize_dense_cpp(arma::mat& x, const arma::uword L) { + arma::mat result(x); + for (int j = 0; j < x.n_cols; ++j) { + double norm = arma::norm(x.col(j), L); + if (norm == 0) { + continue; + } + for (int i = 0; i < x.n_rows; ++i) { + result(i, j) /= norm; + } + } + return result; +} // ========================= Used for scaleNotCenter =========================== @@ -265,6 +281,21 @@ arma::sp_mat colAggregateSums_sparse(const arma::sp_mat& x, // return out; // } +// x: n features by n selected factors +// group: n-selected-factor integer vector, pre-transformed to 0-base, +// from upstream kmeans clustering. +// [[Rcpp::export()]] +arma::mat colAggregateMedian_dense_cpp(const arma::mat& x, const arma::uvec& group, const arma::uword n) { + arma::mat result(x.n_rows, n); + for (int i = 0; i < n; ++i) { + arma::uvec idx = arma::find(group == i); + arma::mat sub_x = x.cols(idx); + arma::vec median = arma::median(sub_x, 1); + result.col(i) = median; + } + return result; +} + // [[Rcpp::export()]] Rcpp::NumericVector sample_cpp(const int x, const int size) { arma::uvec rand = arma::randperm(x); diff --git a/tests/testthat/test_downstream.R b/tests/testthat/test_downstream.R index 3155ce0..cde252e 100644 --- a/tests/testthat/test_downstream.R +++ b/tests/testthat/test_downstream.R @@ -72,18 +72,18 @@ test_that("clustering", { pbmc <- runOnlineINMF(pbmc, k = 20, minibatchSize = 100) expect_message(runCluster(pbmc, nRandomStarts = 1), - "leiden clustering on unnormalized") + "leiden clustering on unaligned") expect_message(runCluster(pbmc, nRandomStarts = 1, method = "louvain"), - "louvain clustering on unnormalized") + "louvain clustering on unaligned") pbmc <- quantileNorm(pbmc) expect_message(pbmc <- runCluster(pbmc, nRandomStarts = 1, saveSNN = TRUE), - "leiden clustering on quantile normalized") + "leiden clustering on aligned") expect_is(defaultCluster(pbmc, droplevels = TRUE), "factor") expect_is(pbmc@uns$snn, "dgCMatrix") expect_message(pbmc <- runCluster(pbmc, nRandomStarts = 1, method = "louvain"), - "louvain clustering on quantile normalized") + "louvain clustering on aligned") expect_message(defaultCluster(pbmc, name = "louvain_cluster") <- "louvain_cluster", "Cannot have") expect_error(defaultCluster(pbmc) <- "notexist", "Selected variable does not exist") @@ -148,10 +148,10 @@ test_that("dimensionality reduction", { skip_if_not_installed("RcppPlanc") pbmc <- process(pbmc) expect_message(runUMAP(pbmc, useRaw = TRUE), - "Generating UMAP on unnormalized") + "Generating UMAP on unaligned") expect_error(dimRed(pbmc), "available in this") expect_message(pbmc <- runUMAP(pbmc, useRaw = FALSE), - "Generating UMAP on quantile normalized") + "Generating UMAP on aligned") pbmc@uns$defaultDimRed <- NULL expect_message(dimRed(pbmc), "No default") defaultDimRed(pbmc) <- "UMAP" @@ -166,9 +166,9 @@ test_that("dimensionality reduction", { expect_no_error(dimRed(pbmc, 2) <- NULL) expect_message(runTSNE(pbmc, useRaw = TRUE), - "Generating TSNE \\(Rtsne\\) on unnormalized") + "Generating TSNE \\(Rtsne\\) on unaligned") expect_message(pbmc <- runTSNE(pbmc, useRaw = FALSE), - "Generating TSNE \\(Rtsne\\) on quantile normalized") + "Generating TSNE \\(Rtsne\\) on aligned") expect_equal(dim(dimRed(pbmc, "TSNE")), c(ncol(pbmc), 2)) expect_error(runTSNE(pbmc, method = "fft"), diff --git a/tests/testthat/test_factorization.R b/tests/testthat/test_factorization.R index de4842a..e19e944 100644 --- a/tests/testthat/test_factorization.R +++ b/tests/testthat/test_factorization.R @@ -205,7 +205,8 @@ test_that("quantileNorm", { pbmc2 <- quantileNorm(pbmc) expect_equal(dim(getMatrix(pbmc2, "H.norm")), c(ncol(pbmc), 20)) - pbmc2 <- quantileNorm(pbmc, reference = "ctrl") + pbmc2 <- alignFactors(pbmc, reference = "ctrl") + # pbmc2 <- quantileNorm(pbmc, reference = "ctrl") expect_equal(dim(getMatrix(pbmc2, "H.norm")), c(ncol(pbmc), 20)) # For quantileNorm,list method @@ -228,6 +229,19 @@ test_that("quantileNorm", { # "Unable to understand `reference`.") }) +test_that("centroidAlign", { + skip_if_not_installed("RcppPlanc") + pbmc <- process(pbmc) + pbmc <- runOnlineINMF(pbmc, k = 20, minibatchSize = 100) + + expect_error(pbmc <- centroidAlign(pbmc, centerCluster = TRUE, shift = FALSE), + "Negative values found prior to normalizing") + expect_no_error(pbmc <- alignFactors(pbmc, method = "centroid", diagnosis = TRUE)) + expect_equal(dim(getMatrix(pbmc, "H.norm")), c(ncol(pbmc), 20)) + expect_is(pbmc$raw_which.max, "factor") + expect_is(pbmc$Z_which.max, "factor") + expect_is(pbmc$R_which.max, "factor") +}) test_that("consensus iNMF", { skip_if_not_installed("RcppPlanc") @@ -271,10 +285,15 @@ test_that("Seurat wrapper", { expect_in("inmf", SeuratObject::Reductions(seu)) expect_in("onlineINMF", SeuratObject::Reductions(seu)) - expect_error(quantileNorm(seu, reduction = "orig.ident"), + expect_error(alignFactors(seu, reduction = "orig.ident"), "Specified `reduction` does not points to a") - seu <- quantileNorm(seu, reduction = "inmf") - expect_in("inmfNorm", SeuratObject::Reductions(seu)) + # expect_error(quantileNorm(seu, reduction = "orig.ident"), + # "Specified `reduction` does not points to a") + seu1 <- quantileNorm(seu, reduction = "inmf") + expect_in("inmfNorm", SeuratObject::Reductions(seu1)) + + seu2 <- alignFactors(seu, "centroid", reduction = "inmf") + expect_in("inmfNorm", SeuratObject::Reductions(seu2)) expect_error(quantileNorm(seu, reference = "hello"), "Should specify one existing dataset")