diff --git a/.Rbuildignore b/.Rbuildignore index 00e8d99b0..bcad4fe52 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,4 @@ appveyor.yml cran-comments.md travis_setup.sh +CODE_OF_CONDUCT.md diff --git a/.travis.yml b/.travis.yml index 2ace4192e..38fa4c0e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,14 @@ os: - linux - osx -r: release +r: + - release + - devel + +matrix: + exclude: + - r: devel + os: osx env: global: @@ -18,7 +25,7 @@ env: - HDF5_VERSION=1.8.17 - HDF5_RELEASE_URL="https://support.hdfgroup.org/ftp/HDF5/releases" -before_install: +before_install: - chmod +x travis_setup.sh - ./travis_setup.sh diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..a920408bc --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at rsatija@nygenome.org. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/DESCRIPTION b/DESCRIPTION index 9c8cf2c82..c29468720 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.0.2 -Date: 2019-06-14 +Version: 3.1.0 +Date: 2019-08-20 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Authors@R: c( @@ -11,7 +11,8 @@ Authors@R: c( person(given = 'Jeff', family = 'Farrell', email = 'jfarrell@g.harvard.edu', role = 'ctb'), person(given = 'Shiwei', family = 'Zheng', email = 'szheng@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0001-6682-6743')), person(given = 'Christoph', family = 'Hafemeister', email = 'chafemeister@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0001-6365-8254')), - person(given = 'Patrick', family = 'Roelli', email = 'proelli@nygenome.org', role = 'ctb') + person(given = 'Patrick', family = 'Roelli', email = 'proelli@nygenome.org', role = 'ctb'), + person(given = "Yuhan", family = "Hao", email = 'yhao@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0002-1810-0822')) ) URL: http://www.satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues @@ -36,9 +37,10 @@ Imports: igraph, irlba, KernSmooth, + leiden (>= 0.3.1), lmtest, MASS, - Matrix (>= 1.2.14), + Matrix (>= 1.2-14), metap, pbapply, plotly, @@ -46,6 +48,7 @@ Imports: RANN, RColorBrewer, Rcpp, + RcppAnnoy, reticulate, rlang, ROCR, @@ -57,7 +60,8 @@ Imports: stats, tools, tsne, - utils + utils, + uwot LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: GPL-3 | file LICENSE LazyData: true diff --git a/NAMESPACE b/NAMESPACE index f2a6756c6..2d3db5c65 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -161,6 +161,7 @@ export("VariableFeatures<-") export(ALRAChooseKPlot) export(AddMetaData) export(AddModuleScore) +export(Assays) export(AugmentPlot) export(AverageExpression) export(BarcodeInflectionsPlot) @@ -174,7 +175,9 @@ export(CellCycleScoring) export(CellScatter) export(CellSelector) export(Cells) +export(CollapseEmbeddingOutliers) export(CollapseSpeciesExpressionMatrix) +export(ColorDimSplit) export(CombinePlots) export(Command) export(CreateAssayObject) @@ -249,6 +252,7 @@ export(PercentageFeatureSet) export(PlotClusterTree) export(PolyDimPlot) export(PolyFeaturePlot) +export(PrepSCTIntegration) export(Project) export(ProjectDim) export(PurpleAndYellow) @@ -257,6 +261,7 @@ export(Read10X_h5) export(ReadAlevin) export(ReadAlevinCsv) export(ReadH5AD) +export(Reductions) export(RelativeCounts) export(RenameCells) export(RenameIdents) @@ -320,7 +325,6 @@ import(Matrix) importClassesFrom(Matrix,dgCMatrix) importFrom(KernSmooth,bkde) importFrom(MASS,glm.nb) -importFrom(MASS,kde2d) importFrom(Matrix,Matrix) importFrom(Matrix,as.matrix) importFrom(Matrix,colMeans) @@ -334,6 +338,10 @@ importFrom(RColorBrewer,brewer.pal.info) importFrom(ROCR,performance) importFrom(ROCR,prediction) importFrom(Rcpp,evalCpp) +importFrom(RcppAnnoy,AnnoyAngular) +importFrom(RcppAnnoy,AnnoyEuclidean) +importFrom(RcppAnnoy,AnnoyHamming) +importFrom(RcppAnnoy,AnnoyManhattan) importFrom(Rtsne,Rtsne) importFrom(SDMTools,pnt.in.poly) importFrom(ape,as.phylo) @@ -435,8 +443,11 @@ importFrom(ica,icaimax) importFrom(ica,icajade) importFrom(igraph,E) importFrom(igraph,graph.adjacency) +importFrom(igraph,graph_from_adj_list) +importFrom(igraph,graph_from_adjacency_matrix) importFrom(igraph,plot.igraph) importFrom(irlba,irlba) +importFrom(leiden,leiden) importFrom(lmtest,lrtest) importFrom(metap,minimump) importFrom(methods,"slot<-") @@ -492,7 +503,6 @@ importFrom(stats,na.omit) importFrom(stats,p.adjust) importFrom(stats,pchisq) importFrom(stats,pnbinom) -importFrom(stats,pnorm) importFrom(stats,poisson) importFrom(stats,prcomp) importFrom(stats,prop.test) @@ -513,9 +523,12 @@ importFrom(tsne,tsne) importFrom(utils,.DollarNames) importFrom(utils,argsAnywhere) importFrom(utils,browseURL) +importFrom(utils,capture.output) importFrom(utils,file_test) -importFrom(utils,getAnywhere) importFrom(utils,globalVariables) +importFrom(utils,isS3method) +importFrom(utils,isS3stdGeneric) +importFrom(utils,methods) importFrom(utils,packageVersion) importFrom(utils,read.csv) importFrom(utils,read.delim) @@ -523,4 +536,5 @@ importFrom(utils,read.table) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) importFrom(utils,write.table) +importFrom(uwot,umap) useDynLib(Seurat) diff --git a/NEWS.md b/NEWS.md index 8581b2047..2b9365e4f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,36 @@ All notable changes to Seurat will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +## [3.1.0] - 2019-08-20 +### Added +- New `PrepSCTIntegrati +- on` function to facilitate integration after `SCTransform` +- Reference-based integration with the `reference` parameter in `FindIntegrationAnchors` +- Reciprocal PCA as a `reduction` option in `FindIntegrationAnchors` +- New `CollapseEmbeddingOutliers` function +- Enable `FindTransferAnchors` after `SCTransform` +- Added back `ColorDimSplit` functionality +- Include a code of conduct +- Added uwot support as new default UMAP method +- Added `CheckDots` to catch unused parameters and suggest updated names +- `Reductions` and `Assays` assays functions to list stored DimReducs and Assays + +### Changed +- Fix regex in `LogSeuratCommand` +- Check for NAs in feature names in `Read10X` +- Prevent dimnames for counts/data/scale.data matrices from being arrays +- Updates `ReadH5AD` to distinguish FVF methods +- Fixes to UpdateSeuratObject for v2 objects +- Sink all output from stdout to stderr +- Fix to scale.data cell ordering after subsetting +- Enable `Assay` specification in `BuildClusterTree` +- Fix `FeaturePlot` when using both `blend` and `split.by` +- Fix to `WhichCells` when passing `cells` and `invert` +- Fix to `HoverLocator` labels and title +- Ensure features names don't contain pipes (`|`) +- Deprecation of `RunLSI` and `RunALRA` +- Fix legend bug when sorting in `ExIPlot` + ## [3.0.2] - 2019-06-07 ### Added - Flag to skip singleton grouping in `FindClusters` diff --git a/R/RcppExports.R b/R/RcppExports.R index a14eaaac7..2fbd3b2d0 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -21,10 +21,6 @@ LogNorm <- function(data, scale_factor, display_progress = TRUE) { .Call('_Seurat_LogNorm', PACKAGE = 'Seurat', data, scale_factor, display_progress) } -FastMatMult <- function(m1, m2) { - .Call('_Seurat_FastMatMult', PACKAGE = 'Seurat', m1, m2) -} - FastRowScale <- function(mat, scale = TRUE, center = TRUE, scale_max = 10, display_progress = TRUE) { .Call('_Seurat_FastRowScale', PACKAGE = 'Seurat', mat, scale, center, scale_max, display_progress) } diff --git a/R/clustering.R b/R/clustering.R index fc9216e41..40560795e 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -21,6 +21,8 @@ NULL #' @param algorithm Algorithm for modularity optimization (1 = original Louvain #' algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM #' algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python. +#' @param method Method for running leiden (defaults to matrix which is fast for small datasets). +#' Enable method = "igraph" to avoid casting large data to a dense matrix. #' @param n.start Number of random starts. #' @param n.iter Maximal number of iterations per random start. #' @param random.seed Seed of the random number generator. @@ -41,6 +43,7 @@ FindClusters.default <- function( weights = NULL, node.sizes = NULL, resolution = 0.8, + method = "matrix", algorithm = 1, n.start = 10, n.iter = 10, @@ -51,6 +54,7 @@ FindClusters.default <- function( verbose = TRUE, ... ) { + CheckDots(...) if (is.null(x = object)) { stop("Please provide an SNN graph") } @@ -79,12 +83,15 @@ FindClusters.default <- function( ) } else if (algorithm == 4) { ids <- RunLeiden( - adj_mat = object, + object = object, + method = method, partition.type = "RBConfigurationVertexPartition", initial.membership = initial.membership, weights = weights, node.sizes = node.sizes, - resolution.parameter = resolution + resolution.parameter = r, + random.seed = random.seed, + n.iter = n.iter ) } else { stop("algorithm not recognised, please specify as an integer or string") @@ -114,12 +121,15 @@ FindClusters.default <- function( edge.file.name = edge.file.name) } else if (algorithm == 4) { ids <- RunLeiden( - adj_mat = object, + object = object, + method = method, partition.type = "RBConfigurationVertexPartition", initial.membership = initial.membership, weights = weights, node.sizes = node.sizes, - resolution.parameter = r + resolution.parameter = r, + random.seed = random.seed, + n.iter = n.iter ) } else { stop("algorithm not recognised, please specify as an integer or string") @@ -158,6 +168,7 @@ FindClusters.Seurat <- function( verbose = TRUE, ... ) { + CheckDots(...) graph.name <- graph.name %||% paste0(DefaultAssay(object = object), "_snn") if (!graph.name %in% names(x = object)) { stop("Provided graph.name not present in Seurat object") @@ -179,7 +190,8 @@ FindClusters.Seurat <- function( group.singletons = group.singletons, temp.file.location = temp.file.location, edge.file.name = edge.file.name, - verbose = verbose + verbose = verbose, + ... ) colnames(x = clustering.results) <- paste0(graph.name, "_", colnames(x = clustering.results)) object <- AddMetaData(object = object, metadata = clustering.results) @@ -210,6 +222,10 @@ FindClusters.Seurat <- function( #' values less than or equal to this will be set to 0 and removed from the SNN #' graph. Essentially sets the strigency of pruning (0 --- no pruning, 1 --- #' prune everything). +#' @param nn.method Method for nearest neighbor finding. Options include: rann, +#' annoy +#' @param annoy.metric Distance metric for annoy. Options include: euclidean, +#' cosine, manhattan, and hamming #' @param nn.eps Error bound when performing nearest neighbor seach using RANN; #' default of 0.0 implies exact nearest neighbor search #' @param verbose Whether or not to print output to the console @@ -228,11 +244,14 @@ FindNeighbors.default <- function( k.param = 20, compute.SNN = TRUE, prune.SNN = 1/15, + nn.method = 'rann', + annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, force.recalc = FALSE, ... ) { + CheckDots(...) if (is.null(x = dim(x = object))) { warning( "Object should have two dimensions, attempting to coerce to matrix", @@ -256,12 +275,14 @@ FindNeighbors.default <- function( if (verbose) { message("Computing nearest neighbor graph") } - my.knn <- nn2( + nn.ranked <- NNHelper( data = object, k = k.param, - searchtype = 'standard', - eps = nn.eps) - nn.ranked <- my.knn$nn.idx + method = nn.method, + searchtype = "standard", + eps = nn.eps, + metric = annoy.metric) + nn.ranked <- nn.ranked$nn.idx } else { if (verbose) { message("Building SNN based on a provided distance matrix") @@ -307,11 +328,14 @@ FindNeighbors.Assay <- function( k.param = 20, compute.SNN = TRUE, prune.SNN = 1/15, + nn.method = 'rann', + annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, force.recalc = FALSE, ... ) { + CheckDots(...) features <- features %||% VariableFeatures(object = object) data.use <- t(x = GetAssayData(object = object, slot = "data")[features, ]) neighbor.graphs <- FindNeighbors( @@ -319,9 +343,12 @@ FindNeighbors.Assay <- function( k.param = k.param, compute.SNN = compute.SNN, prune.SNN = prune.SNN, + nn.method = nn.method, + annoy.metric = annoy.metric, nn.eps = nn.eps, verbose = verbose, - force.recalc = force.recalc + force.recalc = force.recalc, + ... ) return(neighbor.graphs) } @@ -335,11 +362,14 @@ FindNeighbors.dist <- function( k.param = 20, compute.SNN = TRUE, prune.SNN = 1/15, + nn.method = "rann", + annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, force.recalc = FALSE, ... ) { + CheckDots(...) return(FindNeighbors( object = as.matrix(x = object), distance.matrix = TRUE, @@ -347,6 +377,8 @@ FindNeighbors.dist <- function( compute.SNN = compute.SNN, prune.SNN = prune.SNN, nn.eps = nn.eps, + nn.method = nn.method, + annoy.metric = annoy.metric, verbose = verbose, force.recalc = force.recalc, ... @@ -376,6 +408,8 @@ FindNeighbors.Seurat <- function( k.param = 20, compute.SNN = TRUE, prune.SNN = 1/15, + nn.method = "rann", + annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, force.recalc = FALSE, @@ -383,6 +417,7 @@ FindNeighbors.Seurat <- function( graph.name = NULL, ... ) { + CheckDots(...) if (!is.null(x = dims)) { assay <- assay %||% DefaultAssay(object = object) data.use <- Embeddings(object = object[[reduction]]) @@ -395,9 +430,12 @@ FindNeighbors.Seurat <- function( k.param = k.param, compute.SNN = compute.SNN, prune.SNN = prune.SNN, + nn.method = nn.method, + annoy.metric = annoy.metric, nn.eps = nn.eps, verbose = verbose, - force.recalc = force.recalc + force.recalc = force.recalc, + ... ) } else { assay <- assay %||% DefaultAssay(object = object) @@ -408,9 +446,12 @@ FindNeighbors.Seurat <- function( k.param = k.param, compute.SNN = compute.SNN, prune.SNN = prune.SNN, + nn.method = nn.method, + annoy.metric = annoy.metric, nn.eps = nn.eps, verbose = verbose, - force.recalc = force.recalc + force.recalc = force.recalc, + ... ) } graph.name <- graph.name %||% paste0(assay, "_", names(x = neighbor.graphs)) @@ -448,6 +489,89 @@ FindNeighbors.Seurat <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Run annoy +# +# @param data Data to build the index with +# @param query A set of data to be queried against data +# @param metric Distance metric; can be one of "euclidean", "cosine", "manhattan", +# "hamming" +# @param n.trees More trees gives higher precision when querying +# @param k Number of neighbors +# @param search.k During the query it will inspect up to search_k nodes which +# gives you a run-time tradeoff between better accuracy and speed. +# @ param include.distance Include the corresponding distances +# +AnnoyNN <- function(data, query = data, metric = "euclidean", n.trees = 50, k, + search.k = -1, include.distance = TRUE) { + idx <- AnnoyBuildIndex( + data = data, + metric = metric, + n.trees = n.trees) + nn <- AnnoySearch( + index = idx, + query = query, + k = k, + search.k = search.k, + include.distance = include.distance) + return(nn) +} + +# Build the annoy index +# +# @param data Data to build the index with +# @param metric Distance metric; can be one of "euclidean", "cosine", "manhattan", +# "hamming" +# @param n.trees More trees gives higher precision when querying +#' @importFrom RcppAnnoy AnnoyEuclidean AnnoyAngular AnnoyManhattan AnnoyHamming +# +AnnoyBuildIndex <- function(data, metric = "euclidean", n.trees = 50) { + f <- ncol(x = data) + a <- switch( + EXPR = metric, + "euclidean" = new(Class = RcppAnnoy::AnnoyEuclidean, f), + "cosine" = new(Class = RcppAnnoy::AnnoyAngular, f), + "manhattan" = new(Class = RcppAnnoy::AnnoyManhattan, f), + "hamming" = new(Class = RcppAnnoy::AnnoyHamming, f), + stop ("Invalid metric") + ) + for (ii in seq(nrow(x = data))) { + a$addItem(ii - 1, data[ii, ]) + } + a$build(n.trees) + return(a) +} + +# Search the annoy index +# +# @param Annoy index, build with AnnoyBuildIndex +# @param query A set of data to be queried against the index +# @param k Number of neighbors +# @param search.k During the query it will inspect up to search_k nodes which +# gives you a run-time tradeoff between better accuracy and speed. +# @ param include.distance Include the corresponding distances +# +AnnoySearch <- function(index, query, k, search.k = -1, include.distance = TRUE) { + n <- nrow(x = query) + idx <- matrix(nrow = n, ncol = k) + dist <- matrix(nrow = n, ncol = k) + convert <- methods::is(index, "Rcpp_AnnoyAngular") + res <- future_lapply(X = 1:n, FUN = function(x) { + res <- index$getNNsByVectorList(query[x, ], k, search.k, include.distance) + # Convert from Angular to Cosine distance + if (convert) { + res$dist <- 0.5 * (res$dist * res$dist) + } + list(res$item + 1, res$distance) + }) + for (i in 1:n) { + idx[i, ] <- res[[i]][[1]] + if (include.distance) { + dist[i, ] <- res[[i]][[2]] + } + } + return(list(nn.idx = idx, nn.dists = dist)) +} + # Group single cells that make up their own cluster in with the cluster they are # most connected to. # @@ -502,6 +626,33 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = TRUE) { return(ids) } +# Internal helper function to dispatch to various neighbor finding methods +# +# @param data Input data +# @param query Data to query against data +# @param k Number of nearest neighbors to compute +# @param method Nearest neighbor method to use: "rann", "annoy" +# @param ... additional parameters to specific neighbor finding method +# +NNHelper <- function(data, query = data, k, method, ...) { + args <- as.list(x = sys.frame(which = sys.nframe())) + args <- c(args, list(...)) + return( + switch( + EXPR = method, + "rann" = { + args <- args[intersect(x = names(x = args), y = names(x = formals(fun = nn2)))] + do.call(what = 'nn2', args = args) + }, + "annoy" = { + args <- args[intersect(x = names(x = args), y = names(x = formals(fun = AnnoyNN)))] + do.call(what = 'AnnoyNN', args = args) + }, + stop("Invalid method. Please choose one of 'rann', 'annoy'") + ) + ) +} + # Run Leiden clustering algorithm # # Implements the Leiden clustering algorithm in R using reticulate @@ -518,17 +669,23 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = TRUE) { # @param resolution.parameter A parameter controlling the coarseness of the clusters # for Leiden algorithm. Higher values lead to more clusters. (defaults to 1.0 for # partition types that accept a resolution parameter) +# @param random.seed Seed of the random number generator +# @param n.iter Maximal number of iterations per random start # # @keywords graph network igraph mvtnorm simulation # +#' @importFrom leiden leiden +#' @importFrom methods as is #' @importFrom reticulate py_module_available import r_to_py +#' @importFrom igraph graph_from_adjacency_matrix graph_from_adj_list # # @author Tom Kelly # # @export # RunLeiden <- function( - adj_mat, + object, + method = c("matrix", "igraph"), partition.type = c( 'RBConfigurationVertexPartition', 'ModularityVertexPartition', @@ -541,75 +698,42 @@ RunLeiden <- function( initial.membership = NULL, weights = NULL, node.sizes = NULL, - resolution.parameter = 1 + resolution.parameter = 1, + random.seed = 0, + n.iter = 10 ) { if (!py_module_available(module = 'leidenalg')) { stop("Cannot find Leiden algorithm, please install through pip (e.g. pip install leidenalg).") } - # import python modules with reticulate - leidenalg <- import(module = "leidenalg") - ig <- import(module = "igraph") - # convert matrix input - adj_mat <- as.matrix(x = ceiling(x = adj_mat)) - # convert to python numpy.ndarray, then a list - adj_mat_py <- r_to_py(x = adj_mat) - adj_mat_py <- adj_mat_py$tolist() - # convert graph structure to a Python compatible object - snn_graph <- ig$Graph$Adjacency(adj_mat_py) - # compute partitions - part <- switch( - EXPR = partition.type, - 'RBConfigurationVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$RBConfigurationVertexPartition, - initial_membership = initial.membership, - weights = weights, - resolution_parameter = resolution.parameter - ), - 'ModularityVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$ModularityVertexPartition, - initial_membership = initial.membership, - weights = weights - ), - 'RBERVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$RBERVertexPartition, - initial_membership = initial.membership, - weights = weights, - node_sizes = node.sizes, - resolution_parameter = resolution.parameter - ), - 'CPMVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$CPMVertexPartition, - initial_membership = initial.membership, - weights = weights, - node_sizes = node.sizes, - resolution_parameter = resolution.parameter + switch( + EXPR = method, + #cast to dense (supported by reticulate for numpy.array) + "matrix" = input <- as(object, "matrix"), + #run as igraph object (passes to reticulate) + "igraph" = switch( + EXPR = is(object), + #generate igraph if needed (will handle updated snn class) + "Graph" = input <- graph_from_adjacency_matrix(adjmatrix = object), + "dgCMatrix" = input <- graph_from_adjacency_matrix(adjmatrix = object), + "igraph" = input <- object, + "matrix" = input <- graph_from_adjacency_matrix(adjmatrix = object), + "list" = input <- graph_from_adj_list(adjlist = object), + stop("SNN object must be a compatible input for igraph") ), - 'MutableVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$MutableVertexPartition, - initial_membership = initial.membership - ), - 'SignificanceVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$SignificanceVertexPartition, - initial_membership = initial.membership, - node_sizes = node.sizes, - resolution_parameter = resolution.parameter - ), - 'SurpriseVertexPartition' = leidenalg$find_partition( - snn_graph, - leidenalg$SurpriseVertexPartition, - initial_membership = initial.membership, - weights = weights, - node_sizes = node.sizes - ), - stop("please specify a partition type as a string out of those documented") + stop("method for leiden must be 'matrix' or 'igraph'") + ) + #run leiden from CRAN package (calls python with reticulate) + partition <- leiden( + object = input, + partition_type = partition.type, + initial_membership = initial.membership, + weights = weights, + node_sizes = node.sizes, + resolution_parameter = resolution.parameter, + seed = random.seed, + n_iterations = n.iter ) - return(part$membership + 1) + return(partition) } # Runs the modularity optimizer (C++ port of java program ModularityOptimizer.jar) diff --git a/R/convenience.R b/R/convenience.R index 2312b45a8..5739cd813 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -55,7 +55,8 @@ UMAPPlot <- function(object, ...) { # @rdname DimPlot # SpecificDimPlot <- function(object, ...) { - name <- as.character(x = sys.calls()[[1]])[1] + funs <- sys.calls() + name <- as.character(x = funs[[length(x = funs) - 1]])[1] name <- tolower(x = gsub(pattern = 'Plot', replacement = '', x = name)) args <- list('object' = object) args <- c(args, list(...)) diff --git a/R/differential_expression.R b/R/differential_expression.R index fdc969c35..e58da661a 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -252,6 +252,7 @@ FindConservedMarkers <- function( } marker.test <- list() # do marker tests + ident.2.save <- ident.2 for (i in 1:num.groups) { level.use <- levels.split[i] ident.use.1 <- paste(ident.1, level.use, sep = "_") @@ -270,6 +271,7 @@ FindConservedMarkers <- function( ) next } + ident.2 <- ident.2.save cells.1 <- WhichCells(object = object, idents = ident.use.1) if (is.null(x = ident.2)) { cells.2 <- setdiff(x = cells[[i]], y = cells.1) @@ -316,8 +318,8 @@ FindConservedMarkers <- function( verbose = verbose, ... ) + names(x = marker.test)[i] <- levels.split[i] } - names(x = marker.test) <- levels.split marker.test <- Filter(f = Negate(f = is.null), x = marker.test) genes.conserved <- Reduce( f = intersect, @@ -584,7 +586,14 @@ FindMarkers.default <- function( } # perform DE if (!(test.use %in% c('negbinom', 'poisson', 'MAST', "LR")) && !is.null(x = latent.vars)) { - warning("'latent.vars' is only used for 'negbinom', 'poisson', 'LR', and 'MAST' tests") + warning( + "'latent.vars' is only used for 'negbinom', 'poisson', 'LR', and 'MAST' tests", + call. = FALSE, + immediate. = TRUE + ) + } + if (!test.use %in% c('wilcox', 'MAST', 'DESeq2')) { + CheckDots(...) } de.results <- switch( EXPR = test.use, @@ -636,13 +645,15 @@ FindMarkers.default <- function( cells.1 = cells.1, cells.2 = cells.2, latent.vars = latent.vars, - verbose = verbose + verbose = verbose, + ... ), "DESeq2" = DESeq2DETest( data.use = object[features, c(cells.1, cells.2), drop = FALSE], cells.1 = cells.1, cells.2 = cells.2, - verbose = verbose + verbose = verbose, + ... ), "LR" = LRDETest( data.use = object[features, c(cells.1, cells.2), drop = FALSE], @@ -926,6 +937,7 @@ DESeq2DETest <- function( if (!PackageCheck('DESeq2', error = FALSE)) { stop("Please install DESeq2 - learn more at https://bioconductor.org/packages/release/bioc/html/DESeq2.html") } + CheckDots(..., fxns = 'DESeq2::results') group.info <- data.frame(row.names = c(cells.1, cells.2)) group.info[cells.1, "group"] <- "Group1" group.info[cells.2, "group"] <- "Group2" @@ -1087,7 +1099,7 @@ DiffTTest <- function( #' @importFrom stats var as.formula #' @importFrom future.apply future_sapply #' @importFrom future nbrOfWorkers -#' +#' # @export # # @examples @@ -1204,8 +1216,7 @@ LRDETest <- function( cells.1, cells.2, latent.vars = NULL, - verbose = TRUE, - ... + verbose = TRUE ) { group.info <- data.frame(row.names = c(cells.1, cells.2)) group.info[cells.1, "group"] <- "Group1" diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 48358889c..27325036d 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -150,7 +150,7 @@ JackStraw <- function( #' #' @export #' -L2Dim <- function(object, reduction, new.dr = NULL, new.key = NULL){ +L2Dim <- function(object, reduction, new.dr = NULL, new.key = NULL) { l2.norm <- L2Norm(mat = Embeddings(object[[reduction]])) if(is.null(new.dr)){ new.dr <- paste0(reduction, ".l2") @@ -183,6 +183,7 @@ L2Dim <- function(object, reduction, new.dr = NULL, new.key = NULL){ #' @export #' L2CCA <- function(object, ...){ + CheckDots(..., fxns = 'L2Dim') return(L2Dim(object = object, reduction = "cca", ...)) } @@ -286,7 +287,7 @@ ProjectDim <- function( data.use <- scale(x = as.matrix(x = data.use), center = TRUE, scale = FALSE) } cell.embeddings <- Embeddings(object = redeuc) - new.feature.loadings.full <- FastMatMult(m1 = data.use, m2 = cell.embeddings) + new.feature.loadings.full <- data.use %*% cell.embeddings rownames(x = new.feature.loadings.full) <- rownames(x = data.use) colnames(x = new.feature.loadings.full) <- colnames(x = cell.embeddings) Loadings(object = redeuc, projected = TRUE) <- new.feature.loadings.full @@ -313,8 +314,7 @@ ProjectDim <- function( #' @param standardize Standardize matrices - scales columns to have unit variance #' and mean 0 #' @param num.cc Number of canonical vectors to calculate -#' @param verbose ... -#' @param use.cpp ... +#' @param verbose Show progress messages #' #' @importFrom irlba irlba #' @@ -327,7 +327,6 @@ RunCCA.default <- function( standardize = TRUE, num.cc = 20, verbose = FALSE, - use.cpp = TRUE, ... ) { set.seed(seed = 42) @@ -337,17 +336,7 @@ RunCCA.default <- function( object1 <- Standardize(mat = object1, display_progress = FALSE) object2 <- Standardize(mat = object2, display_progress = FALSE) } - if (as.numeric(x = max(dim(x = object1))) * as.numeric(x = max(dim(x = object2))) > .Machine$integer.max) { - # if the returned matrix from FastMatMult has more than 2^31-1 entries, throws an error due to - # storage of certain attributes as ints, force usage of R version - use.cpp <- FALSE - } - if (use.cpp == TRUE) { - mat3 <- FastMatMult(m1 = t(x = object1), m2 = object2) - } - else { - mat3 <- crossprod(x = object1, y = object2) - } + mat3 <- crossprod(x = object1, y = object2) cca.svd <- irlba(A = mat3, nv = num.cc) cca.data <- rbind(cca.svd$u, cca.svd$v) colnames(x = cca.data) <- paste0("CC", 1:num.cc) @@ -395,7 +384,6 @@ RunCCA.Seurat <- function( add.cell.id1 = NULL, add.cell.id2 = NULL, verbose = TRUE, - use.cpp = TRUE, ... ) { assay1 <- assay1 %||% DefaultAssay(object = object1) @@ -452,7 +440,6 @@ RunCCA.Seurat <- function( standardize = TRUE, num.cc = num.cc, verbose = verbose, - use.cpp = use.cpp ) if (verbose) { message("Merging objects") @@ -528,6 +515,7 @@ RunICA.default <- function( seed.use = 42, ... ) { + CheckDots(..., fxns = ica.function) if (!is.null(x = seed.use)) { set.seed(seed = seed.use) } @@ -598,7 +586,6 @@ RunICA.Assay <- function( return(reduction.data) } - #' @param reduction.name dimensional reduction name #' #' @rdname RunICA @@ -663,6 +650,7 @@ RunLSI.default <- function( verbose = TRUE, ... ) { + CheckDots(...) if (!is.null(seed.use)) { set.seed(seed = seed.use) } @@ -725,7 +713,6 @@ RunLSI.Assay <- function( reduction.data <- RunLSI( object = data.use, assay = assay, - features = features, n = n, reduction.key = reduction.key, scale.max = scale.max, @@ -789,6 +776,7 @@ RunLSI.Seurat <- function( #' #' @importFrom irlba irlba #' @importFrom stats prcomp +#' @importFrom utils capture.output #' #' @rdname RunPCA #' @export @@ -860,7 +848,12 @@ RunPCA.default <- function( misc = list(total.variance = total.variance) ) if (verbose) { - print(x = reduction.data, dims = ndims.print, nfeatures = nfeatures.print) + msg <- capture.output(print( + x = reduction.data, + dims = ndims.print, + nfeatures = nfeatures.print + )) + message(paste(msg, collapse = '\n')) } return(reduction.data) } @@ -1135,6 +1128,8 @@ RunTSNE.Seurat <- function( } #' @importFrom reticulate py_module_available py_set_seed import +#' @importFrom uwot umap +#' @importFrom future nbrOfWorkers #' #' @rdname RunUMAP #' @method RunUMAP default @@ -1143,9 +1138,10 @@ RunTSNE.Seurat <- function( RunUMAP.default <- function( object, assay = NULL, + umap.method = 'uwot', n.neighbors = 30L, n.components = 2L, - metric = "correlation", + metric = 'cosine', n.epochs = NULL, learning.rate = 1.0, min.dist = 0.3, @@ -1163,40 +1159,85 @@ RunUMAP.default <- function( verbose = TRUE, ... ) { - if (!py_module_available(module = 'umap')) { - stop("Cannot find UMAP, please install through pip (e.g. pip install umap-learn).") - } + CheckDots(...) if (!is.null(x = seed.use)) { set.seed(seed = seed.use) py_set_seed(seed = seed.use) } - if (typeof(x = n.epochs) == "double") { - n.epochs <- as.integer(x = n.epochs) + if (umap.method != 'umap-learn' && getOption('Seurat.warn.umap.uwot', TRUE)) { + warning( + "The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric", + "\nTo use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'", + "\nThis message will be shown once per session", + call. = FALSE, + immediate. = TRUE + ) + options(Seurat.warn.umap.uwot = FALSE) } - umap_import <- import(module = "umap", delay_load = TRUE) - umap <- umap_import$UMAP( - n_neighbors = as.integer(x = n.neighbors), - n_components = as.integer(x = n.components), - metric = metric, - n_epochs = n.epochs, - learning_rate = learning.rate, - min_dist = min.dist, - spread = spread, - set_op_mix_ratio = set.op.mix.ratio, - local_connectivity = local.connectivity, - repulsion_strength = repulsion.strength, - negative_sample_rate = negative.sample.rate, - a = a, - b = b, - metric_kwds = metric.kwds, - angular_rp_forest = angular.rp.forest, - verbose = verbose + umap.output <- switch( + EXPR = umap.method, + 'umap-learn' = { + if (!py_module_available(module = 'umap')) { + stop("Cannot find UMAP, please install through pip (e.g. pip install umap-learn).") + } + if (typeof(x = n.epochs) == "double") { + n.epochs <- as.integer(x = n.epochs) + } + umap_import <- import(module = "umap", delay_load = TRUE) + umap <- umap_import$UMAP( + n_neighbors = as.integer(x = n.neighbors), + n_components = as.integer(x = n.components), + metric = metric, + n_epochs = n.epochs, + learning_rate = learning.rate, + min_dist = min.dist, + spread = spread, + set_op_mix_ratio = set.op.mix.ratio, + local_connectivity = local.connectivity, + repulsion_strength = repulsion.strength, + negative_sample_rate = negative.sample.rate, + a = a, + b = b, + metric_kwds = metric.kwds, + angular_rp_forest = angular.rp.forest, + verbose = verbose + ) + umap$fit_transform(as.matrix(x = object)) + }, + 'uwot' = { + if (metric == 'correlation') { + warning( + "UWOT does not implement the correlation metric, using cosine instead", + call. = FALSE, + immediate. = TRUE + ) + metric <- 'cosine' + } + umap( + X = object, + n_threads = nbrOfWorkers(), + n_neighbors = as.integer(x = n.neighbors), + n_components = as.integer(x = n.components), + metric = metric, + n_epochs = n.epochs, + learning_rate = learning.rate, + min_dist = min.dist, + spread = spread, + set_op_mix_ratio = set.op.mix.ratio, + local_connectivity = local.connectivity, + repulsion_strength = repulsion.strength, + negative_sample_rate = negative.sample.rate, + a = a, + b = b, + verbose = verbose + ) + }, + stop("Unknown umap method: ", umap.method, call. = FALSE) ) - umap_output <- umap$fit_transform(as.matrix(x = object)) - colnames(x = umap_output) <- paste0(reduction.key, 1:ncol(x = umap_output)) - rownames(x = umap_output) <- rownames(object) + colnames(x = umap.output) <- paste0(reduction.key, 1:ncol(x = umap.output)) + rownames(x = umap.output) <- rownames(x = object) umap.reduction <- CreateDimReducObject( - embeddings = umap_output, + embeddings = umap.output, key = reduction.key, assay = assay ) @@ -1212,8 +1253,9 @@ RunUMAP.default <- function( RunUMAP.Graph <- function( object, assay = NULL, + umap.method = 'umap-learn', n.components = 2L, - metric = "correlation", + metric = 'correlation', n.epochs = 0L, learning.rate = 1, min.dist = 0.3, @@ -1228,6 +1270,14 @@ RunUMAP.Graph <- function( reduction.key = 'UMAP_', ... ) { + CheckDots(...) + if (umap.method != 'umap-learn') { + warning( + "Running UMAP on Graph objects is only supported using the umap-learn method", + call. = FALSE, + immediate. = TRUE + ) + } if (!py_module_available(module = 'umap')) { stop("Cannot find UMAP, please install through pip (e.g. pip install umap-learn).") } @@ -1286,6 +1336,11 @@ RunUMAP.Graph <- function( #' @param graph Name of graph on which to run UMAP #' @param assay Assay to pull data for when using \code{features}, or assay used to construct Graph #' if running UMAP on a Graph +#' @param umap.method UMAP implementation to run. Can be +#' \describe{ +#' \item{\code{uwot}:}{Runs umap via the uwot R package} +#' \item{\code{umap-learn}:}{Run the Seurat wrapper of the python umap-learn package} +#' } #' @param n.neighbors This determines the number of neighboring points used in #' local approximations of manifold structure. Larger values will result in more #' global structure being preserved at the loss of detailed local structure. In @@ -1348,9 +1403,10 @@ RunUMAP.Seurat <- function( features = NULL, graph = NULL, assay = 'RNA', + umap.method = 'uwot', n.neighbors = 30L, n.components = 2L, - metric = "correlation", + metric = 'cosine', n.epochs = NULL, learning.rate = 1, min.dist = 0.3, @@ -1365,10 +1421,11 @@ RunUMAP.Seurat <- function( metric.kwds = NULL, angular.rp.forest = FALSE, verbose = TRUE, - reduction.name = "umap", - reduction.key = "UMAP_", + reduction.name = 'umap', + reduction.key = 'UMAP_', ... ) { + CheckDots(...) if (sum(c(is.null(x = dims), is.null(x = features), is.null(x = graph))) < 2) { stop("Please specify only one of the following arguments: dims, features, or graph") } @@ -1385,6 +1442,7 @@ RunUMAP.Seurat <- function( object[[reduction.name]] <- RunUMAP( object = data.use, assay = assay, + umap.method = umap.method, n.neighbors = n.neighbors, n.components = n.components, metric = metric, @@ -1424,6 +1482,7 @@ ScoreJackStraw.JackStrawData <- function( score.thresh = 1e-5, ... ) { + CheckDots(...) pAll <- JS(object = object, slot = "empirical.p.values") pAll <- pAll[, dims, drop = FALSE] pAll <- as.data.frame(pAll) @@ -1490,6 +1549,7 @@ ScoreJackStraw.Seurat <- function( ... ) if (do.plot) { + CheckDots(..., fxns = 'JackStrawPlot') suppressWarnings(expr = print(JackStrawPlot( object = object, reduction = reduction, @@ -1608,6 +1668,7 @@ fftRtsne <- function(X, df = 1.0, ... ) { + CheckDots(...) if (is.null(x = data_path)) { data_path <- tempfile(pattern = 'fftRtsne_data_', fileext = '.dat') } @@ -1628,18 +1689,17 @@ fftRtsne <- function(X, ft.out <- suppressWarnings(expr = system2(command = fast_tsne_path, stdout = TRUE)) if (grepl(pattern = '= t-SNE v1.1', x = ft.out[1])) { version_number <- '1.1.0' - }else if(grepl(pattern = '= t-SNE v1.0', x = ft.out[1])){ + } else if (grepl(pattern = '= t-SNE v1.0', x = ft.out[1])) { version_number <- '1.0' - }else{ + } else { message("First line of fast_tsne output is") message(ft.out[1]) stop("Our FIt-SNE wrapper requires FIt-SNE v1.X.X, please install the appropriate version from github.com/KlugerLab/FIt-SNE and have fast_tsne_path point to it if it's not in your path") } - is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) { return(abs(x = x - round(x = x)) < tol) } - if (version_number == '1.0' && df !=1.0) { + if (version_number == '1.0' && df != 1.0) { stop("This version of FIt-SNE does not support df!=1. Please install the appropriate version from github.com/KlugerLab/FIt-SNE") } if (!is.numeric(x = theta) || (theta < 0.0) || (theta > 1.0) ) { @@ -1805,7 +1865,7 @@ JackRandom <- function( # # @return returns the l2-norm. # -L2Norm <- function(vec){ +L2Norm <- function(vec) { a <- sqrt(x = sum(vec ^ 2)) if (a == 0) { a <- .05 diff --git a/R/generics.R b/R/generics.R index 23ce29841..58f18c98f 100644 --- a/R/generics.R +++ b/R/generics.R @@ -691,6 +691,9 @@ ReorderIdent <- function(object, var, ...) { #' Linderman, G. C., Zhao, J., Kluger, Y. (2018). "Zero-preserving imputation #' of scRNA-seq data using low rank approximation." (bioRxiv:138677) #' +#' @note RunALRA and associated functions are being moved to SeuratWrappers; +#' for more information on SeuratWrappers, please see \url{https://github.com/satijalab/seurat-wrappers} +#' #' @param object An object #' @param ... Arguments passed to other methods #' @@ -718,6 +721,13 @@ ReorderIdent <- function(object, var, ...) { #' } #' RunALRA <- function(object, ...) { + .Deprecated( + new = 'SeruatWrappers::RunALRA', + msg = paste( + 'RunALRA and associated functions are being moved to SeuratWrappers;', + 'for more information on SeuratWrappers, please see https://github.com/satijalab/seurat-wrappers' + ) + ) UseMethod(generic = 'RunALRA', object = object) } @@ -774,6 +784,11 @@ RunICA <- function(object, ...) { #' #' For details about stored LSI calculation parameters, see #' \code{PrintLSIParams}. +#' +#' @note RunLSI is being moved to Signac. Equivalent functionality can be +#' achieved via the Signac::RunTFIDF and Signac::RunSVD functions; +#' for more information on Signac, please see +#' \url{https://github.com/timoast/Signac} #' #' @param object Seurat object #' @param ... Arguments passed to other methods @@ -782,6 +797,14 @@ RunICA <- function(object, ...) { #' @export RunLSI #' RunLSI <- function(object, ...) { + .Deprecated( + new = 'Signac::RunTFIDF', + msg = paste( + "RunLSI is being moved to Signac. Equivalent functionality can be", + "achieved via the Signac::RunTFIDF and Signac::RunSVD functions; for", + "more information on Signac, please see https://github.com/timoast/Signac" + ) + ) UseMethod(generic = "RunLSI", object = object) } diff --git a/R/integration.R b/R/integration.R index 2513040e7..60d92686d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -10,24 +10,48 @@ NULL #' #' Finds the integration anchors #' -#' @param object.list A list of objects between which to find anchors for downstream integration. -#' @param assay A vector of assay names specifying which assay to use when constructing anchors. If -#' NULL, the current default assay for each object is used. +#' @param object.list A list of objects between which to find anchors for +#' downstream integration. +#' @param assay A vector of assay names specifying which assay to use when +#' constructing anchors. If NULL, the current default assay for each object is +#' used. +#' @param reference A vector specifying the object/s to be used as a reference +#' during integration. If NULL (default), all pairwise anchors are found (no +#' reference/s). If not NULL, the corresponding objects in \code{object.list} +#' will be used as references. When using a set of specified references, anchors +#' are first found between each query and each reference. The references are +#' then integrated through pairwise integration. Each query is then mapped to +#' the integrated reference. #' @param anchor.features Can be either: #' \itemize{ -#' \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} to select the -#' provided number of features to be used in anchor finding} +#' \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} +#' to select the provided number of features to be used in anchor finding} #' \item{A vector of features to be used as input to the anchor finding process} #' } -#' @param scale Whether or not to scale the features provided. Only set to FALSE if you have -#' previously scaled the features you want to use for each object in the object.list -#' @param l2.norm Perform L2 normalization on the CCA cell embeddings after dimensional reduction -#' @param dims Which dimensions to use from the CCA to specify the neighbor search space +#' @param scale Whether or not to scale the features provided. Only set to FALSE +#' if you have previously scaled the features you want to use for each object in +#' the object.list +#' @param normalization.method Name of normalization method used: LogNormalize +#' or SCT +#' @param sct.clip.range Numeric of length two specifying the min and max values +#' the Pearson residual will be clipped to +#' @param reduction Dimensional reduction to perform when finding anchors. Can +#' be one of: +#' \itemize{ +#' \item{cca: Canonical correlation analysis} +#' \item{rpca: Reciprocal PCA} +#' } +#' @param l2.norm Perform L2 normalization on the CCA cell embeddings after +#' dimensional reduction +#' @param dims Which dimensions to use from the CCA to specify the neighbor +#' search space #' @param k.anchor How many neighbors (k) to use when picking anchors #' @param k.filter How many neighbors (k) to use when filtering anchors #' @param k.score How many neighbors (k) to use when scoring anchors -#' @param max.features The maximum number of features to use when specifying the neighborhood search -#' space in the anchor filtering +#' @param max.features The maximum number of features to use when specifying the +#' neighborhood search space in the anchor filtering +#' @param nn.method Method for nearest neighbor finding. Options include: rann, +#' annoy #' @param eps Error bound on the neighbor finding algorithm (from RANN) #' @param verbose Print progress bars and output #' @@ -42,27 +66,38 @@ NULL FindIntegrationAnchors <- function( object.list = NULL, assay = NULL, + reference = NULL, anchor.features = 2000, scale = TRUE, + normalization.method = c("LogNormalize", "SCT"), + sct.clip.range = NULL, + reduction = c("cca", "rpca"), l2.norm = TRUE, dims = 1:30, k.anchor = 5, k.filter = 200, k.score = 30, max.features = 200, + nn.method = "rann", eps = 0, verbose = TRUE ) { + normalization.method <- match.arg(arg = normalization.method) + reduction <- match.arg(arg = reduction) + if (reduction == "rpca") { + reduction <- "pca" + } my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, no = future_lapply ) - object.ncells <- sapply(X = object.list, FUN = function(x) dim(x)[2]) + object.ncells <- sapply(X = object.list, FUN = function(x) dim(x = x)[2]) if (any(object.ncells <= max(dims))) { bad.obs <- which(x = object.ncells <= max(dims)) - stop("Max dimension too large: objects ", paste(bad.obs, collapse = ", "), " contain fewer than ", max(dims), " cells. \n", - "Please specify a maximum dimensions that is less than the number of cells in any ", + stop("Max dimension too large: objects ", paste(bad.obs, collapse = ", "), + " contain fewer than ", max(dims), " cells. \n Please specify a", + " maximum dimensions that is less than the number of cells in any ", "object (", min(object.ncells), ").") } if (!is.null(x = assay)) { @@ -80,9 +115,45 @@ FindIntegrationAnchors <- function( assay <- sapply(X = object.list, FUN = DefaultAssay) } object.list <- CheckDuplicateCellNames(object.list = object.list) - if (is.numeric(x = anchor.features)) { + + slot <- "data" + if (normalization.method == "SCT") { + slot <- "scale.data" + scale <- FALSE + if (is.numeric(x = anchor.features)) { + stop("Please specify the anchor.features to be used. The expected ", + "workflow for integratinge assays produced by SCTransform is ", + "SelectIntegrationFeatures -> PrepSCTIntegration -> ", + "FindIntegrationAnchors.") + } + sct.check <- sapply( + X = 1:length(x = object.list), + FUN = function(x) { + sct.cmd <- grep( + pattern = 'PrepSCTIntegration', + x = Command(object = object.list[[x]]), + value = TRUE + ) + # check assay has gone through PrepSCTIntegration + if (!any(grepl(pattern = "PrepSCTIntegration", x = Command(object = object.list[[x]]))) || + Command(object = object.list[[x]], command = sct.cmd, value = "assay") != assay[x]) { + stop("Object ", x, " assay - ", assay[x], " has not been processed ", + "by PrepSCTIntegration. Please run PrepSCTIntegration prior to ", + "FindIntegrationAnchors if using assays generated by SCTransform.", call. = FALSE) + } + # check that the correct features are being used + if (all(Command(object = object.list[[x]], command = sct.cmd, value = "anchor.features") != anchor.features)) { + stop("Object ", x, " assay - ", assay[x], " was processed using a ", + "different feature set than in PrepSCTIntegration. Please rerun ", + "PrepSCTIntegration with the same anchor.features for all objects in ", + "the object.list.", call. = FALSE) + } + } + ) + } + if (is.numeric(x = anchor.features) && normalization.method != "SCT") { if (verbose) { - message(paste("Computing", anchor.features, "integration features")) + message("Computing ", anchor.features, " integration features") } anchor.features <- SelectIntegrationFeatures( object.list = object.list, @@ -101,50 +172,177 @@ FindIntegrationAnchors <- function( } ) } + nn.reduction <- reduction + # if using pca, only need to compute the internal neighborhood structure once + # for each dataset + internal.neighbors <- list() + if (nn.reduction == "pca") { + k.filter <- NA + if (verbose) { + message("Computing within dataset neighborhoods") + } + k.neighbor <- max(k.anchor, k.score) + internal.neighbors <- my.lapply( + X = 1:length(x = object.list), + FUN = function(x) { + NNHelper( + data = Embeddings(object = object.list[[x]][[nn.reduction]])[, dims], + k = k.neighbor + 1, + method = nn.method, + eps = eps + ) + } + ) + } # determine pairwise combinations combinations <- expand.grid(1:length(x = object.list), 1:length(x = object.list)) combinations <- combinations[combinations$Var1 < combinations$Var2, , drop = FALSE] # determine the proper offsets for indexing anchors objects.ncell <- sapply(X = object.list, FUN = ncol) offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)] - if (verbose) { - message("Finding all pairwise anchors") + if (is.null(x = reference)) { + # case for all pairwise, leave the combinations matrix the same + if (verbose) { + message("Finding all pairwise anchors") + } + } else { + reference <- unique(x = sort(x = reference)) + if (max(reference) > length(x = object.list)) { + stop('Error: requested reference object ', max(reference), " but only ", + length(x = object.list), " objects provided") + } + # modify the combinations matrix to retain only R-R and R-Q comparisons + if (verbose) { + message("Finding anchors between all query and reference datasets") + ok.rows <- (combinations$Var1 %in% reference) | (combinations$Var2 %in% reference) + combinations <- combinations[ok.rows, ] + } } - reduction <- "cca" - # determine all pairwise anchors + # determine all anchors all.anchors <- my.lapply( X = 1:nrow(x = combinations), FUN = function(row) { i <- combinations[row, 1] j <- combinations[row, 2] - object.1 <- object.list[[i]] - object.2 <- object.list[[j]] - object.pair <- RunCCA( - object1 = object.1, - object2 = object.2, - assay1 = assay[i], - assay2 = assay[j], + object.1 <- DietSeurat( + object = object.list[[i]], + assays = assay[i], features = anchor.features, - num.cc = max(dims), - renormalize = FALSE, - rescale = FALSE, - verbose = verbose + counts = FALSE, + scale.data = TRUE, + dimreducs = reduction ) - if (l2.norm){ - object.pair <- L2Dim(object = object.pair, reduction = reduction) - reduction <- paste0(reduction, ".l2") + object.2 <- DietSeurat( + object = object.list[[j]], + assays = assay[j], + features = anchor.features, + counts = FALSE, + scale.data = TRUE, + dimreducs = reduction + ) + # suppress key duplication warning + suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]]) + DefaultAssay(object = object.1) <- "ToIntegrate" + if (reduction %in% Reductions(object = object.1)) { + slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate" } + object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction) + suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]]) + DefaultAssay(object = object.2) <- "ToIntegrate" + if (reduction %in% Reductions(object = object.2)) { + slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate" + } + object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction) + object.pair <- switch( + EXPR = reduction, + 'cca' = { + object.pair <- RunCCA( + object1 = object.1, + object2 = object.2, + assay1 = "ToIntegrate", + assay2 = "ToIntegrate", + features = anchor.features, + num.cc = max(dims), + renormalize = FALSE, + rescale = FALSE, + verbose = verbose + ) + if (l2.norm){ + object.pair <- L2Dim(object = object.pair, reduction = reduction) + reduction <- paste0(reduction, ".l2") + nn.reduction <- reduction + } + reduction.2 <- character() + object.pair + }, + 'pca' = { + common.features <- intersect( + x = rownames(x = Loadings(object = object.1[["pca"]])), + y = rownames(x = Loadings(object = object.2[["pca"]])) + ) + object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE) + projected.embeddings.1<- t(x = GetAssayData(object = object.1, slot = "scale.data")[common.features, ]) %*% + Loadings(object = object.2[["pca"]])[common.features, ] + object.pair[['projectedpca.1']] <- CreateDimReducObject( + embeddings = rbind(projected.embeddings.1, Embeddings(object = object.2[["pca"]])), + assay = DefaultAssay(object = object.1), + key = "projectedpca1_" + ) + projected.embeddings.2 <- t(x = GetAssayData(object = object.2, slot = "scale.data")[common.features, ]) %*% + Loadings(object = object.1[["pca"]])[common.features, ] + object.pair[['projectedpca.2']] <- CreateDimReducObject( + embeddings = rbind(projected.embeddings.2, Embeddings(object = object.1[["pca"]])), + assay = DefaultAssay(object = object.2), + key = "projectedpca2_" + ) + object.pair[["pca"]] <- CreateDimReducObject( + embeddings = rbind( + Embeddings(object = object.1[["pca"]]), + Embeddings(object = object.2[["pca"]])), + assay = DefaultAssay(object = object.1), + key = "pca_" + ) + reduction <- "projectedpca.1" + reduction.2 <- "projectedpca.2" + if (l2.norm){ + slot(object = object.pair[["projectedpca.1"]], name = "cell.embeddings") <- sweep( + x = Embeddings(object = object.pair[["projectedpca.1"]]), + MARGIN = 2, + STATS = apply(X = Embeddings(object = object.pair[["projectedpca.1"]]), MARGIN = 2, FUN = sd), + FUN = "/" + ) + slot(object = object.pair[["projectedpca.2"]], name = "cell.embeddings") <- sweep( + x = Embeddings(object = object.pair[["projectedpca.2"]]), + MARGIN = 2, + STATS = apply(X = Embeddings(object = object.pair[["projectedpca.2"]]), MARGIN = 2, FUN = sd), + FUN = "/" + ) + object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.1") + object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.2") + reduction <- paste0(reduction, ".l2") + reduction.2 <- paste0(reduction.2, ".l2") + } + object.pair + }, + stop("Invalid reduction parameter. Please choose either cca or rpca") + ) + internal.neighbors <- internal.neighbors[c(i, j)] anchors <- FindAnchors( object.pair = object.pair, - assay = c(assay[i], assay[j]), + assay = c("ToIntegrate", "ToIntegrate"), + slot = slot, cells1 = colnames(x = object.1), cells2 = colnames(x = object.2), + internal.neighbors = internal.neighbors, reduction = reduction, + reduction.2 = reduction.2, + nn.reduction = nn.reduction, dims = dims, k.anchor = k.anchor, k.filter = k.filter, k.score = k.score, max.features = max.features, + nn.method = nn.method, eps = eps, verbose = verbose ) @@ -159,6 +357,7 @@ FindIntegrationAnchors <- function( command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) anchor.set <- new(Class = "AnchorSet", object.list = object.list, + reference.objects = reference %||% seq_along(object.list), anchors = all.anchors, offsets = offsets, anchor.features = anchor.features, @@ -185,6 +384,7 @@ FindIntegrationAnchors <- function( #' cases where the query dataset has a much larger cell number, but the reference dataset has a #' unique assay for transfer. #' @param features Features to use for dimensional reduction +#' @param normalization.method Name of normalization method used: LogNormalize or SCT #' @param npcs Number of PCs to compute on reference. If null, then use an existing PCA structure in #' the reference object #' @param l2.norm Perform L2 normalization on the cell embeddings after dimensional reduction @@ -194,6 +394,8 @@ FindIntegrationAnchors <- function( #' @param k.score How many neighbors (k) to use when scoring anchors #' @param max.features The maximum number of features to use when specifying the neighborhood search #' space in the anchor filtering +#'@param nn.method Method for nearest neighbor finding. Options include: rann, +#' annoy #' @param eps Error bound on the neighbor finding algorithm (from RANN) #' @param approx.pca Use truncated singular value decomposition to approximate PCA #' @param verbose Print progress bars and output @@ -206,6 +408,7 @@ FindIntegrationAnchors <- function( FindTransferAnchors <- function( reference, query, + normalization.method = c("LogNormalize", "SCT"), reference.assay = NULL, query.assay = NULL, reduction = "pcaproject", @@ -218,6 +421,7 @@ FindTransferAnchors <- function( k.filter = 200, k.score = 30, max.features = 200, + nn.method = "rann", eps = 0, approx.pca = TRUE, verbose = TRUE @@ -233,6 +437,7 @@ FindTransferAnchors <- function( } else { projected = FALSE } + normalization.method <- match.arg(arg = normalization.method) query <- RenameCells( object = query, new.names = paste0(Cells(x = query), "_", "query") @@ -246,9 +451,11 @@ FindTransferAnchors <- function( query.assay <- query.assay %||% DefaultAssay(object = query) DefaultAssay(object = reference) <- reference.assay DefaultAssay(object = query) <- query.assay + feature.mean <- NULL + slot <- "data" ## find anchors using PCA projection if (reduction == 'pcaproject') { - if (project.query){ + if (project.query) { if (!is.null(x = npcs)) { if (verbose) { message("Performing PCA on the provided query using ", length(x = features), " features as input.") @@ -278,13 +485,60 @@ FindTransferAnchors <- function( if (verbose) { message("Performing PCA on the provided reference using ", length(x = features), " features as input.") } + if (normalization.method == "LogNormalize") { reference <- ScaleData(object = reference, features = features, verbose = FALSE) - reference <- RunPCA(object = reference, npcs = npcs, verbose = FALSE,features = features, approx = approx.pca) + } else if (normalization.method == "SCT") { + features <- intersect(x = features, y = rownames(x = query)) + query <- GetResidual(object = query, features = features, verbose = FALSE) + query[[query.assay]] <- CreateAssayObject( + counts = as.sparse(x = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]) + ) + query <- SetAssayData( + object = query, + slot = "data", + assay = query.assay, + new.data = GetAssayData(object = query[[query.assay]], slot = "counts") + ) + query <- SetAssayData( + object = query, + slot = "scale.data", + assay = query.assay, + new.data = as.matrix(x = GetAssayData(object = query[[query.assay]], slot = "counts")) + ) + if (IsSCT(assay = reference[[reference.assay]])) { + reference <- GetResidual(object = reference, features = features, verbose = FALSE) + } + reference[[reference.assay]] <- CreateAssayObject( + counts = as.sparse(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]) + ) + reference <- SetAssayData( + object = reference, + slot = "data", + assay = reference.assay, + new.data = GetAssayData(object = reference[[reference.assay]], slot = "counts") + ) + reference <- SetAssayData( + object = reference, + slot = "scale.data", + assay = reference.assay, + new.data = as.matrix(x = GetAssayData(object = reference[[reference.assay]], slot = "counts")) + ) + feature.mean <- "SCT" + slot <- "scale.data" + } + reference <- RunPCA( + object = reference, + npcs = npcs, + verbose = FALSE, + features = features, + approx = approx.pca + ) } projected.pca <- ProjectCellEmbeddings( reference = reference, query = query, dims = dims, + feature.mean = feature.mean, verbose = verbose ) ref.pca <- Embeddings(object = reference[["pca"]])[, dims] @@ -300,7 +554,6 @@ FindTransferAnchors <- function( Loadings(object = combined.ob[["pcaproject"]]) <- old.loadings[, dims] } } - ## find anchors using CCA if (reduction == 'cca') { reference <- ScaleData(object = reference, features = features, verbose = FALSE) @@ -315,22 +568,25 @@ FindTransferAnchors <- function( verbose = verbose ) } - - if (l2.norm){ + if (l2.norm) { combined.ob <- L2Dim(object = combined.ob, reduction = reduction) reduction <- paste0(reduction, ".l2") } + slot <- "data" anchors <- FindAnchors( object.pair = combined.ob, assay = c(reference.assay, query.assay), + slot = slot, cells1 = colnames(x = reference), cells2 = colnames(x = query), reduction = reduction, + internal.neighbors = list(NULL, NULL), dims = dims, k.anchor = k.anchor, k.filter = k.filter, k.score = k.score, max.features = max.features, + nn.method = nn.method, eps = eps, projected = projected, verbose = verbose @@ -348,13 +604,14 @@ FindTransferAnchors <- function( return(anchor.set) } - #' Integrate data #' -#' Integrates the data +#' Perform dataset integration using a pre-computed anchorset #' #' @param anchorset Results from FindIntegrationAnchors #' @param new.assay.name Name for the new assay containing the integrated data +#' @param normalization.method Name of normalization method used: LogNormalize +#' or SCT #' @param features Vector of features to use when computing the PCA to determine the weights. Only set #' if you want a different set from those used in the anchor finding process #' @param features.to.integrate Vector of features to integrate. By default, will use the features @@ -366,6 +623,7 @@ FindTransferAnchors <- function( #' \itemize{ #' \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} #' \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} +#' \item{A vector of Dimreduc objects, specifying the object to use for each object in the integration} #' \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} #' } #' Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the @@ -385,6 +643,7 @@ FindTransferAnchors <- function( IntegrateData <- function( anchorset, new.assay.name = "integrated", + normalization.method = c("LogNormalize", "SCT"), features = NULL, features.to.integrate = NULL, dims = 1:30, @@ -397,212 +656,124 @@ IntegrateData <- function( eps = 0, verbose = TRUE ) { - object.list <- slot(object = anchorset, name = "object.list") - anchors <- slot(object = anchorset, name = "anchors") - offsets <- slot(object = anchorset, name = "offsets") + normalization.method <- match.arg(arg = normalization.method) + reference.datasets <- slot(object = anchorset, name = 'reference.objects') + object.list <- slot(object = anchorset, name = 'object.list') + anchors <- slot(object = anchorset, name = 'anchors') + ref <- object.list[reference.datasets] features <- features %||% slot(object = anchorset, name = "anchor.features") - features.to.integrate <- features.to.integrate %||% features - objects.ncell <- sapply(X = object.list, FUN = ncol) - if (!is.null(x = weight.reduction)) { - if (length(x = weight.reduction) == 1 | inherits(x = weight.reduction, what = "DimReduc")) { - if (length(x = object.list) == 2) { - weight.reduction <- list(NULL, weight.reduction) - } else if (inherits(x = weight.reduction, what = "character")) { - weight.reduction <- rep(x = weight.reduction, times = length(x = object.list)) - } else { - stop("Invalid input for weight.reduction. Please specify either the names of the dimension", - "reduction for each object in the list or provide DimReduc objects.") - } - } - if (length(x = weight.reduction) != length(x = object.list)) { - stop("Please specify a dimension reduction for each object, or one dimension reduction to be used for all objects") - } - available.reductions <- lapply(X = object.list, FUN = FilterObjects, classes.keep = 'DimReduc') - for (ii in 1:length(x = weight.reduction)) { - if (ii == 1 & is.null(x = weight.reduction[[ii]])) next - if (!inherits(x = weight.reduction[[ii]], what = "DimReduc")) { - if (!weight.reduction[[ii]] %in% available.reductions[[ii]]) { - stop("Requested dimension reduction (", weight.reduction[[ii]], ") is not present in object ", ii) - } - } - } - } - if (is.null(x = sample.tree)) { - similarity.matrix <- CountAnchors( - anchor.df = anchors, - offsets = offsets, - obj.lengths = objects.ncell - ) - sample.tree <- BuildSampleTree(similarity.matrix = similarity.matrix) - } - cellnames.list <- list() - for (ii in 1:length(x = object.list)) { - cellnames.list[[ii]] <- colnames(x = object.list[[ii]]) - } unintegrated <- merge( x = object.list[[1]], y = object.list[2:length(x = object.list)] ) - names(x = object.list) <- as.character(-(1:length(x = object.list))) - for (ii in 1:nrow(x = sample.tree)) { - merge.pair <- as.character(x = sample.tree[ii, ]) - length1 <- ncol(x = object.list[[merge.pair[1]]]) - length2 <- ncol(x = object.list[[merge.pair[2]]]) - if (!(preserve.order) & (length2 > length1)) { - merge.pair <- rev(merge.pair) - sample.tree[ii, ] <- as.numeric(merge.pair) - } - object.1 <- object.list[[merge.pair[1]]] - object.2 <- object.list[[merge.pair[2]]] - datasets <- ParseMergePair(sample.tree, ii) - if (verbose) { - message( - "Merging dataset ", - paste(datasets$object2, collapse = " "), - " into ", - paste(datasets$object1, collapse = " ") + if (normalization.method == "SCT") { + vst.set <- list() + for (i in 1:length(x = object.list)) { + assay <- DefaultAssay(object = object.list[[i]]) + object.list[[i]][[assay]] <- CreateAssayObject( + data = GetAssayData(object = object.list[[i]], assay = assay, slot = "scale.data") ) } - cells1 <- colnames(x = object.1) - cells2 <- colnames(x = object.2) - merged.obj <- merge(x = object.1, y = object.2, merge.data = TRUE) - if (verbose) { - message("Extracting anchors for merged samples") - } - filtered.anchors <- anchors[anchors$dataset1 %in% datasets$object1 & anchors$dataset2 %in% datasets$object2, ] - cell1.name <- sapply(X = 1:nrow(x = filtered.anchors), FUN = function(x) { - cellnames.list[[filtered.anchors$dataset1[x]]][filtered.anchors$cell1[x]] - }) - cell2.name <- sapply(X = 1:nrow(x = filtered.anchors), FUN = function(x) { - cellnames.list[[filtered.anchors$dataset2[x]]][filtered.anchors$cell2[x]] - }) - cell1.offset <- sapply( - X = 1:length(x = cell1.name), - FUN = function(x) { - return(which(cells1 == cell1.name[x])) - } - ) - cell2.offset <- sapply( - X = 1:length(x = cell2.name), - FUN = function(x) { - return(which(cells2 == cell2.name[x])) - } - ) - filtered.anchors.new <- filtered.anchors - filtered.anchors.new[, 1] <- cell1.offset - filtered.anchors.new[, 2] <- cell2.offset - integration.name <- "integrated" - merged.obj <- SetIntegrationData( - object = merged.obj, - integration.name = integration.name, - slot = 'anchors', - new.data = filtered.anchors.new - ) - merged.obj <- SetIntegrationData( - object = merged.obj, - integration.name = integration.name, - slot = 'neighbors', - new.data = list('cells1' = cells1, 'cells2' = cells2) - ) - merged.obj <- FindIntegrationMatrix( - object = merged.obj, - integration.name = integration.name, - features.integrate = features.to.integrate, - verbose = verbose - ) - assay <- DefaultAssay(object = merged.obj) - if (is.null(weight.reduction) | (as.numeric(merge.pair[[2]]) > 0)) { - merged.obj <- ScaleData( - object = merged.obj, - features = features, - verbose = FALSE - ) - merged.obj <- RunPCA( - object = merged.obj, - npcs = max(dims), - verbose = FALSE, - features = features + } + # perform pairwise integration of reference objects + reference.integrated <- PairwiseIntegrateReference( + anchorset = anchorset, + new.assay.name = new.assay.name, + normalization.method = normalization.method, + features = features, + features.to.integrate = features.to.integrate, + dims = dims, + k.weight = k.weight, + weight.reduction = weight.reduction, + sd.weight = sd.weight, + sample.tree = sample.tree, + preserve.order = preserve.order, + do.cpp = do.cpp, + eps = eps, + verbose = verbose + ) + + if (length(x = reference.datasets) == length(x = object.list)) { + if (normalization.method == "SCT") { + reference.integrated <- SetAssayData( + object = reference.integrated, + assay = new.assay.name, + slot = "scale.data", + new.data = ScaleData( + object = GetAssayData(object = reference.integrated, assay = new.assay.name, slot = "scale.data"), + do.scale = FALSE, + do.center = TRUE, + verbose = FALSE + ) ) - dr.weights <- merged.obj[['pca']] - } else { - dataset.id <- -(as.numeric(merge.pair[[2]])) - dr <- weight.reduction[[dataset.id]] - if (inherits(x = dr, what = "DimReduc")) { - dr.weights <- dr - } else { - dr.weights <- object.2[[dr]] - } + reference.integrated[[assay]] <- unintegrated[[assay]] } - merged.obj <- FindWeights( - object = merged.obj, - integration.name = integration.name, - reduction = dr.weights, - cpp = do.cpp, - dims = dims, - k = k.weight, - sd.weight = sd.weight, - eps = eps, - verbose = verbose + return(reference.integrated) + } else { + active.assay <- DefaultAssay(object = ref[[1]]) + reference.integrated[[active.assay]] <- NULL + reference.integrated[[active.assay]] <- CreateAssayObject( + data = GetAssayData( + object = reference.integrated[[new.assay.name]], + slot = 'data' + ) ) - merged.obj <- TransformDataMatrix( - object = merged.obj, + DefaultAssay(object = reference.integrated) <- active.assay + reference.integrated[[new.assay.name]] <- NULL + VariableFeatures(object = reference.integrated) <- features + # Extract the query objects (if any) and map to reference + integrated.data <- MapQuery( + anchorset = anchorset, + reference = reference.integrated, new.assay.name = new.assay.name, + normalization.method = normalization.method, + features = features, features.to.integrate = features.to.integrate, - integration.name = integration.name, + dims = dims, + k.weight = k.weight, + weight.reduction = weight.reduction, + sd.weight = sd.weight, + sample.tree = sample.tree, + preserve.order = preserve.order, do.cpp = do.cpp, + eps = eps, verbose = verbose ) - integrated.matrix <- GetAssayData( - object = merged.obj, - assay = new.assay.name, - slot = 'data' + + # Construct final assay object + integrated.assay <- CreateAssayObject( + data = integrated.data ) - # merged.obj <- SetAssayData( - # object = merged.obj, - # assay = assay, - # slot = 'data', - # new.data = integrated.matrix - # ) - merged.obj[[assay]] <- CreateAssayObject(data = integrated.matrix) - object.list[[as.character(x = ii)]] <- merged.obj - object.list[[merge.pair[[1]]]] <- NULL - object.list[[merge.pair[[2]]]] <- NULL - invisible(x = CheckGC()) + if (normalization.method == "SCT") { + integrated.assay <- SetAssayData( + object = integrated.assay, + slot = "scale.data", + new.data = ScaleData( + object = GetAssayData(object = integrated.assay, slot = "data"), + do.scale = FALSE, + do.center = TRUE, + verbose = FALSE + ) + ) + integrated.assay <- SetAssayData( + object = integrated.assay, + slot = "data", + new.data = GetAssayData(object = integrated.assay, slot = "scale.data") + ) + } + unintegrated[[new.assay.name]] <- integrated.assay + unintegrated <- SetIntegrationData( + object = unintegrated, + integration.name = "Integration", + slot = "anchors", + new.data = anchors + ) + DefaultAssay(object = unintegrated) <- new.assay.name + VariableFeatures(object = unintegrated) <- features + unintegrated[["FindIntegrationAnchors"]] <- slot(object = anchorset, name = "command") + unintegrated <- LogSeuratCommand(object = unintegrated) + return(unintegrated) } - integrated.data <- GetAssayData( - object = object.list[[as.character(x = ii)]], - assay = assay, - slot = 'data' - ) - integrated.data <- integrated.data[, colnames(x = unintegrated)] - new.assay <- new( - Class = 'Assay', - counts = new(Class = "dgCMatrix"), - data = integrated.data, - scale.data = matrix(), - var.features = vector(), - meta.features = data.frame(row.names = rownames(x = integrated.data)), - misc = NULL - ) - unintegrated[[new.assay.name]] <- new.assay - # "unintegrated" now contains the integrated assay - DefaultAssay(object = unintegrated) <- new.assay.name - VariableFeatures(object = unintegrated) <- features - unintegrated <- SetIntegrationData( - object = unintegrated, - integration.name = "Integration", - slot = "anchors", - new.data = anchors - ) - unintegrated <- SetIntegrationData( - object = unintegrated, - integration.name = "Integration", - slot = "sample.tree", - new.data = sample.tree - ) - unintegrated[["FindIntegrationAnchors"]] <- slot(object = anchorset, name = "command") - unintegrated <- LogSeuratCommand(object = unintegrated) - return(unintegrated) } #' Calculate the local structure preservation metric @@ -693,91 +864,567 @@ LocalStruct <- function( length(x = intersect(x = nn.orig[x, ], y = nn.corrected[x, ])) / neighbors }) if (verbose) { - setTxtProgressBar(pb = pb, value = i) + setTxtProgressBar(pb = pb, value = i) + } + } + names(x = local.struct) <- names(x = ob.list) + return(local.struct) +} + +#' Map queries to reference +#' +#' Map query objects onto assembled reference dataset +#' +#' @param anchorset Anchorset found by FindIntegrationAnchors +#' @param reference Pre-integrated reference dataset to map query datasets to +#' @param new.assay.name Name for the new assay containing the integrated data +#' @param normalization.method Name of normalization method used: LogNormalize +#' or SCT +#' @param features Vector of features to use when computing the PCA to determine the weights. Only set +#' if you want a different set from those used in the anchor finding process +#' @param features.to.integrate Vector of features to integrate. By default, will use the features +#' used in anchor finding. +#' @param dims Number of PCs to use in the weighting procedure +#' @param k.weight Number of neighbors to consider when weighting +#' @param weight.reduction Dimension reduction to use when calculating anchor weights. +#' This can be either: +#' \itemize{ +#' \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} +#' \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} +#' \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} +#' } +#' Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the +#' first merge between reference and query, as the merged object will subsequently contain more cells than was in +#' query, and weights will need to be calculated for all cells in the object. +#' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting +#' @param sample.tree Specify the order of integration. If NULL, will compute automatically. +#' @param preserve.order Do not reorder objects based on size for each pairwise integration. +#' @param do.cpp Run cpp code where applicable +#' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) +#' @param verbose Print progress bars and output +#' +#' @return Returns an integrated matrix +#' +MapQuery <- function( + anchorset, + reference, + new.assay.name = "integrated", + normalization.method = c("LogNormalize", "SCT"), + features = NULL, + features.to.integrate = NULL, + dims = 1:30, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, + do.cpp = TRUE, + eps = 0, + verbose = TRUE +) { + normalization.method <- match.arg(arg = normalization.method) + reference.datasets <- slot(object = anchorset, name = 'reference.objects') + object.list <- slot(object = anchorset, name = 'object.list') + anchors <- slot(object = anchorset, name = 'anchors') + features <- features %||% slot(object = anchorset, name = "anchor.features") + features.to.integrate <- features.to.integrate %||% features + cellnames.list <- list() + for (ii in 1:length(x = object.list)) { + cellnames.list[[ii]] <- colnames(x = object.list[[ii]]) + } + if (length(x = reference.datasets) == length(x = object.list)) { + query.datasets <- NULL + } else { + query.datasets <- setdiff(x = seq_along(along.with = object.list), y = reference.datasets) + } + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + query.corrected <- my.lapply( + X = query.datasets, + FUN = function(dataset1) { + if (verbose) { + message("Integrating dataset ", dataset1, " with reference dataset") + } + filtered.anchors <- anchors[anchors$dataset1 %in% reference.datasets & anchors$dataset2 == dataset1, ] + integrated <- RunIntegration( + filtered.anchors = filtered.anchors, + reference = reference, + query = object.list[[dataset1]], + new.assay.name = new.assay.name, + normalization.method = normalization.method, + cellnames.list = cellnames.list, + features.to.integrate = features.to.integrate, + weight.reduction = weight.reduction, + features = features, + dims = dims, + do.cpp = do.cpp, + k.weight = k.weight, + sd.weight = sd.weight, + eps = eps, + verbose = verbose + ) + return(integrated) + } + ) + reference.integrated <- GetAssayData( + object = reference, + slot = 'data' + )[features.to.integrate, ] + all.integrated <- do.call(cbind, c(reference.integrated, query.corrected)) + return(all.integrated) +} + +#' Calculates a mixing metric +#' +#' Here we compute a measure of how well mixed a composite dataset is. To +#' compute, we first examine the local neighborhood for each cell (looking at +#' max.k neighbors) and determine for each group (could be the dataset after +#' integration) the k nearest neighbor and what rank that neighbor was in the +#' overall neighborhood. We then take the median across all groups as the mixing +#' metric per cell. +#' +#' @param object Seurat object +#' @param grouping.var Grouping variable for dataset +#' @param reduction Which dimensionally reduced space to use +#' @param dims Dimensions to use +#' @param k Neighbor number to examine per group +#' @param max.k Maximum size of local neighborhood to compute +#' @param eps Error bound on the neighbor finding algorithm (from RANN) +#' @param verbose Displays progress bar +#' +#' @return Returns a vector of values representing the entropy metric from each +#' bootstrapped iteration. +#' +#' @importFrom RANN nn2 +#' @importFrom pbapply pbsapply +#' @importFrom future.apply future_sapply +#' @importFrom future nbrOfWorkers +#' @export +#' +MixingMetric <- function( + object, + grouping.var, + reduction = "pca", + dims = 1:2, + k = 5, + max.k = 300, + eps = 0, + verbose = TRUE +) { + my.sapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pbsapply, + no = future_sapply + ) + embeddings <- Embeddings(object = object[[reduction]])[, dims] + nn <- nn2( + data = embeddings, + k = max.k, + eps = eps + ) + group.info <- object[[grouping.var, drop = TRUE]] + groups <- unique(x = group.info) + mixing <- my.sapply( + X = 1:ncol(x = object), + FUN = function(x) { + sapply(X = groups, FUN = function(y) { + which(x = group.info[nn$nn.idx[x, ]] == y)[k] + }) + } + ) + mixing[is.na(x = mixing)] <- max.k + mixing <- apply( + X = mixing, + MARGIN = 2, + FUN = median + ) + return(mixing) +} + +#' Pairwise dataset integration +#' +#' Used for reference construction +#' +#' @param anchorset Results from FindIntegrationAnchors +#' @param new.assay.name Name for the new assay containing the integrated data +#' @param normalization.method Name of normalization method used: LogNormalize +#' or SCT +#' @param features Vector of features to use when computing the PCA to determine +#' the weights. Only set if you want a different set from those used in the +#' anchor finding process +#' @param features.to.integrate Vector of features to integrate. By default, +#' will use the features used in anchor finding. +#' @param dims Number of PCs to use in the weighting procedure +#' @param k.weight Number of neighbors to consider when weighting +#' @param weight.reduction Dimension reduction to use when calculating anchor +#' weights. This can be either: +#' \itemize{ +#' \item{A string, specifying the name of a dimension reduction present in +#' all objects to be integrated} +#' \item{A vector of strings, specifying the name of a dimension reduction to +#' use for each object to be integrated} +#' \item{NULL, in which case a new PCA will be calculated and used to +#' calculate anchor weights} +#' } +#' Note that, if specified, the requested dimension reduction will only be used +#' for calculating anchor weights in the first merge between reference and +#' query, as the merged object will subsequently contain more cells than was in +#' query, and weights will need to be calculated for all cells in the object. +#' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting +#' @param sample.tree Specify the order of integration. If NULL, will compute +#' automatically. +#' @param preserve.order Do not reorder objects based on size for each pairwise +#' integration. +#' @param do.cpp Run cpp code where applicable +#' @param eps Error bound on the neighbor finding algorithm (from +#' \code{\link{RANN}}) +#' @param verbose Print progress bars and output +#' +#' @return Returns a Seurat object with a new integrated Assay +#' +PairwiseIntegrateReference <- function( + anchorset, + new.assay.name = "integrated", + normalization.method = c("LogNormalize", "SCT"), + features = NULL, + features.to.integrate = NULL, + dims = 1:30, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, + do.cpp = TRUE, + eps = 0, + verbose = TRUE +) { + object.list <- slot(object = anchorset, name = "object.list") + reference.objects <- slot(object = anchorset, name = "reference.objects") + features <- features %||% slot(object = anchorset, name = "anchor.features") + features.to.integrate <- features.to.integrate %||% features + if (length(x = reference.objects) == 1) { + ref.obj <- object.list[[reference.objects]] + ref.obj[[new.assay.name]] <- CreateAssayObject( + data = GetAssayData(ref.obj, slot = 'data')[features.to.integrate, ] + ) + DefaultAssay(object = ref.obj) <- new.assay.name + return(ref.obj) + } + anchors <- slot(object = anchorset, name = "anchors") + offsets <- slot(object = anchorset, name = "offsets") + objects.ncell <- sapply(X = object.list, FUN = ncol) + if (!is.null(x = weight.reduction)) { + if (length(x = weight.reduction) == 1 | inherits(x = weight.reduction, what = "DimReduc")) { + if (length(x = object.list) == 2) { + weight.reduction <- list(NULL, weight.reduction) + } else if (inherits(x = weight.reduction, what = "character")) { + weight.reduction <- rep(x = weight.reduction, times = length(x = object.list)) + } else { + stop("Invalid input for weight.reduction. Please specify either the names of the dimension", + "reduction for each object in the list or provide DimReduc objects.") + } + } + if (length(x = weight.reduction) != length(x = object.list)) { + stop("Please specify a dimension reduction for each object, or one dimension reduction to be used for all objects") + } + available.reductions <- lapply(X = object.list, FUN = FilterObjects, classes.keep = 'DimReduc') + for (ii in 1:length(x = weight.reduction)) { + if (ii == 1 & is.null(x = weight.reduction[[ii]])) next + if (!inherits(x = weight.reduction[[ii]], what = "DimReduc")) { + if (!weight.reduction[[ii]] %in% available.reductions[[ii]]) { + stop("Requested dimension reduction (", weight.reduction[[ii]], ") is not present in object ", ii) + } + weight.reduction[[ii]] <- object.list[[ii]][[weight.reduction[[ii]]]] + } + } + } + if (is.null(x = sample.tree)) { + similarity.matrix <- CountAnchors( + anchor.df = anchors, + offsets = offsets, + obj.lengths = objects.ncell + ) + similarity.matrix <- similarity.matrix[reference.objects, reference.objects] + sample.tree <- BuildSampleTree(similarity.matrix = similarity.matrix) + sample.tree <- AdjustSampleTree(x = sample.tree, reference.objects = reference.objects) + } + cellnames.list <- list() + for (ii in 1:length(x = object.list)) { + cellnames.list[[ii]] <- colnames(x = object.list[[ii]]) + } + unintegrated <- merge( + x = object.list[[reference.objects[[1]]]], + y = object.list[reference.objects[2:length(x = reference.objects)]] + ) + names(x = object.list) <- as.character(-(1:length(x = object.list))) + if (verbose & (length(x = reference.objects) != length(x = object.list))) { + message("Building integrated reference") + } + for (ii in 1:nrow(x = sample.tree)) { + merge.pair <- as.character(x = sample.tree[ii, ]) + length1 <- ncol(x = object.list[[merge.pair[1]]]) + length2 <- ncol(x = object.list[[merge.pair[2]]]) + if (!(preserve.order) & (length2 > length1)) { + merge.pair <- rev(x = merge.pair) + sample.tree[ii, ] <- as.numeric(merge.pair) + } + object.1 <- DietSeurat( + object = object.list[[merge.pair[1]]], + assays = DefaultAssay(object = object.list[[merge.pair[1]]]), + counts = FALSE + ) + object.2 <- DietSeurat( + object = object.list[[merge.pair[2]]], + assays = DefaultAssay(object = object.list[[merge.pair[2]]]), + counts = FALSE + ) + # suppress key duplication warning + suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[DefaultAssay(object = object.1)]]) + DefaultAssay(object = object.1) <- "ToIntegrate" + object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate") + suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[DefaultAssay(object = object.2)]]) + DefaultAssay(object = object.2) <- "ToIntegrate" + object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate") + + datasets <- ParseMergePair(sample.tree, ii) + if (verbose) { + message( + "Merging dataset ", + paste(datasets$object2, collapse = " "), + " into ", + paste(datasets$object1, collapse = " ") + ) + } + merged.obj <- merge(x = object.1, y = object.2, merge.data = TRUE) + if (verbose) { + message("Extracting anchors for merged samples") } + filtered.anchors <- anchors[anchors$dataset1 %in% datasets$object1 & anchors$dataset2 %in% datasets$object2, ] + integrated.matrix <- RunIntegration( + filtered.anchors = filtered.anchors, + normalization.method = normalization.method, + reference = object.1, + query = object.2, + cellnames.list = cellnames.list, + new.assay.name = new.assay.name, + features.to.integrate = features.to.integrate, + features = features, + dims = dims, + weight.reduction = weight.reduction, + do.cpp = do.cpp, + k.weight = k.weight, + sd.weight = sd.weight, + eps = eps, + verbose = verbose + ) + integrated.matrix <- cbind(integrated.matrix, GetAssayData(object = object.1, slot = 'data')[features.to.integrate, ]) + merged.obj[[new.assay.name]] <- CreateAssayObject(data = integrated.matrix) + DefaultAssay(object = merged.obj) <- new.assay.name + object.list[[as.character(x = ii)]] <- merged.obj + object.list[[merge.pair[[1]]]] <- NULL + object.list[[merge.pair[[2]]]] <- NULL + invisible(x = CheckGC()) } - names(x = local.struct) <- names(x = ob.list) - return(local.struct) + integrated.data <- GetAssayData( + object = object.list[[as.character(x = ii)]], + assay = new.assay.name, + slot = 'data' + ) + integrated.data <- integrated.data[, colnames(x = unintegrated)] + new.assay <- new( + Class = 'Assay', + counts = new(Class = "dgCMatrix"), + data = integrated.data, + scale.data = matrix(), + var.features = vector(), + meta.features = data.frame(row.names = rownames(x = integrated.data)), + misc = NULL + ) + unintegrated[[new.assay.name]] <- new.assay + # "unintegrated" now contains the integrated assay + DefaultAssay(object = unintegrated) <- new.assay.name + VariableFeatures(object = unintegrated) <- features + if (normalization.method == "SCT"){ + unintegrated[[new.assay.name]] <- SetAssayData( + object = unintegrated[[new.assay.name]], + slot = "scale.data", + new.data = as.matrix(x = GetAssayData(object = unintegrated[[new.assay.name]], slot = "data")) + ) + } + unintegrated <- SetIntegrationData( + object = unintegrated, + integration.name = "Integration", + slot = "anchors", + new.data = anchors + ) + unintegrated <- SetIntegrationData( + object = unintegrated, + integration.name = "Integration", + slot = "sample.tree", + new.data = sample.tree + ) + unintegrated[["FindIntegrationAnchors"]] <- slot(object = anchorset, name = "command") + unintegrated <- LogSeuratCommand(object = unintegrated) + return(unintegrated) } -#' Calculates a mixing metric +#' Prepare an object list that has been run through SCTransform for integration #' -#' Here we compute a measure of how well mixed a composite dataset is. To compute, we first examine -#' the local neighborhood for each cell (looking at max.k neighbors) and determine for each group -#' (could be the dataset after integration) the k nearest neighbor and what rank that neighbor was -#' in the overall neighborhood. We then take the median across all groups as the mixing metric per -#' cell. -#' -#' @param object Seurat object -#' @param grouping.var Grouping variable for dataset -#' @param reduction Which dimensionally reduced space to use -#' @param dims Dimensions to use -#' @param k Neighbor number to examine per group -#' @param max.k Maximum size of local neighborhood to compute -#' @param eps Error bound on the neighbor finding algorithm (from RANN) -#' @param verbose Displays progress bar +#' @param object.list A list of objects to prep for integration +#' @param assay Name or vector of assay names (one for each object) that correspond +#' to the assay that SCTransform has been run on. If NULL, the current default +#' assay for each object is used. +#' @param anchor.features Can be either: +#' \itemize{ +#' \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} +#' to select the provided number of features to be used in anchor finding} +#' \item{A vector of features to be used as input to the anchor finding +#' process} +#' } +#' @param sct.clip.range Numeric of length two specifying the min and max values +#' the Pearson residual will be clipped to +#' @param verbose Display output/messages #' -#' @return Returns a vector of values representing the entropy metric from each -#' bootstrapped iteration. +#' @return An object list with the \code{scale.data} slots set to the anchor +#' features #' -#' @importFrom RANN nn2 -#' @importFrom pbapply pbsapply -#' @importFrom future.apply future_sapply +#' @importFrom pbapply pblapply +#' @importFrom methods slot slot<- #' @importFrom future nbrOfWorkers +#' @importFrom future.apply future_lapply +#' #' @export #' -MixingMetric <- function( - object, - grouping.var, - reduction = "pca", - dims = 1:2, - k = 5, - max.k = 300, - eps = 0, +PrepSCTIntegration <- function( + object.list, + assay = NULL, + anchor.features = 2000, + sct.clip.range = NULL, verbose = TRUE ) { - my.sapply <- ifelse( + my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, - yes = pbsapply, - no = future_sapply + yes = pblapply, + no = future_lapply ) - embeddings <- Embeddings(object = object[[reduction]])[, dims] - nn <- nn2( - data = embeddings, - k = max.k, - eps = eps + assay <- assay %||% sapply(X = object.list, FUN = DefaultAssay) + assay <- rep_len(x = assay, length.out = length(x = object.list)) + objects.names <- names(x = object.list) + object.list <- lapply( + X = 1:length(x = object.list), + FUN = function(i) { + DefaultAssay(object = object.list[[i]]) <- assay[i] + return(object.list[[i]]) + } ) - group.info <- object[[grouping.var, drop = TRUE]] - groups <- unique(x = group.info) - mixing <- my.sapply( - X = 1:ncol(x = object), - FUN = function(x) { - sapply(X = groups, FUN = function(y) { - which(x = group.info[nn$nn.idx[x, ]] == y)[k] - }) + sct.check <- vapply( + X = 1:length(x = object.list), + FUN = function(i) { + sct.check <- IsSCT(assay = object.list[[i]][[assay[i]]]) + if (!sct.check) { + if ("FindIntegrationAnchors" %in% Command(object = object.list[[i]]) && + Command(object = object.list[[i]], command = "FindIntegrationAnchors", value = "normalization.method") == "SCT") { + sct.check <- TRUE + } + } + return(sct.check) + }, + FUN.VALUE = logical(length = 1L), + USE.NAMES = FALSE + ) + if (!all(sct.check)) { + stop( + "The following assays have not been processed with SCTransform:\n", + paste( + ' object:', + which(x = !sct.check, useNames = FALSE), + '- assay:', + assay[!sct.check], + collapse = '\n' + ), + call. = FALSE + ) + } + + object.list <- lapply( + X = 1:length(x = object.list), + FUN = function(i) { + vst_out <- Misc(object = object.list[[i]][[assay[i]]], slot = "vst.out") + vst_out$cell_attr <- vst_out$cell_attr[Cells(x = object.list[[i]]), ] + vst_out$cells_step1 <- intersect(x = vst_out$cells_step1, y = Cells(x = object.list[[i]])) + suppressWarnings(expr = Misc(object = object.list[[i]][[assay[i]]], slot = "vst.out") <- vst_out) + return(object.list[[i]]) } ) - mixing[is.na(x = mixing)] <- max.k - mixing <- apply( - X = mixing, - MARGIN = 2, - FUN = median + + if (is.numeric(x = anchor.features)) { + anchor.features <- SelectIntegrationFeatures( + object.list = object.list, + nfeatures = anchor.features, + verbose = verbose + ) + } + object.list <- my.lapply( + X = 1:length(x = object.list), + FUN = function(i) { + if (!IsSCT(assay = object.list[[i]][[assay[i]]])) { + return(object.list[[i]]) + } + obj <- if (is.null(x = sct.clip.range)) { + GetResidual( + object = object.list[[i]], + features = anchor.features, + assay = assay[i], + verbose = FALSE + ) + } else { + GetResidual( + object = object.list[[i]], + assay = assay[i], + features = anchor.features, + replace.value = TRUE, + clip.range = sct.clip.range, + verbose = FALSE + ) + } + scale.data <- GetAssayData( + object = obj, + assay = assay[i], + slot = 'scale.data' + ) + obj <- SetAssayData( + object = obj, + slot = 'scale.data', + new.data = scale.data[anchor.features, ], + assay = assay[i] + ) + return(obj) + } ) - return(mixing) + assays.used <- assay + for (i in 1:length(x = object.list)) { + assay <- as.character(x = assays.used[i]) + object.list[[i]] <- LogSeuratCommand(object = object.list[[i]]) + } + names(x = object.list) <- objects.names + return(object.list) } #' Select integration features #' -#' Choose the features to use when integrating multiple datasets. This function ranks features by -#' the number of datasets they appear in, breaking ties by the median rank across datasets. It -#' returns the highest features by this ranking. +#' Choose the features to use when integrating multiple datasets. This function +#' ranks features by the number of datasets they appear in, breaking ties by the +#' median rank across datasets. It returns the highest features by this ranking. #' #' @param object.list List of seurat objects #' @param nfeatures Number of features to return #' @param assay Name of assay from which to pull the variable features. #' @param verbose Print messages -#' @param fvf.nfeatures nfeatures for FindVariableFeatures. Used if VariableFeatures have not been -#' set for any object in object.list. +#' @param fvf.nfeatures nfeatures for FindVariableFeatures. Used if +#' VariableFeatures have not been set for any object in object.list. #' @param ... Additional parameters to \code{\link{FindVariableFeatures}} #' #' @return A vector of selected features @@ -856,16 +1503,20 @@ SelectIntegrationFeatures <- function( #' Transfers the labels #' #' @param anchorset Results from FindTransferAnchors -#' @param refdata Data to transfer. Should be either a vector where the names correspond to -#' reference cells, or a matrix, where the column names correspond to the reference cells. -#' @param weight.reduction Dimensional reduction to use for the weighting. Options are: +#' @param refdata Data to transfer. Should be either a vector where the names +#' correspond to reference cells, or a matrix, where the column names correspond +#' to the reference cells. +#' @param weight.reduction Dimensional reduction to use for the weighting. +#' Options are: #' \itemize{ #' \item{pcaproject: Use the projected PCA used for anchor building} #' \item{pca: Use an internal PCA on the query only} #' \item{cca: Use the CCA used for anchor building} -#' \item{custom DimReduc: User provided DimReduc object computed on the query cells} +#' \item{custom DimReduc: User provided DimReduc object computed on the query +#' cells} #' } -#' @param l2.norm Perform L2 normalization on the cell embeddings after dimensional reduction +#' @param l2.norm Perform L2 normalization on the cell embeddings after +#' dimensional reduction #' @param dims Number of PCs to use in the weighting procedure #' @param k.weight Number of neighbors to consider when weighting #' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting @@ -874,8 +1525,9 @@ SelectIntegrationFeatures <- function( #' @param verbose Print progress bars and output #' @param slot Slot to store the imputed data #' -#' @return If refdata is a vector, returns a dataframe with label predictions. If refdata is a -#' matrix, returns an Assay object where the imputed data has been stored in the provided slot. +#' @return If refdata is a vector, returns a dataframe with label predictions. +#' If refdata is a matrix, returns an Assay object where the imputed data has +#' been stored in the provided slot. #' #' @export #' @@ -1065,8 +1717,9 @@ TransferData <- function( # @param offsets size of each dataset in anchor dataframe # @param obj.length Vector of object lengths # -# @return Anchor dataframe with additional columns corresponding to the dataset of each cell -# +# @return Anchor dataframe with additional columns corresponding to the dataset +# of each cell + AddDatasetID <- function( anchor.df, offsets, @@ -1087,6 +1740,24 @@ AddDatasetID <- function( return(anchor.df) } +# Adjust sample tree to only include given reference objects +# +# @param x A sample tree +# @param reference.objects a sorted list of reference object IDs +# +AdjustSampleTree <- function(x, reference.objects) { + for (i in 1:nrow(x = x)) { + obj.id <- -(x[i, ]) + if (obj.id[[1]] > 0) { + x[i, 1] <- -(reference.objects[[obj.id[[1]]]]) + } + if (obj.id[[2]] > 0) { + x[i, 2] <- -(reference.objects[[obj.id[[2]]]]) + } + } + return(x) +} + # Add info to anchor matrix # # @param object Seurat object @@ -1094,8 +1765,9 @@ AddDatasetID <- function( # @param annotation Name in metadata to annotate anchors with # @param object.list List of objects using in FindIntegrationAnchors call # -# @return Returns the anchor dataframe with additional columns for annotation metadata -# +# @return Returns the anchor dataframe with additional columns for annotation +# metadata + AnnotateAnchors <- function( object, toolname = "integrated", @@ -1170,11 +1842,13 @@ ConstructNNMat <- function(nn.idx, offset1, offset2, dims) { # Count anchors between all datasets # -# Counts anchors between each dataset and scales based on total number of cells in the datasets +# Counts anchors between each dataset and scales based on total number of cells +# in the datasets # # @param anchor.df Matrix of anchors -# @param offsets Dataset sizes in anchor matrix. Used to identify boundaries of each dataset in -# matrix, so that total pairwise anchors between all datasets can be counted +# @param offsets Dataset sizes in anchor matrix. Used to identify boundaries of +# each dataset in matrix, so that total pairwise anchors between all datasets +# can be counted # # @return Returns a similarity matrix # @@ -1203,9 +1877,11 @@ CountAnchors <- function( FilterAnchors <- function( object, assay = NULL, + slot = "data", integration.name = 'integrated', features = NULL, k.filter = 200, + nn.method = "rann", eps = 0, verbose = TRUE ) { @@ -1224,17 +1900,18 @@ FilterAnchors <- function( cn.data1 <- L2Norm( mat = as.matrix(x = t(x = GetAssayData( object = object[[assay[1]]], - slot = "data")[features, nn.cells1])), + slot = slot)[features, nn.cells1])), MARGIN = 1) cn.data2 <- L2Norm( mat = as.matrix(x = t(x = GetAssayData( object = object[[assay[2]]], - slot = "data")[features, nn.cells2])), + slot = slot)[features, nn.cells2])), MARGIN = 1) - nn <- nn2( + nn <- NNHelper( data = cn.data2[nn.cells2, ], query = cn.data1[nn.cells1, ], k = k.filter, + method = nn.method, eps = eps ) @@ -1258,14 +1935,19 @@ FilterAnchors <- function( FindAnchors <- function( object.pair, assay, + slot, cells1, cells2, + internal.neighbors, reduction, + reduction.2 = character(), + nn.reduction = reduction, dims = 1:10, k.anchor = 5, k.filter = 200, k.score = 30, max.features = 200, + nn.method = "rann", eps = 0, projected = FALSE, verbose = TRUE @@ -1280,9 +1962,13 @@ FindAnchors <- function( object = object.pair, cells1 = cells1, cells2 = cells2, + internal.neighbors = internal.neighbors, dims = dims, reduction = reduction, + reduction.2 = reduction.2, + nn.reduction = nn.reduction, k = k.neighbor, + nn.method = nn.method, eps = eps, verbose = verbose ) @@ -1304,9 +1990,11 @@ FindAnchors <- function( object.pair <- FilterAnchors( object = object.pair, assay = assay, + slot = slot, integration.name = 'integrated', features = top.features, k.filter = k.filter, + nn.method = nn.method, eps = eps, verbose = verbose ) @@ -1454,12 +2142,15 @@ FindNN <- function( object, cells1 = NULL, cells2 = NULL, + internal.neighbors, grouping.var = NULL, dims = 1:10, reduction = "cca.l2", + reduction.2 = character(), nn.dims = dims, nn.reduction = reduction, k = 300, + nn.method = "rann", eps = 0, integration.name = 'integrated', verbose = TRUE @@ -1477,42 +2168,68 @@ FindNN <- function( if (nrow(x = unique(x = object[[grouping.var]])) != 2) { stop("Number of groups in grouping.var not equal to 2.") } - groups <- names(x = sort(x = table(... = object[[grouping.var]]), decreasing = TRUE)) + groups <- names(x = sort(x = table(object[[grouping.var]]), decreasing = TRUE)) cells1 <- colnames(x = object)[object[[grouping.var]] == groups[[1]]] cells2 <- colnames(x = object)[object[[grouping.var]] == groups[[2]]] } if (verbose) { message("Finding neighborhoods") } - dim.data.self <- Embeddings(object = object[[nn.reduction]])[ ,nn.dims] - dim.data.opposite <- Embeddings(object = object[[reduction]])[ ,dims] - dims.cells1.self <- dim.data.self[cells1, ] - dims.cells1.opposite <- dim.data.opposite[cells1, ] - dims.cells2.self <- dim.data.self[cells2, ] - dims.cells2.opposite <- dim.data.opposite[cells2, ] + if (!is.null(x = internal.neighbors[[1]])) { + nnaa <- internal.neighbors[[1]] + nnbb <- internal.neighbors[[2]] + } else { + dim.data.self <- Embeddings(object = object[[nn.reduction]])[ ,nn.dims] + dims.cells1.self <- dim.data.self[cells1, ] + dims.cells2.self <- dim.data.self[cells2, ] + nnaa <- NNHelper( + data = dims.cells1.self, + k = k + 1, + method = nn.method, + eps = eps + ) + nnbb <- NNHelper( + data = dims.cells2.self, + k = k + 1, + method = nn.method, + eps = eps + ) + } + if (length(x = reduction.2) > 0) { + nnab <- NNHelper( + data = Embeddings(object = object[[reduction.2]])[cells2, ], + query = Embeddings(object = object[[reduction.2]])[cells1, ], + k = k, + method = nn.method, + eps = eps + ) + nnba <- NNHelper( + data = Embeddings(object = object[[reduction]])[cells1, ], + query = Embeddings(object = object[[reduction]])[cells2, ], + k = k, + method = nn.method, + eps = eps + ) + } else { + dim.data.opposite <- Embeddings(object = object[[reduction]])[ ,dims] + dims.cells1.opposite <- dim.data.opposite[cells1, ] + dims.cells2.opposite <- dim.data.opposite[cells2, ] + nnab <- NNHelper( + data = dims.cells2.opposite, + query = dims.cells1.opposite, + k = k, + method = nn.method, + eps = eps + ) + nnba <- NNHelper( + data = dims.cells1.opposite, + query = dims.cells2.opposite, + k = k, + method = nn.method, + eps = eps + ) + } - nnaa <- nn2( - data = dims.cells1.self, - k = k + 1, - eps = eps - ) - nnab <- nn2( - data = dims.cells2.opposite, - query = dims.cells1.opposite, - k = k, - eps = eps - ) - nnbb <- nn2( - data = dims.cells2.self, - k = k + 1, - eps = eps - ) - nnba <- nn2( - data = dims.cells1.opposite, - query = dims.cells2.opposite, - k = k, - eps = eps - ) object <- SetIntegrationData( object = object, integration.name = integration.name, @@ -1532,6 +2249,7 @@ FindWeights <- function( features = NULL, k = 300, sd.weight = 1, + nn.method = "rann", eps = 0, verbose = TRUE, cpp = FALSE @@ -1539,7 +2257,7 @@ FindWeights <- function( if (verbose) { message("Finding integration vector weights") } - if (is.null(reduction) & is.null(features)) { + if (is.null(x = reduction) & is.null(x = features)) { stop("Need to specify either dimension reduction object or a set of features") } assay <- assay %||% DefaultAssay(object = object) @@ -1557,10 +2275,11 @@ FindWeights <- function( } else { data.use <- t(x = GetAssayData(object = object, slot = 'data', assay = assay)[features, nn.cells2]) } - knn_2_2 <- nn2( + knn_2_2 <- NNHelper( data = data.use[anchors.cells2, ], query = data.use, k = k + 1, + method = nn.method, eps = eps ) distances <- knn_2_2$nn.dists[, -1] @@ -1617,6 +2336,30 @@ FindWeights <- function( return(object) } + +# Work out the anchor cell offsets for given set of cells in anchor list +# +# @param anchors A dataframe of anchors, from AnchorSet object +# @param dataset Dataset number (1 or 2) +# @param cell Cell number (1 or 2) +# @param cellnames.list List of cell names in all objects +# @param cellnames list of cell names for only the object in question +# +# @return Returns a list of offsets +# +GetCellOffsets <- function(anchors, dataset, cell, cellnames.list, cellnames) { + cell.id <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { + cellnames.list[[anchors[, dataset+3][x]]][anchors[, cell][x]] + }) + cell.offset <- sapply( + X = 1:length(x = cell.id), + FUN = function(x) { + return(which(x = cellnames == cell.id[x])) + } + ) + return(cell.offset) +} + # Convert nearest neighbor information to a sparse matrix # # @param idx Nearest neighbor index @@ -1707,14 +2450,14 @@ ProjectCellEmbeddings <- function( reference.data <- GetAssayData( object = reference, - assay.use = reference.assay, + assay = reference.assay, slot = "data")[features, ] query.data <- GetAssayData( object = query, - assay.use = query.assay, + assay = query.assay, slot = "data")[features, ] - if (is.null(x = feature.mean)){ + if (is.null(x = feature.mean)) { feature.mean <- rowMeans(x = reference.data) feature.sd <- sqrt(SparseRowVar2(mat = reference.data, mu = feature.mean, display_progress = FALSE)) feature.sd[is.na(x = feature.sd)] <- 1 @@ -1726,12 +2469,14 @@ ProjectCellEmbeddings <- function( slot = "data" )[features, ] store.names <- dimnames(x = proj.data) + if (is.numeric(x = feature.mean) && feature.mean != "SCT") { proj.data <- FastSparseRowScaleWithKnownStats( mat = proj.data, mu = feature.mean, sigma = feature.sd, display_progress = FALSE ) + } dimnames(x = proj.data) <- store.names ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] proj.pca <- t(crossprod(x = ref.feature.loadings, y = proj.data)) @@ -1755,6 +2500,158 @@ ReferenceRange <- function(x, lower = 0.025, upper = 0.975) { (quantile(x = x, probs = upper) - quantile(x = x, probs = lower))) } +# Run integration between a reference and query object +# +# Should only be called from within another function +# +# @param filtered.anchors A dataframe containing only anchors between reference and query +# @param reference A reference object +# @param query A query object +# @param cellnames.list List of all cell names in all objects to be integrated +# @param new.assay.name Name for the new assay containing the integrated data +# @param features Vector of features to use when computing the PCA to determine the weights. Only set +# if you want a different set from those used in the anchor finding process +# @param features.to.integrate Vector of features to integrate. By default, will use the features +# used in anchor finding. +# @param dims Number of PCs to use in the weighting procedure +# @param k.weight Number of neighbors to consider when weighting +# @param weight.reduction Dimension reduction to use when calculating anchor weights. +# This can be either: +# \itemize{ +# \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} +# \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} +# \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} +# } +# Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the +# first merge between reference and query, as the merged object will subsequently contain more cells than was in +# query, and weights will need to be calculated for all cells in the object. +# @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting +# @param sample.tree Specify the order of integration. If NULL, will compute automatically. +# @param do.cpp Run cpp code where applicable +# @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) +# @param verbose Print progress bars and output +# +RunIntegration <- function( + filtered.anchors, + normalization.method, + reference, + query, + cellnames.list, + new.assay.name, + features.to.integrate, + weight.reduction, + features, + dims, + do.cpp, + k.weight, + sd.weight, + eps, + verbose +) { + cells1 <- colnames(x = reference) + cells2 <- colnames(x = query) + merged.obj <- merge(x = reference, y = query, merge.data = TRUE) + cell1.offset <- GetCellOffsets( + anchors = filtered.anchors, + dataset = 1, + cell = 1, + cellnames.list = cellnames.list, + cellnames = cells1 + ) + cell2.offset <- GetCellOffsets( + anchors = filtered.anchors, + dataset = 2, + cell = 2, + cellnames.list = cellnames.list, + cellnames = cells2 + ) + filtered.anchors[, 1] <- cell1.offset + filtered.anchors[, 2] <- cell2.offset + integration.name <- "integrated" + merged.obj <- SetIntegrationData( + object = merged.obj, + integration.name = integration.name, + slot = 'anchors', + new.data = filtered.anchors + ) + merged.obj <- SetIntegrationData( + object = merged.obj, + integration.name = integration.name, + slot = 'neighbors', + new.data = list('cells1' = cells1, 'cells2' = cells2) + ) + merged.obj <- FindIntegrationMatrix( + object = merged.obj, + integration.name = integration.name, + features.integrate = features.to.integrate, + verbose = verbose + ) + assay <- DefaultAssay(object = merged.obj) + if (is.null(x = weight.reduction)) { + if (normalization.method == "SCT"){ + # recenter residuals + centered.resids <- ScaleData( + object = GetAssayData(object = merged.obj, assay = assay, slot = "data"), + do.scale = FALSE, + do.center = TRUE, + verbose = FALSE + ) + merged.obj[["pca"]] <- RunPCA( + object = centered.resids[features, ], + assay = assay, + npcs = max(dims), + verbose = FALSE, + features = features + ) + } else { + merged.obj <- ScaleData( + object = merged.obj, + features = features, + verbose = FALSE + ) + merged.obj <- RunPCA( + object = merged.obj, + npcs = max(dims), + verbose = FALSE, + features = features + ) + } + dr.weights <- merged.obj[['pca']] + } else { + dr <- weight.reduction[[2]] + if (inherits(x = dr, what = "DimReduc")) { + dr.weights <- dr + } else { + dr.weights <- query[[dr]] + } + } + merged.obj <- FindWeights( + object = merged.obj, + integration.name = integration.name, + reduction = dr.weights, + cpp = do.cpp, + dims = dims, + k = k.weight, + sd.weight = sd.weight, + eps = eps, + verbose = verbose + ) + merged.obj <- TransformDataMatrix( + object = merged.obj, + new.assay.name = new.assay.name, + features.to.integrate = features.to.integrate, + integration.name = integration.name, + do.cpp = do.cpp, + verbose = verbose + ) + integrated.matrix <- GetAssayData( + object = merged.obj, + assay = new.assay.name, + slot = 'data' + ) + return(integrated.matrix[, cells2]) +} + ScoreAnchors <- function( object, assay = NULL, diff --git a/R/objects.R b/R/objects.R index 13e63c135..becf4f342 100644 --- a/R/objects.R +++ b/R/objects.R @@ -24,6 +24,7 @@ setClassUnion(name = 'AnyMatrix', c("matrix", "dgCMatrix")) #' @slot object.list List of objects used to create anchors #' @slot reference.cells List of cell names in the reference dataset - needed when performing data #' transfer. +#' @slot reference.objects Position of reference object/s in object.list #' @slot query.cells List of cell names in the query dataset - needed when performing data transfer #' @slot anchors The anchor matrix. This contains the cell indices of both anchor pair cells, the #' anchor score, and the index of the original dataset in the object.list for cell1 and cell2 of @@ -41,6 +42,7 @@ AnchorSet <- setClass( slots = list( object.list = "list", reference.cells = "vector", + reference.objects = "vector", query.cells = "vector", anchors = "ANY", offsets = "ANY", @@ -318,6 +320,39 @@ seurat <- setClass( # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Pull Assays or assay names +#' +#' Lists the names of \code{\link{Assay}} objects present in +#' a Seurat object. If slot is provided, pulls specified Assay object. +#' +#' @param object A Seurat object +#' @param slot Name of Assay to return +#' +#' @return If \code{slot} is \code{NULL}, the names of all \code{Assay} objects +#' in this Seurat object. Otherwise, the \code{Assay} object specified +#' +#' @export +#' +#' @examples +#' Assays(object = pbmc_small) +#' +Assays <- function(object, slot = NULL) { + assays <- FilterObjects(object = object, classes.keep = 'Assay') + if (is.null(x = slot)) { + return(assays) + } + if (!slot %in% assays) { + warning( + "Cannot find an assay of name ", + slot, + " in this Seurat object", + call. = FALSE, + immediate. = TRUE + ) + } + return(slot(object = object, name = 'assays')[[slot]]) +} + #' Create an Assay object #' #' Create an Assay object from a feature (e.g. gene) expression matrix. The @@ -434,6 +469,19 @@ CreateAssayObject <- function( } counts <- new(Class = 'matrix') } + # Ensure row- and column-names are vectors, not arrays + if (!is.vector(x = rownames(x = counts))) { + rownames(x = counts) <- as.vector(x = rownames(x = counts)) + } + if (!is.vector(x = colnames(x = counts))) { + colnames(x = counts) <- as.vector(x = colnames(x = counts)) + } + if (!is.vector(x = rownames(x = data))) { + rownames(x = data) <- as.vector(x = rownames(x = data)) + } + if (!is.vector(x = colnames(x = data))) { + colnames(x = data) <- as.vector(x = colnames(x = data)) + } if (any(grepl(pattern = '_', x = rownames(x = counts))) || any(grepl(pattern = '_', x = rownames(x = data)))) { warning( "Feature names cannot have underscores ('_'), replacing with dashes ('-')", @@ -451,6 +499,25 @@ CreateAssayObject <- function( x = rownames(x = data) ) } + if (any(grepl(pattern = '|', x = rownames(x = counts), fixed = TRUE)) || any(grepl(pattern = '|', x = rownames(x = data), fixed = TRUE))) { + warning( + "Feature names cannot have pipe characters ('|'), replacing with dashes ('-')", + call. = FALSE, + immediate. = TRUE + ) + rownames(x = counts) <- gsub( + pattern = '|', + replacement = '-', + x = rownames(x = counts), + fixed = TRUE + ) + rownames(x = data) <- gsub( + pattern = '|', + replacement = '-', + x = rownames(x = data), + fixed = TRUE + ) + } # Initialize meta.features init.meta.features <- data.frame(row.names = rownames(x = data)) assay <- new( @@ -629,6 +696,22 @@ CreateSeuratObject <- function( names.delim = "_", meta.data = NULL ) { + if (!is.null(x = meta.data)) { + if (is.null(x = rownames(x = meta.data))) { + stop("Row names not set in metadata. Please ensure that rownames of metadata match column names of data matrix") + } + if (length(x = setdiff(x = rownames(x = meta.data), y = colnames(x = counts)))) { + warning("Some cells in meta.data not present in provided counts matrix.") + meta.data <- meta.data[intersect(x = rownames(x = meta.data), y = colnames(x = counts)), ] + } + if (class(x = meta.data) == "data.frame") { + new.meta.data <- data.frame(row.names = colnames(x = counts)) + for (ii in 1:ncol(x = meta.data)) { + new.meta.data[rownames(x = meta.data), colnames(x = meta.data)[ii]] <- meta.data[, ii, drop = FALSE] + } + meta.data <- new.meta.data + } + } assay.data <- CreateAssayObject( counts = counts, min.cells = min.cells, @@ -687,6 +770,10 @@ CreateSeuratObject <- function( #' @param scale.data Preserve the scale.data slot for the assays specified #' @param features Only keep a subset of features, defaults to all features #' @param assays Only keep a subset of assays specified here +#' @param dimreducs Only keep a subset of DimReducs specified here (if NULL, +#' remove all DimReducs) +#' @param graphs Only keep a subset of Graphs specified here (if NULL, remove +#' all Graphs) #' #' @export #' @@ -696,7 +783,9 @@ DietSeurat <- function( data = TRUE, scale.data = FALSE, features = NULL, - assays = NULL + assays = NULL, + dimreducs = NULL, + graphs = NULL ) { assays <- assays %||% FilterObjects(object = object, classes.keep = "Assay") assays <- assays[assays %in% FilterObjects(object = object, classes.keep = 'Assay')] @@ -724,25 +813,37 @@ DietSeurat <- function( } } else { if (counts) { - slot(object = object[[assay]], name = 'counts') <- slot(object = object[[assay]], name = 'counts')[features.assay, ] + if (!is.null(x = features)) { + slot(object = object[[assay]], name = 'counts') <- slot(object = object[[assay]], name = 'counts')[features.assay, ] + } } else { slot(object = object[[assay]], name = 'counts') <- new(Class = 'matrix') } if (data) { - slot(object = object[[assay]], name = 'data') <- slot(object = object[[assay]], name = 'data')[features.assay, ] + if (!is.null(x = features)) { + slot(object = object[[assay]], name = 'data') <- slot(object = object[[assay]], name = 'data')[features.assay, ] + } } else { stop('data = FALSE currently not supported') slot(object = object[[assay]], name = 'data') <- new(Class = 'matrix') } features.scaled <- features.assay[features.assay %in% rownames(x = slot(object = object[[assay]], name = 'scale.data'))] if (scale.data && length(x = features.scaled) > 0) { - slot(object = object[[assay]], name = 'scale.data') <- slot(object = object[[assay]], name = 'scale.data')[features.scaled, ] + if (! all(rownames(x = slot(object = object[[assay]], name = 'scale.data')) %in% features.scaled)) { + slot(object = object[[assay]], name = 'scale.data') <- slot(object = object[[assay]], name = 'scale.data')[features.scaled, ] + } } else { slot(object = object[[assay]], name = 'scale.data') <- new(Class = 'matrix') } } } } + # remove unspecified DimReducs and Graphs + all.objects <- FilterObjects(object = object, classes.keep = c('DimReduc', 'Graph')) + objects.to.remove <- all.objects[!all.objects %in% c(dimreducs, graphs)] + for (ob in objects.to.remove) { + object[[ob]] <- NULL + } return(object) } @@ -833,14 +934,19 @@ FetchData <- function(object, vars, cells = NULL, slot = 'data') { default.vars <- vars[vars %in% rownames(x = GetAssayData(object = object, slot = slot))] data.fetched <- c( data.fetched, - as.data.frame(x = t(x = as.matrix(x = GetAssayData( - object = object, - slot = slot - )[default.vars, cells, drop = FALSE]))) + tryCatch( + expr = as.data.frame(x = t(x = as.matrix(x = GetAssayData( + object = object, + slot = slot + )[default.vars, cells, drop = FALSE]))), + error = function(...) { + return(NULL) + } + ) ) # Pull identities if ('ident' %in% vars && !'ident' %in% colnames(x = object[[]])) { - data.fetched[['ident']] <- Idents(object = object) + data.fetched[['ident']] <- Idents(object = object)[cells] } # Try to find ambiguous vars fetched <- names(x = data.fetched) @@ -997,7 +1103,7 @@ LogSeuratCommand <- function(object, return.command = FALSE) { stop("'LogSeuratCommand' cannot be called at the top level", call. = FALSE) } command.name <- as.character(x = deparse(expr = sys.calls()[[which.frame]])) - command.name <- gsub(pattern = ".Seurat", replacement = "", x = command.name) + command.name <- gsub(pattern = "\\.Seurat", replacement = "", x = command.name) call.string <- command.name command.name <- ExtractField(string = command.name, field = 1, delim = "\\(") #capture function arguments @@ -1041,6 +1147,39 @@ LogSeuratCommand <- function(object, return.command = FALSE) { return(object) } +#' Pull DimReducs or DimReduc names +#' +#' Lists the names of \code{\link{DimReduc}} objects present in +#' a Seurat object. If slot is provided, pulls specified DimReduc object. +#' +#' @param object A Seurat object +#' @param slot Name of DimReduc +#' +#' @return If \code{slot} is \code{NULL}, the names of all \code{DimReduc} objects +#' in this Seurat object. Otherwise, the \code{DimReduc} object requested +#' +#' @export +#' +#' @examples +#' Reductions(object = pbmc_small) +#' +Reductions <- function(object, slot = NULL) { + reductions <- FilterObjects(object = object, classes.keep = 'DimReduc') + if (is.null(x = slot)) { + return(reductions) + } + if (!slot %in% reductions) { + warning( + "Cannot find a DimReduc of name ", + slot, + " in this Seurat object", + call. = FALSE, + immediate. = TRUE + ) + } + return(slot(object = object, name = 'reductions')[[slot]]) +} + #' Set integation data #' #' @param object Seurat object @@ -1078,7 +1217,6 @@ SetIntegrationData <- function(object, integration.name, slot, new.data) { #' @param object Seurat object #' @param split.by Attribute for splitting. Default is "ident". Currently #' only supported for class-level (i.e. non-quantitative) attributes. -#' @param ... Ignored #' #' @return A named list of Seurat objects, each containing a subset of cells #' from the original object. @@ -1092,7 +1230,7 @@ SetIntegrationData <- function(object, integration.name, slot, new.data) { #' pbmc_small <- AddMetaData(object = pbmc_small, metadata = groups, col.name = "group") #' obj.list <- SplitObject(pbmc_small, split.by = "group") #' -SplitObject <- function(object, split.by = "ident", ...) { +SplitObject <- function(object, split.by = "ident") { if (split.by == 'ident') { groupings <- Idents(object = object) } else { @@ -1201,9 +1339,9 @@ TopCells <- function(object, dim = 1, ncells = 20, balanced = FALSE, ...) { #' UpdateSeuratObject <- function(object) { if (.hasSlot(object, "version")) { - object.version <- package_version(x = object@version) - if (object.version >= package_version(x = "2.0.0") && object.version < package_version(x = '3.0.0')) { + if (slot(object = object, name = 'version') >= package_version(x = "2.0.0") && slot(object = object, name = 'version') < package_version(x = '3.0.0')) { # Run update + message("Updating from v2.X to v3.X") seurat.version <- packageVersion(pkg = "Seurat") new.assay <- UpdateAssay(old.assay = object, assay = "RNA") assay.list <- list(new.assay) @@ -1225,7 +1363,7 @@ UpdateSeuratObject <- function(object) { tools = list() ) } - if (package_version(x = object@version) >= package_version(x = "3.0.0")) { + if (package_version(x = slot(object = object, name = 'version')) >= package_version(x = "3.0.0")) { # Run validation message("Validating object structure") # Validate object keys @@ -1234,7 +1372,7 @@ UpdateSeuratObject <- function(object) { Key(object = object[[ko]]) <- UpdateKey(key = Key(object = object[[ko]])) } # Check feature names - message("Ensuring feature names don't have underscores") + message("Ensuring feature names don't have underscores or pipes") for (assay.name in FilterObjects(object = object, classes.keep = 'Assay')) { assay <- object[[assay.name]] for (slot in c('counts', 'data', 'scale.data')) { @@ -1244,8 +1382,36 @@ UpdateSeuratObject <- function(object) { replacement = '-', x = rownames(x = slot(object = assay, name = slot)) ) + rownames(x = slot(object = assay, name = slot)) <- gsub( + pattern = '|', + replacement = '-', + x = rownames(x = slot(object = assay, name = slot)), + fixed = TRUE + ) } } + VariableFeatures(object = assay) <- gsub( + pattern = '_', + replacement = '-', + x = VariableFeatures(object = assay) + ) + VariableFeatures(object = assay) <- gsub( + pattern = '|', + replacement = '-', + x = VariableFeatures(object = assay), + fixed = TRUE + ) + rownames(x = slot(object = assay, name = "meta.features")) <- gsub( + pattern = '_', + replacement = '-', + x = rownames(x = assay[[]]) + ) + rownames(x = slot(object = assay, name = "meta.features")) <- gsub( + pattern = '|', + replacement = '-', + x = rownames(x = assay[[]]), + fixed = TRUE + ) object[[assay.name]] <- assay } for (reduc.name in FilterObjects(object = object, classes.keep = 'DimReduc')) { @@ -1257,6 +1423,12 @@ UpdateSeuratObject <- function(object) { replacement = '-', x = rownames(x = slot(object = reduc, name = slot)) ) + rownames(x = slot(object = reduc, name = slot)) <- gsub( + pattern = '_', + replacement = '-', + x = rownames(x = slot(object = reduc, name = slot)), + fixed = TRUE + ) } } object[[reduc.name]] <- reduc @@ -1300,6 +1472,7 @@ AddMetaData.Seurat <- function(object, metadata, col.name = NULL) { #' @method as.CellDataSet Seurat #' as.CellDataSet.Seurat <- function(x, assay = NULL, reduction = NULL, ...) { + CheckDots(...) if (!PackageCheck('monocle', error = FALSE)) { stop("Please install monocle from Bioconductor before converting to a CellDataSet object") } else if (packageVersion(pkg = 'monocle') >= package_version(x = '2.99.0')) { @@ -1380,6 +1553,7 @@ as.CellDataSet.Seurat <- function(x, assay = NULL, reduction = NULL, ...) { #' g <- as.Graph(x = mat) #' as.Graph.Matrix <- function(x, ...) { + CheckDots(...) x <- as.sparse(x = x) if (is.null(x = rownames(x = x))) { stop("Please provide rownames to the matrix before converting to a Graph.") @@ -1402,6 +1576,7 @@ as.Graph.Matrix <- function(x, ...) { #' g <- as.Graph(x = mat) #' as.Graph.matrix <- function(x, ...) { + CheckDots(...) return(as.Graph.Matrix(x = as(object = x, Class = 'Matrix'))) } @@ -1454,6 +1629,7 @@ as.loom.Seurat <- function( if (!PackageCheck('loomR', error = FALSE)) { stop("Please install loomR from GitHub before converting to a loom object") } + CheckDots(..., fxns = 'loomR::create') # Set the default assay to make life easy assay <- assay %||% DefaultAssay(object = x) DefaultAssay(object = x) <- assay @@ -1576,7 +1752,6 @@ as.loom.Seurat <- function( return(lfile) } - #' @param slot Slot to store expression data as #' #' @importFrom utils packageVersion @@ -1592,6 +1767,7 @@ as.Seurat.CellDataSet <- function( verbose = TRUE, ... ) { + CheckDots(...) if (!PackageCheck('monocle', error = FALSE)) { stop("Please install monocle from Bioconductor before converting to a CellDataSet object") } else if (packageVersion(pkg = 'monocle') >= package_version(x = '2.99.0')) { @@ -1779,6 +1955,7 @@ as.Seurat.loom <- function( verbose = TRUE, ... ) { + CheckDots(...) # Shouldn't be necessary if (!PackageCheck('loomR', error = FALSE)) { stop("Please install loomR") @@ -2128,6 +2305,7 @@ as.Seurat.SingleCellExperiment <- function( project = 'SingleCellExperiment', ... ) { + CheckDots(...) if (!PackageCheck('SingleCellExperiment', error = FALSE)) { stop( "Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object", @@ -2217,6 +2395,7 @@ as.Seurat.SingleCellExperiment <- function( #' @method as.SingleCellExperiment Seurat #' as.SingleCellExperiment.Seurat <- function(x, assay = NULL, ...) { + CheckDots(...) if (!PackageCheck('SingleCellExperiment', error = FALSE)) { stop("Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object") } @@ -2245,6 +2424,7 @@ as.SingleCellExperiment.Seurat <- function(x, assay = NULL, ...) { #' @method as.sparse data.frame #' as.sparse.data.frame <- function(x, ...) { + CheckDots(...) return(as(object = as.matrix(x = x), Class = 'dgCMatrix')) } @@ -2256,6 +2436,7 @@ as.sparse.data.frame <- function(x, ...) { #' @method as.sparse H5Group #' as.sparse.H5Group <- function(x, ...) { + CheckDots(...) for (i in c('data', 'indices', 'indptr')) { if (!x$exists(name = i) || !is(object = x[[i]], class2 = 'H5D')) { stop("Invalid H5Group specification for a sparse matrix, missing dataset ", i) @@ -2284,6 +2465,7 @@ as.sparse.H5Group <- function(x, ...) { #' @method as.sparse Matrix #' as.sparse.Matrix <- function(x, ...) { + CheckDots(...) return(as(object = x, Class = 'dgCMatrix')) } @@ -2318,6 +2500,7 @@ Cells.DimReduc <- function(x) { #' @method Command Seurat #' Command.Seurat <- function(object, command = NULL, value = NULL, ...) { + CheckDots(...) commands <- slot(object = object, name = "commands") if (is.null(x = command)) { return(names(x = commands)) @@ -2341,6 +2524,7 @@ Command.Seurat <- function(object, command = NULL, value = NULL, ...) { #' @method DefaultAssay DimReduc #' DefaultAssay.DimReduc <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'assay.used')) } @@ -2353,6 +2537,7 @@ DefaultAssay.DimReduc <- function(object, ...) { #' DefaultAssay(object = pbmc_small) #' DefaultAssay.Seurat <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'active.assay')) } @@ -2360,6 +2545,7 @@ DefaultAssay.Seurat <- function(object, ...) { #' @method DefaultAssay<- DimReduc #' "DefaultAssay<-.DimReduc" <- function(object, ..., value) { + CheckDots(...) slot(object = object, name = 'assay.used') <- value return(object) } @@ -2378,6 +2564,7 @@ DefaultAssay.Seurat <- function(object, ...) { #' DefaultAssay(object = pbmc_small) #' "DefaultAssay<-.Seurat" <- function(object, ..., value) { + CheckDots(...) if (!value %in% names(x = slot(object = object, name = 'assays'))) { stop("Cannot find assay ", value) } @@ -2394,6 +2581,7 @@ DefaultAssay.Seurat <- function(object, ...) { #' Embeddings(object = pbmc_small[["pca"]])[1:5, 1:5] #' Embeddings.DimReduc <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'cell.embeddings')) } @@ -2421,6 +2609,7 @@ Embeddings.Seurat <- function(object, reduction = 'pca', ...) { #' GetAssay(object = pbmc_small, assay = "RNA") #' GetAssay.Seurat <- function(object, assay = NULL, ...) { + CheckDots(...) assay <- assay %||% DefaultAssay(object = object) object.assays <- FilterObjects(object = object, classes.keep = 'Assay') if (!assay %in% object.assays) { @@ -2444,6 +2633,7 @@ GetAssay.Seurat <- function(object, assay = NULL, ...) { #' GetAssayData(object = pbmc_small[["RNA"]], slot = "data")[1:5,1:5] #' GetAssayData.Assay <- function(object, slot = 'data', ...) { + CheckDots(...) return(slot(object = object, name = slot)) } @@ -2458,6 +2648,7 @@ GetAssayData.Assay <- function(object, slot = 'data', ...) { #' GetAssayData(object = pbmc_small, assay = "RNA", slot = "data")[1:5,1:5] #' GetAssayData.Seurat <- function(object, slot = 'data', assay = NULL, ...) { + CheckDots(...) assay <- assay %||% DefaultAssay(object = object) return(GetAssayData( object = GetAssay(object = object, assay = assay), @@ -2478,6 +2669,7 @@ GetAssayData.Seurat <- function(object, slot = 'data', assay = NULL, ...) { #' HVFInfo(object = pbmc_small[["RNA"]], selection.method = 'vst')[1:5, ] #' HVFInfo.Assay <- function(object, selection.method, status = FALSE, ...) { + CheckDots(...) disp.methods <- c('mean.var.plot', 'dispersion', 'disp') if (tolower(x = selection.method) %in% disp.methods) { selection.method <- 'mvp' @@ -2531,6 +2723,7 @@ HVFInfo.Seurat <- function( status = FALSE, ... ) { + CheckDots(...) assay <- assay %||% DefaultAssay(object = object) if (is.null(x = selection.method)) { cmds <- apply( @@ -2579,6 +2772,7 @@ HVFInfo.Seurat <- function( #' @method Idents Seurat #' Idents.Seurat <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'active.ident')) } @@ -2590,6 +2784,7 @@ Idents.Seurat <- function(object, ...) { #' @method Idents<- Seurat #' "Idents<-.Seurat" <- function(object, cells = NULL, drop = FALSE, ..., value) { + CheckDots(...) cells <- cells %||% colnames(x = object) if (is.numeric(x = cells)) { cells <- colnames(x = object)[cells] @@ -2641,6 +2836,7 @@ Idents.Seurat <- function(object, ...) { #' @method JS DimReduc #' JS.DimReduc <- function(object, slot = NULL, ...) { + CheckDots(...) jackstraw <- slot(object = object, name = 'jackstraw') if (!is.null(x = slot)) { jackstraw <- JS(object = jackstraw, slot = slot) @@ -2653,6 +2849,7 @@ JS.DimReduc <- function(object, slot = NULL, ...) { #' @method JS JackStrawData #' JS.JackStrawData <- function(object, slot, ...) { + CheckDots(...) slot <- switch( EXPR = slot, 'empirical' = 'empirical.p.values', @@ -2669,6 +2866,7 @@ JS.JackStrawData <- function(object, slot, ...) { #' @method JS<- DimReduc #' "JS<-.DimReduc" <- function(object, slot = NULL, ..., value) { + CheckDots(...) if (inherits(x = value, what = 'JackStrawData')) { slot(object = object, name = 'jackstraw') <- value } else if (is.null(x = NULL)) { @@ -2684,6 +2882,7 @@ JS.JackStrawData <- function(object, slot, ...) { #' @method JS<- JackStrawData #' "JS<-.JackStrawData" <- function(object, slot, ..., value) { + CheckDots(...) slot <- switch( EXPR = slot, 'empirical' = 'empirical.p.values', @@ -2705,6 +2904,7 @@ JS.JackStrawData <- function(object, slot, ...) { #' Key(object = pbmc_small[["RNA"]]) #' Key.Assay <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'key')) } @@ -2717,6 +2917,7 @@ Key.Assay <- function(object, ...) { #' Key(object = pbmc_small[["pca"]]) #' Key.DimReduc <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'key')) } @@ -2729,6 +2930,7 @@ Key.DimReduc <- function(object, ...) { #' Key(object = pbmc_small) #' Key.Seurat <- function(object, ...) { + CheckDots(...) keyed.objects <- FilterObjects(object = object) return(sapply( X = keyed.objects, @@ -2748,6 +2950,7 @@ Key.Seurat <- function(object, ...) { #' Key(object = pbmc_small[["RNA"]]) #' "Key<-.Assay" <- function(object, ..., value) { + CheckDots(...) slot(object = object, name = 'key') <- value return(object) } @@ -2762,6 +2965,7 @@ Key.Seurat <- function(object, ...) { #' Key(object = pbmc_small[["pca"]]) #' "Key<-.DimReduc" <- function(object, ..., value) { + CheckDots(...) old.key <- Key(object = object) slots <- Filter( f = function(x) { @@ -2795,6 +2999,7 @@ Key.Seurat <- function(object, ...) { #' Loadings(object = pbmc_small[["pca"]])[1:5,1:5] #' Loadings.DimReduc <- function(object, projected = FALSE, ...) { + CheckDots(...) projected <- projected %||% Projected(object = object) slot <- ifelse( test = projected, @@ -2829,6 +3034,7 @@ Loadings.Seurat <- function(object, reduction = 'pca', projected = FALSE, ...) { #' Loadings(object = pbmc_small[["pca"]]) <- new.loadings #' "Loadings<-.DimReduc" <- function(object, projected = TRUE, ..., value) { + CheckDots(...) slot.use <- ifelse( test = projected, yes = 'feature.loadings.projected', @@ -2848,6 +3054,7 @@ Loadings.Seurat <- function(object, reduction = 'pca', projected = FALSE, ...) { #' @method Misc Assay #' Misc.Assay <- function(object, slot = NULL, ...) { + CheckDots(...) if (is.null(x = slot)) { return(slot(object = object, name = 'misc')) } @@ -2863,6 +3070,7 @@ Misc.Assay <- function(object, slot = NULL, ...) { #' Misc(object = pbmc_small, slot = "example") #' Misc.Seurat <- function(object, slot = NULL, ...) { + CheckDots(...) if (is.null(x = slot)) { return(slot(object = object, name = 'misc')) } @@ -2874,6 +3082,7 @@ Misc.Seurat <- function(object, slot = NULL, ...) { #' @method Misc<- Assay #' "Misc<-.Assay" <- function(object, slot, ..., value) { + CheckDots(...) if (slot %in% names(x = Misc(object = object))) { warning("Overwriting miscellanous data for ", slot) } @@ -2894,6 +3103,7 @@ Misc.Seurat <- function(object, slot = NULL, ...) { #' Misc(object = pbmc_small, slot = "example") <- "testing_misc" #' "Misc<-.Seurat" <- function(object, slot, ..., value) { + CheckDots(...) if (slot %in% names(x = Misc(object = object))) { warning("Overwriting miscellanous data for ", slot) } @@ -3094,6 +3304,7 @@ OldWhichCells.Seurat <- function( #' @method Project Seurat #' Project.Seurat <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'project.name')) } @@ -3102,6 +3313,7 @@ Project.Seurat <- function(object, ...) { #' @method Project<- Seurat #' "Project<-.Seurat" <- function(object, ..., value) { + CheckDots(...) slot(object = object, name = 'project.name') <- as.character(x = value) return(object) } @@ -3114,6 +3326,7 @@ Project.Seurat <- function(object, ...) { #' @method ReadH5AD character #' ReadH5AD.character <- function(file, assay = 'RNA', verbose = TRUE, ...) { + CheckDots(...) if (!PackageCheck('hdf5r', error = FALSE)) { stop("Please install hdf5r' for h5ad capabilities") } @@ -3135,6 +3348,7 @@ ReadH5AD.character <- function(file, assay = 'RNA', verbose = TRUE, ...) { #' @method ReadH5AD H5File #' ReadH5AD.H5File <- function(file, assay = 'RNA', verbose = TRUE, ...) { + CheckDots(...) # Pull assay data # If X is an H5D, assume scaled # Otherwise, if file$exists(name = 'raw'), assume X is normalized @@ -3193,17 +3407,17 @@ ReadH5AD.H5File <- function(file, assay = 'RNA', verbose = TRUE, ...) { # Fix meta feature colnames colnames(x = meta.features) <- gsub( pattern = 'dispersions_norm', - replacement = 'dispersion.scaled', + replacement = 'mvp.dispersion.scaled', x = colnames(x = meta.features) ) colnames(x = meta.features) <- gsub( pattern = 'dispersions', - replacement = 'dispersion', + replacement = 'mvp.dispersion', x = colnames(x = meta.features) ) colnames(x = meta.features) <- gsub( pattern = 'means', - replacement = 'mean', + replacement = 'mvp.mean', x = colnames(x = meta.features) ) colnames(x = meta.features) <- gsub( @@ -3272,7 +3486,7 @@ ReadH5AD.H5File <- function(file, assay = 'RNA', verbose = TRUE, ...) { if (verbose) { message("Setting highly variable features") } - hvf.info <- HVFInfo(object = assays[[assay]]) + hvf.info <- HVFInfo(object = assays[[assay]], selection.method = 'mvp') hvf.info <- hvf.info[order(hvf.info$dispersion, decreasing = TRUE), , drop = FALSE] means.use <- (hvf.info$mean > 0.1) & (hvf.info$mean < 8) dispersions.use <- (hvf.info$dispersion.scaled > 1) & (hvf.info$dispersion.scaled < Inf) @@ -3524,6 +3738,7 @@ ReorderIdent.Seurat <- function( #' head(x = colnames(x = renamed.assay)) #' RenameCells.Assay <- function(object, new.names = NULL, ...) { + CheckDots(...) for (data.slot in c("counts", "data", "scale.data")) { old.data <- GetAssayData(object = object, slot = data.slot) if (ncol(x = old.data) <= 1) { @@ -3548,6 +3763,7 @@ RenameCells.Assay <- function(object, new.names = NULL, ...) { #' head(x = Cells(x = renamed.dimreduc)) #' RenameCells.DimReduc <- function(object, new.names = NULL, ...) { + CheckDots(...) old.data <- Embeddings(object = object) rownames(x = old.data) <- new.names slot(object = object, name = "cell.embeddings") <- old.data @@ -3579,6 +3795,7 @@ RenameCells.Seurat <- function( for.merge = FALSE, ... ) { + CheckDots(...) if (missing(x = add.cell.id) && missing(x = new.names)) { stop("One of 'add.cell.id' and 'new.names' must be set") } @@ -3680,6 +3897,7 @@ RenameIdents.Seurat <- function(object, ...) { #' new.assay <- SetAssayData(object = pbmc_small[["RNA"]], slot = "counts", new.data = count.data) #' SetAssayData.Assay <- function(object, slot, new.data, ...) { + CheckDots(...) slots.use <- c('counts', 'data', 'scale.data') if (!slot %in% slots.use) { stop( @@ -3740,7 +3958,7 @@ SetAssayData.Assay <- function(object, slot, new.data, ...) { call. = FALSE ) } - new.data <- new.data[new.features, colnames(x = object)] + new.data <- new.data[new.features, colnames(x = object), drop = FALSE] if (slot %in% c('counts', 'data') && !all(dim(x = new.data) == dim(x = object))) { stop( "Attempting to add a different number of cells and/or features", @@ -3748,6 +3966,12 @@ SetAssayData.Assay <- function(object, slot, new.data, ...) { ) } } + if (!is.vector(x = rownames(x = new.data))) { + rownames(x = new.data) <- as.vector(x = rownames(x = new.data)) + } + if (!is.vector(x = colnames(x = new.data))) { + colnames(x = new.data) <- as.vector(x = colnames(x = new.data)) + } slot(object = object, name = slot) <- new.data return(object) } @@ -3776,6 +4000,7 @@ SetAssayData.Seurat <- function( assay = NULL, ... ) { + CheckDots(...) assay <- assay %||% DefaultAssay(object = object) object[[assay]] <- SetAssayData(object = object[[assay]], slot = slot, new.data = new.data) return(object) @@ -3798,6 +4023,7 @@ SetIdent.Seurat <- function(object, cells = NULL, value, ...) { # ') <- ', # deparse(expr = substitute(expr = value)) #) + CheckDots(...) Idents(object = object, cells = cells) <- value return(object) } @@ -3819,6 +4045,7 @@ StashIdent.Seurat <- function(object, save.name = 'orig.ident', ...) { deparse(expr = substitute(expr = object)), ')' ) + CheckDots(...) object[[save.name]] <- Idents(object = object) return(object) } @@ -3832,6 +4059,7 @@ StashIdent.Seurat <- function(object, save.name = 'orig.ident', ...) { #' Stdev(object = pbmc_small[["pca"]]) #' Stdev.DimReduc <- function(object, ...) { + CheckDots(...) return(slot(object = object, name = 'stdev')) } @@ -3846,6 +4074,7 @@ Stdev.DimReduc <- function(object, ...) { #' Stdev(object = pbmc_small, reduction = "pca") #' Stdev.Seurat <- function(object, reduction = 'pca', ...) { + CheckDots(...) return(Stdev(object = object[[reduction]])) } @@ -4014,6 +4243,7 @@ SubsetData.Seurat <- function( #' Tool(object = pbmc_small) #' Tool.Seurat <- function(object, slot = NULL, ...) { + CheckDots(...) if (is.null(x = slot)) { return(names(x = slot(object = object, name = 'tools'))) } @@ -4031,6 +4261,7 @@ Tool.Seurat <- function(object, slot = NULL, ...) { #' Tool(object = pbmc_small) <- sample.tool.output #' } "Tool<-.Seurat" <- function(object, ..., value) { + CheckDots(...) calls <- as.character(x = sys.calls()) calls <- lapply( X = strsplit(x = calls, split = '(', fixed = TRUE), @@ -4063,6 +4294,7 @@ Tool.Seurat <- function(object, slot = NULL, ...) { #' @method VariableFeatures Assay #' VariableFeatures.Assay <- function(object, selection.method = NULL, ...) { + CheckDots(...) if (!is.null(x = selection.method)) { vf <- HVFInfo(object = object, selection.method = selection.method, status = TRUE) return(rownames(x = vf)[which(x = vf[, "variable"][, 1])]) @@ -4077,6 +4309,7 @@ VariableFeatures.Assay <- function(object, selection.method = NULL, ...) { #' @method VariableFeatures Seurat #' VariableFeatures.Seurat <- function(object, assay = NULL, selection.method = NULL, ...) { + CheckDots(...) assay <- assay %||% DefaultAssay(object = object) return(VariableFeatures(object = object[[assay]], selection.method = selection.method)) } @@ -4086,6 +4319,7 @@ VariableFeatures.Seurat <- function(object, assay = NULL, selection.method = NUL #' @method VariableFeatures<- Assay #' "VariableFeatures<-.Assay" <- function(object, ..., value) { + CheckDots(...) if (length(x = value) == 0) { slot(object = object, name = 'var.features') <- character(length = 0) return(object) @@ -4122,6 +4356,7 @@ VariableFeatures.Seurat <- function(object, assay = NULL, selection.method = NUL #' @method VariableFeatures<- Seurat #' "VariableFeatures<-.Seurat" <- function(object, assay = NULL, ..., value) { + CheckDots(...) assay <- assay %||% DefaultAssay(object = object) VariableFeatures(object = object[[assay]]) <- value return(object) @@ -4147,6 +4382,7 @@ WhichCells.Assay <- function( invert = FALSE, ... ) { + CheckDots(...) cells <- cells %||% colnames(x = object) if (!missing(x = expression) && !is.null(x = substitute(expr = expression))) { key.pattern <- paste0('^', Key(object = object)) @@ -4212,10 +4448,12 @@ WhichCells.Seurat <- function( seed = 1, ... ) { + CheckDots(...) cells <- cells %||% colnames(x = object) if (is.numeric(x = cells)) { cells <- colnames(x = object)[cells] } + cell.order <- cells if (!is.null(x = idents)) { set.seed(seed = seed) if (any(!idents %in% levels(x = Idents(object = object)))) { @@ -4279,6 +4517,7 @@ WhichCells.Seurat <- function( cells <- rownames(x = data.subset) } if (invert) { + cell.order <- colnames(x = object) cells <- colnames(x = object)[!colnames(x = object) %in% cells] } cells <- CellsByIdentities(object = object, cells = cells) @@ -4291,8 +4530,9 @@ WhichCells.Seurat <- function( return(x) } ) - cells <- na.omit(object = unlist(x = cells, use.names = FALSE)) - return(as.character(x = cells)) + cells <- as.character(x = na.omit(object = unlist(x = cells, use.names = FALSE))) + cells <- cells[na.omit(object = match(x = cell.order, table = cells))] + return(cells) } #' @note @@ -4324,6 +4564,7 @@ WriteH5AD.Seurat <- function( if (!PackageCheck('hdf5r', error = FALSE)) { stop("Please install hdf5r to enable h5ad functionality") } + CheckDots(...) if (file.exists(file) && !overwrite) { stop("Output file exists, not overwriting") } @@ -4856,6 +5097,7 @@ WriteH5AD.Seurat <- function( #' @method as.list SeuratCommand #' as.list.SeuratCommand <- function(x, complete = FALSE, ...) { + CheckDots(...) cmd <- slot(object = x, name = 'params') if (complete) { cmd <- append( @@ -4887,6 +5129,7 @@ as.list.SeuratCommand <- function(x, complete = FALSE, ...) { #' @method as.logical JackStrawData #' as.logical.JackStrawData <- function(x, ...) { + CheckDots(...) empP <- JS(object = x, slot = 'empirical') return(!(all(dim(x = empP) == 0) || all(is.na(x = empP)))) } @@ -4991,6 +5234,7 @@ merge.Assay <- function( merge.data = TRUE, ... ) { + CheckDots(...) assays <- c(x, y) if (!is.null(x = add.cell.ids)) { for (i in 1:length(assays)) { @@ -5005,7 +5249,7 @@ merge.Assay <- function( mat1 = merged.counts, mat2 = ValidateDataForMerge(assay = assays[[i]], slot = "counts") ) - if (length(Key(object = assays[[i]]) > 0)){ + if (length(Key(object = assays[[i]]) > 0)) { keys[i] <- Key(object = assays[[i]]) } } @@ -5026,13 +5270,40 @@ merge.Assay <- function( ) } # only keep cells that made it through counts filtering params - merged.data <- merged.data[, colnames(combined.assay)] + merged.data <- merged.data[, colnames(x = combined.assay)] combined.assay <- SetAssayData( object = combined.assay, slot = "data", new.data = merged.data ) } + # merge SCT assay misc vst info and scale.data + if (all(IsSCT(assay = assays))) { + vst.set.new <- list() + idx <- 1 + for (i in 1:length(x = assays)) { + vst.set.old <- Misc(object = assays[[i]], slot = "vst.set") + if (!is.null(x = vst.set.old)) { + for (j in 1:length(x = vst.set.old)) { + vst.set.new[[idx]] <- vst.set.old[[j]] + idx <- idx + 1 + } + } else if (!is.null(x = Misc(object = assays[[i]], slot = "vst.out"))) { + vst.set.new[[idx]] <- Misc(object = assays[[i]], slot = "vst.out") + idx <- idx + 1 + } + } + Misc(object = combined.assay, slot = "vst.set") <- vst.set.new + scale.data <- do.call( + what = cbind, + args = lapply(X = assays, FUN = function(x) GetAssayData(object = x, slot = "scale.data")) + ) + combined.assay <- SetAssayData( + object = combined.assay, + slot = "scale.data", + new.data = scale.data + ) + } return(combined.assay) } @@ -5081,6 +5352,7 @@ merge.Seurat <- function( project = "SeuratProject", ... ) { + CheckDots(...) objects <- c(x, y) if (!is.null(x = add.cell.ids)) { if (length(x = add.cell.ids) != length(x = objects)) { @@ -5118,6 +5390,30 @@ merge.Seurat <- function( )) } ) + if (all(IsSCT(assay = assays.merge))) { + scaled.features <- unique(x = unlist(x = lapply( + X = assays.merge, + FUN = function(x) rownames(x = GetAssayData(object = x, slot = "scale.data"))) + )) + for (ob in 1:length(x = objects)) { + if (assay %in% FilterObjects(object = objects[[ob]], classes.keep = "Assay")) { + objects[[ob]] <- suppressWarnings(GetResidual(object = objects[[ob]], features = scaled.features, assay = assay, verbose = FALSE)) + assays.merge[[ob]] <- objects[[ob]][[assay]] + } + } + # handle case where some features aren't in counts and can't be retrieved with + # GetResidual - take intersection + scaled.features <- names(x = which(x = table(x = unlist(x = lapply( + X = assays.merge, + FUN = function(x) rownames(x = GetAssayData(object = x, slot = "scale.data"))) + )) == length(x = assays.merge))) + for (a in 1:length(x = assays.merge)) { + assays.merge[[a]] <- SetAssayData( + object = assays.merge[[a]], + slot = "scale.data", + new.data = GetAssayData(object = assays.merge[[a]], slot = "scale.data")[scaled.features, ]) + } + } merged.assay <- merge( x = assays.merge[[1]], y = assays.merge[2:length(x = assays.merge)], @@ -5201,6 +5497,7 @@ names.Seurat <- function(x) { #' @method print DimReduc #' print.DimReduc <- function(x, dims = 1:5, nfeatures = 20, projected = FALSE, ...) { + CheckDots(...) loadings <- Loadings(object = x, projected = projected) nfeatures <- min(nfeatures, nrow(x = loadings)) if (ncol(x = loadings) == 0) { @@ -5249,6 +5546,7 @@ print.DimReduc <- function(x, dims = 1:5, nfeatures = 20, projected = FALSE, ... #' @method subset Assay #' subset.Assay <- function(x, cells = NULL, features = NULL, ...) { + CheckDots(...) cells <- cells %||% colnames(x = x) if (all(is.na(x = cells))) { cells <- colnames(x = x) @@ -5284,6 +5582,7 @@ subset.Assay <- function(x, cells = NULL, features = NULL, ...) { slot(object = x, name = "data") <- GetAssayData(object = x, slot = "data")[features, cells, drop = FALSE] cells.scaled <- colnames(x = GetAssayData(object = x, slot = "scale.data")) cells.scaled <- cells.scaled[cells.scaled %in% cells] + cells.scaled <- cells.scaled[na.omit(object = match(x = colnames(x = x), table = cells.scaled))] features.scaled <- rownames(x = GetAssayData(object = x, slot = 'scale.data')) features.scaled <- features.scaled[features.scaled %in% features] slot(object = x, name = "scale.data") <- if (length(x = cells.scaled) > 0 && length(x = features.scaled) > 0) { @@ -5300,6 +5599,7 @@ subset.Assay <- function(x, cells = NULL, features = NULL, ...) { #' @method subset DimReduc #' subset.DimReduc <- function(x, cells = NULL, features = NULL, ...) { + CheckDots(...) cells <- Cells(x = x) %iff% cells %||% Cells(x = x) if (all(is.na(x = cells))) { cells <- Cells(x = x) @@ -5845,7 +6145,7 @@ setMethod( definition = function(object) { cat('An AnchorSet object containing', nrow(x = slot(object = object, name = "anchors")), "anchors between", length(x = slot(object = object, name = "object.list")), "Seurat objects \n", - "This can be used as input to IntegrateData, TransferLabels, or TransferFeatures.") + "This can be used as input to IntegrateData or TransferData.") } ) diff --git a/R/preprocessing.R b/R/preprocessing.R index 8dffc9bbe..7258ea9ae 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -415,6 +415,12 @@ HTODemux <- function( #' @param object A seurat object #' @param features Name of features to add into the scale.data #' @param assay Name of the assay of the seurat object generated by SCTransform +#' @param umi.assay Name of the assay of the seurat object containing UMI matrix and the default is +#' RNA +#' @param clip.range Numeric of length two specifying the min and max values the Pearson residual +#' will be clipped to +#' @param replace.value Recalculate residuals for all features, even if they are already present. +#' Useful if you want to change the clip.range. #' @param verbose Whether to print messages and progress bars #' #' @return Returns a Seurat object containing pearson residuals of added features in its scale.data @@ -429,55 +435,115 @@ HTODemux <- function( #' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) #' -GetResidual <- function(object, features, assay = "SCT", verbose = TRUE) { - if (is.null(x = Misc(object[[assay]], slot = 'vst.out'))) { +GetResidual <- function( + object, + features, + assay = "SCT", + umi.assay = "RNA", + clip.range = NULL, + replace.value = FALSE, + verbose = TRUE +) { + if (is.null(x = Misc(object = object[[assay]], slot = 'vst.out')) & is.null(x = Misc(object = object[[assay]], slot = 'vst.set'))) { stop(assay, " assay was not generated by SCTransform") } - new_features <- setdiff( - x = features, - y = rownames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) - ) + if (replace.value) { + new_features <- features + } else { + new_features <- setdiff( + x = features, + y = rownames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) + ) + } if (length(x = new_features) == 0) { if (verbose) { message("Pearson residuals of input features exist already") } } else { - vst_out <- Misc(object = object[[assay]], slot = 'vst.out') + if (is.null(x = Misc(object = object[[assay]], slot = 'vst.set'))) { + vst_out <- Misc(object = object[[assay]], slot = 'vst.out') + # filter cells not in the object but in the SCT model + vst_out$cell_attr <- vst_out$cell_attr[Cells(x = object), ] + vst_out$cells_step1 <- intersect(x = vst_out$cells_step1, y = Cells(x = object)) + object <- GetResidualVstOut( + object = object, + assay = assay, + umi.assay = umi.assay, + new_features = new_features, + vst_out = vst_out, + clip.range = clip.range, + verbose = verbose + ) + } else { + # Calculate Pearson Residual from integrated object SCT assay + vst.set <- Misc(object = object[[assay]], slot = 'vst.set') + scale.data <- GetAssayData( + object = object, + assay = assay, + slot = "scale.data" + ) + + vst_set_genes <- sapply(1:length(vst.set), function(x) rownames(vst.set[[x]]$model_pars_fit)) + vst_set_genes <- Reduce(intersect, vst_set_genes) diff_features <- setdiff( x = new_features, - y = rownames(x = GetAssayData(object = object, assay = assay, slot = "counts")) + y = vst_set_genes ) - intersect_feature <- intersect( + if (diff_features !=0){ + warning( + "The following ", length(x = diff_features), + " features do not exist in all SCT models: ", + paste(diff_features, collapse = " ") + ) + } + new_features <- intersect( x = new_features, - y = rownames(x = GetAssayData(object = object, assay = assay, slot = "counts")) + y = vst_set_genes ) - if (length(x = diff_features) == 0) { - umi <- GetAssayData(object = object, assay = assay, slot = "counts" )[new_features, , drop = FALSE] - } else { - warning("The following ", length(x = diff_features), " features do not exist in the counts slot: ", paste(diff_features, collapse = " ")) - umi <- GetAssayData(object = object, assay = assay, slot = "counts" )[intersect_feature, , drop = FALSE] - } - new_residual <- get_residuals( - vst_out = vst_out, - umi = umi, - residual_type = "pearson", - show_progress = verbose + if (length(new_features) != 0){ + object <- SetAssayData( + object = object, + assay = assay, + slot = "scale.data", + new.data = scale.data[!rownames(x = scale.data) %in% new_features, , drop = FALSE] ) - new_residual <- as.matrix(x = new_residual) + new.scale.data <- matrix(nrow = length(new_features), ncol = 0) + rownames(x = new.scale.data) <- new_features + for (v in 1:length(x = vst.set)) { + vst_out <- vst.set[[v]] + # confirm that cells from SCT model also exist in the integrated object + cells.v <- intersect(x = rownames(x = vst_out$cell_attr), y = Cells(x = object)) + vst_out$cell_attr <- vst_out$cell_attr[cells.v, ] + vst_out$cells_step1 <- intersect(x = vst_out$cells_step1, y = cells.v) + object.v <- subset(x = object, cells = cells.v) + + object.v <- GetResidualVstOut( + object = object.v, + assay = assay, + umi.assay = umi.assay, + new_features = new_features, + vst_out = vst_out, + clip.range = clip.range, + verbose = verbose + ) + new.scale.data <- cbind( + new.scale.data, + GetAssayData(object = object.v, assay = assay, slot ="scale.data" )[new_features, , drop = FALSE] + ) + } object <- SetAssayData( object = object, - slot = 'scale.data', - new.data = rbind( - GetAssayData(object = object, slot = 'scale.data', assay = assay), - new_residual - ), - assay = assay + assay = assay, + slot = "scale.data", + new.data = new.scale.data ) + + } + } } return(object) } - #' Normalize raw data #' #' Normalize count data per cell and transform to log scale @@ -485,7 +551,6 @@ GetResidual <- function(object, features, assay = "SCT", verbose = TRUE) { #' @param data Matrix with the raw count data #' @param scale.factor Scale the data. Default is 1e4 #' @param verbose Print progress -#' @param ... Ignored #' #' @return Returns a matrix with the normalize and log transformed data #' @@ -500,7 +565,7 @@ GetResidual <- function(object, features, assay = "SCT", verbose = TRUE) { #' mat_norm <- LogNormalize(data = mat) #' mat_norm #' -LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE, ...) { +LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { if (class(x = data) == "data.frame") { data <- as.matrix(x = data) } @@ -817,6 +882,16 @@ Read10X <- function(data.dir = NULL, gene.column = 2, unique.features = TRUE) { header = FALSE, stringsAsFactors = FALSE ) + if (any(is.na(x = feature.names[, gene.column]))) { + warning( + 'Some features names are NA. Replacing NA names with ID from the opposite column requested', + call. = FALSE, + immediate. = TRUE + ) + na.features <- which(x = is.na(x = feature.names[, gene.column])) + replacement.column <- ifelse(test = gene.column == 2, yes = 1, no = 2) + feature.names[na.features, gene.column] <- feature.names[na.features, replacement.column] + } if (unique.features) { fcols = ncol(x = feature.names) if (fcols < gene.column) { @@ -961,7 +1036,6 @@ Read10X_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) { #' @param data Matrix with the raw count data #' @param scale.factor Scale the result. Default is 1 #' @param verbose Print progress -#' @param ... Ignored #' @return Returns a matrix with the relative counts #' #' @import Matrix @@ -975,7 +1049,7 @@ Read10X_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) { #' mat_norm <- RelativeCounts(data = mat) #' mat_norm #' -RelativeCounts <- function(data, scale.factor = 1, verbose = TRUE, ...) { +RelativeCounts <- function(data, scale.factor = 1, verbose = TRUE) { if (class(x = data) == "data.frame") { data <- as.matrix(x = data) } @@ -1258,6 +1332,8 @@ SCTransform <- function( ) # save vst output (except y) in @misc slot vst.out$y <- NULL + # save clip.range into vst model + vst.out$arguments$sct.clip.range <- clip.range Misc(object = assay.out, slot = 'vst.out') <- vst.out # also put gene attributes in meta.features assay.out[[paste0('sct.', names(x = vst.out$gene_attr))]] <- vst.out$gene_attr @@ -1399,6 +1475,7 @@ FindVariableFeatures.default <- function( verbose = TRUE, ... ) { + CheckDots(...) if (!inherits(x = object, 'Matrix')) { object <- as(object = as.matrix(x = object), Class = 'Matrix') } @@ -1520,7 +1597,8 @@ FindVariableFeatures.Assay <- function( dispersion.function = dispersion.function, num.bin = num.bin, binning.method = binning.method, - verbose = verbose + verbose = verbose, + ... ) object[[names(x = hvf.info)]] <- hvf.info hvf.info <- hvf.info[which(x = hvf.info[, 1, drop = TRUE] != 0), ] @@ -1594,7 +1672,8 @@ FindVariableFeatures.Seurat <- function( nfeatures = nfeatures, mean.cutoff = mean.cutoff, dispersion.cutoff = dispersion.cutoff, - verbose = verbose + verbose = verbose, + ... ) object[[assay]] <- assay.data object <- LogSeuratCommand(object = object) @@ -1632,6 +1711,7 @@ NormalizeData.default <- function( verbose = TRUE, ... ) { + CheckDots(...) if (is.null(x = normalization.method)) { return(object) } @@ -1641,7 +1721,7 @@ NormalizeData.default <- function( 'LogNormalize' = LogNormalize, 'CLR' = CustomNormalize, 'RC' = RelativeCounts, - stop("Unkown normalization method: ", normalization.method) + stop("Unknown normalization method: ", normalization.method) ) if (normalization.method != 'CLR') { margin <- 2 @@ -1666,19 +1746,24 @@ NormalizeData.default <- function( X = 1:ncol(x = chunk.points), FUN = function(i) { block <- chunk.points[, i] - return(norm.function( - data = if (margin == 1) { - object[block[1]:block[2], , drop = FALSE] - } else { - object[, block[1]:block[2], drop = FALSE] - }, + data <- if (margin == 1) { + object[block[1]:block[2], , drop = FALSE] + } else { + object[, block[1]:block[2], drop = FALSE] + } + clr_function <- function(x) { + return(log1p(x = x / (exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE) / length(x = x))))) + } + args <- list( + data = data, scale.factor = scale.factor, verbose = FALSE, - custom_function = function(x) { - return(log1p(x = x / (exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE) / length(x = x))))) - }, - margin = margin - # across = across + custom_function = clr_function, margin = margin + ) + args <- args[names(x = formals(fun = norm.function))] + return(do.call( + what = norm.function, + args = args )) } ) @@ -1785,21 +1870,55 @@ NormalizeData.Seurat <- function( #' @param k The rank of the rank-k approximation. Set to NULL for automated choice of k. #' @param q The number of additional power iterations in randomized SVD when #' computing rank k approximation. By default, q=10. +#' @param quantile.prob The quantile probability to use when calculating threshold. +#' By default, quantile.prob = 0.001. +#' @param use.mkl Use the Intel MKL based implementation of SVD. Needs to be +#' installed from https://github.com/KlugerLab/rpca-mkl. \strong{Note}: this requires +#' the \href{https://github.com/satijalab/seurat-wrappers}{SeuratWrappers} implementation +#' of \code{RunALRA} +#' @param mkl.seed Only relevant if \code{use.mkl = TRUE}. Set the seed for the random +#' generator for the Intel MKL implementation of SVD. Any number <0 will +#' use the current timestamp. If \code{use.mkl = FALSE}, set the seed using +#' \code{\link{set.seed}()} function as usual. #' #' @rdname RunALRA #' @export #' -RunALRA.default <- function(object, k = NULL, q = 10, ...) { +RunALRA.default <- function( + object, + k = NULL, + q = 10, + quantile.prob = 0.001, + use.mkl = FALSE, + mkl.seed = -1, + ... +) { + CheckDots(...) A.norm <- t(x = as.matrix(x = object)) message("Identifying non-zero values") originally.nonzero <- A.norm > 0 message("Computing Randomized SVD") + if (use.mkl) { + warning( + "Using the Intel MKL-based implementation of SVD requires RunALRA from SeuratWrappers\n", + "For more details, see https://github.com/satijalab/seurat-wrappers\n", + "Continuing with standard SVD implementation", + call. = FALSE, + immediate. = TRUE + ) + } fastDecomp.noc <- rsvd(A = A.norm, k = k, q = q) A.norm.rank.k <- fastDecomp.noc$u[, 1:k] %*% diag(x = fastDecomp.noc$d[1:k]) %*% t(x = fastDecomp.noc$v[,1:k]) - message("Find most negative values of each gene") - A.norm.rank.k.mins <- abs(x = apply(X = A.norm.rank.k, MARGIN = 2, FUN = min)) + message(sprintf("Find the %f quantile of each gene", quantile.prob)) + A.norm.rank.k.mins <- abs(x = apply( + X = A.norm.rank.k, + MARGIN = 2, + FUN = function(x) { + return(quantile(x = x, probs = quantile.prob)) + } + )) message("Thresholding by the most negative value of each gene") A.norm.rank.k.cor <- replace( x = A.norm.rank.k, @@ -1813,7 +1932,7 @@ RunALRA.default <- function(object, k = NULL, q = 10, ...) { sigma.2 <- apply(X = A.norm, MARGIN = 2, FUN = sd.nonzero) mu.1 <- colSums(x = A.norm.rank.k.cor) / colSums(x = !!A.norm.rank.k.cor) mu.2 <- colSums(x = A.norm) / colSums(x = !!A.norm) - toscale <- !is.na(x = sigma.1) & !is.na(x = sigma.2) + toscale <- !is.na(sigma.1) & !is.na(sigma.2) & !(sigma.1 == 0 & sigma.2 == 0) & !(sigma.1 == 0) message(sprintf(fmt = "Scaling all except for %d columns", sum(!toscale))) sigma.1.2 <- sigma.2 / sigma.1 toadd <- -1 * mu.1 * sigma.2 / sigma.1 + mu.2 @@ -1859,15 +1978,15 @@ RunALRA.default <- function(object, k = NULL, q = 10, ...) { #' @param genes.use genes to impute #' @param K Number of singular values to compute when choosing k. Must be less #' than the smallest dimension of the matrix. Default 100 or smallest dimension. -#' @param p.val.th The threshold for ''significance'' when choosing k. Default 1e-10. -#' @param noise.start Index for which all smaller singular values are considered noise. +#' @param thresh The threshold for ''significance'' when choosing k. Default 1e-10. +#' @param noise.start Index for which all smaller singular values are considered noise. #' Default K - 20. -#' @param q.k Number of additional power iterations when choosing k. Default 2. +#' @param q.k Number of additional power iterations when choosing k. Default 2. #' @param k.only If TRUE, only computes optimal k WITHOUT performing ALRA #' #' @importFrom rsvd rsvd #' @importFrom Matrix Matrix -#' @importFrom stats pnorm sd +#' @importFrom stats sd setNames quantile #' #' @rdname RunALRA #' @export @@ -1877,12 +1996,15 @@ RunALRA.Seurat <- function( object, k = NULL, q = 10, + quantile.prob = 0.001, + use.mkl = FALSE, + mkl.seed=-1, assay = NULL, slot = "data", setDefaultAssay = TRUE, genes.use = NULL, K = NULL, - p.val.th = 1e-10, + thresh = 6, noise.start = NULL, q.k = 2, k.only = FALSE, @@ -1925,18 +2047,24 @@ RunALRA.Seurat <- function( stop("There need to be at least 5 singular values considered noise") } noise.svals <- noise.start:K + if (use.mkl) { + warning( + "Using the Intel MKL-based implementation of SVD requires RunALRA from SeuratWrappers\n", + "For more details, see https://github.com/satijalab/seurat-wrappers\n", + "Continuing with standard SVD implementation", + call. = FALSE, + immediate. = TRUE + ) + } rsvd.out <- rsvd(A = t(x = as.matrix(x = data.used)), k = K, q = q.k) - diffs <- diff(x = rsvd.out$d) - pvals <- pnorm( - q = diffs, - mean = mean(x = diffs[noise.svals - 1]), - sd = sd(x = diffs[noise.svals - 1]) - ) - k <- max(which(x = pvals < p.val.th)) + diffs <- rsvd.out$d[1:(length(x = rsvd.out$d)-1)] - rsvd.out$d[2:length(x = rsvd.out$d)] + mu <- mean(x = diffs[noise.svals - 1]) + sigma <- sd(x = diffs[noise.svals - 1]) + num_of_sds <- (diffs - mu) / sigma + k <- max(which(x = num_of_sds > thresh)) alra.info[["d"]] <- rsvd.out$d alra.info[["k"]] <- k alra.info[["diffs"]] <- diffs - alra.info[["pvals"]] <- pvals Tool(object = object) <- alra.info } if (k.only) { @@ -1945,7 +2073,14 @@ RunALRA.Seurat <- function( } message("Rank k = ", k) # Perform ALRA on data.used - output.alra <- RunALRA(object = data.used, k = k, q = q) + output.alra <- RunALRA( + object = data.used, + k = k, + q = q, + quantile.prob = quantile.prob, + use.mkl = use.mkl, + mkl.seed = mkl.seed + ) # Save ALRA data in object@assay data.alra <- Matrix(data = t(x = output.alra), sparse = TRUE) rownames(x = data.alra) <- genes.use @@ -2006,6 +2141,7 @@ ScaleData.default <- function( verbose = TRUE, ... ) { + CheckDots(...) features <- features %||% rownames(x = object) features <- as.vector(x = intersect(x = features, y = rownames(x = object))) object <- object[features, , drop = FALSE] @@ -2408,7 +2544,6 @@ ClassifyCells <- function(data, q) { # @param margin Which way to we normalize. Set 1 for rows (features) or 2 for columns (genes) # @parm across Which way to we normalize? Choose form 'cells' or 'features' # @param verbose Show progress bar -# @param ... Ignored # # @return Returns a matrix with the custom normalization # @@ -2416,7 +2551,7 @@ ClassifyCells <- function(data, q) { #' @importFrom pbapply pbapply # @import Matrix # -CustomNormalize <- function(data, custom_function, margin, verbose = TRUE, ...) { +CustomNormalize <- function(data, custom_function, margin, verbose = TRUE) { if (class(x = data) == "data.frame") { data <- as.matrix(x = data) } @@ -2494,6 +2629,105 @@ FindThresh <- function(call.list) { return(list(res = res, extrema = extrema)) } +# Calculate pearson residuals of features not in the scale.data +# This function is the secondary function under GetResidual +# +# @param object A seurat object +# @param features Name of features to add into the scale.data +# @param assay Name of the assay of the seurat object generated by SCTransform +# @param vst_out The SCT parameter list +# @param clip.range Numeric of length two specifying the min and max values the Pearson residual +# will be clipped to +# Useful if you want to change the clip.range. +# @param verbose Whether to print messages and progress bars +# +# @return Returns a Seurat object containing pearson residuals of added features in its scale.data +# +#' @importFrom sctransform get_residuals +# +GetResidualVstOut <- function( + object, + assay, + umi.assay, + new_features, + vst_out, + clip.range, + verbose +) { + diff_features <- setdiff( + x = new_features, + y = rownames(x = vst_out$model_pars_fit) + ) + intersect_feature <- intersect( + x = new_features, + y = rownames(x = vst_out$model_pars_fit) + ) + if (length(x = diff_features) == 0) { + umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts" )[new_features, , drop = FALSE] + } else { + + warning( + "The following ", length(x = diff_features), + " features do not exist in the counts slot: ", + paste(diff_features, collapse = " ") + ) + + if (length(x = intersect_feature) == 0) { + return(object) + } + umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts" )[intersect_feature, , drop = FALSE] + } + if (is.null(x = clip.range)) { + if(length(vst_out$arguments$sct.clip.range)!=0 ){ + clip.max <- max(vst_out$arguments$sct.clip.range) + clip.min <- min(vst_out$arguments$sct.clip.range) + } else{ + clip.max <- max(vst_out$arguments$res_clip_range) + clip.min <- min(vst_out$arguments$res_clip_range) + } + } else { + clip.max <- max(clip.range) + clip.min <- min(clip.range) + } + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + res_clip_range = c(clip.min, clip.max), + show_progress = verbose + ) + new_residual <- as.matrix(x = new_residual) + # centered data + new_residual <- new_residual - rowMeans(new_residual) + # remove genes from the scale.data if genes are part of new_features + scale.data <- GetAssayData(object = object, assay = assay, slot = "scale.data") + object <- SetAssayData( + object = object, + assay = assay, + slot = "scale.data", + new.data = scale.data[!rownames(x = scale.data) %in% new_features, , drop = FALSE] + ) + if (nrow(x = GetAssayData(object = object, slot = 'scale.data', assay = assay)) == 0 ) { + object <- SetAssayData( + object = object, + slot = 'scale.data', + new.data = new_residual, + assay = assay + ) + } else { + object <- SetAssayData( + object = object, + slot = 'scale.data', + new.data = rbind( + GetAssayData(object = object, slot = 'scale.data', assay = assay), + new_residual + ), + assay = assay + ) + } + return(object) +} + # Local maxima estimator # # Finding local maxima given a numeric vector @@ -2571,8 +2805,7 @@ RegressOutMatrix <- function( features.regress = NULL, model.use = NULL, use.umi = FALSE, - verbose = TRUE, - ... + verbose = TRUE ) { # Do we bypass regression and simply return data.expr? bypass <- vapply( diff --git a/R/tree.R b/R/tree.R index dfaa117c4..a20a3b92e 100644 --- a/R/tree.R +++ b/R/tree.R @@ -17,6 +17,7 @@ NULL #' tree is constructed. #' #' @param object Seurat object +#' @param assay Assay to use for the analysis. #' @param features Genes to use for the analysis. Default is the set of #' variable genes (\code{VariableFeatures(object = object)}) #' @param dims If set, tree is calculated in PCA space; overrides \code{features} @@ -46,15 +47,16 @@ NULL #' BuildClusterTree <- function( object, + assay = NULL, features = NULL, dims = NULL, graph = NULL, - # do.plot = TRUE, slot = 'data', reorder = FALSE, reorder.numeric = FALSE, verbose = TRUE ) { + assay <- assay %||% DefaultAssay(object = object) if (!is.null(x = graph)) { idents <- levels(x = object) nclusters <- length(x = idents) @@ -118,6 +120,7 @@ BuildClusterTree <- function( features <- intersect(x = features, y = rownames(x = object)) data.avg <- AverageExpression( object = object, + assays = assay, features = features, slot = slot, verbose = verbose @@ -142,6 +145,7 @@ BuildClusterTree <- function( } object <- BuildClusterTree( object = object, + assay = assay, features = features, dims = dims, graph = graph, @@ -309,6 +313,7 @@ GetRightDescendants <- function(tree, node) { # PlotClusterTree(object = pbmc_small) # MergeNode <- function(object, node.use, rebuild.tree = FALSE, ...) { + CheckDots(..., fxns = 'BuldClusterTree') object.tree <- object@cluster.tree[[1]] node.children <- DFT( tree = object.tree, diff --git a/R/utilities.R b/R/utilities.R index 57e9a9528..451b18e12 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -204,6 +204,7 @@ AverageExpression <- function( verbose = TRUE, ... ) { + CheckDots(..., fxns = 'CreateSeuratObject') if (use.scale) { .Deprecated(msg = "'use.scale' is a deprecated argument, please use the 'slot' argument instead") slot <- 'scale.data' @@ -382,6 +383,7 @@ CellCycleScoring <- function( set.ident = FALSE, ... ) { + CheckDots(..., fxns = 'AddModuleScore') name <- 'Cell Cycle' features <- list('S.Score' = s.features, 'G2M.Score' = g2m.features) object.cc <- AddModuleScore( @@ -495,6 +497,7 @@ CollapseSpeciesExpressionMatrix <- function( #' cell.manhattan.dist <- CustomDistance(input.data, manhattan.distance) #' CustomDistance <- function(my.mat, my.function, ...) { + CheckDots(..., fxns = my.function) n <- ncol(x = my.mat) mat <- matrix(data = 0, ncol = n, nrow = n) colnames(x = mat) <- rownames(x = mat) <- colnames(x = my.mat) @@ -1082,52 +1085,102 @@ L2Norm <- function(mat, MARGIN = 1){ # # @return ... # -#' @importFrom utils argsAnywhere getAnywhere +# @importFrom utils argsAnywhere getAnywhere +#' @importFrom utils isS3stdGeneric methods argsAnywhere isS3method # # @examples # CheckDots <- function(..., fxns = NULL) { - args.names <- names(x = c(...)) - if (length(x = c(...)) == 0) { + args.names <- names(x = list(...)) + if (length(x = list(...)) == 0) { return(invisible(x = NULL)) } if (is.null(x = args.names)) { stop("No named arguments passed") } - fxn.args <- lapply( + if (length(x = fxns) == 1) { + fxns <- list(fxns) + } + for (f in fxns) { + if (!(is.character(x = f) || is.function(x = f))) { + stop("CheckDots only works on characters or functions, not ", class(x = f)) + } + } + fxn.args <- suppressWarnings(expr = sapply( X = fxns, FUN = function(x) { - if (is.character(x = x)) { - if (any(grepl(pattern = 'UseMethod', x = as.character(x = getAnywhere(x = x))))) { - x <- paste0(x, '.default') + x <- tryCatch( + expr = if (isS3stdGeneric(f = x)) { + as.character(x = methods(generic.function = x)) + } else { + x + }, + error = function(...) { + return(x) } - x <- argsAnywhere(x = x) - } - if (is.function(x = x)) { - return(names(x = formals(fun = x))) - } else { - stop("CheckDots only works on characters or functions, not ", class(x = x)) + ) + x <- if (is.character(x = x)) { + sapply(X = x, FUN = argsAnywhere, simplify = FALSE, USE.NAMES = TRUE) + } else if (length(x = x) <= 1) { + list(x) } - } - ) - if (any(grepl(pattern = '...', x = fxn.args, fixed = TRUE))) { - dotted <- grepl(pattern = '...', x = fxn.args, fixed = TRUE) - dfxn <- fxns[dotted] - if (any(sapply(X = dfxn, FUN = is.character))) { + return(sapply( + X = x, + FUN = function(f) { + return(names(x = formals(fun = f))) + }, + simplify = FALSE, + USE.NAMES = TRUE + )) + }, + simplify = FALSE, + USE.NAMES = TRUE + )) + fxn.args <- unlist(x = fxn.args, recursive = FALSE) + fxn.null <- vapply(X = fxn.args, FUN = is.null, FUN.VALUE = logical(length = 1L)) + if (all(fxn.null) && !is.null(x = fxns)) { + stop("None of the functions passed could be found") + } else if (any(fxn.null)) { + warning( + "The following functions passed could not be found: ", + paste(names(x = which(x = fxn.null)), collapse = ', '), + call. = FALSE, + immediate. = TRUE + ) + fxn.args <- Filter(f = Negate(f = is.null), x = fxn.args) + } + dfxns <- vector(mode = 'logical', length = length(x = fxn.args)) + names(x = dfxns) <- names(x = fxn.args) + for (i in 1:length(x = fxn.args)) { + dfxns[i] <- any(grepl(pattern = '...', x = fxn.args[[i]], fixed = TRUE)) + } + if (any(dfxns)) { + dfxns <- names(x = which(x = dfxns)) + if (any(nchar(x = dfxns) > 0)) { + fx <- vapply( + X = Filter(f = nchar, x = dfxns), + FUN = function(x) { + if (isS3method(method = x)) { + x <- unlist(x = strsplit(x = x, split = '\\.')) + x <- x[length(x = x) - 1L] + } + return(x) + }, + FUN.VALUE = character(length = 1L) + ) message( - "The following functions accept the dots: ", - paste(Filter(f = is.character, x = dfxn), collapse = ', ') + "The following functions and any applicable methods accept the dots: ", + paste(unique(x = fx), collapse = ', ') ) - dfxn <- Filter(f = Negate(f = is.character), x = dfxn) - if (length(x = dfxn) > 0) { + if (any(nchar(x = dfxns) < 1)) { message( - "In addition, there are ", - length(x = dfxn), - " unnamed function(s) that accept the dots" + "In addition, there is/are ", + length(x = Filter(f = Negate(f = nchar), x = dfxns)), + " other function(s) that accept(s) the dots" ) } } else { - message("There are ", length(x = dfxn), ' functions provided that accept the dots') + message("There is/are ", length(x = dfxns), 'function(s) that accept(s) the dots') } } else { unused <- Filter( @@ -1137,12 +1190,27 @@ CheckDots <- function(..., fxns = NULL) { x = args.names ) if (length(x = unused) > 0) { - warning( - "The following arguments are not used: ", - paste(unused, collapse = ', '), - call. = FALSE, - immediate. = TRUE + msg <- paste0( + "The following arguments are not used: ", + paste(unused, collapse = ', ') ) + switch( + EXPR = getOption(x = "Seurat.checkdots"), + "warn" = warning(msg, call. = FALSE, immediate. = TRUE), + "stop" = stop(msg), + "silent" = NULL, + stop("Invalid Seurat.checkdots option. Please choose one of warn, stop, silent") + ) + unused.hints <- sapply(X = unused, FUN = OldParamHints) + names(x = unused.hints) <- unused + unused.hints <- na.omit(object = unused.hints) + if (length(x = unused.hints) > 0) { + message( + "Suggested parameter: ", + paste(unused.hints, "instead of", names(x = unused.hints), collapse = '; '), + "\n" + ) + } } } } @@ -1247,6 +1315,22 @@ IsMatrixEmpty <- function(x) { return(all(matrix.dims == 0) || matrix.na) } +# Check whether an assay has been processed by sctransform +# +# @param assay assay to check +# +# @return Boolean +# +IsSCT <- function(assay) { + if (is.list(x = assay)) { + sct.check <- lapply(X = assay, FUN = function(x) { + return(!is.null(x = Misc(object = x, slot = 'vst.out')) | !is.null(x = Misc(object = x, slot = 'vst.set'))) + }) + return(unlist(x = sct.check)) + } + return(!is.null(x = Misc(assay, slot = 'vst.out'))) +} + # Check the length of components of a list # # @param values A list whose components should be checked @@ -1264,6 +1348,25 @@ LengthCheck <- function(values, cutoff = 0) { )) } +# Function to map values in a vector `v` as defined in `from`` to the values +# defined in `to`. +# +# @param v vector of values to map +# @param from vector of original values +# @param to vector of values to map original values to (should be of equal +# length as from) +# @return returns vector of mapped values +# +MapVals <- function(v, from, to) { + if (length(x = from) != length(x = to)) { + stop("from and to vectors are not the equal length.") + } + vals.to.match <- match(x = v, table = from) + vals.to.match.idx <- !is.na(x = vals.to.match) + v[vals.to.match.idx] <- to[vals.to.match[vals.to.match.idx]] + return(v) +} + # Independently shuffle values within each row of a matrix # # Creates a matrix where correlation structure has been removed, but overall values are the same @@ -1328,6 +1431,39 @@ Melt <- function(x) { )) } +# Give hints for old paramters and their newer counterparts +# +# This is a non-exhaustive list. If your function isn't working properly based +# on the parameters you give it, please read the documentation for your function +# +# @param param A vector of paramters to get hints for +# +# @return Parameter hints for the specified paramters +# +OldParamHints <- function(param) { + param.conversion <- c( + 'raw.data' = 'counts', + 'min.genes' = 'min.features', + 'features.plot' = 'features', + 'pc.genes' = 'features', + 'do.print' = 'verbose', + 'genes.print' = 'nfeatures.print', + 'pcs.print' = 'ndims.print', + 'pcs.use' = 'dims', + 'reduction.use' = 'reduction', + 'cells.use' = 'cells', + 'do.balanced' = 'balanced', + 'display.progress' = 'verbose', + 'print.output' = 'verbose', + 'dims.use' = 'dims', + 'reduction.type' = 'reduction', + 'y.log' = 'log', + 'cols.use' = 'cols', + 'assay.use' = 'assay' + ) + return(param.conversion[param]) +} + # Check the existence of a package # # @param ... Package names @@ -1408,7 +1544,7 @@ Parenting <- function(parent.find = 'Seurat', ...) { # # @return Returns the percentage of `x` values above the given threshold # -PercentAbove <- function(x, threshold){ +PercentAbove <- function(x, threshold) { return(length(x = x[x > threshold]) / length(x = x)) } @@ -1425,6 +1561,7 @@ PercentAbove <- function(x, threshold){ # @seealso \code{\link{sample}} # RandomName <- function(length = 5L, ...) { + CheckDots(..., fxns = 'sample') return(paste(sample(x = letters, size = length, ...), collapse = '')) } @@ -1444,19 +1581,23 @@ RowMergeSparseMatrices <- function(mat1, mat2){ if (inherits(x = mat2, what = "data.frame")) { mat2 <- as.matrix(x = mat2) } - mat1 <- as(object = mat1, Class = "RsparseMatrix") - mat2 <- as(object = mat2, Class = "RsparseMatrix") mat1.names <- rownames(x = mat1) mat2.names <- rownames(x = mat2) - all.names <- union(x = mat1.names, y = mat2.names) - new.mat <- RowMergeMatrices( - mat1 = mat1, - mat2 = mat2, - mat1_rownames = mat1.names, - mat2_rownames = mat2.names, - all_rownames = all.names - ) - rownames(x = new.mat) <- make.unique(names = all.names) + if (length(x = mat1.names) == length(x = mat2.names) && all(mat1.names == mat2.names)) { + new.mat <- cbind(mat1, mat2) + } else { + mat1 <- as(object = mat1, Class = "RsparseMatrix") + mat2 <- as(object = mat2, Class = "RsparseMatrix") + all.names <- union(x = mat1.names, y = mat2.names) + new.mat <- RowMergeMatrices( + mat1 = mat1, + mat2 = mat2, + mat1_rownames = mat1.names, + mat2_rownames = mat2.names, + all_rownames = all.names + ) + rownames(x = new.mat) <- make.unique(names = all.names) + } colnames(x = new.mat) <- make.unique(names = c( colnames(x = mat1), colnames(x = mat2) diff --git a/R/visualization.R b/R/visualization.R index ff92c385e..ef84ea18c 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -178,6 +178,7 @@ DimHeatmap <- function( #' if \code{slot} is 'scale.data', 6 otherwise #' @param group.by A vector of variables to group cells by; pass 'ident' to group by cell identity classes #' @param group.bar Add a color bar showing group status for cells +#' @param group.colors Colors to use for the color bar #' @param slot Data slot to use, choose from 'raw.data', 'data', or 'scale.data' #' @param assay Assay to pull from # @param check.plot Check that plotting will finish in a reasonable amount of time @@ -199,7 +200,8 @@ DimHeatmap <- function( #' #' @importFrom stats median #' @importFrom scales hue_pal -#' @importFrom ggplot2 annotation_raster coord_cartesian ggplot_build aes_string +#' @importFrom ggplot2 annotation_raster coord_cartesian scale_color_manual +#' ggplot_build aes_string #' @export #' #' @examples @@ -211,6 +213,7 @@ DoHeatmap <- function( cells = NULL, group.by = 'ident', group.bar = TRUE, + group.colors = NULL, disp.min = -2.5, disp.max = NULL, slot = 'scale.data', @@ -293,6 +296,7 @@ DoHeatmap <- function( ) data.group <- rbind(data.group, na.data.group) } + lgroup <- length(levels(group.use)) plot <- SingleRasterMap( data = data.group, raster = raster, @@ -304,14 +308,34 @@ DoHeatmap <- function( ) if (group.bar) { # TODO: Change group.bar to annotation.bar + default.colors <- c(hue_pal()(length(x = levels(x = group.use)))) + cols <- group.colors[1:length(x = levels(x = group.use))] %||% default.colors + if (any(is.na(x = cols))) { + cols[is.na(x = cols)] <- default.colors[is.na(x = cols)] + cols <- Col2Hex(cols) + col.dups <- sort(x = unique(x = which(x = duplicated(x = substr( + x = cols, + start = 1, + stop = 7 + ))))) + through <- length(x = default.colors) + while (length(x = col.dups) > 0) { + pal.max <- length(x = col.dups) + through + cols.extra <- hue_pal()(pal.max)[(through + 1):pal.max] + cols[col.dups] <- cols.extra + col.dups <- sort(x = unique(x = which(x = duplicated(x = substr( + x = cols, + start = 1, + stop = 7 + ))))) + } + } group.use2 <- sort(x = group.use) if (draw.lines) { na.group <- RandomName(length = 20) levels(x = group.use2) <- c(levels(x = group.use2), na.group) group.use2[placeholder.cells] <- na.group - cols <- c(hue_pal()(length(x = levels(x = group.use))), "#FFFFFF") - } else { - cols <- c(hue_pal()(length(x = levels(x = group.use)))) + cols <- c(cols, "#FFFFFF") } pbuild <- ggplot_build(plot = plot) names(x = cols) <- levels(x = group.use2) @@ -319,15 +343,16 @@ DoHeatmap <- function( y.range <- diff(x = pbuild$layout$panel_params[[1]]$y.range) y.pos <- max(pbuild$layout$panel_params[[1]]$y.range) + y.range * 0.015 y.max <- y.pos + group.bar.height * y.range - - plot <- plot + annotation_raster( - raster = t(x = cols[group.use2]),, - xmin = -Inf, - xmax = Inf, - ymin = y.pos, - ymax = y.max - ) + - coord_cartesian(ylim = c(0, y.max), clip = 'off') + plot <- plot + + annotation_raster( + raster = t(x = cols[group.use2]), + xmin = -Inf, + xmax = Inf, + ymin = y.pos, + ymax = y.max + ) + + coord_cartesian(ylim = c(0, y.max), clip = 'off') + + scale_color_manual(values = cols) if (label) { x.max <- max(pbuild$layout$panel_params[[1]]$x.range) x.divs <- pbuild$layout$panel_params[[1]]$x.major @@ -409,18 +434,9 @@ HTOHeatmap <- function( doublets <- which(object[[global.classification]] == 'Doublet') doublet.ids <- sort(x = unique(x = as.character(x = classification[doublets, ]))) heatmap.levels <- c(singlet.ids, doublet.ids, 'Negative') - if (length(x = doublets) > 0) { - Idents(object = object, cells = doublets) <- 'Multiplet' - } - Idents(object = object) <- factor( - x = Idents(object = object), - levels = c(singlet.ids, 'Multiplet', 'Negative') - ) object <- ScaleData(object = object, assay = assay, verbose = FALSE) - if (!is.null(x = singlet.names)) { - levels(x = object) <- c(singlet.names, 'Multiplet', 'Negative') - } data <- FetchData(object = object, vars = singlet.ids) + Idents(object = object) <- factor(x = classification[, 1], levels = heatmap.levels) plot <- SingleRasterMap( data = data, raster = raster, @@ -565,6 +581,74 @@ VlnPlot <- function( # Dimensional reduction plots #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Color dimensional reduction plot by tree split +#' +#' Returns a DimPlot colored based on whether the cells fall in clusters +#' to the left or to the right of a node split in the cluster tree. +#' +#' @param object Seurat object +#' @param node Node in cluster tree on which to base the split +#' @param left.color Color for the left side of the split +#' @param right.color Color for the right side of the split +#' @param other.color Color for all other cells +#' @inheritDotParams DimPlot -object +#' +#' @return Returns a DimPlot +#' +#' @export +#' +#' @examples +#' pbmc_small +#' pbmc_small <- BuildClusterTree(object = pbmc_small, verbose = FALSE) +#' PlotClusterTree(pbmc_small) +#' ColorDimSplit(pbmc_small, node = 5) +#' +ColorDimSplit <- function( + object, + node, + left.color = 'red', + right.color = 'blue', + other.color = 'grey50', + ... +) { + CheckDots(..., fxns = 'DimPlot') + tree <- Tool(object = object, slot = "BuildClusterTree") + split <- tree$edge[which(x = tree$edge[, 1] == node), ][, 2] + all.children <- sort(x = tree$edge[, 2][! tree$edge[, 2] %in% tree$edge[, 1]]) + left.group <- DFT(tree = tree, node = split[1], only.children = TRUE) + right.group <- DFT(tree = tree, node = split[2], only.children = TRUE) + if (any(is.na(x = left.group))) { + left.group <- split[1] + } + if (any(is.na(x = right.group))) { + right.group <- split[2] + } + left.group <- MapVals(v = left.group, from = all.children, to = tree$tip.label) + right.group <- MapVals(v = right.group, from = all.children, to = tree$tip.label) + remaining.group <- setdiff(x = tree$tip.label, y = c(left.group, right.group)) + left.cells <- WhichCells(object = object, ident = left.group) + right.cells <- WhichCells(object = object, ident = right.group) + remaining.cells <- WhichCells(object = object, ident = remaining.group) + object <- SetIdent( + object = object, + cells = left.cells, + value = "Left Split" + ) + object <- SetIdent( + object = object, + cells = right.cells, + value = "Right Split" + ) + object <- SetIdent( + object = object, + cells = remaining.cells, + value = "Not in Split" + ) + levels(x = object) <- c("Left Split", "Right Split", "Not in Split") + colors.use = c(left.color, right.color, other.color) + return(DimPlot(object = object, cols = colors.use, ...)) +} + #' Dimensional reduction plot #' #' Graphs the output of a dimensional reduction technique on a 2D scatter plot where each point is a @@ -646,6 +730,7 @@ DimPlot <- function( ncol = NULL, ... ) { + CheckDots(..., fxns = 'CombinePlots') if (length(x = dims) != 2) { stop("'dims' must be a two-length vector") } @@ -740,6 +825,12 @@ DimPlot <- function( #' @param cols The two colors to form the gradient over. Provide as string vector with #' the first color corresponding to low values, the second to high. Also accepts a Brewer #' color scale or vector of colors. Note: this will bin the data into number of colors provided. +#' When blend is \code{TRUE}, takes anywhere from 1-3 colors: +#' \describe{ +#' \item{1 color:}{Treated as color for double-negatives, will use default colors 2 and 3 for per-feature expression} +#' \item{2 colors:}{Treated as colors for per-feature expression, will use default color 1 for double-negatives} +#' \item{3+ colors:}{First color used for double-negatives, colors 2 and 3 used for per-feature expression, all others ignored} +#' } #' @param min.cutoff,max.cutoff Vector of minimum and maximum cutoff values for each feature, #' may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10') #' @param split.by A factor in object metadata to split the feature plot by, pass 'ident' @@ -748,10 +839,10 @@ DimPlot <- function( #' @param blend Scale and blend expression values to visualize coexpression of two features #' @param blend.threshold The color cutoff from weak signal to strong signal; ranges from 0 to 1. #' @param ncol Number of columns to combine multiple feature plots to, ignored if \code{split.by} is not \code{NULL} -#' @param combine Combine plots into a single gg object; note that if TRUE; themeing will not work when plotting multiple features +#' @param combine Combine plots into a single gg object; note that if \code{TRUE}; themeing will not work when plotting multiple features #' @param coord.fixed Plot cartesian coordinates with fixed aspect ratio #' @param by.col If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE} - +#' @param sort.cell If \code{TRUE}, the positive cells will overlap the negative cells #' #' @return Returns a ggplot object if only 1 feature is plotted. #' If >1 features are plotted and \code{combine=TRUE}, returns a combined ggplot object using \code{cowplot::plot_grid}. @@ -760,9 +851,9 @@ DimPlot <- function( #' @importFrom grDevices rgb #' @importFrom cowplot theme_cowplot #' @importFrom RColorBrewer brewer.pal.info -#' @importFrom ggplot2 labs scale_x_continuous scale_y_continuous theme element_rect dup_axis guides -#' element_blank element_text margin scale_color_brewer scale_color_gradientn scale_color_manual coord_fixed -#' ggtitle +#' @importFrom ggplot2 labs scale_x_continuous scale_y_continuous theme element_rect +#' dup_axis guides element_blank element_text margin scale_color_brewer scale_color_gradientn +#' scale_color_manual coord_fixed ggtitle #' #' @export #' @@ -781,7 +872,11 @@ FeaturePlot <- function( features, dims = c(1, 2), cells = NULL, - cols = ifelse(test = c(blend, blend), yes = c("#ff0000", "#00ff00"), no = c('lightgrey', 'blue')), + cols = if (blend) { + c('lightgrey', '#ff0000', '#00ff00') + } else { + c('lightgrey', 'blue') + }, pt.size = NULL, order = FALSE, min.cutoff = NA, @@ -798,8 +893,11 @@ FeaturePlot <- function( ncol = NULL, combine = TRUE, coord.fixed = FALSE, - by.col = TRUE + by.col = TRUE, + sort.cell = FALSE ) { + # Set a theme to remove right-hand Y axis lines + # Also sets right-hand Y axis text label formatting no.right <- theme( axis.line.y.right = element_blank(), axis.ticks.y.right = element_blank(), @@ -810,44 +908,71 @@ FeaturePlot <- function( margin = margin(r = 7) ) ) - if (is.null(reduction)) { - default_order <- c('umap', 'tsne', 'pca') - reducs <- which(default_order %in% names(object@reductions)) - reduction <- default_order[reducs[1]] - } + # Get the DimReduc to use + reduction <- reduction %||% DefaultDimReduc(object = object) if (length(x = dims) != 2 || !is.numeric(x = dims)) { stop("'dims' must be a two-length integer vector") } + # Figure out blending stuff if (blend && length(x = features) != 2) { stop("Blending feature plots only works with two features") } + # Set color scheme for blended FeaturePlots if (blend) { - if (length(x = cols) > 2) { - warning( - "Blending feature plots only works with two colors; using first two colors", - call. = FALSE, - immediate. = TRUE - ) - } else if (length(x = cols) < 2) { - warning( - "Blended feature plots require two colors, using default colors", - call. = FALSE, - immediate. = TRUE - ) - cols <- c("#ff0000", "#00ff00") - } + default.colors <- eval(expr = formals(fun = FeaturePlot)$cols) + cols <- switch( + EXPR = as.character(x = length(x = cols)), + '0' = { + warning( + "No colors provided, using default colors", + call. = FALSE, + immediate. = TRUE + ) + default.colors + }, + '1' = { + warning( + "Only one color provided, assuming specified is double-negative and augmenting with default colors", + call. = FALSE, + immediate. = TRUE + ) + c(cols, default.colors[2:3]) + }, + '2' = { + warning( + "Only two colors provided, assuming specified are for features and agumenting with '", + default.colors[1], + "' for double-negatives", + call. = FALSE, + immediate. = TRUE + ) + c(default.colors[1], cols) + }, + '3' = cols, + { + warning( + "More than three colors provided, using only first three", + call. = FALSE, + immediate. = TRUE + ) + cols[1:3] + } + ) } - if (blend && length(x = cols) != 2) { - stop("Blending feature plots only works with two colors") + if (blend && length(x = cols) != 3) { + stop("Blending feature plots only works with three colors; first one for negative cells") } + # Name the reductions dims <- paste0(Key(object = object[[reduction]]), dims) cells <- cells %||% colnames(x = object) + # Get plotting data data <- FetchData( object = object, vars = c(dims, 'ident', features), cells = cells, slot = slot ) + # Check presence of features/dimensions if (ncol(x = data) < 4) { stop( "None of the requested features were found: ", @@ -860,6 +985,7 @@ FeaturePlot <- function( stop("The dimensions requested were not found", call. = FALSE) } features <- colnames(x = data)[4:ncol(x = data)] + # Determine cutoffs min.cutoff <- mapply( FUN = function(cutoff, feature) { return(ifelse( @@ -895,6 +1021,7 @@ FeaturePlot <- function( yes = brewer.pal.info[cols, ]$maxcolors, no = length(x = cols) ) + # Apply cutoffs data[, 4:ncol(x = data)] <- sapply( X = 4:ncol(x = data), FUN = function(index) { @@ -920,6 +1047,7 @@ FeaturePlot <- function( ) colnames(x = data)[4:ncol(x = data)] <- features rownames(x = data) <- cells + # Figure out splits (FeatureHeatmap) data$split <- if (is.null(x = split.by)) { RandomName() } else { @@ -932,9 +1060,11 @@ FeaturePlot <- function( if (!is.factor(x = data$split)) { data$split <- factor(x = data$split) } + # Set shaping variable if (!is.null(x = shape.by)) { data[, shape.by] <- object[[shape.by, drop = TRUE]] } + # Make list of plots plots <- vector( mode = "list", length = ifelse( @@ -943,24 +1073,32 @@ FeaturePlot <- function( no = length(x = features) * length(x = levels(x = data$split)) ) ) + # Apply common limits xlims <- c(floor(x = min(data[, dims[1]])), ceiling(x = max(data[, dims[1]]))) ylims <- c(floor(min(data[, dims[2]])), ceiling(x = max(data[, dims[2]]))) + # Set blended colors if (blend) { ncol <- 4 color.matrix <- BlendMatrix( - two.colors = cols, - col.threshold = blend.threshold + two.colors = cols[2:3], + col.threshold = blend.threshold, + negative.color = cols[1] ) + cols <- cols[2:3] colors <- list( color.matrix[, 1], color.matrix[1, ], as.vector(x = color.matrix) ) } + # Make the plots for (i in 1:length(x = levels(x = data$split))) { + # Figre out which split we're working with ident <- levels(x = data$split)[i] data.plot <- data[as.character(x = data$split) == ident, , drop = FALSE] + # Blend expression values if (blend) { + features <- features[1:2] no.expression <- features[colMeans(x = data.plot[, features]) == 0] if (length(x = no.expression) != 0) { stop( @@ -972,16 +1110,23 @@ FeaturePlot <- function( data.plot <- cbind(data.plot[, c(dims, 'ident')], BlendExpression(data = data.plot[, features[1:2]])) features <- colnames(x = data.plot)[4:ncol(x = data.plot)] } + # Make per-feature plots for (j in 1:length(x = features)) { feature <- features[j] + # Get blended colors if (blend) { cols.use <- as.numeric(x = as.character(x = data.plot[, feature])) + 1 cols.use <- colors[[j]][sort(x = unique(x = cols.use))] } else { cols.use <- NULL } + data.single <- data.plot[, c(dims, 'ident', feature, shape.by)] + if (sort.cell) { + data.single <- data.single[order(data.single[, feature]),] + } + # Make the plot plot <- SingleDimPlot( - data = data.plot[, c(dims, 'ident', feature, shape.by)], + data = data.single, dims = dims, col.by = feature, order = order, @@ -992,7 +1137,9 @@ FeaturePlot <- function( ) + scale_x_continuous(limits = xlims) + scale_y_continuous(limits = ylims) + - theme_cowplot() + theme_cowplot() + + theme(plot.title = element_text(hjust = 0.5)) + # Add labels if (label) { plot <- LabelClusters( plot = plot, @@ -1001,13 +1148,16 @@ FeaturePlot <- function( size = label.size ) } + # Make FeatureHeatmaps look nice(ish) if (length(x = levels(x = data$split)) > 1) { plot <- plot + theme(panel.border = element_rect(fill = NA, colour = 'black')) + # Add title plot <- plot + if (i == 1) { labs(title = feature) } else { labs(title = NULL) } + # Add second axis if (j == length(x = features) && !blend) { suppressMessages( expr = plot <- plot + @@ -1015,6 +1165,7 @@ FeaturePlot <- function( no.right ) } + # Remove left Y axis if (j != 1) { plot <- plot + theme( axis.line.y = element_blank(), @@ -1023,6 +1174,7 @@ FeaturePlot <- function( axis.title.y.left = element_blank() ) } + # Remove bottom X axis if (i != length(x = levels(x = data$split))) { plot <- plot + theme( axis.line.x = element_blank(), @@ -1034,15 +1186,17 @@ FeaturePlot <- function( } else { plot <- plot + labs(title = feature) } + # Add colors scale for normal FeaturePlots if (!blend) { plot <- plot + guides(color = NULL) cols.grad <- cols if (length(x = cols) == 1) { plot <- plot + scale_color_brewer(palette = cols) } else if (length(x = cols) > 1) { - if (all(data.plot[, feature] == data.plot[, feature][1])) { - warning("All cells have the same value (", data.plot[1, feature], ") of ", feature, ".") - if (data.plot[1, feature][1] == 0) { + unique.feature.exp <- unique(data.plot[, feature]) + if (length(unique.feature.exp) == 1) { + warning("All cells have the same value (", unique.feature.exp, ") of ", feature, ".") + if (unique.feature.exp == 0) { cols.grad <- cols[1] } else{ cols.grad <- cols @@ -1056,16 +1210,21 @@ FeaturePlot <- function( ) } } + # Add coord_fixed if (coord.fixed) { plot <- plot + coord_fixed() } + # I'm not sure why, but sometimes the damn thing fails without this + # Thanks ggplot2 plot <- plot + # Place the plot plots[[(length(x = features) * (i - 1)) + j]] <- plot } } + # Add blended color key if (blend) { blend.legend <- BlendMap(color.matrix = color.matrix) - for (i in 1:length(x = levels(x = data$split))) { + for (ii in 1:length(x = levels(x = data$split))) { suppressMessages(expr = plots <- append( x = plots, values = list( @@ -1073,7 +1232,7 @@ FeaturePlot <- function( scale_y_continuous( sec.axis = dup_axis(name = ifelse( test = length(x = levels(x = data$split)) > 1, - yes = levels(x = data$split)[i], + yes = levels(x = data$split)[ii], no = '' )), expand = c(0, 0) @@ -1081,7 +1240,7 @@ FeaturePlot <- function( labs( x = features[1], y = features[2], - title = if (i == 1) { + title = if (ii == 1) { paste('Color threshold:', blend.threshold) } else { NULL @@ -1089,11 +1248,13 @@ FeaturePlot <- function( ) + no.right ), - after = 4 * i - 1 + after = 4 * ii - 1 )) } } + # Remove NULL plots plots <- Filter(f = Negate(f = is.null), x = plots) + # Combine the plots if (combine) { if (is.null(x = ncol)) { ncol <- 2 @@ -1117,6 +1278,7 @@ FeaturePlot <- function( } else { split.by %iff% 'none' } + # Transpose the FeatureHeatmap matrix (not applicable for blended FeaturePlots) if (by.col && !is.null(x = split.by) && !blend) { plots <- lapply( X = plots, @@ -1138,13 +1300,13 @@ FeaturePlot <- function( } idx <- 1 for (i in which(x = 1:length(x = plots) %% length(x = features) == 1)) { - plots[[i]] <- plots[[i]] + ggtitle(levels(x = data$split)[[idx]]) + plots[[i]] <- plots[[i]] + ggtitle(levels(x = data$split)[[idx]]) + theme(plot.title = element_text(hjust = 0.5)) idx <- idx + 1 } idx <- 1 if (length(x = features) == 1) { for (i in 1:length(x = plots)) { - plots[[i]] <- plots[[i]] + ggtitle(levels(x = data$split)[[idx]]) + plots[[i]] <- plots[[i]] + ggtitle(levels(x = data$split)[[idx]]) + theme(plot.title = element_text(hjust = 0.5)) idx <- idx + 1 } } @@ -1183,7 +1345,6 @@ FeaturePlot <- function( #' @param cell2 Cell 2 name #' @param features Features to plot (default, all features) #' @param highlight Features to highlight -#' #' @return A ggplot object #' #' @export @@ -1201,8 +1362,7 @@ CellScatter <- function( highlight = NULL, cols = NULL, pt.size = 1, - smooth = FALSE, - ... + smooth = FALSE ) { features <- features %||% rownames(x = object) data <- FetchData( @@ -1216,8 +1376,7 @@ CellScatter <- function( cols = cols, pt.size = pt.size, rows.highlight = highlight, - smooth = smooth, - ... + smooth = smooth ) return(plot) } @@ -1241,7 +1400,6 @@ CellScatter <- function( #' @param span Spline span in loess function call, if \code{NULL}, no spline added #' @param smooth Smooth the graph (similar to smoothScatter) #' @param slot Slot to pull data from, should be one of 'counts', 'data', or 'scale.data' -#' @param ... Ignored for now #' #' @return A ggplot object #' @@ -1264,8 +1422,7 @@ FeatureScatter <- function( shape.by = NULL, span = NULL, smooth = FALSE, - slot = 'data', - ... + slot = 'data' ) { cells <- cells %||% colnames(x = object) group.by <- group.by %||% Idents(object = object)[cells] @@ -1524,6 +1681,8 @@ PolyFeaturePlot <- function( #' #' Plots the results of the approximate rank selection process for ALRA. #' +#' @note ALRAChooseKPlot and associated functions are being moved to SeuratWrappers; +#' for more information on SeuratWrappers, please see \url{https://github.com/satijalab/seurat-wrappers} #' #' @param object Seurat object #' @param start Index to start plotting singular value spacings from. @@ -1545,13 +1704,19 @@ PolyFeaturePlot <- function( #' @export #' ALRAChooseKPlot <- function(object, start = 0, combine = TRUE) { + .Deprecated( + new = 'SeruatWrappers::ALRAChooseKPlot', + msg = paste( + 'ALRAChooseKPlot and associated functions are being moved to SeuratWrappers;', + 'for more information on SeuratWrappers, please see https://github.com/satijalab/seurat-wrappers' + ) + ) alra.data <- Tool(object = object, slot = 'RunALRA') if (is.null(x = alra.data)) { stop('RunALRA should be run prior to using this function.') } d <- alra.data[["d"]] diffs <- alra.data[["diffs"]] - pvals <- alra.data[["pvals"]] k <- alra.data[["k"]] if (start == 0) { start <- floor(x = k / 2) @@ -1568,7 +1733,7 @@ ALRAChooseKPlot <- function(object, start = 0, combine = TRUE) { theme_cowplot() + scale_x_continuous(breaks = breaks) + labs(x = NULL, y = 's_i', title = 'Singular values') - ggdata <- data.frame(x = 2:length(x = d), y = diffs)[-(1:(start - 1)), ] + ggdata <- data.frame(x = 1:(length(x = d) - 1), y = diffs)[-(1:(start - 1)), ] gg2 <- ggplot(data = ggdata, mapping = aes_string(x = 'x', y = 'y')) + geom_point(size = 1) + geom_line(size = 0.5) + @@ -1576,14 +1741,7 @@ ALRAChooseKPlot <- function(object, start = 0, combine = TRUE) { theme_cowplot() + scale_x_continuous(breaks = breaks) + labs(x = NULL, y = 's_{i} - s_{i-1}', title = 'Singular value spacings') - ggdata <- data.frame(x = 2:length(x = d), y = pvals) - gg3 <- ggplot(data = ggdata, mapping = aes_string(x = 'x', y = 'y')) + - geom_point(size = 1) + - geom_vline(xintercept = k + 1) + - theme_cowplot() + - scale_x_continuous(breaks = breaks) + - labs(x = NULL, y = 'p.val', title = 'Singular value spacing p-values') - plots <- list(spectrum = gg1, spacings = gg2, pvals = gg3) + plots <- list(spectrum = gg1, spacings = gg2) if (combine) { plots <- CombinePlots(plots = plots) } @@ -1690,7 +1848,6 @@ BarcodeInflectionsPlot <- function(object) { #' @param scale.by Scale the size of the points by 'size' or by 'radius' #' @param scale.min Set lower limit for scaling, use NA for default #' @param scale.max Set upper limit for scaling, use NA for default -#' @param ... Ignored #' #' @return A ggplot object #' @@ -1722,8 +1879,7 @@ DotPlot <- function( split.by = NULL, scale.by = 'radius', scale.min = NA, - scale.max = NA, - ... + scale.max = NA ) { assay <- assay %||% DefaultAssay(object = object) DefaultAssay(object = object) <- assay @@ -1989,7 +2145,6 @@ JackStrawPlot <- function( #' PlotClusterTree(object = pbmc_small) #' PlotClusterTree <- function(object, ...) { - if (is.null(x = Tool(object = object, slot = "BuildClusterTree"))) { stop("Phylogenetic tree does not exist, build using BuildClusterTree") } @@ -1998,7 +2153,6 @@ PlotClusterTree <- function(object, ...) { nodelabels() } - #' Visualize Dimensional Reduction genes #' #' Visualize top genes associated with reduction components @@ -2008,11 +2162,13 @@ PlotClusterTree <- function(object, ...) { #' @param dims Number of dimensions to display #' @param nfeatures Number of genes to display #' @param col Color of points to use -#' @param projected Use reduction values for full dataset (i.e. projected dimensional reduction values) -#' @param balanced Return an equal number of genes with + and - scores. If FALSE (default), returns the top genes ranked by the scores absolute values +#' @param projected Use reduction values for full dataset (i.e. projected +#' dimensional reduction values) +#' @param balanced Return an equal number of genes with + and - scores. If +#' FALSE (default), returns the top genes ranked by the scores absolute values #' @param ncol Number of columns to display -#' @param combine Combine plots into a single gg object; note that if TRUE; themeing will not work when plotting multiple features -#' @param ... Ignored +#' @param combine Combine plots into a single gg object; note that if TRUE; +#' themeing will not work when plotting multiple features #' #' @return A ggplot object #' @@ -2032,8 +2188,7 @@ VizDimLoadings <- function( projected = FALSE, balanced = FALSE, ncol = NULL, - combine = TRUE, - ... + combine = TRUE ) { if (is.null(x = ncol)) { ncol <- 2 @@ -2178,6 +2333,72 @@ BlueAndRed <- function(k = 50) { return(CustomPalette(low = "#313695" , high = "#A50026", mid = "#FFFFBF", k = k)) } +#' Move outliers towards center on dimension reduction plot +#' +#' @param object Seurat object +#' @param reduction Name of DimReduc to adjust +#' @param dims Dimensions to visualize +#' @param group.by Group (color) cells in different ways (for example, orig.ident) +#' @param outlier.sd Controls the outlier distance +#' @param reduction.key Key for DimReduc that is returned +#' +#' @return Returns a DimReduc object with the modified embeddings +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' pbmc_small <- FindClusters(pbmc_small, resolution = 1.1) +#' pbmc_small <- RunUMAP(pbmc_small, dims = 1:5) +#' DimPlot(pbmc_small, reduction = "umap") +#' pbmc_small[["umap_new"]] <- CollapseEmbeddingOutliers(pbmc_small, +#' reduction = "umap", reduction.key = 'umap_', outlier.sd = 0.5) +#' DimPlot(pbmc_small, reduction = "umap_new") +#' } +#' +CollapseEmbeddingOutliers <- function( + object, + reduction = 'umap', + dims = 1:2, + group.by = 'ident', + outlier.sd = 2, + reduction.key = 'UMAP_' +) { + embeddings <- Embeddings(object = object[[reduction]])[, dims] + idents <- FetchData(object = object, vars = group.by) + data.medians <- sapply(X = dims, FUN = function(x) { + tapply(X = embeddings[, x], INDEX = idents, FUN = median) + }) + data.sd <- apply(X = data.medians, MARGIN = 2, FUN = sd) + data.medians.scale <- as.matrix(x = scale(x = data.medians, center = TRUE, scale = TRUE)) + data.medians.scale[abs(x = data.medians.scale) < outlier.sd] <- 0 + data.medians.scale <- sign(x = data.medians.scale) * (abs(x = data.medians.scale) - outlier.sd) + data.correct <- sweep( + x = data.medians.scale, + MARGIN = 2, + STATS = data.sd, + FUN = "*" + ) + data.correct <- data.correct[abs(x = apply(X = data.correct, MARGIN = 1, FUN = min)) > 0, ] + new.embeddings <- embeddings + for (i in rownames(x = data.correct)) { + cells.correct <- rownames(x = idents)[idents[, "ident"] == i] + new.embeddings[cells.correct, ] <- sweep( + x = new.embeddings[cells.correct,], + MARGIN = 2, + STATS = data.correct[i, ], + FUN = "-" + ) + } + reduc <- CreateDimReducObject( + embeddings = new.embeddings, + loadings = Loadings(object = object[[reduction]]), + assay = slot(object = object[[reduction]], name = "assay.used"), + key = reduction.key + ) + return(reduc) +} + #' Combine ggplot2-based plots into a single plot #' #' @param plots A list of gg objects @@ -2419,7 +2640,7 @@ HoverLocator <- function( # Set up axis labels here # Also, a bunch of stuff to get axis lines done properly xaxis <- list( - title = names(x = data.frame())[1], + title = names(x = data)[1], showgrid = FALSE, zeroline = FALSE, showline = TRUE @@ -2444,7 +2665,7 @@ HoverLocator <- function( # Use I() to get plotly to accept the colors from the data as is # Set hoverinfo to 'text' to override the default hover information # rather than append to it - plotly::layout( + p <- plotly::layout( p = plot_ly( data = plot.build, x = ~x, @@ -2457,11 +2678,31 @@ HoverLocator <- function( ), xaxis = xaxis, yaxis = yaxis, + title = plot$labels$title, titlefont = title, paper_bgcolor = plotbg, plot_bgcolor = plotbg, ... ) + # add labels + label.layer <- which(x = sapply( + X = plot$layers, + FUN = function(x) class(x$geom)[1] == "GeomText") + ) + if (length(x = label.layer) == 1) { + p <- plotly::add_annotations( + p = p, + x = plot$layers[[label.layer]]$data[, 1], + y = plot$layers[[label.layer]]$data[, 2], + xref = "x", + yref = "y", + text = plot$layers[[label.layer]]$data[, 3], + xanchor = 'right', + showarrow = FALSE, + font = list(size = plot$layers[[label.layer]]$aes_params$size * 4) + ) + } + return(p) } #' Label clusters on a ggplot2-based scatter plot @@ -3112,38 +3353,68 @@ BlendMap <- function(color.matrix) { BlendMatrix <- function( n = 10, col.threshold = 0.5, - two.colors=c("#ff0000", "#00ff00") + two.colors = c("#ff0000", "#00ff00"), + negative.color = "black" ) { if (0 > col.threshold || col.threshold > 1) { stop("col.threshold must be between 0 and 1") } + C0 <- colorRamp(colors = negative.color)(1) ramp <- colorRamp(colors = two.colors) C1 <- ramp(x = 0) C2 <- ramp(x = 1) - merge.weight <- min(255 / (C1 + C2 + 0.01)) - weight_color <- function(w1, c1) { - c1_weight <- 1 / (1 + exp(x = -(w1 ^ 1.2 - 3 - col.threshold * 10))) - return(c1_weight * c1) - } - blend_color <- function(i, j) { - C1_weight <- weight_color(w1 = i, c1 = C1) - C2_weight <- weight_color(w1 = j, c1 = C2) - C_blend <- (merge.weight * C1_weight + merge.weight * C2_weight) - alpha <- lapply(X = list(i, j), FUN = '^', 0.5) - alpha <- Reduce(f = '+', x = alpha) - alpha <- (1 - 0.4 /alpha ) * 255 + merge.weight <- min(255 / (C1 + C2 + C0 + 0.01)) + sigmoid <- function(x) { + return(1 / (1 + exp(-x))) + } + blend_color <- function( + i, + j, + col.threshold, + n, + C0, + C1, + C2, + merge.weight + ) { + c.min <- sigmoid(5 * (1 / n - col.threshold)) + c.max <- sigmoid(5 * (1 - col.threshold)) + c1_weight <- sigmoid(5 * (i / n - col.threshold)) + c2_weight <- sigmoid(5 * (j / n - col.threshold)) + c0_weight <- sigmoid(5 * ((i + j) / (2 * n) - col.threshold)) + c1_weight <- (c1_weight - c.min) / (c.max - c.min) + c2_weight <- (c2_weight - c.min) / (c.max - c.min) + c0_weight <- (c0_weight - c.min) / (c.max - c.min) + C1_length <- sqrt(sum((C1 - C0) ** 2)) + C2_length <- sqrt(sum((C2 - C0) ** 2)) + C1_unit <- (C1 - C0) / C1_length + C2_unit <- (C2 - C0) / C2_length + C1_weight <- C1_unit * c1_weight + C2_weight <- C2_unit * c2_weight + C_blend <- C1_weight * (i - 1) * C1_length / (n - 1) + C2_weight * (j - 1) * C2_length / (n - 1) + (i - 1) * (j - 1) * c0_weight * C0 / (n - 1) ** 2 + C0 + C_blend[C_blend > 255] <- 255 + C_blend[C_blend < 0] <- 0 return(rgb( red = C_blend[, 1], green = C_blend[, 2], blue = C_blend[, 3], - alpha = alpha, + alpha = 255, maxColorValue = 255 )) } blend_matrix <- matrix(nrow = n, ncol = n) for (i in 1:n) { for (j in 1:n) { - blend_matrix[i, j] <- blend_color(i = i, j = j) + blend_matrix[i, j] <- blend_color( + i = i, + j = j, + col.threshold = col.threshold, + n = n, + C0 = C0, + C1 = C1, + C2 = C2, + merge.weight = merge.weight + ) } } return(blend_matrix) @@ -3651,6 +3922,7 @@ PlotBuild <- function(data, dark.theme = FALSE, smooth = FALSE, ...) { # Take advantage of functions as first class objects # to dynamically choose normal vs smooth scatterplot myplot <- ifelse(test = smooth, yes = smoothScatter, no = plot) + CheckDots(..., fxns = myplot) if (dark.theme) { par(bg = 'black') axes = FALSE @@ -3896,7 +4168,7 @@ globalVariables(names = '..density..', package = 'Seurat') # @param ... Extra parameters to MASS::kde2d # #' @importFrom stats cor -#' @importFrom MASS kde2d +# #' @importFrom MASS kde2d #' @importFrom cowplot theme_cowplot #' @importFrom RColorBrewer brewer.pal.info #' @importFrom ggplot2 ggplot geom_point aes_string labs scale_color_brewer @@ -3910,8 +4182,7 @@ SingleCorPlot <- function( smooth = FALSE, rows.highlight = NULL, legend.title = NULL, - na.value = 'grey50', - ... + na.value = 'grey50' ) { pt.size <- pt.size <- pt.size %||% AutoPointSize(data = data) orig.names <- colnames(x = data) @@ -3921,6 +4192,13 @@ SingleCorPlot <- function( x = colnames(x = data), fixed = TRUE ) + if (ncol(x = data) < 2) { + msg <- "Too few variables passed" + if (ncol(x = data) == 1) { + msg <- paste0(msg, ', only have ', colnames(x = data)[1]) + } + stop(msg, call. = FALSE) + } plot.cor <- round(x = cor(x = data[, 1], y = data[, 2]), digits = 2) if (!is.null(x = rows.highlight)) { highlight.info <- SetHighlight( @@ -4000,7 +4278,7 @@ SingleCorPlot <- function( plot <- plot + guides(color = FALSE) } } - plot <- plot + theme_cowplot() + plot <- plot + theme_cowplot() + theme(plot.title = element_text(hjust = 0.5)) return(plot) } @@ -4261,7 +4539,8 @@ SingleExIPlot <- function( mapping = aes_string(x = x, y = y, fill = fill)[c(2, 3, 1)] ) + labs(x = xlab, y = ylab, title = feature, fill = NULL) + - theme_cowplot() + theme_cowplot() + + theme(plot.title = element_text(hjust = 0.5)) plot <- do.call(what = '+', args = list(plot, geom)) plot <- plot + if (log) { log.scale @@ -4299,7 +4578,7 @@ SingleExIPlot <- function( names(x = labels) <- labels } } else { - labels <- unique(x = as.vector(x = data$ident)) + labels <- levels(x = droplevels(data$ident)) } plot <- plot + scale_fill_manual(values = cols, labels = labels) } diff --git a/R/zzz.R b/R/zzz.R index a4e3769c5..d89d7c2a6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -4,13 +4,19 @@ #' #' Seurat uses the following [options()] to configure behaviour: #' -#' \itemize{ -#' \item `Seurat.memsafe`: global option to call gc() after many operations. +#' \describe{ +#' \item{\code{Seurat.memsafe}}{global option to call gc() after many operations. #' This can be helpful in cleaning up the memory status of the R session and #' prevent use of swap space. However, it does add to the computational overhead #' and setting to FALSE can speed things up if you're working in an environment -#' where RAM availabiliy is not a concern. +#' where RAM availabiliy is not a concern.} +#' \item{\code{Seurat.warn.umap.uwot}}{Show warning about the default backend +#' for \code{\link{RunUMAP}} changing from Python UMAP via reticulate to UWOT} +#' \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, +#' this controls the behavior when an item isn't used. Can be one of warn, +#' stop, or silent.} #' } +#' #' @docType package #' @rdname Seurat-package #' @name Seurat-package @@ -18,7 +24,9 @@ NULL seurat_default_options <- list( - Seurat.memsafe = TRUE + Seurat.memsafe = FALSE, + Seurat.warn.umap.uwot = TRUE, + Seurat.checkdots = "warn" ) .onLoad <- function(libname, pkgname) { diff --git a/README.md b/README.md index d4ebaa3b1..04f12918d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,8 @@ [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) -# Seurat v3.0.2 +# Seurat v3.1.0 + Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. @@ -19,6 +20,12 @@ Improvements and new features will be added on a regular basis, please contact s Version History +August 20, 2019 +* Version 3.1 +* Changes: + * Support for SCTransform integration workflows + * Integration speed ups: reference-based integration + reciprocal PCA + April 12, 2019 * Version 3.0 * Changes: diff --git a/cran-comments.md b/cran-comments.md index b70ca4979..dbfa0c6b2 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,10 @@ -# Seurat v3.0.2 +# Seurat v3.1.0 ## Test environments * local Ubuntu 16.04.6 and 18.04.2 installs, R 3.5.3 -* Ubuntu 14.04.5 (on travis-ci), R 3.6.0 -* macOS 10.13.3 (on travis-ci), R 3.6.0 -* Windows Server 2012 (on AppVeyor), R 3.6.0 Patched +* Ubuntu 16.04.6 (on travis-ci), R 3.6.1 +* macOS 10.13.3 (on travis-ci), R 3.6.1 +* Windows Server 2012 R2 (on AppVeyor), R 3.6.1 Patched * win-builder (oldrelease, release, devel) ## R CMD check results @@ -33,4 +33,4 @@ There were 3 NOTEs: ## Downstream dependencies -There is a package that suggests Seurat (clustree), but this update does not impact their functionality. +There are three packages that suggest Seurat: BisqueRNA, clustreen, and iCellR; this update does not impact their functionality. diff --git a/man/ALRAChooseKPlot.Rd b/man/ALRAChooseKPlot.Rd index c88d642f7..88d1fd048 100644 --- a/man/ALRAChooseKPlot.Rd +++ b/man/ALRAChooseKPlot.Rd @@ -24,6 +24,10 @@ spacings of the singular values, and the p-values of the singular values. \description{ Plots the results of the approximate rank selection process for ALRA. } +\note{ +ALRAChooseKPlot and associated functions are being moved to SeuratWrappers; +for more information on SeuratWrappers, please see \url{https://github.com/satijalab/seurat-wrappers} +} \seealso{ \code{\link{RunALRA}} } diff --git a/man/AnchorSet-class.Rd b/man/AnchorSet-class.Rd index 4a84afb7d..274387d28 100644 --- a/man/AnchorSet-class.Rd +++ b/man/AnchorSet-class.Rd @@ -18,6 +18,8 @@ related information needed for performing downstream analyses - namely data inte \item{\code{reference.cells}}{List of cell names in the reference dataset - needed when performing data transfer.} +\item{\code{reference.objects}}{Position of reference object/s in object.list} + \item{\code{query.cells}}{List of cell names in the query dataset - needed when performing data transfer} \item{\code{anchors}}{The anchor matrix. This contains the cell indices of both anchor pair cells, the diff --git a/man/Assays.Rd b/man/Assays.Rd new file mode 100644 index 000000000..22cffc6a5 --- /dev/null +++ b/man/Assays.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.R +\name{Assays} +\alias{Assays} +\title{Pull Assays or assay names} +\usage{ +Assays(object, slot = NULL) +} +\arguments{ +\item{object}{A Seurat object} + +\item{slot}{Name of Assay to return} +} +\value{ +If \code{slot} is \code{NULL}, the names of all \code{Assay} objects +in this Seurat object. Otherwise, the \code{Assay} object specified +} +\description{ +Lists the names of \code{\link{Assay}} objects present in +a Seurat object. If slot is provided, pulls specified Assay object. +} +\examples{ +Assays(object = pbmc_small) + +} diff --git a/man/BuildClusterTree.Rd b/man/BuildClusterTree.Rd index 7721d0b28..0ebde224b 100644 --- a/man/BuildClusterTree.Rd +++ b/man/BuildClusterTree.Rd @@ -4,13 +4,15 @@ \alias{BuildClusterTree} \title{Phylogenetic Analysis of Identity Classes} \usage{ -BuildClusterTree(object, features = NULL, dims = NULL, graph = NULL, - slot = "data", reorder = FALSE, reorder.numeric = FALSE, - verbose = TRUE) +BuildClusterTree(object, assay = NULL, features = NULL, dims = NULL, + graph = NULL, slot = "data", reorder = FALSE, + reorder.numeric = FALSE, verbose = TRUE) } \arguments{ \item{object}{Seurat object} +\item{assay}{Assay to use for the analysis.} + \item{features}{Genes to use for the analysis. Default is the set of variable genes (\code{VariableFeatures(object = object)})} diff --git a/man/CellScatter.Rd b/man/CellScatter.Rd index bdda7f8b1..dfa6ff14d 100644 --- a/man/CellScatter.Rd +++ b/man/CellScatter.Rd @@ -6,7 +6,7 @@ \title{Cell-cell scatter plot} \usage{ CellScatter(object, cell1, cell2, features = NULL, highlight = NULL, - cols = NULL, pt.size = 1, smooth = FALSE, ...) + cols = NULL, pt.size = 1, smooth = FALSE) } \arguments{ \item{object}{Seurat object} @@ -24,8 +24,6 @@ CellScatter(object, cell1, cell2, features = NULL, highlight = NULL, \item{pt.size}{Size of the points on the plot} \item{smooth}{Smooth the graph (similar to smoothScatter)} - -\item{...}{Ignored for now} } \value{ A ggplot object diff --git a/man/CollapseEmbeddingOutliers.Rd b/man/CollapseEmbeddingOutliers.Rd new file mode 100644 index 000000000..e80e74355 --- /dev/null +++ b/man/CollapseEmbeddingOutliers.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{CollapseEmbeddingOutliers} +\alias{CollapseEmbeddingOutliers} +\title{Move outliers towards center on dimension reduction plot} +\usage{ +CollapseEmbeddingOutliers(object, reduction = "umap", dims = 1:2, + group.by = "ident", outlier.sd = 2, reduction.key = "UMAP_") +} +\arguments{ +\item{object}{Seurat object} + +\item{reduction}{Name of DimReduc to adjust} + +\item{dims}{Dimensions to visualize} + +\item{group.by}{Group (color) cells in different ways (for example, orig.ident)} + +\item{outlier.sd}{Controls the outlier distance} + +\item{reduction.key}{Key for DimReduc that is returned} +} +\value{ +Returns a DimReduc object with the modified embeddings +} +\description{ +Move outliers towards center on dimension reduction plot +} +\examples{ +\dontrun{ +pbmc_small <- FindClusters(pbmc_small, resolution = 1.1) +pbmc_small <- RunUMAP(pbmc_small, dims = 1:5) +DimPlot(pbmc_small, reduction = "umap") +pbmc_small[["umap_new"]] <- CollapseEmbeddingOutliers(pbmc_small, + reduction = "umap", reduction.key = 'umap_', outlier.sd = 0.5) +DimPlot(pbmc_small, reduction = "umap_new") +} + +} diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd new file mode 100644 index 000000000..c69611570 --- /dev/null +++ b/man/ColorDimSplit.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{ColorDimSplit} +\alias{ColorDimSplit} +\title{Color dimensional reduction plot by tree split} +\usage{ +ColorDimSplit(object, node, left.color = "red", right.color = "blue", + other.color = "grey50", ...) +} +\arguments{ +\item{object}{Seurat object} + +\item{node}{Node in cluster tree on which to base the split} + +\item{left.color}{Color for the left side of the split} + +\item{right.color}{Color for the right side of the split} + +\item{other.color}{Color for all other cells} + +\item{...}{Arguments passed on to \code{DimPlot} +\describe{ + \item{dims}{Dimensions to plot, must be a two-length numeric vector specifying x- and y-dimensions} + \item{cells}{Vector of cells to plot (default is all cells)} + \item{cols}{Vector of colors, each color corresponds to an identity class. This may also be a single character +or numeric value corresponding to a palette as specified by \code{\link[RColorBrewer]{brewer.pal.info}}. +By default, ggplot2 assigns colors} + \item{pt.size}{Adjust point size for plotting} + \item{reduction}{Which dimensionality reduction to use. If not specified, first searches for umap, then tsne, then pca} + \item{group.by}{Name of one or more metadata columns to group (color) cells by +(for example, orig.ident); pass 'ident' to group by identity class} + \item{split.by}{Name of a metadata column to split plot by; +see \code{\link{FetchData}} for more details} + \item{shape.by}{If NULL, all points are circles (default). You can specify any +cell attribute (that can be pulled with FetchData) allowing for both +different colors and different shapes on cells} + \item{order}{Specify the order of plotting for the idents. This can be +useful for crowded plots if points of interest are being buried. Provide +either a full list of valid idents or a subset to be plotted last (on top)} + \item{label}{Whether to label the clusters} + \item{label.size}{Sets size of labels} + \item{repel}{Repel labels} + \item{cells.highlight}{A list of character or numeric vectors of cells to +highlight. If only one group of cells desired, can simply +pass a vector instead of a list. If set, colors selected cells to the color(s) +in \code{cols.highlight} and other cells black (white if dark.theme = TRUE); +will also resize to the size(s) passed to \code{sizes.highlight}} + \item{cols.highlight}{A vector of colors to highlight the cells as; will +repeat to the length groups in cells.highlight} + \item{sizes.highlight}{Size of highlighted cells; will repeat to the length +groups in cells.highlight} + \item{na.value}{Color value for NA points when using custom scale} + \item{combine}{Combine plots into a single gg object; note that if TRUE; themeing will not work when plotting multiple features} + \item{ncol}{Number of columns for display when combining plots} +}} +} +\value{ +Returns a DimPlot +} +\description{ +Returns a DimPlot colored based on whether the cells fall in clusters +to the left or to the right of a node split in the cluster tree. +} +\examples{ +pbmc_small +pbmc_small <- BuildClusterTree(object = pbmc_small, verbose = FALSE) +PlotClusterTree(pbmc_small) +ColorDimSplit(pbmc_small, node = 5) + +} diff --git a/man/DietSeurat.Rd b/man/DietSeurat.Rd index 75a5ead30..558e05d7c 100644 --- a/man/DietSeurat.Rd +++ b/man/DietSeurat.Rd @@ -5,7 +5,7 @@ \title{Slim down a Seurat object} \usage{ DietSeurat(object, counts = TRUE, data = TRUE, scale.data = FALSE, - features = NULL, assays = NULL) + features = NULL, assays = NULL, dimreducs = NULL, graphs = NULL) } \arguments{ \item{object}{Seurat object} @@ -19,6 +19,12 @@ DietSeurat(object, counts = TRUE, data = TRUE, scale.data = FALSE, \item{features}{Only keep a subset of features, defaults to all features} \item{assays}{Only keep a subset of assays specified here} + +\item{dimreducs}{Only keep a subset of DimReducs specified here (if NULL, +remove all DimReducs)} + +\item{graphs}{Only keep a subset of Graphs specified here (if NULL, remove +all Graphs)} } \description{ Keep only certain aspects of the Seurat object. Can be useful in functions that utilize merge as diff --git a/man/DoHeatmap.Rd b/man/DoHeatmap.Rd index 98904729b..4041f7582 100644 --- a/man/DoHeatmap.Rd +++ b/man/DoHeatmap.Rd @@ -5,10 +5,11 @@ \title{Feature expression heatmap} \usage{ DoHeatmap(object, features = NULL, cells = NULL, group.by = "ident", - group.bar = TRUE, disp.min = -2.5, disp.max = NULL, - slot = "scale.data", assay = NULL, label = TRUE, size = 5.5, - hjust = 0, angle = 45, raster = TRUE, draw.lines = TRUE, - lines.width = NULL, group.bar.height = 0.02, combine = TRUE) + group.bar = TRUE, group.colors = NULL, disp.min = -2.5, + disp.max = NULL, slot = "scale.data", assay = NULL, label = TRUE, + size = 5.5, hjust = 0, angle = 45, raster = TRUE, + draw.lines = TRUE, lines.width = NULL, group.bar.height = 0.02, + combine = TRUE) } \arguments{ \item{object}{Seurat object} @@ -21,6 +22,8 @@ DoHeatmap(object, features = NULL, cells = NULL, group.by = "ident", \item{group.bar}{Add a color bar showing group status for cells} +\item{group.colors}{Colors to use for the color bar} + \item{disp.min}{Minimum display value (all values below are clipped)} \item{disp.max}{Maximum display value (all values above are clipped); defaults to 2.5 diff --git a/man/DotPlot.Rd b/man/DotPlot.Rd index 09266266a..3a5f10747 100644 --- a/man/DotPlot.Rd +++ b/man/DotPlot.Rd @@ -8,7 +8,7 @@ DotPlot(object, assay = NULL, features, cols = c("lightgrey", "blue"), col.min = -2.5, col.max = 2.5, dot.min = 0, dot.scale = 6, group.by = NULL, split.by = NULL, scale.by = "radius", - scale.min = NA, scale.max = NA, ...) + scale.min = NA, scale.max = NA) } \arguments{ \item{object}{Seurat object} @@ -42,8 +42,6 @@ see \code{\link{FetchData}} for more details} \item{scale.min}{Set lower limit for scaling, use NA for default} \item{scale.max}{Set upper limit for scaling, use NA for default} - -\item{...}{Ignored} } \value{ A ggplot object diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 5f3e84f39..ca1eecd55 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -5,14 +5,14 @@ \alias{FeatureHeatmap} \title{Visualize 'features' on a dimensional reduction plot} \usage{ -FeaturePlot(object, features, dims = c(1, 2), cells = NULL, - cols = ifelse(test = c(blend, blend), yes = c("#ff0000", "#00ff00"), no - = c("lightgrey", "blue")), pt.size = NULL, order = FALSE, +FeaturePlot(object, features, dims = c(1, 2), cells = NULL, cols = if + (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { + c("lightgrey", "blue") }, pt.size = NULL, order = FALSE, min.cutoff = NA, max.cutoff = NA, reduction = NULL, split.by = NULL, shape.by = NULL, slot = "data", blend = FALSE, blend.threshold = 0.5, label = FALSE, label.size = 4, repel = FALSE, ncol = NULL, combine = TRUE, coord.fixed = FALSE, - by.col = TRUE) + by.col = TRUE, sort.cell = FALSE) } \arguments{ \item{object}{Seurat object} @@ -31,7 +31,13 @@ FeaturePlot(object, features, dims = c(1, 2), cells = NULL, \item{cols}{The two colors to form the gradient over. Provide as string vector with the first color corresponding to low values, the second to high. Also accepts a Brewer -color scale or vector of colors. Note: this will bin the data into number of colors provided.} +color scale or vector of colors. Note: this will bin the data into number of colors provided. +When blend is \code{TRUE}, takes anywhere from 1-3 colors: +\describe{ + \item{1 color:}{Treated as color for double-negatives, will use default colors 2 and 3 for per-feature expression} + \item{2 colors:}{Treated as colors for per-feature expression, will use default color 1 for double-negatives} + \item{3+ colors:}{First color used for double-negatives, colors 2 and 3 used for per-feature expression, all others ignored} +}} \item{pt.size}{Adjust point size for plotting} @@ -64,11 +70,13 @@ different colors and different shapes on cells} \item{ncol}{Number of columns to combine multiple feature plots to, ignored if \code{split.by} is not \code{NULL}} -\item{combine}{Combine plots into a single gg object; note that if TRUE; themeing will not work when plotting multiple features} +\item{combine}{Combine plots into a single gg object; note that if \code{TRUE}; themeing will not work when plotting multiple features} \item{coord.fixed}{Plot cartesian coordinates with fixed aspect ratio} \item{by.col}{If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE}} + +\item{sort.cell}{If \code{TRUE}, the positive cells will overlap the negative cells} } \value{ Returns a ggplot object if only 1 feature is plotted. diff --git a/man/FeatureScatter.Rd b/man/FeatureScatter.Rd index 566ea4ea2..300551ec2 100644 --- a/man/FeatureScatter.Rd +++ b/man/FeatureScatter.Rd @@ -7,7 +7,7 @@ \usage{ FeatureScatter(object, feature1, feature2, cells = NULL, group.by = NULL, cols = NULL, pt.size = 1, shape.by = NULL, - span = NULL, smooth = FALSE, slot = "data", ...) + span = NULL, smooth = FALSE, slot = "data") } \arguments{ \item{object}{Seurat object} @@ -33,8 +33,6 @@ be metrics, PC scores, etc. - anything that can be retreived with FetchData} \item{smooth}{Smooth the graph (similar to smoothScatter)} \item{slot}{Slot to pull data from, should be one of 'counts', 'data', or 'scale.data'} - -\item{...}{Ignored for now} } \value{ A ggplot object diff --git a/man/FindClusters.Rd b/man/FindClusters.Rd index 83847d59a..02edfdb1f 100644 --- a/man/FindClusters.Rd +++ b/man/FindClusters.Rd @@ -10,8 +10,8 @@ FindClusters(object, ...) \method{FindClusters}{default}(object, modularity.fxn = 1, initial.membership = NULL, weights = NULL, node.sizes = NULL, - resolution = 0.8, algorithm = 1, n.start = 10, n.iter = 10, - random.seed = 0, group.singletons = TRUE, + resolution = 0.8, method = "matrix", algorithm = 1, n.start = 10, + n.iter = 10, random.seed = 0, group.singletons = TRUE, temp.file.location = NULL, edge.file.name = NULL, verbose = TRUE, ...) @@ -34,6 +34,9 @@ FindClusters(object, ...) \item{resolution}{Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities.} +\item{method}{Method for running leiden (defaults to matrix which is fast for small datasets). +Enable method = "igraph" to avoid casting large data to a dense matrix.} + \item{algorithm}{Algorithm for modularity optimization (1 = original Louvain algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python.} diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index b3b8642b0..e6cb48ca1 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -5,29 +5,58 @@ \title{Find integration anchors} \usage{ FindIntegrationAnchors(object.list = NULL, assay = NULL, - anchor.features = 2000, scale = TRUE, l2.norm = TRUE, - dims = 1:30, k.anchor = 5, k.filter = 200, k.score = 30, - max.features = 200, eps = 0, verbose = TRUE) + reference = NULL, anchor.features = 2000, scale = TRUE, + normalization.method = c("LogNormalize", "SCT"), + sct.clip.range = NULL, reduction = c("cca", "rpca"), + l2.norm = TRUE, dims = 1:30, k.anchor = 5, k.filter = 200, + k.score = 30, max.features = 200, nn.method = "rann", eps = 0, + verbose = TRUE) } \arguments{ -\item{object.list}{A list of objects between which to find anchors for downstream integration.} +\item{object.list}{A list of objects between which to find anchors for +downstream integration.} -\item{assay}{A vector of assay names specifying which assay to use when constructing anchors. If -NULL, the current default assay for each object is used.} +\item{assay}{A vector of assay names specifying which assay to use when +constructing anchors. If NULL, the current default assay for each object is +used.} + +\item{reference}{A vector specifying the object/s to be used as a reference +during integration. If NULL (default), all pairwise anchors are found (no +reference/s). If not NULL, the corresponding objects in \code{object.list} +will be used as references. When using a set of specified references, anchors +are first found between each query and each reference. The references are +then integrated through pairwise integration. Each query is then mapped to +the integrated reference.} \item{anchor.features}{Can be either: \itemize{ - \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} to select the - provided number of features to be used in anchor finding} + \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} + to select the provided number of features to be used in anchor finding} \item{A vector of features to be used as input to the anchor finding process} }} -\item{scale}{Whether or not to scale the features provided. Only set to FALSE if you have -previously scaled the features you want to use for each object in the object.list} +\item{scale}{Whether or not to scale the features provided. Only set to FALSE +if you have previously scaled the features you want to use for each object in +the object.list} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} -\item{l2.norm}{Perform L2 normalization on the CCA cell embeddings after dimensional reduction} +\item{sct.clip.range}{Numeric of length two specifying the min and max values +the Pearson residual will be clipped to} -\item{dims}{Which dimensions to use from the CCA to specify the neighbor search space} +\item{reduction}{Dimensional reduction to perform when finding anchors. Can +be one of: +\itemize{ + \item{cca: Canonical correlation analysis} + \item{rpca: Reciprocal PCA} +}} + +\item{l2.norm}{Perform L2 normalization on the CCA cell embeddings after +dimensional reduction} + +\item{dims}{Which dimensions to use from the CCA to specify the neighbor +search space} \item{k.anchor}{How many neighbors (k) to use when picking anchors} @@ -35,8 +64,11 @@ previously scaled the features you want to use for each object in the object.lis \item{k.score}{How many neighbors (k) to use when scoring anchors} -\item{max.features}{The maximum number of features to use when specifying the neighborhood search -space in the anchor filtering} +\item{max.features}{The maximum number of features to use when specifying the +neighborhood search space in the anchor filtering} + +\item{nn.method}{Method for nearest neighbor finding. Options include: rann, +annoy} \item{eps}{Error bound on the neighbor finding algorithm (from RANN)} diff --git a/man/FindNeighbors.Rd b/man/FindNeighbors.Rd index 968f018b3..5527f870c 100644 --- a/man/FindNeighbors.Rd +++ b/man/FindNeighbors.Rd @@ -11,21 +11,24 @@ FindNeighbors(object, ...) \method{FindNeighbors}{default}(object, distance.matrix = FALSE, - k.param = 20, compute.SNN = TRUE, prune.SNN = 1/15, nn.eps = 0, + k.param = 20, compute.SNN = TRUE, prune.SNN = 1/15, + nn.method = "rann", annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, force.recalc = FALSE, ...) \method{FindNeighbors}{Assay}(object, features = NULL, k.param = 20, - compute.SNN = TRUE, prune.SNN = 1/15, nn.eps = 0, verbose = TRUE, + compute.SNN = TRUE, prune.SNN = 1/15, nn.method = "rann", + annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, force.recalc = FALSE, ...) \method{FindNeighbors}{dist}(object, k.param = 20, compute.SNN = TRUE, - prune.SNN = 1/15, nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, ...) + prune.SNN = 1/15, nn.method = "rann", annoy.metric = "euclidean", + nn.eps = 0, verbose = TRUE, force.recalc = FALSE, ...) \method{FindNeighbors}{Seurat}(object, reduction = "pca", dims = 1:10, assay = NULL, features = NULL, k.param = 20, compute.SNN = TRUE, - prune.SNN = 1/15, nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, do.plot = FALSE, graph.name = NULL, ...) + prune.SNN = 1/15, nn.method = "rann", annoy.metric = "euclidean", + nn.eps = 0, verbose = TRUE, force.recalc = FALSE, + do.plot = FALSE, graph.name = NULL, ...) } \arguments{ \item{object}{An object} @@ -46,6 +49,12 @@ values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the strigency of pruning (0 --- no pruning, 1 --- prune everything).} +\item{nn.method}{Method for nearest neighbor finding. Options include: rann, +annoy} + +\item{annoy.metric}{Distance metric for annoy. Options include: euclidean, +cosine, manhattan, and hamming} + \item{nn.eps}{Error bound when performing nearest neighbor seach using RANN; default of 0.0 implies exact nearest neighbor search} diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index f9295110c..fa9ce247a 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -4,18 +4,21 @@ \alias{FindTransferAnchors} \title{Find transfer anchors} \usage{ -FindTransferAnchors(reference, query, reference.assay = NULL, - query.assay = NULL, reduction = "pcaproject", - project.query = FALSE, features = NULL, npcs = 30, - l2.norm = TRUE, dims = 1:30, k.anchor = 5, k.filter = 200, - k.score = 30, max.features = 200, eps = 0, approx.pca = TRUE, - verbose = TRUE) +FindTransferAnchors(reference, query, + normalization.method = c("LogNormalize", "SCT"), + reference.assay = NULL, query.assay = NULL, + reduction = "pcaproject", project.query = FALSE, features = NULL, + npcs = 30, l2.norm = TRUE, dims = 1:30, k.anchor = 5, + k.filter = 200, k.score = 30, max.features = 200, + nn.method = "rann", eps = 0, approx.pca = TRUE, verbose = TRUE) } \arguments{ \item{reference}{Seurat object to use as the reference} \item{query}{Seurat object to use as the query} +\item{normalization.method}{Name of normalization method used: LogNormalize or SCT} + \item{reference.assay}{Assay to use from reference} \item{query.assay}{Assay to use from query} @@ -49,6 +52,9 @@ the reference object} \item{max.features}{The maximum number of features to use when specifying the neighborhood search space in the anchor filtering} +\item{nn.method}{Method for nearest neighbor finding. Options include: rann, +annoy} + \item{eps}{Error bound on the neighbor finding algorithm (from RANN)} \item{approx.pca}{Use truncated singular value decomposition to approximate PCA} diff --git a/man/GetResidual.Rd b/man/GetResidual.Rd index 1adadef42..e8f8a3970 100644 --- a/man/GetResidual.Rd +++ b/man/GetResidual.Rd @@ -4,7 +4,8 @@ \alias{GetResidual} \title{Calculate pearson residuals of features not in the scale.data} \usage{ -GetResidual(object, features, assay = "SCT", verbose = TRUE) +GetResidual(object, features, assay = "SCT", umi.assay = "RNA", + clip.range = NULL, replace.value = FALSE, verbose = TRUE) } \arguments{ \item{object}{A seurat object} @@ -13,6 +14,15 @@ GetResidual(object, features, assay = "SCT", verbose = TRUE) \item{assay}{Name of the assay of the seurat object generated by SCTransform} +\item{umi.assay}{Name of the assay of the seurat object containing UMI matrix and the default is +RNA} + +\item{clip.range}{Numeric of length two specifying the min and max values the Pearson residual +will be clipped to} + +\item{replace.value}{Recalculate residuals for all features, even if they are already present. +Useful if you want to change the clip.range.} + \item{verbose}{Whether to print messages and progress bars} } \value{ diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index 5c48a8fd1..b90c42575 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -5,16 +5,19 @@ \title{Integrate data} \usage{ IntegrateData(anchorset, new.assay.name = "integrated", - features = NULL, features.to.integrate = NULL, dims = 1:30, - k.weight = 100, weight.reduction = NULL, sd.weight = 1, - sample.tree = NULL, preserve.order = FALSE, do.cpp = TRUE, - eps = 0, verbose = TRUE) + normalization.method = c("LogNormalize", "SCT"), features = NULL, + features.to.integrate = NULL, dims = 1:30, k.weight = 100, + weight.reduction = NULL, sd.weight = 1, sample.tree = NULL, + preserve.order = FALSE, do.cpp = TRUE, eps = 0, verbose = TRUE) } \arguments{ \item{anchorset}{Results from FindIntegrationAnchors} \item{new.assay.name}{Name for the new assay containing the integrated data} +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + \item{features}{Vector of features to use when computing the PCA to determine the weights. Only set if you want a different set from those used in the anchor finding process} @@ -30,6 +33,7 @@ This can be either: \itemize{ \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} + \item{A vector of Dimreduc objects, specifying the object to use for each object in the integration} \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} } Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the @@ -52,5 +56,5 @@ query, and weights will need to be calculated for all cells in the object.} Returns a Seurat object with a new integrated Assay } \description{ -Integrates the data +Perform dataset integration using a pre-computed anchorset } diff --git a/man/LogNormalize.Rd b/man/LogNormalize.Rd index a63f32c2b..654c310e8 100644 --- a/man/LogNormalize.Rd +++ b/man/LogNormalize.Rd @@ -4,7 +4,7 @@ \alias{LogNormalize} \title{Normalize raw data} \usage{ -LogNormalize(data, scale.factor = 10000, verbose = TRUE, ...) +LogNormalize(data, scale.factor = 10000, verbose = TRUE) } \arguments{ \item{data}{Matrix with the raw count data} @@ -12,8 +12,6 @@ LogNormalize(data, scale.factor = 10000, verbose = TRUE, ...) \item{scale.factor}{Scale the data. Default is 1e4} \item{verbose}{Print progress} - -\item{...}{Ignored} } \value{ Returns a matrix with the normalize and log transformed data diff --git a/man/MapQuery.Rd b/man/MapQuery.Rd new file mode 100644 index 000000000..b16de4e06 --- /dev/null +++ b/man/MapQuery.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{MapQuery} +\alias{MapQuery} +\title{Map queries to reference} +\usage{ +MapQuery(anchorset, reference, new.assay.name = "integrated", + normalization.method = c("LogNormalize", "SCT"), features = NULL, + features.to.integrate = NULL, dims = 1:30, k.weight = 100, + weight.reduction = NULL, sd.weight = 1, sample.tree = NULL, + preserve.order = FALSE, do.cpp = TRUE, eps = 0, verbose = TRUE) +} +\arguments{ +\item{anchorset}{Anchorset found by FindIntegrationAnchors} + +\item{reference}{Pre-integrated reference dataset to map query datasets to} + +\item{new.assay.name}{Name for the new assay containing the integrated data} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{features}{Vector of features to use when computing the PCA to determine the weights. Only set +if you want a different set from those used in the anchor finding process} + +\item{features.to.integrate}{Vector of features to integrate. By default, will use the features +used in anchor finding.} + +\item{dims}{Number of PCs to use in the weighting procedure} + +\item{k.weight}{Number of neighbors to consider when weighting} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor weights. +This can be either: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} + \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} +} +Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the +first merge between reference and query, as the merged object will subsequently contain more cells than was in +query, and weights will need to be calculated for all cells in the object.} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. If NULL, will compute automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise integration.} + +\item{do.cpp}{Run cpp code where applicable} + +\item{eps}{Error bound on the neighbor finding algorithm (from \code{\link{RANN}})} + +\item{verbose}{Print progress bars and output} +} +\value{ +Returns an integrated matrix +} +\description{ +Map query objects onto assembled reference dataset +} diff --git a/man/MixingMetric.Rd b/man/MixingMetric.Rd index d7738338b..f9eec617c 100644 --- a/man/MixingMetric.Rd +++ b/man/MixingMetric.Rd @@ -29,9 +29,10 @@ Returns a vector of values representing the entropy metric from each bootstrapped iteration. } \description{ -Here we compute a measure of how well mixed a composite dataset is. To compute, we first examine -the local neighborhood for each cell (looking at max.k neighbors) and determine for each group -(could be the dataset after integration) the k nearest neighbor and what rank that neighbor was -in the overall neighborhood. We then take the median across all groups as the mixing metric per -cell. +Here we compute a measure of how well mixed a composite dataset is. To +compute, we first examine the local neighborhood for each cell (looking at +max.k neighbors) and determine for each group (could be the dataset after +integration) the k nearest neighbor and what rank that neighbor was in the +overall neighborhood. We then take the median across all groups as the mixing +metric per cell. } diff --git a/man/PairwiseIntegrateReference.Rd b/man/PairwiseIntegrateReference.Rd new file mode 100644 index 000000000..2308be8c1 --- /dev/null +++ b/man/PairwiseIntegrateReference.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{PairwiseIntegrateReference} +\alias{PairwiseIntegrateReference} +\title{Pairwise dataset integration} +\usage{ +PairwiseIntegrateReference(anchorset, new.assay.name = "integrated", + normalization.method = c("LogNormalize", "SCT"), features = NULL, + features.to.integrate = NULL, dims = 1:30, k.weight = 100, + weight.reduction = NULL, sd.weight = 1, sample.tree = NULL, + preserve.order = FALSE, do.cpp = TRUE, eps = 0, verbose = TRUE) +} +\arguments{ +\item{anchorset}{Results from FindIntegrationAnchors} + +\item{new.assay.name}{Name for the new assay containing the integrated data} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{features}{Vector of features to use when computing the PCA to determine +the weights. Only set if you want a different set from those used in the +anchor finding process} + +\item{features.to.integrate}{Vector of features to integrate. By default, +will use the features used in anchor finding.} + +\item{dims}{Number of PCs to use in the weighting procedure} + +\item{k.weight}{Number of neighbors to consider when weighting} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be either: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{NULL, in which case a new PCA will be calculated and used to + calculate anchor weights} +} +Note that, if specified, the requested dimension reduction will only be used +for calculating anchor weights in the first merge between reference and +query, as the merged object will subsequently contain more cells than was in +query, and weights will need to be calculated for all cells in the object.} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. If NULL, will compute +automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} + +\item{do.cpp}{Run cpp code where applicable} + +\item{eps}{Error bound on the neighbor finding algorithm (from +\code{\link{RANN}})} + +\item{verbose}{Print progress bars and output} +} +\value{ +Returns a Seurat object with a new integrated Assay +} +\description{ +Used for reference construction +} diff --git a/man/PrepSCTIntegration.Rd b/man/PrepSCTIntegration.Rd new file mode 100644 index 000000000..081d6e766 --- /dev/null +++ b/man/PrepSCTIntegration.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{PrepSCTIntegration} +\alias{PrepSCTIntegration} +\title{Prepare an object list that has been run through SCTransform for integration} +\usage{ +PrepSCTIntegration(object.list, assay = NULL, anchor.features = 2000, + sct.clip.range = NULL, verbose = TRUE) +} +\arguments{ +\item{object.list}{A list of objects to prep for integration} + +\item{assay}{Name or vector of assay names (one for each object) that correspond +to the assay that SCTransform has been run on. If NULL, the current default +assay for each object is used.} + +\item{anchor.features}{Can be either: +\itemize{ + \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} + to select the provided number of features to be used in anchor finding} + \item{A vector of features to be used as input to the anchor finding + process} +}} + +\item{sct.clip.range}{Numeric of length two specifying the min and max values +the Pearson residual will be clipped to} + +\item{verbose}{Display output/messages} +} +\value{ +An object list with the \code{scale.data} slots set to the anchor +features +} +\description{ +Prepare an object list that has been run through SCTransform for integration +} diff --git a/man/Reductions.Rd b/man/Reductions.Rd new file mode 100644 index 000000000..dd264a6fd --- /dev/null +++ b/man/Reductions.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.R +\name{Reductions} +\alias{Reductions} +\title{Pull DimReducs or DimReduc names} +\usage{ +Reductions(object, slot = NULL) +} +\arguments{ +\item{object}{A Seurat object} + +\item{slot}{Name of DimReduc} +} +\value{ +If \code{slot} is \code{NULL}, the names of all \code{DimReduc} objects +in this Seurat object. Otherwise, the \code{DimReduc} object requested +} +\description{ +Lists the names of \code{\link{DimReduc}} objects present in +a Seurat object. If slot is provided, pulls specified DimReduc object. +} +\examples{ +Reductions(object = pbmc_small) + +} diff --git a/man/RelativeCounts.Rd b/man/RelativeCounts.Rd index 0d94ccee2..9893848d4 100644 --- a/man/RelativeCounts.Rd +++ b/man/RelativeCounts.Rd @@ -4,7 +4,7 @@ \alias{RelativeCounts} \title{Normalize raw data to fractions} \usage{ -RelativeCounts(data, scale.factor = 1, verbose = TRUE, ...) +RelativeCounts(data, scale.factor = 1, verbose = TRUE) } \arguments{ \item{data}{Matrix with the raw count data} @@ -12,8 +12,6 @@ RelativeCounts(data, scale.factor = 1, verbose = TRUE, ...) \item{scale.factor}{Scale the result. Default is 1} \item{verbose}{Print progress} - -\item{...}{Ignored} } \value{ Returns a matrix with the relative counts diff --git a/man/RunALRA.Rd b/man/RunALRA.Rd index 75b886530..8ad711ad0 100644 --- a/man/RunALRA.Rd +++ b/man/RunALRA.Rd @@ -8,12 +8,14 @@ \usage{ RunALRA(object, ...) -\method{RunALRA}{default}(object, k = NULL, q = 10, ...) +\method{RunALRA}{default}(object, k = NULL, q = 10, + quantile.prob = 0.001, use.mkl = FALSE, mkl.seed = -1, ...) -\method{RunALRA}{Seurat}(object, k = NULL, q = 10, assay = NULL, - slot = "data", setDefaultAssay = TRUE, genes.use = NULL, - K = NULL, p.val.th = 1e-10, noise.start = NULL, q.k = 2, - k.only = FALSE, ...) +\method{RunALRA}{Seurat}(object, k = NULL, q = 10, + quantile.prob = 0.001, use.mkl = FALSE, mkl.seed = -1, + assay = NULL, slot = "data", setDefaultAssay = TRUE, + genes.use = NULL, K = NULL, thresh = 6, noise.start = NULL, + q.k = 2, k.only = FALSE, ...) } \arguments{ \item{object}{An object} @@ -25,6 +27,19 @@ RunALRA(object, ...) \item{q}{The number of additional power iterations in randomized SVD when computing rank k approximation. By default, q=10.} +\item{quantile.prob}{The quantile probability to use when calculating threshold. +By default, quantile.prob = 0.001.} + +\item{use.mkl}{Use the Intel MKL based implementation of SVD. Needs to be +installed from https://github.com/KlugerLab/rpca-mkl. \strong{Note}: this requires +the \href{https://github.com/satijalab/seurat-wrappers}{SeuratWrappers} implementation +of \code{RunALRA}} + +\item{mkl.seed}{Only relevant if \code{use.mkl = TRUE}. Set the seed for the random +generator for the Intel MKL implementation of SVD. Any number <0 will +use the current timestamp. If \code{use.mkl = FALSE}, set the seed using +\code{\link{set.seed}()} function as usual.} + \item{assay}{Assay to use} \item{slot}{slot to use} @@ -36,7 +51,7 @@ computing rank k approximation. By default, q=10.} \item{K}{Number of singular values to compute when choosing k. Must be less than the smallest dimension of the matrix. Default 100 or smallest dimension.} -\item{p.val.th}{The threshold for ''significance'' when choosing k. Default 1e-10.} +\item{thresh}{The threshold for ''significance'' when choosing k. Default 1e-10.} \item{noise.start}{Index for which all smaller singular values are considered noise. Default K - 20.} @@ -52,6 +67,10 @@ error distribution learned from the negative values. Described in Linderman, G. C., Zhao, J., Kluger, Y. (2018). "Zero-preserving imputation of scRNA-seq data using low rank approximation." (bioRxiv:138677) } +\note{ +RunALRA and associated functions are being moved to SeuratWrappers; +for more information on SeuratWrappers, please see \url{https://github.com/satijalab/seurat-wrappers} +} \examples{ pbmc_small # Example 1: Simple usage, with automatic choice of k. diff --git a/man/RunCCA.Rd b/man/RunCCA.Rd index bb377877e..9825ceffa 100644 --- a/man/RunCCA.Rd +++ b/man/RunCCA.Rd @@ -9,12 +9,12 @@ RunCCA(object1, object2, ...) \method{RunCCA}{default}(object1, object2, standardize = TRUE, - num.cc = 20, verbose = FALSE, use.cpp = TRUE, ...) + num.cc = 20, verbose = FALSE, ...) \method{RunCCA}{Seurat}(object1, object2, assay1 = NULL, assay2 = NULL, num.cc = 20, features = NULL, renormalize = FALSE, rescale = FALSE, compute.gene.loadings = TRUE, add.cell.id1 = NULL, - add.cell.id2 = NULL, verbose = TRUE, use.cpp = TRUE, ...) + add.cell.id2 = NULL, verbose = TRUE, ...) } \arguments{ \item{object1}{First Seurat object} @@ -30,9 +30,7 @@ and mean 0} \item{num.cc}{Number of canonical vectors to calculate} -\item{verbose}{...} - -\item{use.cpp}{...} +\item{verbose}{Show progress messages} \item{assay1, assay2}{Assays to pull from in the first and second objects, respectively} diff --git a/man/RunLSI.Rd b/man/RunLSI.Rd index 398962837..8e0d69630 100644 --- a/man/RunLSI.Rd +++ b/man/RunLSI.Rd @@ -46,6 +46,12 @@ NULL will not set a seed.} For details about stored LSI calculation parameters, see \code{PrintLSIParams}. } +\note{ +RunLSI is being moved to Signac. Equivalent functionality can be +achieved via the Signac::RunTFIDF and Signac::RunSVD functions; +for more information on Signac, please see +\url{https://github.com/timoast/Signac} +} \examples{ lsi <- RunLSI(object = pbmc_small, n = 5) diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd index a36366b5d..128190b5d 100644 --- a/man/RunUMAP.Rd +++ b/man/RunUMAP.Rd @@ -9,30 +9,31 @@ \usage{ RunUMAP(object, ...) -\method{RunUMAP}{default}(object, assay = NULL, n.neighbors = 30L, - n.components = 2L, metric = "correlation", n.epochs = NULL, - learning.rate = 1, min.dist = 0.3, spread = 1, +\method{RunUMAP}{default}(object, assay = NULL, umap.method = "uwot", + n.neighbors = 30L, n.components = 2L, metric = "cosine", + n.epochs = NULL, learning.rate = 1, min.dist = 0.3, spread = 1, set.op.mix.ratio = 1, local.connectivity = 1L, repulsion.strength = 1, negative.sample.rate = 5, a = NULL, b = NULL, seed.use = 42, metric.kwds = NULL, angular.rp.forest = FALSE, reduction.key = "UMAP_", verbose = TRUE, ...) -\method{RunUMAP}{Graph}(object, assay = NULL, n.components = 2L, +\method{RunUMAP}{Graph}(object, assay = NULL, + umap.method = "umap-learn", n.components = 2L, metric = "correlation", n.epochs = 0L, learning.rate = 1, min.dist = 0.3, spread = 1, repulsion.strength = 1, negative.sample.rate = 5L, a = NULL, b = NULL, seed.use = 42L, metric.kwds = NULL, verbose = TRUE, reduction.key = "UMAP_", ...) \method{RunUMAP}{Seurat}(object, dims = NULL, reduction = "pca", - features = NULL, graph = NULL, assay = "RNA", n.neighbors = 30L, - n.components = 2L, metric = "correlation", n.epochs = NULL, - learning.rate = 1, min.dist = 0.3, spread = 1, - set.op.mix.ratio = 1, local.connectivity = 1L, - repulsion.strength = 1, negative.sample.rate = 5L, a = NULL, - b = NULL, seed.use = 42L, metric.kwds = NULL, - angular.rp.forest = FALSE, verbose = TRUE, reduction.name = "umap", - reduction.key = "UMAP_", ...) + features = NULL, graph = NULL, assay = "RNA", + umap.method = "uwot", n.neighbors = 30L, n.components = 2L, + metric = "cosine", n.epochs = NULL, learning.rate = 1, + min.dist = 0.3, spread = 1, set.op.mix.ratio = 1, + local.connectivity = 1L, repulsion.strength = 1, + negative.sample.rate = 5L, a = NULL, b = NULL, seed.use = 42L, + metric.kwds = NULL, angular.rp.forest = FALSE, verbose = TRUE, + reduction.name = "umap", reduction.key = "UMAP_", ...) } \arguments{ \item{object}{An object} @@ -42,6 +43,12 @@ RunUMAP(object, ...) \item{assay}{Assay to pull data for when using \code{features}, or assay used to construct Graph if running UMAP on a Graph} +\item{umap.method}{UMAP implementation to run. Can be +\describe{ + \item{\code{uwot}:}{Runs umap via the uwot R package} + \item{\code{umap-learn}:}{Run the Seurat wrapper of the python umap-learn package} +}} + \item{n.neighbors}{This determines the number of neighboring points used in local approximations of manifold structure. Larger values will result in more global structure being preserved at the loss of detailed local structure. In diff --git a/man/SelectIntegrationFeatures.Rd b/man/SelectIntegrationFeatures.Rd index 808ff6ac2..6dac1227e 100644 --- a/man/SelectIntegrationFeatures.Rd +++ b/man/SelectIntegrationFeatures.Rd @@ -16,8 +16,8 @@ SelectIntegrationFeatures(object.list, nfeatures = 2000, assay = NULL, \item{verbose}{Print messages} -\item{fvf.nfeatures}{nfeatures for FindVariableFeatures. Used if VariableFeatures have not been -set for any object in object.list.} +\item{fvf.nfeatures}{nfeatures for FindVariableFeatures. Used if +VariableFeatures have not been set for any object in object.list.} \item{...}{Additional parameters to \code{\link{FindVariableFeatures}}} } @@ -25,7 +25,7 @@ set for any object in object.list.} A vector of selected features } \description{ -Choose the features to use when integrating multiple datasets. This function ranks features by -the number of datasets they appear in, breaking ties by the median rank across datasets. It -returns the highest features by this ranking. +Choose the features to use when integrating multiple datasets. This function +ranks features by the number of datasets they appear in, breaking ties by the +median rank across datasets. It returns the highest features by this ranking. } diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index b902ffeb0..f268daee9 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -12,12 +12,17 @@ Tools for single-cell genomics Seurat uses the following [options()] to configure behaviour: -\itemize{ - \item `Seurat.memsafe`: global option to call gc() after many operations. +\describe{ + \item{\code{Seurat.memsafe}}{global option to call gc() after many operations. This can be helpful in cleaning up the memory status of the R session and prevent use of swap space. However, it does add to the computational overhead and setting to FALSE can speed things up if you're working in an environment - where RAM availabiliy is not a concern. + where RAM availabiliy is not a concern.} + \item{\code{Seurat.warn.umap.uwot}}{Show warning about the default backend + for \code{\link{RunUMAP}} changing from Python UMAP via reticulate to UWOT} + \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, + this controls the behavior when an item isn't used. Can be one of warn, + stop, or silent.} } } diff --git a/man/SplitObject.Rd b/man/SplitObject.Rd index 6b4071f4f..72b43d0ca 100644 --- a/man/SplitObject.Rd +++ b/man/SplitObject.Rd @@ -4,15 +4,13 @@ \alias{SplitObject} \title{Splits object into a list of subsetted objects.} \usage{ -SplitObject(object, split.by = "ident", ...) +SplitObject(object, split.by = "ident") } \arguments{ \item{object}{Seurat object} \item{split.by}{Attribute for splitting. Default is "ident". Currently only supported for class-level (i.e. non-quantitative) attributes.} - -\item{...}{Ignored} } \value{ A named list of Seurat objects, each containing a subset of cells diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 988e457a0..d6b5af28e 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -11,18 +11,22 @@ TransferData(anchorset, refdata, weight.reduction = "pcaproject", \arguments{ \item{anchorset}{Results from FindTransferAnchors} -\item{refdata}{Data to transfer. Should be either a vector where the names correspond to -reference cells, or a matrix, where the column names correspond to the reference cells.} +\item{refdata}{Data to transfer. Should be either a vector where the names +correspond to reference cells, or a matrix, where the column names correspond +to the reference cells.} -\item{weight.reduction}{Dimensional reduction to use for the weighting. Options are: +\item{weight.reduction}{Dimensional reduction to use for the weighting. +Options are: \itemize{ \item{pcaproject: Use the projected PCA used for anchor building} \item{pca: Use an internal PCA on the query only} \item{cca: Use the CCA used for anchor building} - \item{custom DimReduc: User provided DimReduc object computed on the query cells} + \item{custom DimReduc: User provided DimReduc object computed on the query + cells} }} -\item{l2.norm}{Perform L2 normalization on the cell embeddings after dimensional reduction} +\item{l2.norm}{Perform L2 normalization on the cell embeddings after +dimensional reduction} \item{dims}{Number of PCs to use in the weighting procedure} @@ -39,8 +43,9 @@ reference cells, or a matrix, where the column names correspond to the reference \item{slot}{Slot to store the imputed data} } \value{ -If refdata is a vector, returns a dataframe with label predictions. If refdata is a -matrix, returns an Assay object where the imputed data has been stored in the provided slot. +If refdata is a vector, returns a dataframe with label predictions. +If refdata is a matrix, returns an Assay object where the imputed data has +been stored in the provided slot. } \description{ Transfers the labels diff --git a/man/VizDimLoadings.Rd b/man/VizDimLoadings.Rd index 56326e718..b71d4a4ea 100644 --- a/man/VizDimLoadings.Rd +++ b/man/VizDimLoadings.Rd @@ -6,7 +6,7 @@ \usage{ VizDimLoadings(object, dims = 1:5, nfeatures = 30, col = "blue", reduction = "pca", projected = FALSE, balanced = FALSE, - ncol = NULL, combine = TRUE, ...) + ncol = NULL, combine = TRUE) } \arguments{ \item{object}{Seurat object} @@ -19,15 +19,16 @@ VizDimLoadings(object, dims = 1:5, nfeatures = 30, col = "blue", \item{reduction}{Reduction technique to visualize results for} -\item{projected}{Use reduction values for full dataset (i.e. projected dimensional reduction values)} +\item{projected}{Use reduction values for full dataset (i.e. projected +dimensional reduction values)} -\item{balanced}{Return an equal number of genes with + and - scores. If FALSE (default), returns the top genes ranked by the scores absolute values} +\item{balanced}{Return an equal number of genes with + and - scores. If +FALSE (default), returns the top genes ranked by the scores absolute values} \item{ncol}{Number of columns to display} -\item{combine}{Combine plots into a single gg object; note that if TRUE; themeing will not work when plotting multiple features} - -\item{...}{Ignored} +\item{combine}{Combine plots into a single gg object; note that if TRUE; +themeing will not work when plotting multiple features} } \value{ A ggplot object diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 25f4ee0ed..f46e94455 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -81,18 +81,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// FastMatMult -Eigen::MatrixXd FastMatMult(Eigen::MatrixXd m1, Eigen::MatrixXd m2); -RcppExport SEXP _Seurat_FastMatMult(SEXP m1SEXP, SEXP m2SEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Eigen::MatrixXd >::type m1(m1SEXP); - Rcpp::traits::input_parameter< Eigen::MatrixXd >::type m2(m2SEXP); - rcpp_result_gen = Rcpp::wrap(FastMatMult(m1, m2)); - return rcpp_result_gen; -END_RCPP -} // FastRowScale Eigen::MatrixXd FastRowScale(Eigen::MatrixXd mat, bool scale, bool center, double scale_max, bool display_progress); RcppExport SEXP _Seurat_FastRowScale(SEXP matSEXP, SEXP scaleSEXP, SEXP centerSEXP, SEXP scale_maxSEXP, SEXP display_progressSEXP) { @@ -367,7 +355,6 @@ static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunUMISamplingPerCell", (DL_FUNC) &_Seurat_RunUMISamplingPerCell, 4}, {"_Seurat_RowMergeMatrices", (DL_FUNC) &_Seurat_RowMergeMatrices, 5}, {"_Seurat_LogNorm", (DL_FUNC) &_Seurat_LogNorm, 3}, - {"_Seurat_FastMatMult", (DL_FUNC) &_Seurat_FastMatMult, 2}, {"_Seurat_FastRowScale", (DL_FUNC) &_Seurat_FastRowScale, 5}, {"_Seurat_Standardize", (DL_FUNC) &_Seurat_Standardize, 2}, {"_Seurat_FastSparseRowScale", (DL_FUNC) &_Seurat_FastSparseRowScale, 5}, diff --git a/src/data_manipulation.cpp b/src/data_manipulation.cpp index 4afc482b2..547dd67dc 100644 --- a/src/data_manipulation.cpp +++ b/src/data_manipulation.cpp @@ -123,13 +123,6 @@ Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_ return data; } -// [[Rcpp::export]] -Eigen::MatrixXd FastMatMult(Eigen::MatrixXd m1, Eigen::MatrixXd m2){ - Eigen::MatrixXd m3 = m1 * m2; - return(m3); -} - - /* Performs row scaling and/or centering. Equivalent to using t(scale(t(mat))) in R. Note: Doesn't handle NA/NaNs in the same way the R implementation does, */ diff --git a/src/data_manipulation.h b/src/data_manipulation.h index 0f3a6ef7a..0cf525f3f 100644 --- a/src/data_manipulation.h +++ b/src/data_manipulation.h @@ -23,7 +23,6 @@ Eigen::SparseMatrix RowMergeMatrices(Eigen::SparseMatrix all_rownames); Eigen::SparseMatrix LogNorm(Eigen::SparseMatrix data, int scale_factor, bool display_progress ); -Eigen::MatrixXd FastMatMult(Eigen::MatrixXd m1, Eigen::MatrixXd m2); Eigen::MatrixXd FastRowScale(Eigen::MatrixXd mat, bool scale, bool center, double scale_max, bool display_progress); NumericMatrix Standardize(const Eigen::Map mat, bool display_progress); diff --git a/tests/testthat/test_data_manipulation.R b/tests/testthat/test_data_manipulation.R index 1a0276699..1d97e8f4b 100644 --- a/tests/testthat/test_data_manipulation.R +++ b/tests/testthat/test_data_manipulation.R @@ -40,20 +40,6 @@ test_that("Log Normalization returns expected values", { expect_equal(mat.norm[4, 4], mat.norm.r[4, 4]) }) -# Tests for matrix multiply -# -------------------------------------------------------------------------------- -context("Matrix Multiply") - -mat <- as.matrix(mat) - -test_that("Fast implementation of matrix multiply returns as expected", { - expect_equal(mat %*% mat, FastMatMult(mat, mat)) - mat[1, 1] <- NA - expect_equal(mat %*% mat, FastMatMult(mat, mat)) - mat[1, 1] <- NaN - expect_equal(mat %*% mat, FastMatMult(mat, mat)) -}) - # Tests for scaling data # -------------------------------------------------------------------------------- context("Fast Scale Data Functions") diff --git a/tests/testthat/test_objects.R b/tests/testthat/test_objects.R index 2b76fb659..8c0e0fc46 100644 --- a/tests/testthat/test_objects.R +++ b/tests/testthat/test_objects.R @@ -235,8 +235,34 @@ bad.gene <- GetAssayData(object = pbmc_small[["RNA"]], slot = "data") rownames(x = bad.gene)[1] <- paste0("rna_", rownames(x = bad.gene)[1]) pbmc_small[["RNA"]]@data <- bad.gene -# errors - when key conflicts with feature name -# +# Tests for WhichCells +# ------------------------------------------------------------------------------ + +test_that("Specifying cells works", { + test.cells <- Cells(x = pbmc_small)[1:10] + expect_equal(WhichCells(object = pbmc_small, cells = test.cells), test.cells) + expect_equal(WhichCells(object = pbmc_small, cells = test.cells, invert = TRUE), setdiff(Cells(x = pbmc_small), test.cells)) +}) + +test_that("Specifying idents works", { + c12 <- WhichCells(object = pbmc_small, idents = c(1, 2)) + expect_equal(length(x = c12), 44) + expect_equal(c12[44], "CTTGATTGATCTTC") + expect_equal(c12, WhichCells(object = pbmc_small, idents = 0, invert = TRUE)) +}) + +test_that("downsample works", { + expect_equal(length(x = WhichCells(object = pbmc_small, downsample = 5)), 15) + expect_equal(length(x = WhichCells(object = pbmc_small, downsample = 100)), 80) +}) +test_that("passing an expression works", { + lyz.pos <- WhichCells(object = pbmc_small, expression = LYZ > 1) + expect_true(all(GetAssayData(object = pbmc_small, slot = "data")["LYZ", lyz.pos] > 1)) + # multiple values in expression + lyz.pos <- WhichCells(object = pbmc_small, expression = LYZ > 1 & groups == "g1") + expect_equal(length(x = lyz.pos), 30) + expect_equal(lyz.pos[30], "CTTGATTGATCTTC") +}) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 5f9315f19..25ec9ad86 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -36,6 +36,13 @@ test_that("Filtering handled properly", { expect_equal(ncol(x = GetAssayData(object = object.filtered, slot = "counts")), 77) }) +test_that("Metadata check errors correctly", { + pbmc.md <- pbmc_small[[]] + pbmc.md.norownames <- as.matrix(pbmc.md) + rownames(pbmc.md.norownames) <- NULL + expect_error(CreateSeuratObject(counts = pbmc.test, meta.data = pbmc.md.norownames), + "Row names not set in metadata. Please ensure that rownames of metadata match column names of data matrix") +}) # Tests for NormalizeData # -------------------------------------------------------------------------------- @@ -316,3 +323,15 @@ test_that("SCTransform ncells param works", { expect_equal(object[["SCT"]][[]]["MS4A1", "sct.residual_variance"], 3.551259, tolerance = 1e6) }) +suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) +object[["SCT"]] <- SetAssayData(object = object[["SCT"]], slot = "scale.data", new.data = GetAssayData(object = object[["SCT"]], slot = "scale.data")[1:100, ]) +object <- GetResidual(object = object, features = rownames(x = object), verbose = FALSE) +test_that("GetResidual works", { + expect_equal(dim(GetAssayData(object = object[["SCT"]], slot = "scale.data")), c(220, 80)) + expect_equal( + GetAssayData(object = object[["SCT"]], slot = "scale.data"), + GetAssayData(object = object[["SCT_SAVE"]], slot = "scale.data") + ) + expect_warning(GetResidual(object, features = "asd")) +}) + diff --git a/tests/testthat/test_visualization.R b/tests/testthat/test_visualization.R new file mode 100644 index 000000000..23c2b217a --- /dev/null +++ b/tests/testthat/test_visualization.R @@ -0,0 +1,13 @@ +# Tests for functions in visualization.R + +set.seed(42) + +# Tests for visualization utilities +# ------------------------------------------------------------------------------ +pbmc_small[["tsne_new"]] <- CollapseEmbeddingOutliers(pbmc_small, + reduction = "tsne", reduction.key = 'tsne_', outlier.sd = 0.5) + +test_that("CollapseEmbeddingOutliers works", { + expect_equal(Embeddings(pbmc_small[["tsne_new"]])[1, 1], -12.59713, tolerance = 1e-6) + expect_equal(colSums(x = Embeddings(object = pbmc_small[["tsne_new"]])), c(-219.9218, 182.9215), check.attributes = FALSE, tolerance = 1e-5) +}) \ No newline at end of file