From 7f8d55352ad3bb6c52eb3cf5986aadf834b2d483 Mon Sep 17 00:00:00 2001 From: Elmar Pruesse Date: Fri, 21 Feb 2020 15:07:58 -0700 Subject: [PATCH 001/111] Minor: Remove unused line --- R/integration.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index a821adc01..478795baf 100644 --- a/R/integration.R +++ b/R/integration.R @@ -672,7 +672,6 @@ IntegrateData <- function( y = object.list[2:length(x = object.list)] ) if (normalization.method == "SCT") { - vst.set <- list() for (i in 1:length(x = object.list)) { assay <- DefaultAssay(object = object.list[[i]]) object.list[[i]][[assay]] <- CreateAssayObject( From a910f734669cdd845acd6f282c35d19d5fc246e5 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 21 Feb 2020 17:33:11 -0500 Subject: [PATCH 002/111] Fix y limit when infinite values present --- R/visualization.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/visualization.R b/R/visualization.R index f15b96503..1248e3ed4 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4615,6 +4615,10 @@ SingleExIPlot <- function( } axis.label <- 'Expression Level' y.max <- y.max %||% max(data[, feature]) + if (is.infinite(x = y.max)) { + tmp <- data[, feature] + y.max <- max(tmp[!is.infinite(x = tmp)]) + } if (type == 'violin' && !is.null(x = split)) { data$split <- split vln.geom <- geom_split_violin From cfe5b7ec792fa2517456aeb06dc1bf84e05e2d1b Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 22 Feb 2020 18:44:44 -0800 Subject: [PATCH 003/111] Bugfix Multiseq's classifyCells --- R/preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index cf44dbca9..61376e3a5 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2501,7 +2501,7 @@ ClassifyCells <- function(data, q) { message("No threshold found for ", colnames(x = data)[i], "...") } ) - if (is.character(x = model)) { + if (is.null(x = model)) { next } x <- seq.int( From 506e52bb9ba92d525d5c0b73a5a0ef50d95323c6 Mon Sep 17 00:00:00 2001 From: Avi Srivastava Date: Tue, 25 Feb 2020 15:54:44 -0500 Subject: [PATCH 004/111] removing unused variable and optimizing the search --- R/integration.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index a821adc01..d12834e99 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1721,8 +1721,7 @@ FindAnchorPairs <- function( # convert cell name to neighbor index nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 - cell1.index <- sapply(X = cells1, FUN = function(x) return(which(x == nn.cells1))) - cell2.index <- sapply(X = cells2, FUN = function(x) return(which(x == nn.cells2))) + cell1.index <- which(cells1 == nn.cells1, arr.ind = TRUE) ncell <- 1:nrow(x = neighbors$nnab$nn.idx) ncell <- ncell[ncell %in% cell1.index] From 94f82008feb2709b2bbee25f33eec957d432da73 Mon Sep 17 00:00:00 2001 From: Avi Srivastava Date: Tue, 25 Feb 2020 16:48:18 -0500 Subject: [PATCH 005/111] remove unused parameters --- R/integration.R | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/R/integration.R b/R/integration.R index d12834e99..cc57f90b5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1693,8 +1693,6 @@ FindAnchors <- function( FindAnchorPairs <- function( object, integration.name = 'integrated', - cells1 = NULL, - cells2 = NULL, k.anchor = 5, verbose = TRUE ) { @@ -1707,22 +1705,10 @@ FindAnchorPairs <- function( if (verbose) { message("Finding anchors") } - if (is.null(x = cells1)) { - cells1 <- colnames(x = object) - } - if (is.null(x = cells2)) { - cells2 <- colnames(x = object) - } - if (!(cells1 %in% colnames(object)) || !(cells2 %in% colnames(object))) { - warning("Requested cells not contained in Seurat object. Subsetting list of cells.") - cells1 <- intersect(x = cells1, y = colnames(x = object)) - cells2 <- intersect(x = cells2, y = colnames(x = object)) - } # convert cell name to neighbor index nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 - cell1.index <- which(cells1 == nn.cells1, arr.ind = TRUE) - + cell1.index <- suppressWarnings(which(colnames(x = object) == nn.cells1, arr.ind = TRUE)) ncell <- 1:nrow(x = neighbors$nnab$nn.idx) ncell <- ncell[ncell %in% cell1.index] anchors <- list() From 9b9792aa92d220a933d049fe17c311b9b13a3dd5 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Thu, 27 Feb 2020 19:04:31 -0800 Subject: [PATCH 006/111] replace wilcox.test with limma implementation --- DESCRIPTION | 5 +++-- NAMESPACE | 2 +- NEWS.md | 4 ++++ R/differential_expression.R | 15 +++++++-------- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e8e999b54..b9ed23553 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4 -Date: 2020-02-21 +Version: 3.1.4.9000 +Date: 2020-02-28 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Authors@R: c( @@ -39,6 +39,7 @@ Imports: irlba, KernSmooth, leiden (>= 0.3.1), + limma, lmtest, MASS, Matrix (>= 1.2-14), diff --git a/NAMESPACE b/NAMESPACE index 52dbc5bac..7341c783f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -468,6 +468,7 @@ importFrom(igraph,graph_from_adjacency_matrix) importFrom(igraph,plot.igraph) importFrom(irlba,irlba) importFrom(leiden,leiden) +importFrom(limma,rankSumTestWithCorrelation) importFrom(lmtest,lrtest) importFrom(metap,minimump) importFrom(methods,"slot<-") @@ -536,7 +537,6 @@ importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,t.test) importFrom(stats,var) -importFrom(stats,wilcox.test) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) importFrom(tsne,tsne) diff --git a/NEWS.md b/NEWS.md index fe5af7825..0a67860f7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ All notable changes to Seurat will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +## Develop +### Changes +- Replace wilcox.test with limma implementation for a faster FindMarkers default method + ## [3.1.4] - 2020-02-20 ### Changes - Fixes to `DoHeatmap` to remain compatible with ggplot2 v3.3 diff --git a/R/differential_expression.R b/R/differential_expression.R index d8f728b68..d3c2f3ce6 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -1505,7 +1505,9 @@ RegularizedTheta <- function(cm, latent.data, min.theta = 0.01, bin.size = 128) # Differential expression using Wilcoxon Rank Sum # # Identifies differentially expressed genes between two groups of cells using -# a Wilcoxon Rank Sum test +# a Wilcoxon Rank Sum test. Makes use of limma::rankSumTestWithCorrelation for a +# more efficient implementation of the wilcoxon test. Thanks to Yunshun Chen and +# Gordon Smyth for suggesting the limma implementation. # # @param data.use Data matrix to test # @param cells.1 Group 1 cells @@ -1517,7 +1519,7 @@ RegularizedTheta <- function(cm, latent.data, min.theta = 0.01, bin.size = 128) # features # #' @importFrom pbapply pbsapply -#' @importFrom stats wilcox.test +#' @importFrom limma rankSumTestWithCorrelation #' @importFrom future.apply future_sapply #' @importFrom future nbrOfWorkers # @@ -1535,11 +1537,8 @@ WilcoxDETest <- function( verbose = TRUE, ... ) { - group.info <- data.frame(row.names = c(cells.1, cells.2)) - group.info[cells.1, "group"] <- "Group1" - group.info[cells.2, "group"] <- "Group2" - group.info[, "group"] <- factor(x = group.info[, "group"]) - data.use <- data.use[, rownames(x = group.info), drop = FALSE] + data.use <- data.use[, c(cells.1, cells.2), drop = FALSE] + j <- seq_len(length.out = length(x = cells.1)) my.sapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pbsapply, @@ -1548,7 +1547,7 @@ WilcoxDETest <- function( p_val <- my.sapply( X = 1:nrow(x = data.use), FUN = function(x) { - return(wilcox.test(data.use[x, ] ~ group.info[, "group"], ...)$p.value) + return(2 * min(rankSumTestWithCorrelation(index = j, statistics = data.use[x, ]))) } ) return(data.frame(p_val, row.names = rownames(x = data.use))) From 64a24c9fc00c3f1cc258283805a5fd2ff112a1a0 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 28 Feb 2020 14:50:10 -0500 Subject: [PATCH 007/111] more succinct implementation for Inf y.max --- R/visualization.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index f2b99ecc6..3aec8e3a3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4612,11 +4612,7 @@ SingleExIPlot <- function( data[, feature] <- data[, feature] + noise } axis.label <- 'Expression Level' - y.max <- y.max %||% max(data[, feature]) - if (is.infinite(x = y.max)) { - tmp <- data[, feature] - y.max <- max(tmp[!is.infinite(x = tmp)]) - } + y.max <- y.max %||% max(data[, feature][is.finite(x = data[, feature])]) if (type == 'violin' && !is.null(x = split)) { data$split <- split vln.geom <- geom_split_violin From b3d3d8eb4fdaa313b724bfbd51a4a406518e239d Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 28 Feb 2020 14:51:35 -0500 Subject: [PATCH 008/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e8e999b54..67d27d010 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4 -Date: 2020-02-21 +Version: 3.1.4.9000 +Date: 2020-02-28 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Authors@R: c( From cba66f54d6ef854532fea404c428fd0af6103417 Mon Sep 17 00:00:00 2001 From: andrewwbutler Date: Mon, 2 Mar 2020 22:23:03 -0500 Subject: [PATCH 009/111] fix readme markdown formatting, CI badges reference master --- README.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a1f9c8a25..8133d96b9 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -[![Build Status](https://travis-ci.com/satijalab/seurat.svg)](https://travis-ci.com/satijalab/seurat) -[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/satijalab/seurat?svg=true)](https://ci.appveyor.com/project/satijalab/seurat) +[![Build Status](https://travis-ci.com/satijalab/seurat.svg?branch=master)](https://travis-ci.com/satijalab/seurat) +[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/satijalab/seurat?branch=master&svg=true)](https://ci.appveyor.com/project/satijalab/seurat) [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) @@ -9,9 +9,11 @@ Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. Instructions, documentation, and tutorials can be found at: + * https://satijalab.org/seurat Seurat is also hosted on GitHub, you can view and clone the repository at + * https://github.com/satijalab/seurat Seurat has been successfully installed on Mac OS X, Linux, and Windows, using the devtools package to install directly from GitHub @@ -21,12 +23,14 @@ Improvements and new features will be added on a regular basis, please contact s Version History August 20, 2019 + * Version 3.1 * Changes: * Support for SCTransform integration workflows * Integration speed ups: reference-based integration + reciprocal PCA April 12, 2019 + * Version 3.0 * Changes: * Preprint published describing new methods for identifying anchors across single-cell datasets @@ -34,29 +38,34 @@ April 12, 2019 * Parallelization support via future July 20, 2018 + * Version 2.4 * Changes: * Java dependency removed and functionality rewritten in Rcpp March 22, 2018 + * Version 2.3 * Changes: * New utility functions * Speed and efficiency improvments January 10, 2018 + * Version 2.2 * Changes: * Support for multiple-dataset alignment with RunMultiCCA and AlignSubspace * New methods for evaluating alignment performance October 12, 2017 + * Version 2.1 * Changes: * Support for using MAST and DESeq2 packages for differential expression testing in FindMarkers * Support for multi-modal single-cell data via \@assay slot July 26, 2017 + * Version 2.0 * Changes: * Preprint released for integrated analysis of scRNA-seq across conditions, technologies and species @@ -64,12 +73,14 @@ July 26, 2017 * Methods for scoring gene expression and cell-cycle phase October 4, 2016 + * Version 1.4 released * Changes: * Improved tools for cluster evaluation/visualizations * Methods for combining and adding to datasets August 22, 2016: + * Version 1.3 released * Changes : * Improved clustering approach - see FAQ for details @@ -79,6 +90,7 @@ August 22, 2016: * Updated visualizations May 21, 2015: + * Drop-Seq manuscript published. Version 1.2 released * Changes : * Added support for spectral t-SNE and density clustering @@ -88,4 +100,5 @@ May 21, 2015: * Small bug fixes April 13, 2015: + * Spatial mapping manuscript published. Version 1.1 released (initial release) From 0e60cee781a89f2dfd4f4b481fa2843ea36e1342 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 10:03:39 -0500 Subject: [PATCH 010/111] improve some integration fxn man pages --- R/integration.R | 38 +++++++++++++++++++++++++------- man/PrepSCTIntegration.Rd | 21 ++++++++++++++---- man/SelectIntegrationFeatures.Rd | 18 ++++++++++----- 3 files changed, 60 insertions(+), 17 deletions(-) diff --git a/R/integration.R b/R/integration.R index a821adc01..6e84058cc 100644 --- a/R/integration.R +++ b/R/integration.R @@ -943,7 +943,22 @@ MixingMetric <- function( return(mixing) } -#' Prepare an object list that has been run through SCTransform for integration +#' Prepare an object list normalized with sctransform for integration. +#' +#' This function takes in a list of objects that have been normalized with the +#' \code{\link{SCTransform}} method and performs the following steps: +#' \itemize{ +#' \item{If anchor.features is a numeric value, calls \code{\link{SelectIntegrationFeatures}} +#' to determine the features to use in the downstream integration procedure.} +#' \item{Ensures that the sctransform residuals for the features specified +#' to anchor.features are present in each object in the list. This is +#' necessary because the default behavior of \code{\link{SCTransform}} is to +#' only store the residuals for the features determined to be variable. +#' Residuals are recomputed for missing features using the stored model +#' parameters via the \code{\link{GetResidual}} function.} +#' \item{Subsets the scale.data slot to only contain the residuals for +#' anchor.features for efficiency in downstream processing. } +#' } #' #' @param object.list A list of objects to prep for integration #' @param assay Name or vector of assay names (one for each object) that correspond @@ -960,8 +975,8 @@ MixingMetric <- function( #' the Pearson residual will be clipped to #' @param verbose Display output/messages #' -#' @return An object list with the \code{scale.data} slots set to the anchor -#' features +#' @return A list of Seurat objects with the appropriate \code{scale.data} slots +#' containing only the required \code{anchor.features}. #' #' @importFrom pbapply pblapply #' @importFrom methods slot slot<- @@ -1088,15 +1103,22 @@ PrepSCTIntegration <- function( #' Select integration features #' #' Choose the features to use when integrating multiple datasets. This function -#' ranks features by the number of datasets they appear in, breaking ties by the -#' median rank across datasets. It returns the highest features by this ranking. +#' ranks features by the number of datasets they are deemed variable in, +#' breaking ties by the median variable feature rank across datasets. It returns +#' the top scoring features by this ranking. +#' +#' If for any assay in the list, \code{\link{FindVariableFeatures}} hasn't been +#' run, this method will try to run it using the \code{fvf.nfeatures} parameter +#' and any additional ones specified through the \dots. #' #' @param object.list List of seurat objects #' @param nfeatures Number of features to return -#' @param assay Name of assay from which to pull the variable features. +#' @param assay Name or vector of assay names (one for each object) from which +#' to pull the variable features. #' @param verbose Print messages -#' @param fvf.nfeatures nfeatures for FindVariableFeatures. Used if -#' VariableFeatures have not been set for any object in object.list. +#' @param fvf.nfeatures nfeatures for \code{\link{FindVariableFeatures}}. Used +#' if \code{VariableFeatures} have not been set for any object in +#' \code{object.list}. #' @param ... Additional parameters to \code{\link{FindVariableFeatures}} #' #' @return A vector of selected features diff --git a/man/PrepSCTIntegration.Rd b/man/PrepSCTIntegration.Rd index a0545766a..9ab89153d 100644 --- a/man/PrepSCTIntegration.Rd +++ b/man/PrepSCTIntegration.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{PrepSCTIntegration} \alias{PrepSCTIntegration} -\title{Prepare an object list that has been run through SCTransform for integration} +\title{Prepare an object list normalized with sctransform for integration.} \usage{ PrepSCTIntegration( object.list, @@ -33,9 +33,22 @@ the Pearson residual will be clipped to} \item{verbose}{Display output/messages} } \value{ -An object list with the \code{scale.data} slots set to the anchor -features +A list of Seurat objects with the appropriate \code{scale.data} slots +containing only the required \code{anchor.features}. } \description{ -Prepare an object list that has been run through SCTransform for integration +This function takes in a list of objects that have been normalized with the +\code{\link{SCTransform}} method and performs the following steps: +\itemize{ + \item{If anchor.features is a numeric value, calls \code{\link{SelectIntegrationFeatures}} + to determine the features to use in the downstream integration procedure.} + \item{Ensures that the sctransform residuals for the features specified + to anchor.features are present in each object in the list. This is + necessary because the default behavior of \code{\link{SCTransform}} is to + only store the residuals for the features determined to be variable. + Residuals are recomputed for missing features using the stored model + parameters via the \code{\link{GetResidual}} function.} + \item{Subsets the scale.data slot to only contain the residuals for + anchor.features for efficiency in downstream processing. } +} } diff --git a/man/SelectIntegrationFeatures.Rd b/man/SelectIntegrationFeatures.Rd index 1ecaf2f2b..6140b0084 100644 --- a/man/SelectIntegrationFeatures.Rd +++ b/man/SelectIntegrationFeatures.Rd @@ -18,12 +18,14 @@ SelectIntegrationFeatures( \item{nfeatures}{Number of features to return} -\item{assay}{Name of assay from which to pull the variable features.} +\item{assay}{Name or vector of assay names (one for each object) from which +to pull the variable features.} \item{verbose}{Print messages} -\item{fvf.nfeatures}{nfeatures for FindVariableFeatures. Used if -VariableFeatures have not been set for any object in object.list.} +\item{fvf.nfeatures}{nfeatures for \code{\link{FindVariableFeatures}}. Used +if \code{VariableFeatures} have not been set for any object in +\code{object.list}.} \item{...}{Additional parameters to \code{\link{FindVariableFeatures}}} } @@ -32,6 +34,12 @@ A vector of selected features } \description{ Choose the features to use when integrating multiple datasets. This function -ranks features by the number of datasets they appear in, breaking ties by the -median rank across datasets. It returns the highest features by this ranking. +ranks features by the number of datasets they are deemed variable in, +breaking ties by the median variable feature rank across datasets. It returns +the top scoring features by this ranking. +} +\details{ +If for any assay in the list, \code{\link{FindVariableFeatures}} hasn't been +run, this method will try to run it using the \code{fvf.nfeatures} parameter +and any additional ones specified through the \dots. } From 3f8d5faa01e00e9578b6193b49127d008582ff77 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 10:09:05 -0500 Subject: [PATCH 011/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fdc1ae163..1e40dc25b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9000 -Date: 2020-02-28 +Version: 3.1.4.9001 +Date: 2020-03-03 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 0e3d0cc01cb39a753fa2d1568412f6e29e59235d Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Tue, 3 Mar 2020 10:55:04 -0500 Subject: [PATCH 012/111] Improvements to integration documentation --- R/integration.R | 71 +++++++++++++++++++---------------- man/FindIntegrationAnchors.Rd | 6 ++- man/FindTransferAnchors.Rd | 18 +++++---- man/IntegrateData.Rd | 10 ++--- man/PrepSCTIntegration.Rd | 14 ++++--- man/TransferData.Rd | 22 +++++------ 6 files changed, 76 insertions(+), 65 deletions(-) diff --git a/R/integration.R b/R/integration.R index 6e84058cc..24b17d343 100644 --- a/R/integration.R +++ b/R/integration.R @@ -8,9 +8,11 @@ NULL #' Find integration anchors #' -#' Finds the integration anchors +#' Find a set of anchors between a group of \code{\link{Seurat}} objects. +#' These anchors can later be used to integrate the objects using the +#' \code{\link{IntegrateData}} function. #' -#' @param object.list A list of objects between which to find anchors for +#' @param object.list A list of \code{\link{Seurat}} objects between which to find anchors for #' downstream integration. #' @param assay A vector of assay names specifying which assay to use when #' constructing anchors. If NULL, the current default assay for each object is @@ -368,12 +370,14 @@ FindIntegrationAnchors <- function( #' Find transfer anchors #' -#' Finds the transfer anchors +#' Find a set of anchors between a reference and query object. These +#' anchors can later be used to transfer data from the reference to +#' query object using the \code{\link{TransferData}} object. #' -#' @param reference Seurat object to use as the reference -#' @param query Seurat object to use as the query -#' @param reference.assay Assay to use from reference -#' @param query.assay Assay to use from query +#' @param reference \code{\link{Seurat}} object to use as the reference +#' @param query \code{\link{Seurat}} object to use as the query +#' @param reference.assay Name of the Assay to use from reference +#' @param query.assay Name of the Assay to use from query #' @param reduction Dimensional reduction to perform when finding anchors. Options are: #' \itemize{ #' \item{pcaproject: Project the PCA from the reference onto the query. We recommend using PCA @@ -389,19 +393,18 @@ FindIntegrationAnchors <- function( #' the reference object #' @param l2.norm Perform L2 normalization on the cell embeddings after dimensional reduction #' @param dims Which dimensions to use from the reduction to specify the neighbor search space -#' @param k.anchor How many neighbors (k) to use when picking anchors +#' @param k.anchor How many neighbors (k) to use when finding anchors #' @param k.filter How many neighbors (k) to use when filtering anchors #' @param k.score How many neighbors (k) to use when scoring anchors #' @param max.features The maximum number of features to use when specifying the neighborhood search #' space in the anchor filtering #'@param nn.method Method for nearest neighbor finding. Options include: rann, #' annoy -#' @param eps Error bound on the neighbor finding algorithm (from RANN) +#' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) #' @param approx.pca Use truncated singular value decomposition to approximate PCA #' @param verbose Print progress bars and output #' -#' @return Returns an AnchorSet object -#' +#' @return Returns an \code{\link{AnchorSet}} object #' #' @export #' @@ -613,7 +616,7 @@ FindTransferAnchors <- function( #' #' Perform dataset integration using a pre-computed anchorset #' -#' @param anchorset Results from FindIntegrationAnchors +#' @param anchorset An \code{\link{AnchorSet}} object generated by \code{\link{FindIntegrationAnchors}} #' @param new.assay.name Name for the new assay containing the integrated data #' @param normalization.method Name of normalization method used: LogNormalize #' or SCT @@ -621,14 +624,14 @@ FindTransferAnchors <- function( #' if you want a different set from those used in the anchor finding process #' @param features.to.integrate Vector of features to integrate. By default, will use the features #' used in anchor finding. -#' @param dims Number of PCs to use in the weighting procedure -#' @param k.weight Number of neighbors to consider when weighting +#' @param dims Number of dimensions to use in the anchor weighting procedure +#' @param k.weight Number of neighbors to consider when weighting anchors #' @param weight.reduction Dimension reduction to use when calculating anchor weights. #' This can be either: #' \itemize{ #' \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} #' \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} -#' \item{A vector of Dimreduc objects, specifying the object to use for each object in the integration} +#' \item{A vector of \code{\link{DimReduc}} objects, specifying the object to use for each object in the integration} #' \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} #' } #' Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the @@ -641,7 +644,7 @@ FindTransferAnchors <- function( #' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) #' @param verbose Print progress bars and output #' -#' @return Returns a Seurat object with a new integrated Assay +#' @return Returns a \code{\link{Seurat}} object with a new integrated \code{\link{Assay}} #' #' @export #' @@ -956,14 +959,16 @@ MixingMetric <- function( #' only store the residuals for the features determined to be variable. #' Residuals are recomputed for missing features using the stored model #' parameters via the \code{\link{GetResidual}} function.} -#' \item{Subsets the scale.data slot to only contain the residuals for +#' \item{Subsets the \code{scale.data} slot to only contain the residuals for #' anchor.features for efficiency in downstream processing. } #' } #' -#' @param object.list A list of objects to prep for integration -#' @param assay Name or vector of assay names (one for each object) that correspond -#' to the assay that SCTransform has been run on. If NULL, the current default -#' assay for each object is used. +#' @param object.list A list of \code{\link{Seurat}} objects to prepare for integration +#' @param assay The name of the \code{\link{Assay}} to use for integration. This can be a +#' single name if all the assays to be integrated have the same name, or a character vector +#' containing the name of each \code{\link{Assay}} in each object to be integrated. The +#' specified assays must have been normalized using \code{\link{SCTransform}}. +#' If NULL (default), the current default assay for each object is used. #' @param anchor.features Can be either: #' \itemize{ #' \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} @@ -975,7 +980,7 @@ MixingMetric <- function( #' the Pearson residual will be clipped to #' @param verbose Display output/messages #' -#' @return A list of Seurat objects with the appropriate \code{scale.data} slots +#' @return A list of \code{\link{Seurat}} objects with the appropriate \code{scale.data} slots #' containing only the required \code{anchor.features}. #' #' @importFrom pbapply pblapply @@ -1192,35 +1197,35 @@ SelectIntegrationFeatures <- function( return(features) } -#' Transfer Labels +#' Transfer data #' -#' Transfers the labels +#' Transfer categorical or continuous data across single-cell datasets. #' -#' @param anchorset Results from FindTransferAnchors +#' @param anchorset An \code{\link{AnchorSet}} object generated by \code{\link{FindTransferAnchors}} #' @param refdata Data to transfer. Should be either a vector where the names #' correspond to reference cells, or a matrix, where the column names correspond #' to the reference cells. -#' @param weight.reduction Dimensional reduction to use for the weighting. +#' @param weight.reduction Dimensional reduction to use for the weighting anchors. #' Options are: #' \itemize{ #' \item{pcaproject: Use the projected PCA used for anchor building} #' \item{pca: Use an internal PCA on the query only} #' \item{cca: Use the CCA used for anchor building} -#' \item{custom DimReduc: User provided DimReduc object computed on the query +#' \item{custom DimReduc: User provided \code{\link{DimReduc}} object computed on the query #' cells} #' } #' @param l2.norm Perform L2 normalization on the cell embeddings after #' dimensional reduction -#' @param dims Number of PCs to use in the weighting procedure -#' @param k.weight Number of neighbors to consider when weighting +#' @param dims Number of dimensions to use in the anchor weighting procedure +#' @param k.weight Number of neighbors to consider when weighting anchors #' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting -#' @param eps Error bound on the neighbor finding algorithm (from RANN) +#' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) #' @param do.cpp Run cpp code where applicable #' @param verbose Print progress bars and output -#' @param slot Slot to store the imputed data +#' @param slot Slot to store the imputed data. Must be either "data" (default) or "counts" #' -#' @return If refdata is a vector, returns a dataframe with label predictions. -#' If refdata is a matrix, returns an Assay object where the imputed data has +#' @return If \code{refdata} is a vector, returns a dataframe with label predictions. +#' If \code{refdata} is a matrix, returns an Assay object where the imputed data has #' been stored in the provided slot. #' #' @export diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index 1d3187985..ab5ac9b15 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -25,7 +25,7 @@ FindIntegrationAnchors( ) } \arguments{ -\item{object.list}{A list of objects between which to find anchors for +\item{object.list}{A list of \code{\link{Seurat}} objects between which to find anchors for downstream integration.} \item{assay}{A vector of assay names specifying which assay to use when @@ -90,5 +90,7 @@ annoy} Returns an AnchorSet object } \description{ -Finds the integration anchors +Find a set of anchors between a group of \code{\link{Seurat}} objects. +These anchors can later be used to integrate the objects using the +\code{\link{IntegrateData}} function. } diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 7fd5c04af..be2099660 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -27,15 +27,15 @@ FindTransferAnchors( ) } \arguments{ -\item{reference}{Seurat object to use as the reference} +\item{reference}{\code{\link{Seurat}} object to use as the reference} -\item{query}{Seurat object to use as the query} +\item{query}{\code{\link{Seurat}} object to use as the query} \item{normalization.method}{Name of normalization method used: LogNormalize or SCT} -\item{reference.assay}{Assay to use from reference} +\item{reference.assay}{Name of the Assay to use from reference} -\item{query.assay}{Assay to use from query} +\item{query.assay}{Name of the Assay to use from query} \item{reduction}{Dimensional reduction to perform when finding anchors. Options are: \itemize{ @@ -57,7 +57,7 @@ the reference object} \item{dims}{Which dimensions to use from the reduction to specify the neighbor search space} -\item{k.anchor}{How many neighbors (k) to use when picking anchors} +\item{k.anchor}{How many neighbors (k) to use when finding anchors} \item{k.filter}{How many neighbors (k) to use when filtering anchors} @@ -69,15 +69,17 @@ space in the anchor filtering} \item{nn.method}{Method for nearest neighbor finding. Options include: rann, annoy} -\item{eps}{Error bound on the neighbor finding algorithm (from RANN)} +\item{eps}{Error bound on the neighbor finding algorithm (from \code{\link{RANN}})} \item{approx.pca}{Use truncated singular value decomposition to approximate PCA} \item{verbose}{Print progress bars and output} } \value{ -Returns an AnchorSet object +Returns an \code{\link{AnchorSet}} object } \description{ -Finds the transfer anchors +Find a set of anchors between a reference and query object. These +anchors can later be used to transfer data from the reference to +query object using the \code{\link{TransferData}} object. } diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index f05fd4885..9b1e3594e 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -22,7 +22,7 @@ IntegrateData( ) } \arguments{ -\item{anchorset}{Results from FindIntegrationAnchors} +\item{anchorset}{An \code{\link{AnchorSet}} object generated by \code{\link{FindIntegrationAnchors}}} \item{new.assay.name}{Name for the new assay containing the integrated data} @@ -35,16 +35,16 @@ if you want a different set from those used in the anchor finding process} \item{features.to.integrate}{Vector of features to integrate. By default, will use the features used in anchor finding.} -\item{dims}{Number of PCs to use in the weighting procedure} +\item{dims}{Number of dimensions to use in the anchor weighting procedure} -\item{k.weight}{Number of neighbors to consider when weighting} +\item{k.weight}{Number of neighbors to consider when weighting anchors} \item{weight.reduction}{Dimension reduction to use when calculating anchor weights. This can be either: \itemize{ \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} - \item{A vector of Dimreduc objects, specifying the object to use for each object in the integration} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to use for each object in the integration} \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} } Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the @@ -64,7 +64,7 @@ query, and weights will need to be calculated for all cells in the object.} \item{verbose}{Print progress bars and output} } \value{ -Returns a Seurat object with a new integrated Assay +Returns a \code{\link{Seurat}} object with a new integrated \code{\link{Assay}} } \description{ Perform dataset integration using a pre-computed anchorset diff --git a/man/PrepSCTIntegration.Rd b/man/PrepSCTIntegration.Rd index 9ab89153d..c00dd4d2e 100644 --- a/man/PrepSCTIntegration.Rd +++ b/man/PrepSCTIntegration.Rd @@ -13,11 +13,13 @@ PrepSCTIntegration( ) } \arguments{ -\item{object.list}{A list of objects to prep for integration} +\item{object.list}{A list of \code{\link{Seurat}} objects to prepare for integration} -\item{assay}{Name or vector of assay names (one for each object) that correspond -to the assay that SCTransform has been run on. If NULL, the current default -assay for each object is used.} +\item{assay}{The name of the \code{\link{Assay}} to use for integration. This can be a +single name if all the assays to be integrated have the same name, or a character vector +containing the name of each \code{\link{Assay}} in each object to be integrated. The +specified assays must have been normalized using \code{\link{SCTransform}}. +If NULL (default), the current default assay for each object is used.} \item{anchor.features}{Can be either: \itemize{ @@ -33,7 +35,7 @@ the Pearson residual will be clipped to} \item{verbose}{Display output/messages} } \value{ -A list of Seurat objects with the appropriate \code{scale.data} slots +A list of \code{\link{Seurat}} objects with the appropriate \code{scale.data} slots containing only the required \code{anchor.features}. } \description{ @@ -48,7 +50,7 @@ This function takes in a list of objects that have been normalized with the only store the residuals for the features determined to be variable. Residuals are recomputed for missing features using the stored model parameters via the \code{\link{GetResidual}} function.} - \item{Subsets the scale.data slot to only contain the residuals for + \item{Subsets the \code{scale.data} slot to only contain the residuals for anchor.features for efficiency in downstream processing. } } } diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 1388696f9..712a0c683 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{TransferData} \alias{TransferData} -\title{Transfer Labels} +\title{Transfer data} \usage{ TransferData( anchorset, @@ -19,44 +19,44 @@ TransferData( ) } \arguments{ -\item{anchorset}{Results from FindTransferAnchors} +\item{anchorset}{An \code{\link{AnchorSet}} object generated by \code{\link{FindTransferAnchors}}} \item{refdata}{Data to transfer. Should be either a vector where the names correspond to reference cells, or a matrix, where the column names correspond to the reference cells.} -\item{weight.reduction}{Dimensional reduction to use for the weighting. +\item{weight.reduction}{Dimensional reduction to use for the weighting anchors. Options are: \itemize{ \item{pcaproject: Use the projected PCA used for anchor building} \item{pca: Use an internal PCA on the query only} \item{cca: Use the CCA used for anchor building} - \item{custom DimReduc: User provided DimReduc object computed on the query + \item{custom DimReduc: User provided \code{\link{DimReduc}} object computed on the query cells} }} \item{l2.norm}{Perform L2 normalization on the cell embeddings after dimensional reduction} -\item{dims}{Number of PCs to use in the weighting procedure} +\item{dims}{Number of dimensions to use in the anchor weighting procedure} -\item{k.weight}{Number of neighbors to consider when weighting} +\item{k.weight}{Number of neighbors to consider when weighting anchors} \item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} -\item{eps}{Error bound on the neighbor finding algorithm (from RANN)} +\item{eps}{Error bound on the neighbor finding algorithm (from \code{\link{RANN}})} \item{do.cpp}{Run cpp code where applicable} \item{verbose}{Print progress bars and output} -\item{slot}{Slot to store the imputed data} +\item{slot}{Slot to store the imputed data. Must be either "data" (default) or "counts"} } \value{ -If refdata is a vector, returns a dataframe with label predictions. -If refdata is a matrix, returns an Assay object where the imputed data has +If \code{refdata} is a vector, returns a dataframe with label predictions. +If \code{refdata} is a matrix, returns an Assay object where the imputed data has been stored in the provided slot. } \description{ -Transfers the labels +Transfer categorical or continuous data across single-cell datasets. } From 3583b627a2c84594e337c0cbd1d1e0dad0271c3f Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Tue, 3 Mar 2020 15:36:35 -0500 Subject: [PATCH 013/111] Add number of variable features to seurat object print --- R/objects.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/objects.R b/R/objects.R index 0470f935f..2aeb8a848 100644 --- a/R/objects.R +++ b/R/objects.R @@ -6653,6 +6653,11 @@ setMethod( strwrap(x = paste(other.assays, collapse = ', ')) ) } + cat( + '\n', + 'Number of variable features: ', + length(x = VariableFeatures(object = object)) + ) reductions <- FilterObjects(object = object, classes.keep = 'DimReduc') if (length(x = reductions) > 0) { cat( From e8512e00a245d7ccd70c6c35298b07462797cba8 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Tue, 3 Mar 2020 15:40:48 -0500 Subject: [PATCH 014/111] Remove extra space --- R/objects.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index 2aeb8a848..e1f2cde12 100644 --- a/R/objects.R +++ b/R/objects.R @@ -6655,7 +6655,7 @@ setMethod( } cat( '\n', - 'Number of variable features: ', + 'Number of variable features:', length(x = VariableFeatures(object = object)) ) reductions <- FilterObjects(object = object, classes.keep = 'DimReduc') From 4c1837cccd77bf4bd9300a19ae83f9b4209bee25 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Tue, 3 Mar 2020 15:49:33 -0500 Subject: [PATCH 015/111] Move var feature count to one line --- R/objects.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/objects.R b/R/objects.R index e1f2cde12..914e46d54 100644 --- a/R/objects.R +++ b/R/objects.R @@ -6640,7 +6640,7 @@ setMethod( cat( "Active assay:", DefaultAssay(object = object), - paste0('(', nrow(x = object), ' features)') + paste0('(', nrow(x = object), ' features, ', length(x = VariableFeatures(object = object)), ' variable features)') ) other.assays <- assays[assays != DefaultAssay(object = object)] if (length(x = other.assays) > 0) { @@ -6653,11 +6653,6 @@ setMethod( strwrap(x = paste(other.assays, collapse = ', ')) ) } - cat( - '\n', - 'Number of variable features:', - length(x = VariableFeatures(object = object)) - ) reductions <- FilterObjects(object = object, classes.keep = 'DimReduc') if (length(x = reductions) > 0) { cat( From 156576fbf558cb0afa388ee7c5beeacdd40eb43b Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 15:51:20 -0500 Subject: [PATCH 016/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1e40dc25b..c394432fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9001 +Version: 3.1.4.9002 Date: 2020-03-03 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From c2fedb5a833c2f685299c1510cec1926d8d00a5a Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 16:12:10 -0500 Subject: [PATCH 017/111] Move metap to suggests, add install message The metap package is required for FindConservedMarkers. However, it depends on the mutoss package which depends on the multtest package. multtest is now hosted on Bioconductor and is causing issues for automatic package installation via install.packages. Therefore, since metap is only used in this one function, move it to Suggests and prompt the user with install instructions if it is missing. --- DESCRIPTION | 4 ++-- NAMESPACE | 1 - R/differential_expression.R | 26 ++++++++++++++++++-------- R/utilities.R | 6 +++--- man/FindConservedMarkers.Rd | 2 +- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1e40dc25b..89fb2cfb9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,6 @@ Imports: lmtest, MASS, Matrix (>= 1.2-14), - metap, patchwork, pbapply, plotly, @@ -101,4 +100,5 @@ Suggests: rtracklayer, monocle, Biobase, - VGAM + VGAM, + metap diff --git a/NAMESPACE b/NAMESPACE index 52dbc5bac..13ad0ef60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -469,7 +469,6 @@ importFrom(igraph,plot.igraph) importFrom(irlba,irlba) importFrom(leiden,leiden) importFrom(lmtest,lrtest) -importFrom(metap,minimump) importFrom(methods,"slot<-") importFrom(methods,.hasSlot) importFrom(methods,as) diff --git a/R/differential_expression.R b/R/differential_expression.R index d8f728b68..c13bb276d 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -210,8 +210,6 @@ FindAllMarkers <- function( #' associated output column (e.g. CTRL_p_val). If only one group is tested in the grouping.var, max #' and combined p-values are not returned. #' -#' @importFrom metap minimump -#' #' @export #' #' @examples @@ -229,10 +227,21 @@ FindConservedMarkers <- function( grouping.var, assay = 'RNA', slot = 'data', - meta.method = minimump, + meta.method = metap::minimump, verbose = TRUE, ... ) { + metap.installed <- PackageCheck("metap", error = FALSE) + if (!metap.installed[1]) { + message("Please install the metap package to use FindConservedMarkers.") + message("The can be accomplished with the following commands: ") + message("----------------------------------------") + message("install.packages('BiocManager')") + message("BiocManager::install('multtest')") + message("install.packages('metap')") + message("----------------------------------------") + stop("metap not found", call. = FALSE) + } if (!is.function(x = meta.method)) { stop("meta.method should be a function from the metap package. Please see https://cran.r-project.org/web/packages/metap/metap.pdf for a detailed description of the available functions.") } @@ -354,12 +363,13 @@ FindConservedMarkers <- function( return(meta.method(x)$p) } )) - colnames(x = combined.pval) <- paste0( - as.character(x = formals()$meta.method), - "_p_val" - ) + meta.method.name <- as.character(x = formals()$meta.method) + if (length(x = meta.method.name) == 3) { + meta.method.name <- meta.method.name[3] + } + colnames(x = combined.pval) <- paste0(meta.method.name, "_p_val") markers.combined <- cbind(markers.combined, combined.pval) - markers.combined <- markers.combined[order(markers.combined[, paste0(as.character(x = formals()$meta.method), "_p_val")]), ] + markers.combined <- markers.combined[order(markers.combined[, paste0(meta.method.name, "_p_val")]), ] } else { warning("Only a single group was tested", call. = FALSE, immediate. = TRUE) } diff --git a/R/utilities.R b/R/utilities.R index 0e0320a8b..90b133ba2 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1783,9 +1783,9 @@ PackageCheck <- function(..., error = TRUE) { ) if (error && any(!package.installed)) { stop( - "Cannot find ", - paste(pkgs[!package.installed], collapse = ', '), - "; please install" + "Cannot find the following packages: ", + paste(pkgs[!package.installed], collapse = ', '), + ". Please install" ) } invisible(x = package.installed) diff --git a/man/FindConservedMarkers.Rd b/man/FindConservedMarkers.Rd index 2500454e6..0e29098c5 100644 --- a/man/FindConservedMarkers.Rd +++ b/man/FindConservedMarkers.Rd @@ -11,7 +11,7 @@ FindConservedMarkers( grouping.var, assay = "RNA", slot = "data", - meta.method = minimump, + meta.method = metap::minimump, verbose = TRUE, ... ) From 10f1d9bc054b1b208eb7d3a0937deb6e75fd1051 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 17:38:45 -0500 Subject: [PATCH 018/111] move limma to suggests but use if available --- DESCRIPTION | 5 +++-- NAMESPACE | 2 +- R/differential_expression.R | 41 +++++++++++++++++++++++++++++++------ R/zzz.R | 5 ++++- man/Seurat-package.Rd | 2 ++ 5 files changed, 45 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b9ed23553..61e8daaf6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,6 @@ Imports: irlba, KernSmooth, leiden (>= 0.3.1), - limma, lmtest, MASS, Matrix (>= 1.2-14), @@ -102,4 +101,6 @@ Suggests: rtracklayer, monocle, Biobase, - VGAM + VGAM, + limma, + diff --git a/NAMESPACE b/NAMESPACE index 7341c783f..52dbc5bac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -468,7 +468,6 @@ importFrom(igraph,graph_from_adjacency_matrix) importFrom(igraph,plot.igraph) importFrom(irlba,irlba) importFrom(leiden,leiden) -importFrom(limma,rankSumTestWithCorrelation) importFrom(lmtest,lrtest) importFrom(metap,minimump) importFrom(methods,"slot<-") @@ -537,6 +536,7 @@ importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,t.test) importFrom(stats,var) +importFrom(stats,wilcox.test) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) importFrom(tsne,tsne) diff --git a/R/differential_expression.R b/R/differential_expression.R index d3c2f3ce6..ebb8588d1 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -1519,7 +1519,7 @@ RegularizedTheta <- function(cm, latent.data, min.theta = 0.01, bin.size = 128) # features # #' @importFrom pbapply pbsapply -#' @importFrom limma rankSumTestWithCorrelation +#' @importFrom stats wilcox.test #' @importFrom future.apply future_sapply #' @importFrom future nbrOfWorkers # @@ -1544,11 +1544,40 @@ WilcoxDETest <- function( yes = pbsapply, no = future_sapply ) - p_val <- my.sapply( - X = 1:nrow(x = data.use), - FUN = function(x) { - return(2 * min(rankSumTestWithCorrelation(index = j, statistics = data.use[x, ]))) + limma.check <- PackageCheck("limma", error = FALSE) + if (limma.check[1]) { + p_val <- my.sapply( + X = 1:nrow(x = data.use), + FUN = function(x) { + return(min(2 * min(limma::rankSumTestWithCorrelation(index = j, statistics = data.use[x, ])), 1)) + } + ) + } else { + if (getOption('Seurat.limma.wilcox.msg', TRUE)) { + message( + "For a more efficient implementation of the Wilcoxon Rank Sum Test,", + "\n(default method for FindMarkers) please install the limma package", + "\n--------------------------------------------", + "\ninstall.packages('BiocManager')", + "\nBiocManager::install('limma')", + "\n--------------------------------------------", + "\nAfter installation of limma, Seurat will automatically use the more ", + "\nefficient implementation (no further action necessary).", + "\nThis message will be shown once per session" + ) + options(Seurat.limma.wilcox.msg = FALSE) } - ) + group.info <- data.frame(row.names = c(cells.1, cells.2)) + group.info[cells.1, "group"] <- "Group1" + group.info[cells.2, "group"] <- "Group2" + group.info[, "group"] <- factor(x = group.info[, "group"]) + data.use <- data.use[, rownames(x = group.info), drop = FALSE] + p_val <- my.sapply( + X = 1:nrow(x = data.use), + FUN = function(x) { + return(wilcox.test(data.use[x, ] ~ group.info[, "group"], ...)$p.value) + } + ) + } return(data.frame(p_val, row.names = rownames(x = data.use))) } diff --git a/R/zzz.R b/R/zzz.R index d89d7c2a6..3befe3eba 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -15,6 +15,8 @@ #' \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, #' this controls the behavior when an item isn't used. Can be one of warn, #' stop, or silent.} +#' \item{\code{Seurat.limma.wilcox.msg}}{{Show message about more efficient +#' Wilcoxon Rank Sum test available via the limma package}} #' } #' #' @docType package @@ -26,7 +28,8 @@ NULL seurat_default_options <- list( Seurat.memsafe = FALSE, Seurat.warn.umap.uwot = TRUE, - Seurat.checkdots = "warn" + Seurat.checkdots = "warn", + Seurat.limma.wilcox.msg = TRUE ) .onLoad <- function(libname, pkgname) { diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index f268daee9..70fb5278d 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -23,6 +23,8 @@ Seurat uses the following [options()] to configure behaviour: \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, this controls the behavior when an item isn't used. Can be one of warn, stop, or silent.} + \item{\code{Seurat.limma.wilcox.msg}}{{Show message about more efficient + Wilcoxon Rank Sum test available via the limma package}} } } From 9f6529316ce751d8c7a13df4f213cf999b0fd390 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 17:45:13 -0500 Subject: [PATCH 019/111] use stop with newlines instead of message --- R/differential_expression.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index c13bb276d..60bfbb6d4 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -233,14 +233,16 @@ FindConservedMarkers <- function( ) { metap.installed <- PackageCheck("metap", error = FALSE) if (!metap.installed[1]) { - message("Please install the metap package to use FindConservedMarkers.") - message("The can be accomplished with the following commands: ") - message("----------------------------------------") - message("install.packages('BiocManager')") - message("BiocManager::install('multtest')") - message("install.packages('metap')") - message("----------------------------------------") - stop("metap not found", call. = FALSE) + stop( + "Please install the metap package to use FindConservedMarkers.", + "\nThis can be accomplished with the following commands: ", + "\n----------------------------------------", + "\ninstall.packages('BiocManager')", + "\nBiocManager::install('multtest')", + "\ninstall.packages('metap')", + "\n----------------------------------------", + call. = FALSE + ) } if (!is.function(x = meta.method)) { stop("meta.method should be a function from the metap package. Please see https://cran.r-project.org/web/packages/metap/metap.pdf for a detailed description of the available functions.") From c4a4addf9ae73200bfa820721c12b3cf06fc72a2 Mon Sep 17 00:00:00 2001 From: Avi Srivastava Date: Tue, 3 Mar 2020 18:16:34 -0500 Subject: [PATCH 020/111] removing SNN anchor bottleneck --- src/integration.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/integration.cpp b/src/integration.cpp index bb61b1303..afb16c289 100644 --- a/src/integration.cpp +++ b/src/integration.cpp @@ -96,17 +96,30 @@ Eigen::SparseMatrix IntegrateDataC( } +int getCoeff (Eigen::SparseMatrix& mat, size_t i, size_t j){ + int score{0}; + if (i == j) { + for (Eigen::SparseMatrix::InnerIterator it(mat, i); it; ++it){ + score++; + } + } else { + for(int k=0; k < mat.outerSize(); ++k) { + if (mat.coeff(i, k) and mat.coeff(j, k)) { score++; } + } + } + + return score; +} + + //[[Rcpp::export]] Eigen::SparseMatrix SNNAnchor( Eigen::SparseMatrix k_matrix, Eigen::SparseMatrix anchor_only ) { - typedef Eigen::SparseMatrix SpMat; - SpMat mat2 = k_matrix; - SpMat mat3 = mat2 * mat2.transpose(); for (int k=0; k::InnerIterator it(anchor_only,k); it; ++it){ - it.valueRef() = mat3.coeff(it.row(), it.col()); + it.valueRef() = getCoeff(k_matrix, it.row(), it.col()); } } return(anchor_only); From a76a396b6f6e8b53e9038cd3d8312aaa4a8d36f1 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 3 Mar 2020 18:18:27 -0500 Subject: [PATCH 021/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c394432fa..e90a095d8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9002 +Version: 3.1.4.9003 Date: 2020-03-03 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From e6db217f7d423f0a3f5b65584f50294b990f5a27 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 4 Mar 2020 10:30:08 -0500 Subject: [PATCH 022/111] update FindIntegrationAnchors docs, update CITATION --- R/integration.R | 41 ++++++++++++++++++++++++++++++----- inst/CITATION | 15 ++++++++----- man/FindIntegrationAnchors.Rd | 41 +++++++++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 15 deletions(-) diff --git a/R/integration.R b/R/integration.R index 24b17d343..83b14febe 100644 --- a/R/integration.R +++ b/R/integration.R @@ -8,12 +8,39 @@ NULL #' Find integration anchors #' -#' Find a set of anchors between a group of \code{\link{Seurat}} objects. +#' Find a set of anchors between a list of \code{\link{Seurat}} objects. #' These anchors can later be used to integrate the objects using the #' \code{\link{IntegrateData}} function. +#' +#' The main steps of this procedure are outlined below. For a more detailed +#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' First, determine anchor.features if not explicitly specified using +#' \code{\link{SelectIntegrationFeatures}}. Then for all pairwise combinations +#' of reference and query datasets: +#' +#' \itemize{ +#' \item{Perform dimensional reduction on the dataset pair as specified via +#' the \code{reduction} parameter. If \code{l2.norm} is set to \code{TRUE}, +#' perform L2 normalization of the embedding vectors.} +#' \item{Identify anchors - pairs of cells from each dataset +#' that are contained within each other's neighborhoods (also known as mutual +#' nearest neighbors).} +#' \item{Filter low confidence anchors to ensure anchors in the low dimension +#' space are in broad agreement with the high dimensional measurements. This +#' is done by looking at the neighbors of each query cell in the reference +#' dataset using \code{max.features} to define this space. If the reference +#' cell isn't found within the first \code{k.filter} neighbors, remove the +#' anchor.} +#' \item{Assign each remaining anchor a score. For each anchor cell, determine +#' the nearest \code{k.score} anchors within its own dataset and within its +#' pair's dataset. Based on these neighborhoods, construct an overall neighbor +#' graph and then compute the shared neighbor overlap between anchor and query +#' cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on +#' these scores to dampen outlier effects and rescale to range between 0-1.} +#' } #' -#' @param object.list A list of \code{\link{Seurat}} objects between which to find anchors for -#' downstream integration. +#' @param object.list A list of \code{\link{Seurat}} objects between which to +#' find anchors for downstream integration. #' @param assay A vector of assay names specifying which assay to use when #' constructing anchors. If NULL, the current default assay for each object is #' used. @@ -57,8 +84,12 @@ NULL #' @param eps Error bound on the neighbor finding algorithm (from RANN) #' @param verbose Print progress bars and output #' -#' @return Returns an AnchorSet object -#' +#' @return Returns an \code{AnchorSet} object that can be used as input to +#' \code{\link{IntegrateData}}. +#' +#' @references Stuart T, Butler A, et al. Comprehensive Integration of +#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' #' @importFrom pbapply pblapply #' @importFrom future.apply future_lapply #' @importFrom future nbrOfWorkers diff --git a/inst/CITATION b/inst/CITATION index 0f8c616f4..1a56568f9 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -7,13 +7,16 @@ citEntry(entry = "article", as.person("Christoph Hafemeister"), as.person("Efthymia Papalexi"), as.person("William M Mauck III"), + as.person("Yuhan Hao"), as.person("Marlon Stoeckius"), as.person("Peter Smibert"), as.person("Rahul Satija")), - title = "Comprehensive integration of single cell data", - journal = "bioRxiv", - year = "2018", - doi = "10.1101/460147", - url = "https://www.biorxiv.org/content/10.1101/460147v1", - textVersion = "Stuart and Butler et al. Comprehensive integration of single cell data. bioRxiv (2018)." + title = "Comprehensive Integration of Single-Cell Data", + journal = "Cell", + year = "2019", + volume = "177", + pages = "1888-1902", + doi = "10.1016/j.cell.2019.05.031", + url = "https://doi.org/10.1016/j.cell.2019.05.031", + textVersion = "Stuart and Butler et al. Comprehensive Integration of Single-Cell Data. Cell (2019)." ) diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index ab5ac9b15..8b5e6a57b 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -25,8 +25,8 @@ FindIntegrationAnchors( ) } \arguments{ -\item{object.list}{A list of \code{\link{Seurat}} objects between which to find anchors for -downstream integration.} +\item{object.list}{A list of \code{\link{Seurat}} objects between which to +find anchors for downstream integration.} \item{assay}{A vector of assay names specifying which assay to use when constructing anchors. If NULL, the current default assay for each object is @@ -87,10 +87,43 @@ annoy} \item{verbose}{Print progress bars and output} } \value{ -Returns an AnchorSet object +Returns an \code{AnchorSet} object that can be used as input to +\code{\link{IntegrateData}}. } \description{ -Find a set of anchors between a group of \code{\link{Seurat}} objects. +Find a set of anchors between a list of \code{\link{Seurat}} objects. These anchors can later be used to integrate the objects using the \code{\link{IntegrateData}} function. } +\details{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Stuart, Butler, et al Cell 2019. +First, determine anchor.features if not explicitly specified using +\code{\link{SelectIntegrationFeatures}}. Then for all pairwise combinations +of reference and query datasets: + +\itemize{ + \item{Perform dimensional reduction on the dataset pair as specified via + the \code{reduction} parameter. If \code{l2.norm} is set to \code{TRUE}, + perform L2 normalization of the embedding vectors.} + \item{Identify anchors - pairs of cells from each dataset + that are contained within each other's neighborhoods (also known as mutual + nearest neighbors).} + \item{Filter low confidence anchors to ensure anchors in the low dimension + space are in broad agreement with the high dimensional measurements. This + is done by looking at the neighbors of each query cell in the reference + dataset using \code{max.features} to define this space. If the reference + cell isn't found within the first \code{k.filter} neighbors, remove the + anchor.} + \item{Assign each remaining anchor a score. For each anchor cell, determine + the nearest \code{k.score} anchors within its own dataset and within its + pair's dataset. Based on these neighborhoods, construct an overall neighbor + graph and then compute the shared neighbor overlap between anchor and query + cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on + these scores to dampen outlier effects and rescale to range between 0-1.} +} +} +\references{ +Stuart T, Butler A, et al. Comprehensive Integration of +Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +} From c4d7bc03066c5cd3ea786606ee64e6ba3b5753fd Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 4 Mar 2020 10:37:15 -0500 Subject: [PATCH 023/111] update integration reference in DESCRIPTION file --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fdc1ae163..f80a9ad84 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: Seurat Version: 3.1.4.9000 Date: 2020-02-28 Title: Tools for Single Cell Genomics -Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. +Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( person(given = 'Rahul', family = 'Satija', email = 'rsatija@nygenome.org', role = 'aut', comment = c(ORCID = '0000-0001-9448-8833')), person(given = 'Andrew', family = 'Butler', email = 'abutler@nygenome.org', role = 'aut', comment = c(ORCID = '0000-0003-3608-0463')), From 4583b777cb9867178babd41152fc0f74a92613a7 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 4 Mar 2020 12:39:02 -0500 Subject: [PATCH 024/111] Bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9e802b1cc..aa094a1b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9002 -Date: 2020-03-03 +Version: 3.1.4.9003 +Date: 2020-03-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 60dd2ed74d2201359c33460e4b4bbee0a7e03d2d Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 4 Mar 2020 15:20:07 -0500 Subject: [PATCH 025/111] Only run FindConservedMarkers test when metap is installed --- tests/testthat/test_differential_expression.R | 103 +++++++++--------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index eb0d4cba9..8dce1582a 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -222,53 +222,56 @@ test_that("LR test works", { }) # Tests for FindConservedMarkers -# -------------------------------------------------------------------------------- -context("FindConservedMarkers") -pbmc_small$groups - -markers <- suppressWarnings(FindConservedMarkers(object = pbmc_small, ident.1 = 0, grouping.var = "groups", verbose = FALSE)) - -standard.names <- c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj") - -test_that("FindConservedMarkers works", { - expect_equal(colnames(x = markers), c(paste0("g2_", standard.names), paste0("g1_", standard.names), "max_pval", "minimump_p_val")) - expect_equal(markers[1, "g2_p_val"], 4.983576e-05) - expect_equal(markers[1, "g2_avg_logFC"], -4.125279, tolerance = 1e-6) - # expect_equal(markers[1, "g2_pct.1"], 0.062) - expect_equal(markers[1, "g2_pct.2"], 0.75) - expect_equal(markers[1, "g2_p_val_adj"], 0.0114622238) - expect_equal(markers[1, "g1_p_val"], 3.946643e-08) - expect_equal(markers[1, "g1_avg_logFC"], -3.589384, tolerance = 1e-6) - expect_equal(markers[1, "g1_pct.1"], 0.10) - expect_equal(markers[1, "g1_pct.2"], 0.958) - expect_equal(markers[1, "g1_p_val_adj"], 9.077279e-06) - expect_equal(markers[1, "max_pval"], 4.983576e-05) - expect_equal(markers[1, "minimump_p_val"], 7.893286e-08) - expect_equal(nrow(markers), 162) - expect_equal(rownames(markers)[1], "HLA-DRB1") - expect_equal(markers[, "max_pval"], unname(obj = apply(X = markers, MARGIN = 1, FUN = function(x) max(x[c("g1_p_val", "g2_p_val")])))) -}) - -test_that("FindConservedMarkers errors when expected", { - expect_error(FindConservedMarkers(pbmc_small)) - expect_error(FindConservedMarkers(pbmc_small, ident.1 = 0)) - expect_error(FindConservedMarkers(pbmc_small, ident.1 = 0, grouping.var = "groups", meta.method = "minimump")) -}) - -pbmc.test <- pbmc_small -Idents(object = pbmc.test) <- "RNA_snn_res.1" -pbmc.test$id.group <- paste0(pbmc.test$RNA_snn_res.1, "_", pbmc.test$groups) -pbmc.test <- subset(x = pbmc.test, id.group == "0_g1", invert = TRUE) -markers.missing <- suppressWarnings(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t", verbose = FALSE)) - -test_that("FindConservedMarkers handles missing idents in certain groups", { - expect_warning(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t")) - expect_equal(colnames(x = markers.missing), paste0("g2_", standard.names)) - expect_equal(markers.missing[1, "g2_p_val"], 1.672911e-13) - expect_equal(markers.missing[1, "g2_avg_logFC"], -4.527888, tolerance = 1e-6) - # expect_equal(markers.missing[1, "g2_pct.1"], 0.062) - expect_equal(markers.missing[1, "g2_pct.2"], 0.95) - expect_equal(markers.missing[1, "g2_p_val_adj"], 3.847695e-11) - expect_equal(nrow(markers.missing), 190) - expect_equal(rownames(markers.missing)[1], "HLA-DPB1") -}) +# ------------------------------------------------------------------------------- + +if (requireNamespace('metap', quietly = TRUE)) { + context("FindConservedMarkers") + pbmc_small$groups + + markers <- suppressWarnings(FindConservedMarkers(object = pbmc_small, ident.1 = 0, grouping.var = "groups", verbose = FALSE)) + + standard.names <- c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj") + + test_that("FindConservedMarkers works", { + expect_equal(colnames(x = markers), c(paste0("g2_", standard.names), paste0("g1_", standard.names), "max_pval", "minimump_p_val")) + expect_equal(markers[1, "g2_p_val"], 4.983576e-05) + expect_equal(markers[1, "g2_avg_logFC"], -4.125279, tolerance = 1e-6) + # expect_equal(markers[1, "g2_pct.1"], 0.062) + expect_equal(markers[1, "g2_pct.2"], 0.75) + expect_equal(markers[1, "g2_p_val_adj"], 0.0114622238) + expect_equal(markers[1, "g1_p_val"], 3.946643e-08) + expect_equal(markers[1, "g1_avg_logFC"], -3.589384, tolerance = 1e-6) + expect_equal(markers[1, "g1_pct.1"], 0.10) + expect_equal(markers[1, "g1_pct.2"], 0.958) + expect_equal(markers[1, "g1_p_val_adj"], 9.077279e-06) + expect_equal(markers[1, "max_pval"], 4.983576e-05) + expect_equal(markers[1, "minimump_p_val"], 7.893286e-08) + expect_equal(nrow(markers), 162) + expect_equal(rownames(markers)[1], "HLA-DRB1") + expect_equal(markers[, "max_pval"], unname(obj = apply(X = markers, MARGIN = 1, FUN = function(x) max(x[c("g1_p_val", "g2_p_val")])))) + }) + + test_that("FindConservedMarkers errors when expected", { + expect_error(FindConservedMarkers(pbmc_small)) + expect_error(FindConservedMarkers(pbmc_small, ident.1 = 0)) + expect_error(FindConservedMarkers(pbmc_small, ident.1 = 0, grouping.var = "groups", meta.method = "minimump")) + }) + + pbmc.test <- pbmc_small + Idents(object = pbmc.test) <- "RNA_snn_res.1" + pbmc.test$id.group <- paste0(pbmc.test$RNA_snn_res.1, "_", pbmc.test$groups) + pbmc.test <- subset(x = pbmc.test, id.group == "0_g1", invert = TRUE) + markers.missing <- suppressWarnings(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t", verbose = FALSE)) + + test_that("FindConservedMarkers handles missing idents in certain groups", { + expect_warning(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t")) + expect_equal(colnames(x = markers.missing), paste0("g2_", standard.names)) + expect_equal(markers.missing[1, "g2_p_val"], 1.672911e-13) + expect_equal(markers.missing[1, "g2_avg_logFC"], -4.527888, tolerance = 1e-6) + # expect_equal(markers.missing[1, "g2_pct.1"], 0.062) + expect_equal(markers.missing[1, "g2_pct.2"], 0.95) + expect_equal(markers.missing[1, "g2_p_val_adj"], 3.847695e-11) + expect_equal(nrow(markers.missing), 190) + expect_equal(rownames(markers.missing)[1], "HLA-DPB1") + }) +} From da30d11be40681d014f110ffb8c39f94f8263565 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 4 Mar 2020 15:33:06 -0500 Subject: [PATCH 026/111] Fix for FeatureHeatmaps Set nrow and ncol in wrap_plots correctly Ensure functionality is preserved for blended plots and split blended plots --- R/visualization.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 3aec8e3a3..07839178a 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1336,7 +1336,8 @@ FeaturePlot <- function( what = rbind, args = split(x = 1:length(x = plots), f = ceiling(x = seq_along(along.with = 1:length(x = plots)) / length(x = features))) ))] - plots <- wrap_plots(plots, ncol = ncol, nrow = nrow) + # Set ncol to number of splits (nrow) and nrow to number of features (ncol) + plots <- wrap_plots(plots, ncol = nrow, nrow = ncol) if (!is.null(x = legend) && legend == 'none') { plots <- plots & NoLegend() } From b15e76ba1f610725e3c040c20f5ba29a6e8f6391 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 4 Mar 2020 16:50:28 -0500 Subject: [PATCH 027/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bee13cc3e..269547136 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9003 +Version: 3.1.4.9004 Date: 2020-03-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 5e211a89f366b47903efbc4087c67046ff387b3a Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 4 Mar 2020 17:44:54 -0500 Subject: [PATCH 028/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 269547136..a017781d6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9004 +Version: 3.1.4.9005 Date: 2020-03-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From e5160b82a4920bd55272a4d64bae75cd98bb0120 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Thu, 5 Mar 2020 10:29:40 -0500 Subject: [PATCH 029/111] update FindTransferAnchors man page --- R/integration.R | 79 +++++++++++++++++++++++++++++--------- man/FindTransferAnchors.Rd | 79 ++++++++++++++++++++++++++++++-------- 2 files changed, 123 insertions(+), 35 deletions(-) diff --git a/R/integration.R b/R/integration.R index 83b14febe..1f87f9a81 100644 --- a/R/integration.R +++ b/R/integration.R @@ -405,38 +405,81 @@ FindIntegrationAnchors <- function( #' anchors can later be used to transfer data from the reference to #' query object using the \code{\link{TransferData}} object. #' +#' The main steps of this procedure are outlined below. For a more detailed +#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' +#' \itemize{ +#' +#' \item{Perform dimensional reduction. Exactly what is done here depends on +#' the values set for the \code{reduction} and \code{project.query} +#' parameters. If \code{reduction = "pcaproject"}, a PCA is performed on +#' either the reference (if \code{project.query = FALSE}) or the query (if +#' \code{project.query = TRUE}), using the \code{features} specified. The data +#' from the other dataset is then projected onto this learned PCA structure. +#' If \code{reduction = "cca"}, then CCA is performed on the reference and +#' query for this dimensional reduction step. If \code{l2.norm} is set to +#' \code{TRUE}, perform L2 normalization of the embedding vectors.} +#' \item{Identify anchors between the reference and query - pairs of cells +#' from each dataset that are contained within each other's neighborhoods +#' (also known as mutual nearest neighbors).} +#' \item{Filter low confidence anchors to ensure anchors in the low dimension +#' space are in broad agreement with the high dimensional measurements. This +#' is done by looking at the neighbors of each query cell in the reference +#' dataset using \code{max.features} to define this space. If the reference +#' cell isn't found within the first \code{k.filter} neighbors, remove the +#' anchor.} +#' \item{Assign each remaining anchor a score. For each anchor cell, determine +#' the nearest \code{k.score} anchors within its own dataset and within its +#' pair's dataset. Based on these neighborhoods, construct an overall neighbor +#' graph and then compute the shared neighbor overlap between anchor and query +#' cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on +#' these scores to dampen outlier effects and rescale to range between 0-1.} +#' } +#' #' @param reference \code{\link{Seurat}} object to use as the reference #' @param query \code{\link{Seurat}} object to use as the query #' @param reference.assay Name of the Assay to use from reference #' @param query.assay Name of the Assay to use from query -#' @param reduction Dimensional reduction to perform when finding anchors. Options are: +#' @param reduction Dimensional reduction to perform when finding anchors. +#' Options are: #' \itemize{ -#' \item{pcaproject: Project the PCA from the reference onto the query. We recommend using PCA -#' when reference and query datasets are from scRNA-seq} +#' \item{pcaproject: Project the PCA from the reference onto the query. We +#' recommend using PCA when reference and query datasets are from scRNA-seq} #' \item{cca: Run a CCA on the reference and query } #' } -#' @param project.query Project the PCA from the query dataset onto the reference. Use only in rare -#' cases where the query dataset has a much larger cell number, but the reference dataset has a -#' unique assay for transfer. -#' @param features Features to use for dimensional reduction -#' @param normalization.method Name of normalization method used: LogNormalize or SCT -#' @param npcs Number of PCs to compute on reference. If null, then use an existing PCA structure in -#' the reference object -#' @param l2.norm Perform L2 normalization on the cell embeddings after dimensional reduction -#' @param dims Which dimensions to use from the reduction to specify the neighbor search space +#' @param project.query Project the PCA from the query dataset onto the +#' reference. Use only in rare cases where the query dataset has a much larger +#' cell number, but the reference dataset has a unique assay for transfer. +#' @param features Features to use for dimensional reduction. If not specified, +#' set as variable features of the reference object which are also present in +#' the query. +#' @param normalization.method Name of normalization method used: LogNormalize +#' or SCT +#' @param npcs Number of PCs to compute on reference. If null, then use an +#' existing PCA structure in the reference object +#' @param l2.norm Perform L2 normalization on the cell embeddings after +#' dimensional reduction +#' @param dims Which dimensions to use from the reduction to specify the +#' neighbor search space #' @param k.anchor How many neighbors (k) to use when finding anchors #' @param k.filter How many neighbors (k) to use when filtering anchors #' @param k.score How many neighbors (k) to use when scoring anchors -#' @param max.features The maximum number of features to use when specifying the neighborhood search -#' space in the anchor filtering +#' @param max.features The maximum number of features to use when specifying the +#' neighborhood search space in the anchor filtering #'@param nn.method Method for nearest neighbor finding. Options include: rann, #' annoy -#' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) -#' @param approx.pca Use truncated singular value decomposition to approximate PCA +#' @param eps Error bound on the neighbor finding algorithm (from +#' \code{\link{RANN}}) +#' @param approx.pca Use truncated singular value decomposition to approximate +#' PCA #' @param verbose Print progress bars and output #' -#' @return Returns an \code{\link{AnchorSet}} object -#' +#' @return Returns an \code{AnchorSet} object that can be used as input to +#' \code{\link{TransferData}} +#' +#' @references Stuart T, Butler A, et al. Comprehensive Integration of +#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' #' @export #' FindTransferAnchors <- function( diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index be2099660..a11efe84a 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -31,31 +31,37 @@ FindTransferAnchors( \item{query}{\code{\link{Seurat}} object to use as the query} -\item{normalization.method}{Name of normalization method used: LogNormalize or SCT} +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} \item{reference.assay}{Name of the Assay to use from reference} \item{query.assay}{Name of the Assay to use from query} -\item{reduction}{Dimensional reduction to perform when finding anchors. Options are: +\item{reduction}{Dimensional reduction to perform when finding anchors. +Options are: \itemize{ - \item{pcaproject: Project the PCA from the reference onto the query. We recommend using PCA - when reference and query datasets are from scRNA-seq} + \item{pcaproject: Project the PCA from the reference onto the query. We + recommend using PCA when reference and query datasets are from scRNA-seq} \item{cca: Run a CCA on the reference and query } }} -\item{project.query}{Project the PCA from the query dataset onto the reference. Use only in rare -cases where the query dataset has a much larger cell number, but the reference dataset has a -unique assay for transfer.} +\item{project.query}{Project the PCA from the query dataset onto the +reference. Use only in rare cases where the query dataset has a much larger +cell number, but the reference dataset has a unique assay for transfer.} -\item{features}{Features to use for dimensional reduction} +\item{features}{Features to use for dimensional reduction. If not specified, +set as variable features of the reference object which are also present in +the query.} -\item{npcs}{Number of PCs to compute on reference. If null, then use an existing PCA structure in -the reference object} +\item{npcs}{Number of PCs to compute on reference. If null, then use an +existing PCA structure in the reference object} -\item{l2.norm}{Perform L2 normalization on the cell embeddings after dimensional reduction} +\item{l2.norm}{Perform L2 normalization on the cell embeddings after +dimensional reduction} -\item{dims}{Which dimensions to use from the reduction to specify the neighbor search space} +\item{dims}{Which dimensions to use from the reduction to specify the +neighbor search space} \item{k.anchor}{How many neighbors (k) to use when finding anchors} @@ -63,23 +69,62 @@ the reference object} \item{k.score}{How many neighbors (k) to use when scoring anchors} -\item{max.features}{The maximum number of features to use when specifying the neighborhood search -space in the anchor filtering} +\item{max.features}{The maximum number of features to use when specifying the +neighborhood search space in the anchor filtering} \item{nn.method}{Method for nearest neighbor finding. Options include: rann, annoy} -\item{eps}{Error bound on the neighbor finding algorithm (from \code{\link{RANN}})} +\item{eps}{Error bound on the neighbor finding algorithm (from +\code{\link{RANN}})} -\item{approx.pca}{Use truncated singular value decomposition to approximate PCA} +\item{approx.pca}{Use truncated singular value decomposition to approximate +PCA} \item{verbose}{Print progress bars and output} } \value{ -Returns an \code{\link{AnchorSet}} object +Returns an \code{AnchorSet} object that can be used as input to +\code{\link{TransferData}} } \description{ Find a set of anchors between a reference and query object. These anchors can later be used to transfer data from the reference to query object using the \code{\link{TransferData}} object. } +\details{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Stuart, Butler, et al Cell 2019. + +\itemize{ + + \item{Perform dimensional reduction. Exactly what is done here depends on + the values set for the \code{reduction} and \code{project.query} + parameters. If \code{reduction = "pcaproject"}, a PCA is performed on + either the reference (if \code{project.query = FALSE}) or the query (if + \code{project.query = TRUE}), using the \code{features} specified. The data + from the other dataset is then projected onto this learned PCA structure. + If \code{reduction = "cca"}, then CCA is performed on the reference and + query for this dimensional reduction step. If \code{l2.norm} is set to + \code{TRUE}, perform L2 normalization of the embedding vectors.} + \item{Identify anchors between the reference and query - pairs of cells + from each dataset that are contained within each other's neighborhoods + (also known as mutual nearest neighbors).} + \item{Filter low confidence anchors to ensure anchors in the low dimension + space are in broad agreement with the high dimensional measurements. This + is done by looking at the neighbors of each query cell in the reference + dataset using \code{max.features} to define this space. If the reference + cell isn't found within the first \code{k.filter} neighbors, remove the + anchor.} + \item{Assign each remaining anchor a score. For each anchor cell, determine + the nearest \code{k.score} anchors within its own dataset and within its + pair's dataset. Based on these neighborhoods, construct an overall neighbor + graph and then compute the shared neighbor overlap between anchor and query + cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on + these scores to dampen outlier effects and rescale to range between 0-1.} +} +} +\references{ +Stuart T, Butler A, et al. Comprehensive Integration of +Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +} From 8f6cd2505b56fa45a452c6945c4a4b87daabb802 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 5 Mar 2020 15:42:33 -0500 Subject: [PATCH 030/111] Fix issue with 1 feature and by.col = TRUE --- R/visualization.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 07839178a..c3152a310 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1327,8 +1327,8 @@ FeaturePlot <- function( theme(plot.title = element_text(hjust = 0.5)) idx <- idx + 1 } - ncol <- nsplits - nrow <- 1 + ncol <- 1 + nrow <- nsplits } else { nrow <- split.by %iff% length(x = levels(x = data$split)) } From 4af7e593b2b2bc8b1e5ebf2019434a1459c9e833 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Thu, 5 Mar 2020 15:58:35 -0500 Subject: [PATCH 031/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a017781d6..e8559082d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9005 -Date: 2020-03-04 +Version: 3.1.4.9006 +Date: 2020-03-05 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 4ebbbcfe706ed99e5b2f0ceb27cca0cb21a04523 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 6 Mar 2020 15:59:32 -0500 Subject: [PATCH 032/111] fix split dots --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index c3152a310..0e3200579 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4638,7 +4638,7 @@ SingleExIPlot <- function( vln.geom(scale = 'width', adjust = adjust, trim = TRUE), theme(axis.text.x = element_text(angle = 45, hjust = 1)) ) - jitter <- geom_jitter(height = 0, size = pt.size) + jitter <- geom_jitter(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size) log.scale <- scale_y_log10() axis.scale <- ylim }, From d20140344b34d8b52e821c031b27ff698f4d342a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 6 Mar 2020 18:20:17 -0500 Subject: [PATCH 033/111] update docu --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 0e3200579..78ed464b9 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4563,7 +4563,7 @@ SingleDimPlot <- function( #' @importFrom stats rnorm #' @importFrom utils globalVariables #' @importFrom ggridges geom_density_ridges theme_ridges -#' @importFrom ggplot2 ggplot aes_string theme labs geom_violin geom_jitter ylim +#' @importFrom ggplot2 ggplot aes_string theme labs geom_violin geom_jitter ylim position_jitterdodge #' scale_fill_manual scale_y_log10 scale_x_log10 scale_y_discrete scale_x_continuous waiver #' @importFrom cowplot theme_cowplot #' From 9356fe68dfaa810a05846d16b0659941887038e4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 6 Mar 2020 18:33:58 -0500 Subject: [PATCH 034/111] update docu --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 13ad0ef60..263b1df25 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -409,6 +409,7 @@ importFrom(ggplot2,guides) importFrom(ggplot2,labs) importFrom(ggplot2,layer) importFrom(ggplot2,margin) +importFrom(ggplot2,position_jitterdodge) importFrom(ggplot2,scale_color_brewer) importFrom(ggplot2,scale_color_distiller) importFrom(ggplot2,scale_color_gradient) From 9a1176cf61609fad8e1a8fa76ad1729b7d773929 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 6 Mar 2020 18:44:33 -0500 Subject: [PATCH 035/111] bump develop version --- DESCRIPTION | 4 ++-- NEWS.md | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e8559082d..ad258683e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9006 -Date: 2020-03-05 +Version: 3.1.4.9007 +Date: 2020-03-06 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 0a67860f7..8c41752c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) ## Develop ### Changes - Replace wilcox.test with limma implementation for a faster FindMarkers default method +- Better point separation for `VlnPlot`s when using the `split.by` option ## [3.1.4] - 2020-02-20 ### Changes From 24b3226e3802ba4bbf16d5d6883dd94186b93588 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 6 Mar 2020 21:21:17 -0500 Subject: [PATCH 036/111] set SCT features.to.integrate --- R/integration.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/integration.R b/R/integration.R index a2f2dea98..04b97703e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -671,9 +671,21 @@ IntegrateData <- function( x = object.list[[1]], y = object.list[2:length(x = object.list)] ) + if ( !is.null(features.to.integrate) ) { + features.to.integrate <- intersect(features.to.integrate, + Reduce( intersect, + lapply( X = object.list, + FUN = function(obj) rownames(obj) + ) + ) + ) + } if (normalization.method == "SCT") { for (i in 1:length(x = object.list)) { assay <- DefaultAssay(object = object.list[[i]]) + if ( length(setdiff(features.to.integrate, features))!=0 ) { + object.list[[i]] <- GetResidual( object.list[[i]], features = setdiff(features.to.integrate, features), verbose = verbose ) + } object.list[[i]][[assay]] <- CreateAssayObject( data = GetAssayData(object = object.list[[i]], assay = assay, slot = "scale.data") ) From a63b84431ad80b6d224c70728dcb50425e84fe95 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 8 Mar 2020 16:35:01 -0400 Subject: [PATCH 037/111] fix one idenity no color --- R/visualization.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/visualization.R b/R/visualization.R index 78ed464b9..7acc1b4af 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1965,6 +1965,7 @@ DotPlot <- function( return(data.use) } ) + avg.exp.scaled[is.nan(avg.exp.scaled)] <- 0 avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled)) if (!is.null(x = split.by)) { avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled, breaks = 20)) From 8493da1dd858e9df847e40101728b8f4f0b8e979 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 9 Mar 2020 09:38:43 -0400 Subject: [PATCH 038/111] minor style fixes, bump develop version --- DESCRIPTION | 4 ++-- R/integration.R | 29 ++++++++++++++++++----------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ad258683e..7a7812070 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9007 -Date: 2020-03-06 +Version: 3.1.4.9008 +Date: 2020-03-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( diff --git a/R/integration.R b/R/integration.R index 04b97703e..0993729e6 100644 --- a/R/integration.R +++ b/R/integration.R @@ -671,20 +671,27 @@ IntegrateData <- function( x = object.list[[1]], y = object.list[2:length(x = object.list)] ) - if ( !is.null(features.to.integrate) ) { - features.to.integrate <- intersect(features.to.integrate, - Reduce( intersect, - lapply( X = object.list, - FUN = function(obj) rownames(obj) - ) - ) - ) - } + if (!is.null(x = features.to.integrate)) { + features.to.integrate <- intersect( + x = features.to.integrate, + y = Reduce( + f = intersect, + x = lapply( + X = object.list, + FUN = rownames + ) + ) + ) + } if (normalization.method == "SCT") { for (i in 1:length(x = object.list)) { assay <- DefaultAssay(object = object.list[[i]]) - if ( length(setdiff(features.to.integrate, features))!=0 ) { - object.list[[i]] <- GetResidual( object.list[[i]], features = setdiff(features.to.integrate, features), verbose = verbose ) + if (length(x = setdiff(x = features.to.integrate, y = features)) != 0) { + object.list[[i]] <- GetResidual( + object = object.list[[i]], + features = setdiff(x = features.to.integrate, y = features), + verbose = verbose + ) } object.list[[i]][[assay]] <- CreateAssayObject( data = GetAssayData(object = object.list[[i]], assay = assay, slot = "scale.data") From 33005ebdeee908a1316fe6243f5e9a274f4b9b70 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 9 Mar 2020 10:10:14 -0400 Subject: [PATCH 039/111] improve docs for IntegrateData --- R/integration.R | 83 ++++++++++++++++++++++++++++++++++---------- man/IntegrateData.Rd | 83 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 130 insertions(+), 36 deletions(-) diff --git a/R/integration.R b/R/integration.R index 1f87f9a81..ce0dec7c4 100644 --- a/R/integration.R +++ b/R/integration.R @@ -688,37 +688,84 @@ FindTransferAnchors <- function( #' Integrate data #' -#' Perform dataset integration using a pre-computed anchorset +#' Perform dataset integration using a pre-computed \code{\link{Anchorset}}. #' -#' @param anchorset An \code{\link{AnchorSet}} object generated by \code{\link{FindIntegrationAnchors}} +#' The main steps of this procedure are outlined below. For a more detailed +#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' For pairwise integration: +#' +#' \itemize{ +#' \item{Construct a weights matrix that defines the association between each +#' query cell and each anchor. These weights are computed as 1 - the distance +#' between the query cell and the anchor divided by the distance of the query +#' cell to the \code{k.weight}th anchor multiplied by the anchor score +#' computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian +#' kernel width a bandwidth defined by \code{sd.weight} and normalize across +#' all \code{k.weight} anchors.} +#' \item{Compute the anchor integration matrix as the difference between the +#' two expression matrices for every pair of anchor cells} +#' \item{Compute the transformation matrix as the product of the integration +#' matrix and the weights matrix.} +#' \item{Subtract the transformation matrix from the original expression +#' matrix.} +#' } +#' +#' For multiple dataset integration, we perform iterative pairwise integration. +#' To determine the order of integration (if not specified via +#' \code{sample.tree}), we +#' \itemize{ +#' \item{Define a distance between datasets as the total number of cells in +#' the samller dataset divided by the total number of anchors between the two +#' datasets.} +#' \item{Compute all pairwise distances between datasets} +#' \item{Cluster this distance matrix to determine a guide tree} +#' } +#' +#' +#' @param anchorset An \code{\link{AnchorSet}} object generated by +#' \code{\link{FindIntegrationAnchors}} #' @param new.assay.name Name for the new assay containing the integrated data #' @param normalization.method Name of normalization method used: LogNormalize #' or SCT -#' @param features Vector of features to use when computing the PCA to determine the weights. Only set -#' if you want a different set from those used in the anchor finding process -#' @param features.to.integrate Vector of features to integrate. By default, will use the features -#' used in anchor finding. +#' @param features Vector of features to use when computing the PCA to determine +#' the weights. Only set if you want a different set from those used in the +#' anchor finding process +#' @param features.to.integrate Vector of features to integrate. By default, +#' will use the features used in anchor finding. #' @param dims Number of dimensions to use in the anchor weighting procedure #' @param k.weight Number of neighbors to consider when weighting anchors -#' @param weight.reduction Dimension reduction to use when calculating anchor weights. -#' This can be either: +#' @param weight.reduction Dimension reduction to use when calculating anchor +#' weights. This can be one of: #' \itemize{ -#' \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} -#' \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} -#' \item{A vector of \code{\link{DimReduc}} objects, specifying the object to use for each object in the integration} -#' \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} +#' \item{A string, specifying the name of a dimension reduction present in +#' all objects to be integrated} +#' \item{A vector of strings, specifying the name of a dimension reduction to +#' use for each object to be integrated} +#' \item{A vector of \code{\link{DimReduc}} objects, specifying the object to +#' use for each object in the integration} +#' \item{NULL, in which case a new PCA will be calculated and used to +#' calculate anchor weights} #' } -#' Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the -#' first merge between reference and query, as the merged object will subsequently contain more cells than was in +#' Note that, if specified, the requested dimension reduction will only be used +#' for calculating anchor weights in the first merge between reference and +#' query, as the merged object will subsequently contain more cells than was in #' query, and weights will need to be calculated for all cells in the object. #' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting -#' @param sample.tree Specify the order of integration. If NULL, will compute automatically. -#' @param preserve.order Do not reorder objects based on size for each pairwise integration. +#' @param sample.tree Specify the order of integration. If NULL, will compute +#' automatically. +#' @param preserve.order Do not reorder objects based on size for each pairwise +#' integration. #' @param do.cpp Run cpp code where applicable -#' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) +#' @param eps Error bound on the neighbor finding algorithm (from +#' \code{\link{RANN}}) #' @param verbose Print progress bars and output #' -#' @return Returns a \code{\link{Seurat}} object with a new integrated \code{\link{Assay}} +#' @return Returns a \code{\link{Seurat}} object with a new integrated +#' \code{\link{Assay}}. If \code{normalization.method = "LogNormalize"}, the +#' integrated data is returned to the \code{data} slot and can be treated as +#' log-normalized, corrected data. If \code{normalization.method = "SCT"}, the +#' integrated data is returned to the \code{scale.data} slot and can be treated +#' as centered, corrected Pearson residuals. #' #' @export #' diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index 9b1e3594e..e8128e123 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -22,50 +22,97 @@ IntegrateData( ) } \arguments{ -\item{anchorset}{An \code{\link{AnchorSet}} object generated by \code{\link{FindIntegrationAnchors}}} +\item{anchorset}{An \code{\link{AnchorSet}} object generated by +\code{\link{FindIntegrationAnchors}}} \item{new.assay.name}{Name for the new assay containing the integrated data} \item{normalization.method}{Name of normalization method used: LogNormalize or SCT} -\item{features}{Vector of features to use when computing the PCA to determine the weights. Only set -if you want a different set from those used in the anchor finding process} +\item{features}{Vector of features to use when computing the PCA to determine +the weights. Only set if you want a different set from those used in the +anchor finding process} -\item{features.to.integrate}{Vector of features to integrate. By default, will use the features -used in anchor finding.} +\item{features.to.integrate}{Vector of features to integrate. By default, +will use the features used in anchor finding.} \item{dims}{Number of dimensions to use in the anchor weighting procedure} \item{k.weight}{Number of neighbors to consider when weighting anchors} -\item{weight.reduction}{Dimension reduction to use when calculating anchor weights. -This can be either: +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be one of: \itemize{ - \item{A string, specifying the name of a dimension reduction present in all objects to be integrated} - \item{A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated} - \item{A vector of \code{\link{DimReduc}} objects, specifying the object to use for each object in the integration} - \item{NULL, in which case a new PCA will be calculated and used to calculate anchor weights} + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to + use for each object in the integration} + \item{NULL, in which case a new PCA will be calculated and used to + calculate anchor weights} } -Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the -first merge between reference and query, as the merged object will subsequently contain more cells than was in +Note that, if specified, the requested dimension reduction will only be used +for calculating anchor weights in the first merge between reference and +query, as the merged object will subsequently contain more cells than was in query, and weights will need to be calculated for all cells in the object.} \item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} -\item{sample.tree}{Specify the order of integration. If NULL, will compute automatically.} +\item{sample.tree}{Specify the order of integration. If NULL, will compute +automatically.} -\item{preserve.order}{Do not reorder objects based on size for each pairwise integration.} +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} \item{do.cpp}{Run cpp code where applicable} -\item{eps}{Error bound on the neighbor finding algorithm (from \code{\link{RANN}})} +\item{eps}{Error bound on the neighbor finding algorithm (from +\code{\link{RANN}})} \item{verbose}{Print progress bars and output} } \value{ -Returns a \code{\link{Seurat}} object with a new integrated \code{\link{Assay}} +Returns a \code{\link{Seurat}} object with a new integrated +\code{\link{Assay}}. If \code{normalization.method = "LogNormalize"}, the +integrated data is returned to the \code{data} slot and can be treated as +log-normalized, corrected data. If \code{normalization.method = "SCT"}, the +integrated data is returned to the \code{scale.data} slot and can be treated +as centered, corrected Pearson residuals. } \description{ -Perform dataset integration using a pre-computed anchorset +Perform dataset integration using a pre-computed \code{\link{Anchorset}}. +} +\details{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Stuart, Butler, et al Cell 2019. +For pairwise integration: + +\itemize{ + \item{Construct a weights matrix that defines the association between each + query cell and each anchor. These weights are computed as 1 - the distance + between the query cell and the anchor divided by the distance of the query + cell to the \code{k.weight}th anchor multiplied by the anchor score + computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian + kernel width a bandwidth defined by \code{sd.weight} and normalize across + all \code{k.weight} anchors.} + \item{Compute the anchor integration matrix as the difference between the + two expression matrices for every pair of anchor cells} + \item{Compute the transformation matrix as the product of the integration + matrix and the weights matrix.} + \item{Subtract the transformation matrix from the original expression + matrix.} +} + +For multiple dataset integration, we perform iterative pairwise integration. +To determine the order of integration (if not specified via +\code{sample.tree}), we +\itemize{ + \item{Define a distance between datasets as the total number of cells in + the samller dataset divided by the total number of anchors between the two + datasets.} + \item{Compute all pairwise distances between datasets} + \item{Cluster this distance matrix to determine a guide tree} +} } From 6ba868c0a48f9fc4ca9159321c5f3acc6f9c4954 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 9 Mar 2020 10:20:09 -0400 Subject: [PATCH 040/111] deprecate redundant sort.cell parameter --- R/visualization.R | 17 ++++++++++++----- man/FeaturePlot.Rd | 5 +++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 78ed464b9..f5d070574 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -844,7 +844,8 @@ DimPlot <- function( #' @param ncol Number of columns to combine multiple feature plots to, ignored if \code{split.by} is not \code{NULL} #' @param coord.fixed Plot cartesian coordinates with fixed aspect ratio #' @param by.col If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE} -#' @param sort.cell If \code{TRUE}, the positive cells will overlap the negative cells +#' @param sort.cell Redundant with \code{order}. This argument is being +#' deprecated. Please use \code{order} instead. #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} #' ggplot object. If \code{FALSE}, return a list of ggplot objects #' @@ -897,9 +898,18 @@ FeaturePlot <- function( ncol = NULL, coord.fixed = FALSE, by.col = TRUE, - sort.cell = FALSE, + sort.cell = NULL, combine = TRUE ) { + # sort.cell to be deprecated + if (!is.null(x = sort.cell)) { + warning( + "The sort.cell parameter is being deprecated. Please use the order ", + "parameter instead for equivalent functionality.") + if (sort.cell) { + order <- sort.cell + } + } # Set a theme to remove right-hand Y axis lines # Also sets right-hand Y axis text label formatting no.right <- theme( @@ -1125,9 +1135,6 @@ FeaturePlot <- function( cols.use <- NULL } data.single <- data.plot[, c(dims, 'ident', feature, shape.by)] - if (sort.cell) { - data.single <- data.single[order(data.single[, feature]),] - } # Make the plot plot <- SingleDimPlot( data = data.single, diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index c67e85e8b..1c9d75547 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -28,7 +28,7 @@ FeaturePlot( ncol = NULL, coord.fixed = FALSE, by.col = TRUE, - sort.cell = FALSE, + sort.cell = NULL, combine = TRUE ) } @@ -92,7 +92,8 @@ different colors and different shapes on cells} \item{by.col}{If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE}} -\item{sort.cell}{If \code{TRUE}, the positive cells will overlap the negative cells} +\item{sort.cell}{Redundant with \code{order}. This argument is being +deprecated. Please use \code{order} instead.} \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} ggplot object. If \code{FALSE}, return a list of ggplot objects} From 00d5cebb0ffbf6c9212dffa8aa43675fd02adb9e Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Mon, 9 Mar 2020 12:08:04 -0400 Subject: [PATCH 041/111] Add warning for missing vars.to.regress. #2588 --- R/preprocessing.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index 95ce72207..31e37cfcf 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2188,6 +2188,13 @@ ScaleData.default <- function( ) } # Currently, RegressOutMatrix will do nothing if latent.data = NULL + notfound <- setdiff(x = vars.to.regress, y = colnames(x = latent.data)) + if (length(x = notfound) == length(x = vars.to.regress)) { + stop("None of the requested variables to regress are present in the object.") + } else if (length(x = notfound) > 0) { + warning("Requested variables to regress not in object: ", paste(notfound, collapse = ", ")) + vars.to.regress <- colnames(x = latent.data) + } if (verbose) { message("Regressing out ", paste(vars.to.regress, collapse = ', ')) } From 80ac0127d6375bb7e7bd88f387e66e99ba0b57a0 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 9 Mar 2020 12:33:04 -0400 Subject: [PATCH 042/111] Update Style --- R/visualization.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index f5d070574..b38e4bdb9 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -844,8 +844,8 @@ DimPlot <- function( #' @param ncol Number of columns to combine multiple feature plots to, ignored if \code{split.by} is not \code{NULL} #' @param coord.fixed Plot cartesian coordinates with fixed aspect ratio #' @param by.col If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE} -#' @param sort.cell Redundant with \code{order}. This argument is being -#' deprecated. Please use \code{order} instead. +#' @param sort.cell Redundant with \code{order}. This argument is being +#' deprecated. Please use \code{order} instead. #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} #' ggplot object. If \code{FALSE}, return a list of ggplot objects #' @@ -905,8 +905,11 @@ FeaturePlot <- function( if (!is.null(x = sort.cell)) { warning( "The sort.cell parameter is being deprecated. Please use the order ", - "parameter instead for equivalent functionality.") - if (sort.cell) { + "parameter instead for equivalent functionality.", + call. = FALSE, + immediate. = TRUE + ) + if (isTRUE(x = sort.cell)) { order <- sort.cell } } From a150380da72c589af4cc5f40399d1ea4fce31829 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 9 Mar 2020 13:17:35 -0400 Subject: [PATCH 043/111] Have warning displayed immediately --- R/preprocessing.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 31e37cfcf..bd30664c1 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2190,9 +2190,17 @@ ScaleData.default <- function( # Currently, RegressOutMatrix will do nothing if latent.data = NULL notfound <- setdiff(x = vars.to.regress, y = colnames(x = latent.data)) if (length(x = notfound) == length(x = vars.to.regress)) { - stop("None of the requested variables to regress are present in the object.") + stop( + "None of the requested variables to regress are present in the object.", + call. = FALSE + ) } else if (length(x = notfound) > 0) { - warning("Requested variables to regress not in object: ", paste(notfound, collapse = ", ")) + warning( + "Requested variables to regress not in object: ", + paste(notfound, collapse = ", "), + call. = FALSE, + immediate. = TRUE + ) vars.to.regress <- colnames(x = latent.data) } if (verbose) { From 9b417db5ee2a86d78f45d9734125a6deb98999fb Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 9 Mar 2020 16:47:07 -0400 Subject: [PATCH 044/111] bump develop version --- DESCRIPTION | 2 +- R/visualization.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7a7812070..1eea7442d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9008 +Version: 3.1.4.9009 Date: 2020-03-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. diff --git a/R/visualization.R b/R/visualization.R index b38e4bdb9..79526d9a1 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -901,7 +901,7 @@ FeaturePlot <- function( sort.cell = NULL, combine = TRUE ) { - # sort.cell to be deprecated + # TODO: deprecate fully on 3.2.0 if (!is.null(x = sort.cell)) { warning( "The sort.cell parameter is being deprecated. Please use the order ", From c8da5effa5b6dfde31ed61286554047492faee07 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 9 Mar 2020 17:03:27 -0400 Subject: [PATCH 045/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1eea7442d..51e2bbcda 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9009 +Version: 3.1.4.9010 Date: 2020-03-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 262ee54d2336c2fe7b5cf2d67606c521b4a9750c Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 10 Mar 2020 10:11:55 -0400 Subject: [PATCH 046/111] update TransferData man page --- R/integration.R | 66 +++++++++++++++++++++++++++++++++++++-------- man/TransferData.Rd | 66 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 110 insertions(+), 22 deletions(-) diff --git a/R/integration.R b/R/integration.R index ce0dec7c4..350b8e283 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1320,34 +1320,78 @@ SelectIntegrationFeatures <- function( #' Transfer data #' -#' Transfer categorical or continuous data across single-cell datasets. +#' Transfer categorical or continuous data across single-cell datasets. For +#' transferring categorical information, pass a vector from the reference +#' dataset (e.g. \code{refdata = reference$celltype}). For transferring +#' continuous information, pass a matrix from the reference dataset (e.g. +#' \code{refdata = GetAssayData(reference[['RNA']])}). #' -#' @param anchorset An \code{\link{AnchorSet}} object generated by \code{\link{FindTransferAnchors}} +#' The main steps of this procedure are outlined below. For a more detailed +#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' For both transferring discrete labels and also feature imputation, we first +#' compute the weights matrix. +#' +#' \itemize{ +#' \item{Construct a weights matrix that defines the association between each +#' query cell and each anchor. These weights are computed as 1 - the distance +#' between the query cell and the anchor divided by the distance of the query +#' cell to the \code{k.weight}th anchor multiplied by the anchor score +#' computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian +#' kernel width a bandwidth defined by \code{sd.weight} and normalize across +#' all \code{k.weight} anchors.} +#' } +#' +#' The main difference between label transfer (classification) and feature +#' imputation is what gets multiplied by the weights matrix. For label transfer, +#' we perform the following steps: +#' +#' \itemize{ +#' \item{Create a binary classification matrix, the rows corresponding to each +#' possible class and the columns corresponding to the anchors. If the +#' reference cell in the anchor pair is a member of a certain class, that +#' matrix entry is filled with a 1, otherwise 0.} +#' \item{Multiply this classification matrix by the transpose of weights +#' matrix to compute a prediction score for each class for each cell in the +#' query dataset.} +#' } +#' +#' For feature imputation, we perform the following step: +#' \itemize{ +#' \item{Multiply the expression matrix for the reference anchor cells by the +#' weights matrix. This returns a predicted expression matrix for the +#' specified features for each cell in the query dataset.} +#' } +#' +#' +#' @param anchorset An \code{\link{AnchorSet}} object generated by +#' \code{\link{FindTransferAnchors}} #' @param refdata Data to transfer. Should be either a vector where the names #' correspond to reference cells, or a matrix, where the column names correspond #' to the reference cells. -#' @param weight.reduction Dimensional reduction to use for the weighting anchors. -#' Options are: +#' @param weight.reduction Dimensional reduction to use for the weighting +#' anchors. Options are: #' \itemize{ #' \item{pcaproject: Use the projected PCA used for anchor building} #' \item{pca: Use an internal PCA on the query only} #' \item{cca: Use the CCA used for anchor building} -#' \item{custom DimReduc: User provided \code{\link{DimReduc}} object computed on the query -#' cells} +#' \item{custom DimReduc: User provided \code{\link{DimReduc}} object +#' computed on the query cells} #' } #' @param l2.norm Perform L2 normalization on the cell embeddings after #' dimensional reduction #' @param dims Number of dimensions to use in the anchor weighting procedure #' @param k.weight Number of neighbors to consider when weighting anchors #' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting -#' @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}}) +#' @param eps Error bound on the neighbor finding algorithm (from +#' \code{\link{RANN}}) #' @param do.cpp Run cpp code where applicable #' @param verbose Print progress bars and output -#' @param slot Slot to store the imputed data. Must be either "data" (default) or "counts" +#' @param slot Slot to store the imputed data. Must be either "data" (default) +#' or "counts" #' -#' @return If \code{refdata} is a vector, returns a dataframe with label predictions. -#' If \code{refdata} is a matrix, returns an Assay object where the imputed data has -#' been stored in the provided slot. +#' @return If \code{refdata} is a vector, returns a dataframe with label +#' predictions. If \code{refdata} is a matrix, returns an Assay object where the +#' imputed data has been stored in the provided slot. #' #' @export #' diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 712a0c683..1b2741e70 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -19,20 +19,21 @@ TransferData( ) } \arguments{ -\item{anchorset}{An \code{\link{AnchorSet}} object generated by \code{\link{FindTransferAnchors}}} +\item{anchorset}{An \code{\link{AnchorSet}} object generated by +\code{\link{FindTransferAnchors}}} \item{refdata}{Data to transfer. Should be either a vector where the names correspond to reference cells, or a matrix, where the column names correspond to the reference cells.} -\item{weight.reduction}{Dimensional reduction to use for the weighting anchors. -Options are: +\item{weight.reduction}{Dimensional reduction to use for the weighting +anchors. Options are: \itemize{ \item{pcaproject: Use the projected PCA used for anchor building} \item{pca: Use an internal PCA on the query only} \item{cca: Use the CCA used for anchor building} - \item{custom DimReduc: User provided \code{\link{DimReduc}} object computed on the query - cells} + \item{custom DimReduc: User provided \code{\link{DimReduc}} object + computed on the query cells} }} \item{l2.norm}{Perform L2 normalization on the cell embeddings after @@ -44,19 +45,62 @@ dimensional reduction} \item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} -\item{eps}{Error bound on the neighbor finding algorithm (from \code{\link{RANN}})} +\item{eps}{Error bound on the neighbor finding algorithm (from +\code{\link{RANN}})} \item{do.cpp}{Run cpp code where applicable} \item{verbose}{Print progress bars and output} -\item{slot}{Slot to store the imputed data. Must be either "data" (default) or "counts"} +\item{slot}{Slot to store the imputed data. Must be either "data" (default) +or "counts"} } \value{ -If \code{refdata} is a vector, returns a dataframe with label predictions. -If \code{refdata} is a matrix, returns an Assay object where the imputed data has -been stored in the provided slot. +If \code{refdata} is a vector, returns a dataframe with label +predictions. If \code{refdata} is a matrix, returns an Assay object where the +imputed data has been stored in the provided slot. } \description{ -Transfer categorical or continuous data across single-cell datasets. +Transfer categorical or continuous data across single-cell datasets. For +transferring categorical information, pass a vector from the reference +dataset (e.g. \code{refdata = reference$celltype}). For transferring +continuous information, pass a matrix from the reference dataset (e.g. +\code{refdata = GetAssayData(reference[['RNA']])}). +} +\details{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Stuart, Butler, et al Cell 2019. +For both transferring discrete labels and also feature imputation, we first +compute the weights matrix. + +\itemize{ + \item{Construct a weights matrix that defines the association between each + query cell and each anchor. These weights are computed as 1 - the distance + between the query cell and the anchor divided by the distance of the query + cell to the \code{k.weight}th anchor multiplied by the anchor score + computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian + kernel width a bandwidth defined by \code{sd.weight} and normalize across + all \code{k.weight} anchors.} +} + +The main difference between label transfer (classification) and feature +imputation is what gets multiplied by the weights matrix. For label transfer, +we perform the following steps: + +\itemize{ + \item{Create a binary classification matrix, the rows corresponding to each + possible class and the columns corresponding to the anchors. If the + reference cell in the anchor pair is a member of a certain class, that + matrix entry is filled with a 1, otherwise 0.} + \item{Multiply this classification matrix by the transpose of weights + matrix to compute a prediction score for each class for each cell in the + query dataset.} +} + +For feature imputation, we perform the following step: +\itemize{ + \item{Multiply the expression matrix for the reference anchor cells by the + weights matrix. This returns a predicted expression matrix for the + specified features for each cell in the query dataset.} +} } From 8f5026d2c7101c6ae355388d756169bb8e3a442d Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 10 Mar 2020 10:15:27 -0400 Subject: [PATCH 047/111] add paper reference --- R/integration.R | 6 ++++++ man/IntegrateData.Rd | 4 ++++ man/TransferData.Rd | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/R/integration.R b/R/integration.R index 350b8e283..c47ac2338 100644 --- a/R/integration.R +++ b/R/integration.R @@ -767,6 +767,9 @@ FindTransferAnchors <- function( #' integrated data is returned to the \code{scale.data} slot and can be treated #' as centered, corrected Pearson residuals. #' +#' @references Stuart T, Butler A, et al. Comprehensive Integration of +#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' #' @export #' IntegrateData <- function( @@ -1393,6 +1396,9 @@ SelectIntegrationFeatures <- function( #' predictions. If \code{refdata} is a matrix, returns an Assay object where the #' imputed data has been stored in the provided slot. #' +#' @references Stuart T, Butler A, et al. Comprehensive Integration of +#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' #' @export #' TransferData <- function( diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index e8128e123..dc7f7cf25 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -116,3 +116,7 @@ To determine the order of integration (if not specified via \item{Cluster this distance matrix to determine a guide tree} } } +\references{ +Stuart T, Butler A, et al. Comprehensive Integration of +Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +} diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 1b2741e70..bfde5152c 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -104,3 +104,7 @@ For feature imputation, we perform the following step: specified features for each cell in the query dataset.} } } +\references{ +Stuart T, Butler A, et al. Comprehensive Integration of +Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +} From f58fcdca29273983e9d000c9d557bef4d31b390a Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Wed, 11 Mar 2020 10:37:41 -0400 Subject: [PATCH 048/111] Add simple examples and hyperlinks to paper --- R/integration.R | 54 ++++++++++++++++++++++++++++------- man/FindIntegrationAnchors.Rd | 14 +++++++-- man/FindTransferAnchors.Rd | 14 +++++++-- man/IntegrateData.Rd | 12 +++++++- man/TransferData.Rd | 13 ++++++++- 5 files changed, 91 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index c47ac2338..86188f440 100644 --- a/R/integration.R +++ b/R/integration.R @@ -13,7 +13,10 @@ NULL #' \code{\link{IntegrateData}} function. #' #' The main steps of this procedure are outlined below. For a more detailed -#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' description of the methodology, please see Stuart, Butler, et al Cell 2019: +#' \url{https://doi.org/10.1016/j.cell.2019.05.031}; +#' \url{https://doi.org/10.1101/460147} +#' #' First, determine anchor.features if not explicitly specified using #' \code{\link{SelectIntegrationFeatures}}. Then for all pairwise combinations #' of reference and query datasets: @@ -88,14 +91,20 @@ NULL #' \code{\link{IntegrateData}}. #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of -#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +#' j.cell.2019.05.031} #' #' @importFrom pbapply pblapply #' @importFrom future.apply future_lapply #' @importFrom future nbrOfWorkers #' #' @export -#' +#' +#' @examples +#' \dontrun{ +#' anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +#' integrated <- IntegrateData(anchorset = anchors) +#' } FindIntegrationAnchors <- function( object.list = NULL, assay = NULL, @@ -406,7 +415,9 @@ FindIntegrationAnchors <- function( #' query object using the \code{\link{TransferData}} object. #' #' The main steps of this procedure are outlined below. For a more detailed -#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' \url{https://doi.org/10.1016/j.cell.2019.05.031}; +#' \url{https://doi.org/10.1101/460147} #' #' \itemize{ #' @@ -478,10 +489,16 @@ FindIntegrationAnchors <- function( #' \code{\link{TransferData}} #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of -#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +#' j.cell.2019.05.031}; #' #' @export -#' +#' @examples +#' \dontrun{ +#' anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) +#' predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) +#' query.obj <- AddMetaData(object = query.obj, metadata = predictions) +#' } FindTransferAnchors <- function( reference, query, @@ -692,6 +709,9 @@ FindTransferAnchors <- function( #' #' The main steps of this procedure are outlined below. For a more detailed #' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' \url{https://doi.org/10.1016/j.cell.2019.05.031}; +#' \url{https://doi.org/10.1101/460147} +#' #' For pairwise integration: #' #' \itemize{ @@ -768,10 +788,15 @@ FindTransferAnchors <- function( #' as centered, corrected Pearson residuals. #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of -#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +#' j.cell.2019.05.031} #' #' @export -#' +#' @examples +#' \dontrun{ +#' anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +#' integrated <- IntegrateData(anchorset = anchors) +#' } IntegrateData <- function( anchorset, new.assay.name = "integrated", @@ -1331,6 +1356,9 @@ SelectIntegrationFeatures <- function( #' #' The main steps of this procedure are outlined below. For a more detailed #' description of the methodology, please see Stuart, Butler, et al Cell 2019. +#' \url{https://doi.org/10.1016/j.cell.2019.05.031}; +#' \url{https://doi.org/10.1101/460147} +#' #' For both transferring discrete labels and also feature imputation, we first #' compute the weights matrix. #' @@ -1397,10 +1425,16 @@ SelectIntegrationFeatures <- function( #' imputed data has been stored in the provided slot. #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of -#' Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +#' Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +#' j.cell.2019.05.031} #' #' @export -#' +#' @examples +#' \dontrun{ +#' anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) +#' predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) +#' query.obj <- AddMetaData(object = query.obj, metadata = predictions) +#' } TransferData <- function( anchorset, refdata, diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index 8b5e6a57b..baf6d1ed6 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -97,7 +97,10 @@ These anchors can later be used to integrate the objects using the } \details{ The main steps of this procedure are outlined below. For a more detailed -description of the methodology, please see Stuart, Butler, et al Cell 2019. +description of the methodology, please see Stuart, Butler, et al Cell 2019: +\url{https://doi.org/10.1016/j.cell.2019.05.031}; +\url{https://doi.org/10.1101/460147} + First, determine anchor.features if not explicitly specified using \code{\link{SelectIntegrationFeatures}}. Then for all pairwise combinations of reference and query datasets: @@ -123,7 +126,14 @@ of reference and query datasets: these scores to dampen outlier effects and rescale to range between 0-1.} } } +\examples{ +\dontrun{ +anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +integrated <- IntegrateData(anchorset = anchors) +} +} \references{ Stuart T, Butler A, et al. Comprehensive Integration of -Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +j.cell.2019.05.031} } diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index a11efe84a..01d9d2d25 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -94,7 +94,9 @@ query object using the \code{\link{TransferData}} object. } \details{ The main steps of this procedure are outlined below. For a more detailed -description of the methodology, please see Stuart, Butler, et al Cell 2019. +description of the methodology, please see Stuart, Butler, et al Cell 2019. +\url{https://doi.org/10.1016/j.cell.2019.05.031}; +\url{https://doi.org/10.1101/460147} \itemize{ @@ -124,7 +126,15 @@ description of the methodology, please see Stuart, Butler, et al Cell 2019. these scores to dampen outlier effects and rescale to range between 0-1.} } } +\examples{ +\dontrun{ +anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) +predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) +query.obj <- AddMetaData(object = query.obj, metadata = predictions) +} +} \references{ Stuart T, Butler A, et al. Comprehensive Integration of -Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +j.cell.2019.05.031}; } diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index dc7f7cf25..50ac6adbe 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -87,6 +87,9 @@ Perform dataset integration using a pre-computed \code{\link{Anchorset}}. \details{ The main steps of this procedure are outlined below. For a more detailed description of the methodology, please see Stuart, Butler, et al Cell 2019. +\url{https://doi.org/10.1016/j.cell.2019.05.031}; +\url{https://doi.org/10.1101/460147} + For pairwise integration: \itemize{ @@ -116,7 +119,14 @@ To determine the order of integration (if not specified via \item{Cluster this distance matrix to determine a guide tree} } } +\examples{ +\dontrun{ +anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +integrated <- IntegrateData(anchorset = anchors) +} +} \references{ Stuart T, Butler A, et al. Comprehensive Integration of -Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +j.cell.2019.05.031} } diff --git a/man/TransferData.Rd b/man/TransferData.Rd index bfde5152c..78a221503 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -70,6 +70,9 @@ continuous information, pass a matrix from the reference dataset (e.g. \details{ The main steps of this procedure are outlined below. For a more detailed description of the methodology, please see Stuart, Butler, et al Cell 2019. +\url{https://doi.org/10.1016/j.cell.2019.05.031}; +\url{https://doi.org/10.1101/460147} + For both transferring discrete labels and also feature imputation, we first compute the weights matrix. @@ -104,7 +107,15 @@ For feature imputation, we perform the following step: specified features for each cell in the query dataset.} } } +\examples{ +\dontrun{ +anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) +predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) +query.obj <- AddMetaData(object = query.obj, metadata = predictions) +} +} \references{ Stuart T, Butler A, et al. Comprehensive Integration of -Single-Cell Data. Cell. 2019;177:1888-1902 doi.org/10.1016/j.cell.2019.05.031 +Single-Cell Data. Cell. 2019;177:1888-1902 \url{https://doi.org/10.1016/ +j.cell.2019.05.031} } From d8d5dbdb4f4942b203e2ab2705b7e2e0dcbeef34 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Wed, 11 Mar 2020 11:07:56 -0400 Subject: [PATCH 049/111] Add runable examples using SeuratData package --- R/integration.R | 94 +++++++++++++++++++++++++++++++---- man/FindIntegrationAnchors.Rd | 20 +++++++- man/FindTransferAnchors.Rd | 26 ++++++++-- man/IntegrateData.Rd | 20 +++++++- man/TransferData.Rd | 26 ++++++++-- 5 files changed, 169 insertions(+), 17 deletions(-) diff --git a/R/integration.R b/R/integration.R index 86188f440..5196bc340 100644 --- a/R/integration.R +++ b/R/integration.R @@ -102,7 +102,25 @@ NULL #' #' @examples #' \dontrun{ -#' anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +#' # to install the SeuratData package see https://github.com/satijalab/seurat-data +#' library(SeuratData) +#' data("panc8") +#' +#' # panc8 is a merged Seurat object containing 8 separate pancreas datasets +#' # split the object by dataset +#' pancreas.list <- SplitObject(panc8, split.by = "tech") +#' +#' # perform standard preprocessing on each object +#' for (i in 1:length(pancreas.list)) { +#' pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) +#' pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", +#' nfeatures = 2000, verbose = FALSE) +#' } +#' +#' # find anchors +#' anchors <- FindIntegrationAnchors(object.list = pancreas.list) +#' +#' # integrate data #' integrated <- IntegrateData(anchorset = anchors) #' } FindIntegrationAnchors <- function( @@ -493,11 +511,31 @@ FindIntegrationAnchors <- function( #' j.cell.2019.05.031}; #' #' @export -#' @examples +#' @examples #' \dontrun{ -#' anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) -#' predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) -#' query.obj <- AddMetaData(object = query.obj, metadata = predictions) +#' # to install the SeuratData package see https://github.com/satijalab/seurat-data +#' library(SeuratData) +#' data("pbmc3k") +#' +#' # for demonstration, split the object into reference and query +#' pbmc.reference <- pbmc3k[, 1:1350] +#' pbmc.query <- pbmc3k[, 1351:2700] +#' +#' # perform standard preprocessing on each object +#' pbmc.reference <- NormalizeData(pbmc.reference) +#' pbmc.reference <- FindVariableFeatures(pbmc.reference) +#' pbmc.reference <- ScaleData(pbmc.reference) +#' +#' pbmc.query <- NormalizeData(pbmc.query) +#' pbmc.query <- FindVariableFeatures(pbmc.query) +#' pbmc.query <- ScaleData(pbmc.query) +#' +#' # find anchors +#' anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) +#' +#' # transfer labels +#' predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) +#' pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) #' } FindTransferAnchors <- function( reference, @@ -794,7 +832,25 @@ FindTransferAnchors <- function( #' @export #' @examples #' \dontrun{ -#' anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +#' # to install the SeuratData package see https://github.com/satijalab/seurat-data +#' library(SeuratData) +#' data("panc8") +#' +#' # panc8 is a merged Seurat object containing 8 separate pancreas datasets +#' # split the object by dataset +#' pancreas.list <- SplitObject(panc8, split.by = "tech") +#' +#' # perform standard preprocessing on each object +#' for (i in 1:length(pancreas.list)) { +#' pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) +#' pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", +#' nfeatures = 2000, verbose = FALSE) +#' } +#' +#' # find anchors +#' anchors <- FindIntegrationAnchors(object.list = pancreas.list) +#' +#' # integrate data #' integrated <- IntegrateData(anchorset = anchors) #' } IntegrateData <- function( @@ -1431,9 +1487,29 @@ SelectIntegrationFeatures <- function( #' @export #' @examples #' \dontrun{ -#' anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) -#' predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) -#' query.obj <- AddMetaData(object = query.obj, metadata = predictions) +#' # to install the SeuratData package see https://github.com/satijalab/seurat-data +#' library(SeuratData) +#' data("pbmc3k") +#' +#' # for demonstration, split the object into reference and query +#' pbmc.reference <- pbmc3k[, 1:1350] +#' pbmc.query <- pbmc3k[, 1351:2700] +#' +#' # perform standard preprocessing on each object +#' pbmc.reference <- NormalizeData(pbmc.reference) +#' pbmc.reference <- FindVariableFeatures(pbmc.reference) +#' pbmc.reference <- ScaleData(pbmc.reference) +#' +#' pbmc.query <- NormalizeData(pbmc.query) +#' pbmc.query <- FindVariableFeatures(pbmc.query) +#' pbmc.query <- ScaleData(pbmc.query) +#' +#' # find anchors +#' anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) +#' +#' # transfer labels +#' predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) +#' pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) #' } TransferData <- function( anchorset, diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index baf6d1ed6..be2800c01 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -128,7 +128,25 @@ of reference and query datasets: } \examples{ \dontrun{ -anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +# to install the SeuratData package see https://github.com/satijalab/seurat-data +library(SeuratData) +data("panc8") + +# panc8 is a merged Seurat object containing 8 separate pancreas datasets +# split the object by dataset +pancreas.list <- SplitObject(panc8, split.by = "tech") + +# perform standard preprocessing on each object +for (i in 1:length(pancreas.list)) { + pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) + pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", + nfeatures = 2000, verbose = FALSE) + } + +# find anchors +anchors <- FindIntegrationAnchors(object.list = pancreas.list) + +# integrate data integrated <- IntegrateData(anchorset = anchors) } } diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 01d9d2d25..beb676632 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -128,9 +128,29 @@ description of the methodology, please see Stuart, Butler, et al Cell 2019. } \examples{ \dontrun{ -anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) -predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) -query.obj <- AddMetaData(object = query.obj, metadata = predictions) +# to install the SeuratData package see https://github.com/satijalab/seurat-data +library(SeuratData) +data("pbmc3k") + +# for demonstration, split the object into reference and query +pbmc.reference <- pbmc3k[, 1:1350] +pbmc.query <- pbmc3k[, 1351:2700] + +# perform standard preprocessing on each object +pbmc.reference <- NormalizeData(pbmc.reference) +pbmc.reference <- FindVariableFeatures(pbmc.reference) +pbmc.reference <- ScaleData(pbmc.reference) + +pbmc.query <- NormalizeData(pbmc.query) +pbmc.query <- FindVariableFeatures(pbmc.query) +pbmc.query <- ScaleData(pbmc.query) + +# find anchors +anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) + +# transfer labels +predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) +pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) } } \references{ diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index 50ac6adbe..63db4759a 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -121,7 +121,25 @@ To determine the order of integration (if not specified via } \examples{ \dontrun{ -anchors <- FindIntegrationAnchors(object.list = list(object1, object2, object3)) +# to install the SeuratData package see https://github.com/satijalab/seurat-data +library(SeuratData) +data("panc8") + +# panc8 is a merged Seurat object containing 8 separate pancreas datasets +# split the object by dataset +pancreas.list <- SplitObject(panc8, split.by = "tech") + +# perform standard preprocessing on each object +for (i in 1:length(pancreas.list)) { + pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) + pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", + nfeatures = 2000, verbose = FALSE) + } + +# find anchors +anchors <- FindIntegrationAnchors(object.list = pancreas.list) + +# integrate data integrated <- IntegrateData(anchorset = anchors) } } diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 78a221503..3951de6fb 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -109,9 +109,29 @@ For feature imputation, we perform the following step: } \examples{ \dontrun{ -anchors <- FindTransferAnchors(reference = ref.obj, query = query.obj) -predictions <- TransferData(anchorset = anchors, refdata = Idents(ref.obj)) -query.obj <- AddMetaData(object = query.obj, metadata = predictions) +# to install the SeuratData package see https://github.com/satijalab/seurat-data +library(SeuratData) +data("pbmc3k") + +# for demonstration, split the object into reference and query +pbmc.reference <- pbmc3k[, 1:1350] +pbmc.query <- pbmc3k[, 1351:2700] + +# perform standard preprocessing on each object +pbmc.reference <- NormalizeData(pbmc.reference) +pbmc.reference <- FindVariableFeatures(pbmc.reference) +pbmc.reference <- ScaleData(pbmc.reference) + +pbmc.query <- NormalizeData(pbmc.query) +pbmc.query <- FindVariableFeatures(pbmc.query) +pbmc.query <- ScaleData(pbmc.query) + +# find anchors +anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) + +# transfer labels +predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) +pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) } } \references{ From fb5388be7784dbcb293b9200a82ec3e7468b7d1b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 11 Mar 2020 21:24:31 -0400 Subject: [PATCH 050/111] fix bug with only one row in lvls --- R/preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index bd30664c1..43c3a684a 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -934,7 +934,7 @@ Read10X <- function(data.dir = NULL, gene.column = 2, unique.features = TRUE) { data <- lapply( X = lvls, FUN = function(l) { - return(data[data_types == l, ]) + return(data[data_types == l, , drop = FALSE]) } ) names(x = data) <- lvls From b8b1e194b9b1560be501d905c33f3ef0fdb3ef03 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 13 Mar 2020 11:53:53 -0400 Subject: [PATCH 051/111] make sure class is correct when calling out to cpp functions --- R/integration.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/R/integration.R b/R/integration.R index 0993729e6..30e309961 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2467,7 +2467,13 @@ ProjectCellEmbeddings <- function( if (is.null(x = feature.mean)) { feature.mean <- rowMeans(x = reference.data) - feature.sd <- sqrt(SparseRowVar2(mat = reference.data, mu = feature.mean, display_progress = FALSE)) + feature.sd <- sqrt( + x = SparseRowVar2( + mat = as(object = reference.data, Class = "dgCMatrix"), + mu = feature.mean, + display_progress = FALSE + ) + ) feature.sd[is.na(x = feature.sd)] <- 1 feature.mean[is.na(x = feature.mean)] <- 1 } @@ -2478,12 +2484,12 @@ ProjectCellEmbeddings <- function( )[features, ] store.names <- dimnames(x = proj.data) if (is.numeric(x = feature.mean) && feature.mean != "SCT") { - proj.data <- FastSparseRowScaleWithKnownStats( - mat = proj.data, - mu = feature.mean, - sigma = feature.sd, - display_progress = FALSE - ) + proj.data <- FastSparseRowScaleWithKnownStats( + mat = as(object = proj.data, Class = "dgCMatrix"), + mu = feature.mean, + sigma = feature.sd, + display_progress = FALSE + ) } dimnames(x = proj.data) <- store.names ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] From 8bcfc0759f352f3de3701057cb69694f01448ca3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 13 Mar 2020 12:31:36 -0400 Subject: [PATCH 052/111] Fix issue with underscores in cluster labels in DimPlot Addresses satijalab/seurat#2342 --- R/visualization.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 79526d9a1..7e01d98dc 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1987,11 +1987,18 @@ DotPlot <- function( data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA data.plot$pct.exp <- data.plot$pct.exp * 100 if (!is.null(x = split.by)) { + # splits.use <- vapply( + # X = strsplit(x = as.character(x = data.plot$id), split = '_'), + # FUN = '[[', + # FUN.VALUE = character(length = 1L), + # 2 + # ) splits.use <- vapply( X = strsplit(x = as.character(x = data.plot$id), split = '_'), - FUN = '[[', - FUN.VALUE = character(length = 1L), - 2 + FUN = function(x) { + return(paste(x[2:length(x = x)], collapse = '_')) + }, + FUN.VALUE = character(length = 1L) ) data.plot$colors <- mapply( FUN = function(color, value) { From 7166cd56e12c10130d21fa2692f71301eb321cc1 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 13 Mar 2020 14:11:23 -0400 Subject: [PATCH 053/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 51e2bbcda..def9f5762 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9010 -Date: 2020-03-09 +Version: 3.1.4.9011 +Date: 2020-03-13 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 1a65c00ac147530b62cc5c858fa3981dcf73a5a1 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 13 Mar 2020 14:55:16 -0400 Subject: [PATCH 054/111] examples + formatting --- R/integration.R | 72 ++++++++++++++++++++++++++++---- man/FindIntegrationAnchors.Rd | 9 ++-- man/FindTransferAnchors.Rd | 6 ++- man/IntegrateData.Rd | 9 ++-- man/PrepSCTIntegration.Rd | 30 +++++++++++++ man/SelectIntegrationFeatures.Rd | 18 ++++++++ man/TransferData.Rd | 1 + 7 files changed, 130 insertions(+), 15 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5196bc340..e17e9dc3b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -113,9 +113,11 @@ NULL #' # perform standard preprocessing on each object #' for (i in 1:length(pancreas.list)) { #' pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) -#' pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", -#' nfeatures = 2000, verbose = FALSE) -#' } +#' pancreas.list[[i]] <- FindVariableFeatures( +#' pancreas.list[[i]], selection.method = "vst", +#' nfeatures = 2000, verbose = FALSE +#' ) +#' } #' #' # find anchors #' anchors <- FindIntegrationAnchors(object.list = pancreas.list) @@ -123,6 +125,7 @@ NULL #' # integrate data #' integrated <- IntegrateData(anchorset = anchors) #' } +#' FindIntegrationAnchors <- function( object.list = NULL, assay = NULL, @@ -534,9 +537,13 @@ FindIntegrationAnchors <- function( #' anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) #' #' # transfer labels -#' predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) +#' predictions <- TransferData( +#' anchorset = anchors, +#' refdata = pbmc.reference$seurat_annotations +#' ) #' pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) #' } +#' FindTransferAnchors <- function( reference, query, @@ -843,9 +850,11 @@ FindTransferAnchors <- function( #' # perform standard preprocessing on each object #' for (i in 1:length(pancreas.list)) { #' pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) -#' pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", -#' nfeatures = 2000, verbose = FALSE) -#' } +#' pancreas.list[[i]] <- FindVariableFeatures( +#' pancreas.list[[i]], selection.method = "vst", +#' nfeatures = 2000, verbose = FALSE +#' ) +#' } #' #' # find anchors #' anchors <- FindIntegrationAnchors(object.list = pancreas.list) @@ -853,6 +862,7 @@ FindTransferAnchors <- function( #' # integrate data #' integrated <- IntegrateData(anchorset = anchors) #' } +#' IntegrateData <- function( anchorset, new.assay.name = "integrated", @@ -1194,7 +1204,35 @@ MixingMetric <- function( #' @importFrom future.apply future_lapply #' #' @export -#' +#' @examples +#' \dontrun{ +#' # to install the SeuratData package see https://github.com/satijalab/seurat-data +#' library(SeuratData) +#' data("panc8") +#' +#' # panc8 is a merged Seurat object containing 8 separate pancreas datasets +#' # split the object by dataset and take the first 2 to integrate +#' pancreas.list <- SplitObject(panc8, split.by = "tech")[1:2] +#' +#' # perform SCTransform normalization +#' pancreas.list <- lapply(X = pancreas.list, FUN = SCTransform) +#' +#' # select integration features and prep step +#' features <- SelectIntegrationFeatures(pancreas.list) +#' pancreas.list <- PrepSCTIntegration( +#' pancreas.list, +#' anchor.features = features +#' ) +#' +#' # downstream integration steps +#' anchors <- FindIntegrationAnchors( +#' pancreas.list, +#' normalization.method = "SCT", +#' anchor.features = features +#' ) +#' pancreas.integrated <- IntegrateData(anchors) +#' } +#' PrepSCTIntegration <- function( object.list, assay = NULL, @@ -1334,6 +1372,23 @@ PrepSCTIntegration <- function( #' @return A vector of selected features #' #' @export +#' +#' @examples +#' \dontrun{ +#' # to install the SeuratData package see https://github.com/satijalab/seurat-data +#' library(SeuratData) +#' data("panc8") +#' +#' # panc8 is a merged Seurat object containing 8 separate pancreas datasets +#' # split the object by dataset and take the first 2 +#' pancreas.list <- SplitObject(panc8, split.by = "tech")[1:2] +#' +#' # perform SCTransform normalization +#' pancreas.list <- lapply(X = pancreas.list, FUN = SCTransform) +#' +#' # select integration features +#' features <- SelectIntegrationFeatures(pancreas.list) +#' } #' SelectIntegrationFeatures <- function( object.list, @@ -1511,6 +1566,7 @@ SelectIntegrationFeatures <- function( #' predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) #' pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) #' } +#' TransferData <- function( anchorset, refdata, diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index be2800c01..2870c1f3e 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -139,9 +139,11 @@ pancreas.list <- SplitObject(panc8, split.by = "tech") # perform standard preprocessing on each object for (i in 1:length(pancreas.list)) { pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) - pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", - nfeatures = 2000, verbose = FALSE) - } + pancreas.list[[i]] <- FindVariableFeatures( + pancreas.list[[i]], selection.method = "vst", + nfeatures = 2000, verbose = FALSE + ) +} # find anchors anchors <- FindIntegrationAnchors(object.list = pancreas.list) @@ -149,6 +151,7 @@ anchors <- FindIntegrationAnchors(object.list = pancreas.list) # integrate data integrated <- IntegrateData(anchorset = anchors) } + } \references{ Stuart T, Butler A, et al. Comprehensive Integration of diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index beb676632..3260a2bb1 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -149,9 +149,13 @@ pbmc.query <- ScaleData(pbmc.query) anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) # transfer labels -predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) +predictions <- TransferData( + anchorset = anchors, + refdata = pbmc.reference$seurat_annotations +) pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) } + } \references{ Stuart T, Butler A, et al. Comprehensive Integration of diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index 63db4759a..2c5bb6e51 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -132,9 +132,11 @@ pancreas.list <- SplitObject(panc8, split.by = "tech") # perform standard preprocessing on each object for (i in 1:length(pancreas.list)) { pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) - pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", - nfeatures = 2000, verbose = FALSE) - } + pancreas.list[[i]] <- FindVariableFeatures( + pancreas.list[[i]], selection.method = "vst", + nfeatures = 2000, verbose = FALSE + ) +} # find anchors anchors <- FindIntegrationAnchors(object.list = pancreas.list) @@ -142,6 +144,7 @@ anchors <- FindIntegrationAnchors(object.list = pancreas.list) # integrate data integrated <- IntegrateData(anchorset = anchors) } + } \references{ Stuart T, Butler A, et al. Comprehensive Integration of diff --git a/man/PrepSCTIntegration.Rd b/man/PrepSCTIntegration.Rd index c00dd4d2e..c4d5653c5 100644 --- a/man/PrepSCTIntegration.Rd +++ b/man/PrepSCTIntegration.Rd @@ -54,3 +54,33 @@ This function takes in a list of objects that have been normalized with the anchor.features for efficiency in downstream processing. } } } +\examples{ +\dontrun{ +# to install the SeuratData package see https://github.com/satijalab/seurat-data +library(SeuratData) +data("panc8") + +# panc8 is a merged Seurat object containing 8 separate pancreas datasets +# split the object by dataset and take the first 2 to integrate +pancreas.list <- SplitObject(panc8, split.by = "tech")[1:2] + +# perform SCTransform normalization +pancreas.list <- lapply(X = pancreas.list, FUN = SCTransform) + +# select integration features and prep step +features <- SelectIntegrationFeatures(pancreas.list) +pancreas.list <- PrepSCTIntegration( + pancreas.list, + anchor.features = features +) + +# downstream integration steps +anchors <- FindIntegrationAnchors( + pancreas.list, + normalization.method = "SCT", + anchor.features = features +) +pancreas.integrated <- IntegrateData(anchors) +} + +} diff --git a/man/SelectIntegrationFeatures.Rd b/man/SelectIntegrationFeatures.Rd index 6140b0084..b9c639ba7 100644 --- a/man/SelectIntegrationFeatures.Rd +++ b/man/SelectIntegrationFeatures.Rd @@ -43,3 +43,21 @@ If for any assay in the list, \code{\link{FindVariableFeatures}} hasn't been run, this method will try to run it using the \code{fvf.nfeatures} parameter and any additional ones specified through the \dots. } +\examples{ +\dontrun{ +# to install the SeuratData package see https://github.com/satijalab/seurat-data +library(SeuratData) +data("panc8") + +# panc8 is a merged Seurat object containing 8 separate pancreas datasets +# split the object by dataset and take the first 2 +pancreas.list <- SplitObject(panc8, split.by = "tech")[1:2] + +# perform SCTransform normalization +pancreas.list <- lapply(X = pancreas.list, FUN = SCTransform) + +# select integration features +features <- SelectIntegrationFeatures(pancreas.list) +} + +} diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 3951de6fb..d58daa4e9 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -133,6 +133,7 @@ anchors <- FindTransferAnchors(reference = pbmc.reference, query = pbmc.query) predictions <- TransferData(anchorset = anchors, refdata = pbmc.reference$seurat_annotations) pbmc.query <- AddMetaData(object = pbmc.query, metadata = predictions) } + } \references{ Stuart T, Butler A, et al. Comprehensive Integration of From 023afb1b7975e22580ac5fdf74e872933ac72e18 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 13 Mar 2020 15:06:48 -0400 Subject: [PATCH 055/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e97dd60f5..6a912b6b4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9011 +Version: 3.1.4.9012 Date: 2020-03-13 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 4376d3516b39fcab0593e0478d33fd7cf16c55bc Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 13 Mar 2020 15:16:56 -0400 Subject: [PATCH 056/111] fix some doc typos --- R/integration.R | 16 ++++++++-------- man/FindIntegrationAnchors.Rd | 4 ++-- man/FindTransferAnchors.Rd | 2 +- man/IntegrateData.Rd | 6 +++--- man/TransferData.Rd | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index 565e0ede4..b29456933 100644 --- a/R/integration.R +++ b/R/integration.R @@ -38,7 +38,7 @@ NULL #' the nearest \code{k.score} anchors within its own dataset and within its #' pair's dataset. Based on these neighborhoods, construct an overall neighbor #' graph and then compute the shared neighbor overlap between anchor and query -#' cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on +#' cells (analogous to an SNN graph). We use the 0.01 and 0.90 quantiles on #' these scores to dampen outlier effects and rescale to range between 0-1.} #' } #' @@ -87,7 +87,7 @@ NULL #' @param eps Error bound on the neighbor finding algorithm (from RANN) #' @param verbose Print progress bars and output #' -#' @return Returns an \code{AnchorSet} object that can be used as input to +#' @return Returns an \code{\link{AnchorSet}} object that can be used as input to #' \code{\link{IntegrateData}}. #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of @@ -464,7 +464,7 @@ FindIntegrationAnchors <- function( #' the nearest \code{k.score} anchors within its own dataset and within its #' pair's dataset. Based on these neighborhoods, construct an overall neighbor #' graph and then compute the shared neighbor overlap between anchor and query -#' cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on +#' cells (analogous to an SNN graph). We use the 0.01 and 0.90 quantiles on #' these scores to dampen outlier effects and rescale to range between 0-1.} #' } #' @@ -750,7 +750,7 @@ FindTransferAnchors <- function( #' Integrate data #' -#' Perform dataset integration using a pre-computed \code{\link{Anchorset}}. +#' Perform dataset integration using a pre-computed \code{\link{AnchorSet}}. #' #' The main steps of this procedure are outlined below. For a more detailed #' description of the methodology, please see Stuart, Butler, et al Cell 2019. @@ -764,7 +764,7 @@ FindTransferAnchors <- function( #' query cell and each anchor. These weights are computed as 1 - the distance #' between the query cell and the anchor divided by the distance of the query #' cell to the \code{k.weight}th anchor multiplied by the anchor score -#' computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian +#' computed in \code{\link{FindIntegrationAnchors}}. We then apply a Gaussian #' kernel width a bandwidth defined by \code{sd.weight} and normalize across #' all \code{k.weight} anchors.} #' \item{Compute the anchor integration matrix as the difference between the @@ -780,7 +780,7 @@ FindTransferAnchors <- function( #' \code{sample.tree}), we #' \itemize{ #' \item{Define a distance between datasets as the total number of cells in -#' the samller dataset divided by the total number of anchors between the two +#' the smaller dataset divided by the total number of anchors between the two #' datasets.} #' \item{Compute all pairwise distances between datasets} #' \item{Cluster this distance matrix to determine a guide tree} @@ -1496,7 +1496,7 @@ SelectIntegrationFeatures <- function( #' query cell and each anchor. These weights are computed as 1 - the distance #' between the query cell and the anchor divided by the distance of the query #' cell to the \code{k.weight}th anchor multiplied by the anchor score -#' computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian +#' computed in \code{\link{FindIntegrationAnchors}}. We then apply a Gaussian #' kernel width a bandwidth defined by \code{sd.weight} and normalize across #' all \code{k.weight} anchors.} #' } @@ -1549,7 +1549,7 @@ SelectIntegrationFeatures <- function( #' @param slot Slot to store the imputed data. Must be either "data" (default) #' or "counts" #' -#' @return If \code{refdata} is a vector, returns a dataframe with label +#' @return If \code{refdata} is a vector, returns a data.frame with label #' predictions. If \code{refdata} is a matrix, returns an Assay object where the #' imputed data has been stored in the provided slot. #' diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index 2870c1f3e..c2d0e3c94 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -87,7 +87,7 @@ annoy} \item{verbose}{Print progress bars and output} } \value{ -Returns an \code{AnchorSet} object that can be used as input to +Returns an \code{\link{AnchorSet}} object that can be used as input to \code{\link{IntegrateData}}. } \description{ @@ -122,7 +122,7 @@ of reference and query datasets: the nearest \code{k.score} anchors within its own dataset and within its pair's dataset. Based on these neighborhoods, construct an overall neighbor graph and then compute the shared neighbor overlap between anchor and query - cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on + cells (analogous to an SNN graph). We use the 0.01 and 0.90 quantiles on these scores to dampen outlier effects and rescale to range between 0-1.} } } diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 3260a2bb1..940143b81 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -122,7 +122,7 @@ description of the methodology, please see Stuart, Butler, et al Cell 2019. the nearest \code{k.score} anchors within its own dataset and within its pair's dataset. Based on these neighborhoods, construct an overall neighbor graph and then compute the shared neighbor overlap between anchor and query - cells (analagous to an SNN graph). We use the 0.01 and 0.90 quantiles on + cells (analogous to an SNN graph). We use the 0.01 and 0.90 quantiles on these scores to dampen outlier effects and rescale to range between 0-1.} } } diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index 2c5bb6e51..d01b8a1ff 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -82,7 +82,7 @@ integrated data is returned to the \code{scale.data} slot and can be treated as centered, corrected Pearson residuals. } \description{ -Perform dataset integration using a pre-computed \code{\link{Anchorset}}. +Perform dataset integration using a pre-computed \code{\link{AnchorSet}}. } \details{ The main steps of this procedure are outlined below. For a more detailed @@ -97,7 +97,7 @@ For pairwise integration: query cell and each anchor. These weights are computed as 1 - the distance between the query cell and the anchor divided by the distance of the query cell to the \code{k.weight}th anchor multiplied by the anchor score - computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian + computed in \code{\link{FindIntegrationAnchors}}. We then apply a Gaussian kernel width a bandwidth defined by \code{sd.weight} and normalize across all \code{k.weight} anchors.} \item{Compute the anchor integration matrix as the difference between the @@ -113,7 +113,7 @@ To determine the order of integration (if not specified via \code{sample.tree}), we \itemize{ \item{Define a distance between datasets as the total number of cells in - the samller dataset divided by the total number of anchors between the two + the smaller dataset divided by the total number of anchors between the two datasets.} \item{Compute all pairwise distances between datasets} \item{Cluster this distance matrix to determine a guide tree} diff --git a/man/TransferData.Rd b/man/TransferData.Rd index d58daa4e9..705ac8638 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -56,7 +56,7 @@ dimensional reduction} or "counts"} } \value{ -If \code{refdata} is a vector, returns a dataframe with label +If \code{refdata} is a vector, returns a data.frame with label predictions. If \code{refdata} is a matrix, returns an Assay object where the imputed data has been stored in the provided slot. } @@ -81,7 +81,7 @@ compute the weights matrix. query cell and each anchor. These weights are computed as 1 - the distance between the query cell and the anchor divided by the distance of the query cell to the \code{k.weight}th anchor multiplied by the anchor score - computed in \code{\link{FindIntegrationAchors}}. We then apply a Gaussian + computed in \code{\link{FindIntegrationAnchors}}. We then apply a Gaussian kernel width a bandwidth defined by \code{sd.weight} and normalize across all \code{k.weight} anchors.} } From c25ee5feb9bec7d66653741a93fd1f539ec94e78 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 13 Mar 2020 15:56:15 -0400 Subject: [PATCH 057/111] Add keep.sparse parameter; #2359 --- R/preprocessing.R | 22 ++++++++++++++++------ man/CreateGeneActivityMatrix.Rd | 6 ++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index bd30664c1..f11301c88 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -165,6 +165,10 @@ CalculateBarcodeInflections <- function( #' @param include.body Include the gene body? #' @param upstream Number of bases upstream to consider #' @param downstream Number of bases downstream to consider +#' @param keep.sparse Leave the matrix as a sparse matrix. Setting this option to +#' TRUE will take much longer but will use less memory. This can be useful if +#' you have a very large matrix that cannot fit into memory when converted to +#' a dense form. #' @param verbose Print progress/messages #' #' @importFrom future nbrOfWorkers @@ -177,6 +181,7 @@ CreateGeneActivityMatrix <- function( include.body = TRUE, upstream = 2000, downstream = 0, + keep.sparse = FALSE, verbose = TRUE ) { if (!PackageCheck('GenomicRanges', error = FALSE)) { @@ -228,7 +233,9 @@ CreateGeneActivityMatrix <- function( colnames(x = annotations) <- c('feature', 'new_feature') # collapse into expression matrix - peak.matrix <- as(object = peak.matrix, Class = 'matrix') + if (!keep.sparse) { + peak.matrix <- as(object = peak.matrix, Class = 'matrix') + } all.features <- unique(x = annotations$new_feature) if (nbrOfWorkers() > 1) { @@ -236,18 +243,21 @@ CreateGeneActivityMatrix <- function( } else { mysapply <- ifelse(test = verbose, yes = pbsapply, no = sapply) } - newmat <- mysapply(X = 1:length(x = all.features), FUN = function(x){ + newmat.list <- mysapply(X = 1:length(x = all.features), FUN = function(x){ features.use <- annotations[annotations$new_feature == all.features[[x]], ]$feature submat <- peak.matrix[features.use, ] if (length(x = features.use) > 1) { - return(Matrix::colSums(x = submat)) + submat <- Matrix::colSums(submat) + } + if (keep.sparse) { + return(as(object = as.matrix(submat), Class = 'dgCMatrix')) } else { - return(submat) + return(as.matrix(submat)) } - }) + }, simplify = FALSE) + newmat = do.call(what = cbind, args = newmat.list) newmat <- t(x = newmat) rownames(x = newmat) <- all.features - colnames(x = newmat) <- colnames(x = peak.matrix) return(as(object = newmat, Class = 'dgCMatrix')) } diff --git a/man/CreateGeneActivityMatrix.Rd b/man/CreateGeneActivityMatrix.Rd index d69bc808f..825452336 100644 --- a/man/CreateGeneActivityMatrix.Rd +++ b/man/CreateGeneActivityMatrix.Rd @@ -11,6 +11,7 @@ CreateGeneActivityMatrix( include.body = TRUE, upstream = 2000, downstream = 0, + keep.sparse = FALSE, verbose = TRUE ) } @@ -27,6 +28,11 @@ CreateGeneActivityMatrix( \item{downstream}{Number of bases downstream to consider} +\item{keep.sparse}{Leave the matrix as a sparse matrix. Setting this option to +TRUE will take much longer but will use less memory. This can be useful if +you have a very large matrix that cannot fit into memory when converted to +a dense form.} + \item{verbose}{Print progress/messages} } \description{ From d86a337b3f7849775d7045d66f664754573f7ee6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 13 Mar 2020 19:43:00 -0400 Subject: [PATCH 058/111] add scale option --- R/visualization.R | 27 +++++++++++++++++---------- man/AddMetaData.Rd | 8 ++++---- man/DotPlot.Rd | 3 +++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 7acc1b4af..b29891761 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1861,6 +1861,7 @@ BarcodeInflectionsPlot <- function(object) { #' @param group.by Factor to group the cells by #' @param split.by Factor to split the groups by (replicates the functionality of the old SplitDotPlotGG); #' see \code{\link{FetchData}} for more details +#' @param scale Determine whether the data is scaled, TRUE for default #' @param scale.by Scale the size of the points by 'size' or by 'radius' #' @param scale.min Set lower limit for scaling, use NA for default #' @param scale.max Set upper limit for scaling, use NA for default @@ -1893,6 +1894,7 @@ DotPlot <- function( dot.scale = 6, group.by = NULL, split.by = NULL, + scale = TRUE, scale.by = 'radius', scale.min = NA, scale.max = NA @@ -1956,16 +1958,21 @@ DotPlot <- function( if (!is.null(x = id.levels)) { data.plot$id <- factor(x = data.plot$id, levels = id.levels) } - avg.exp.scaled <- sapply( - X = unique(x = data.plot$features.plot), - FUN = function(x) { - data.use <- data.plot[data.plot$features.plot == x, 'avg.exp'] - data.use <- scale(x = data.use) - data.use <- MinMax(data = data.use, min = col.min, max = col.max) - return(data.use) - } - ) - avg.exp.scaled[is.nan(avg.exp.scaled)] <- 0 + + avg.exp.scaled <- sapply( + X = unique(x = data.plot$features.plot), + FUN = function(x) { + data.use <- data.plot[data.plot$features.plot == x, 'avg.exp'] + if (scale) { + data.use <- scale(x = data.use) + data.use <- MinMax(data = data.use, min = col.min, max = col.max) + } + return(data.use) + } + ) + + + avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled)) if (!is.null(x = split.by)) { avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled, breaks = 20)) diff --git a/man/AddMetaData.Rd b/man/AddMetaData.Rd index 4cd1823b8..9eff0e8a1 100644 --- a/man/AddMetaData.Rd +++ b/man/AddMetaData.Rd @@ -5,8 +5,8 @@ \alias{SeuratAccess} \alias{AddMetaData.Assay} \alias{AddMetaData.Seurat} -\alias{[[<-,Assay-method} -\alias{[[<-,Seurat-method} +\alias{[[<-,Assay,ANY,ANY-method} +\alias{[[<-,Seurat,ANY,ANY-method} \title{Add in metadata associated with either cells or features.} \usage{ AddMetaData(object, metadata, col.name = NULL) @@ -15,9 +15,9 @@ AddMetaData(object, metadata, col.name = NULL) \method{AddMetaData}{Seurat}(object, metadata, col.name = NULL) -\S4method{[[}{Assay}(x, i, j, ...) <- value +\S4method{[[}{Assay,ANY,ANY}(x, i, j, ...) <- value -\S4method{[[}{Seurat}(x, i, j, ...) <- value +\S4method{[[}{Seurat,ANY,ANY}(x, i, j, ...) <- value } \arguments{ \item{x, object}{An object} diff --git a/man/DotPlot.Rd b/man/DotPlot.Rd index eec9138ad..a7cd18516 100644 --- a/man/DotPlot.Rd +++ b/man/DotPlot.Rd @@ -16,6 +16,7 @@ DotPlot( dot.scale = 6, group.by = NULL, split.by = NULL, + scale = TRUE, scale.by = "radius", scale.min = NA, scale.max = NA @@ -48,6 +49,8 @@ gene will have no dot drawn.} \item{split.by}{Factor to split the groups by (replicates the functionality of the old SplitDotPlotGG); see \code{\link{FetchData}} for more details} +\item{scale}{Determine whether the data is scaled, TRUE for default} + \item{scale.by}{Scale the size of the points by 'size' or by 'radius'} \item{scale.min}{Set lower limit for scaling, use NA for default} From a34e9af157957c33e9cd3a1db592b096fa8f5147 Mon Sep 17 00:00:00 2001 From: Avi Srivastava Date: Thu, 19 Mar 2020 20:05:56 -0400 Subject: [PATCH 059/111] adding uwot learn and predict --- R/dimensional_reduction.R | 85 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 6009ec248..73903a3ee 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1146,6 +1146,7 @@ RunTSNE.Seurat <- function( #' RunUMAP.default <- function( object, + model.object = NULL, assay = NULL, umap.method = 'uwot', n.neighbors = 30L, @@ -1245,20 +1246,90 @@ RunUMAP.default <- function( verbose = verbose ) }, + 'uwot-learn' = { + if (metric == 'correlation') { + warning( + "UWOT does not implement the correlation metric, using cosine instead", + call. = FALSE, + immediate. = TRUE + ) + metric <- 'cosine' + } + umap( + X = object, + n_threads = nbrOfWorkers(), + n_neighbors = as.integer(x = n.neighbors), + n_components = as.integer(x = n.components), + metric = metric, + n_epochs = n.epochs, + learning_rate = learning.rate, + min_dist = min.dist, + spread = spread, + set_op_mix_ratio = set.op.mix.ratio, + local_connectivity = local.connectivity, + repulsion_strength = repulsion.strength, + negative_sample_rate = negative.sample.rate, + a = a, + b = b, + fast_sgd = uwot.sgd, + verbose = verbose, + ret_model = TRUE + ) + }, + 'uwot-predict' = { + if (metric == 'correlation') { + warning( + "UWOT does not implement the correlation metric, using cosine instead", + call. = FALSE, + immediate. = TRUE + ) + metric <- 'cosine' + } + #if (model.object == NULL) { + # stop("UWOT predict needs a model, try umot-learn on the object first") + #} + uwot::umap_transform( + X = object, + model = model.object@reductions$umap@misc, + n_threads = nbrOfWorkers(), + n_epochs = n.epochs, + verbose = verbose + ) + }, stop("Unknown umap method: ", umap.method, call. = FALSE) ) + + if (umap.method == 'uwot-learn') { + umap.model <- umap.output + umap.output <- umap.output$embedding + } + colnames(x = umap.output) <- paste0(reduction.key, 1:ncol(x = umap.output)) if (inherits(x = object, what = 'dist')) { rownames(x = umap.output) <- attr(x = object, "Labels") } else { rownames(x = umap.output) <- rownames(x = object) } - umap.reduction <- CreateDimReducObject( - embeddings = umap.output, - key = reduction.key, - assay = assay, - global = TRUE - ) + + if (umap.method == 'umap-learn' || umap.method == 'uwot' || umap.method == 'uwot-predict') { + umap.reduction <- CreateDimReducObject( + embeddings = umap.output, + key = reduction.key, + assay = assay, + global = TRUE + ) + } else if (umap.method == 'uwot-learn') { + umap.reduction <- CreateDimReducObject( + embeddings = umap.output, + key = reduction.key, + assay = assay, + global = TRUE, + misc = umap.model + ) + } else { + stop("Unknown umap method: ", umap.method, call. = FALSE) + } + return(umap.reduction) } @@ -1423,6 +1494,7 @@ RunUMAP.Graph <- function( #' RunUMAP.Seurat <- function( object, + model.object = NULL, dims = NULL, reduction = 'pca', features = NULL, @@ -1487,6 +1559,7 @@ RunUMAP.Seurat <- function( } object[[reduction.name]] <- RunUMAP( object = data.use, + model.object = model.object, assay = assay, umap.method = umap.method, n.neighbors = n.neighbors, From 144b608de7a1f74713594a56921b4651de7ee501 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 20 Mar 2020 15:05:42 -0400 Subject: [PATCH 060/111] minor style tweaks, bump develop version --- DESCRIPTION | 4 ++-- R/preprocessing.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6a912b6b4..95721e800 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9012 -Date: 2020-03-13 +Version: 3.1.4.9013 +Date: 2020-03-20 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( diff --git a/R/preprocessing.R b/R/preprocessing.R index d5fa4b8b2..9d9a1ab3d 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -250,9 +250,9 @@ CreateGeneActivityMatrix <- function( submat <- Matrix::colSums(submat) } if (keep.sparse) { - return(as(object = as.matrix(submat), Class = 'dgCMatrix')) + return(as(object = as.matrix(x = submat), Class = 'dgCMatrix')) } else { - return(as.matrix(submat)) + return(as.matrix(x = submat)) } }, simplify = FALSE) newmat = do.call(what = cbind, args = newmat.list) From 5c12ccbaac09ece39c216ad2940313a1793895b5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 20 Mar 2020 15:16:09 -0400 Subject: [PATCH 061/111] one identity scale = F --- R/visualization.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index b29891761..cbd884b16 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1958,7 +1958,10 @@ DotPlot <- function( if (!is.null(x = id.levels)) { data.plot$id <- factor(x = data.plot$id, levels = id.levels) } - + if (length(levels(data.plot$id )) == 1) { + scale <- FALSE + warning("Only one identity, the expression will be not scaled.") + } avg.exp.scaled <- sapply( X = unique(x = data.plot$features.plot), FUN = function(x) { From aaafdcb39d9f973889ebb1eecc660f99490ebeba Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 20 Mar 2020 15:38:43 -0400 Subject: [PATCH 062/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 95721e800..8f15e5c57 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9013 +Version: 3.1.4.9014 Date: 2020-03-20 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From dc871c9d3687daa6ec757bfe207dcf71b5546045 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 20 Mar 2020 23:30:26 -0400 Subject: [PATCH 063/111] back to original width when no split --- R/visualization.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 79526d9a1..f2bf73a52 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4648,7 +4648,11 @@ SingleExIPlot <- function( vln.geom(scale = 'width', adjust = adjust, trim = TRUE), theme(axis.text.x = element_text(angle = 45, hjust = 1)) ) - jitter <- geom_jitter(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size) + if(is.null(split)){ + jitter <- geom_jitter(height = 0, size = pt.size) + } else{ + jitter <- geom_jitter(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size) + } log.scale <- scale_y_log10() axis.scale <- ylim }, From 3bbec1804e76903d20db98be175303189488345c Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 23 Mar 2020 10:18:05 -0400 Subject: [PATCH 064/111] minor style tweaks, bump develop version --- DESCRIPTION | 4 ++-- R/visualization.R | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8f15e5c57..0cf361540 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9014 -Date: 2020-03-20 +Version: 3.1.4.9015 +Date: 2020-03-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( diff --git a/R/visualization.R b/R/visualization.R index f2bf73a52..51d3e4c2f 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4648,10 +4648,13 @@ SingleExIPlot <- function( vln.geom(scale = 'width', adjust = adjust, trim = TRUE), theme(axis.text.x = element_text(angle = 45, hjust = 1)) ) - if(is.null(split)){ + if (is.null(x = split)) { jitter <- geom_jitter(height = 0, size = pt.size) - } else{ - jitter <- geom_jitter(position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size) + } else { + jitter <- geom_jitter( + position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), + size = pt.size + ) } log.scale <- scale_y_log10() axis.scale <- ylim From e335aa84dffa42908fbfa0460c9c7898c2609225 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 23 Mar 2020 10:37:29 -0400 Subject: [PATCH 065/111] take log if not scaling in DotPlot --- R/visualization.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index f68baa549..85233b6c9 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1968,9 +1968,9 @@ DotPlot <- function( if (!is.null(x = id.levels)) { data.plot$id <- factor(x = data.plot$id, levels = id.levels) } - if (length(levels(data.plot$id )) == 1) { + if (length(x = levels(x = data.plot$id)) == 1) { scale <- FALSE - warning("Only one identity, the expression will be not scaled.") + warning("Only one identity present, the expression values will be not scaled.") } avg.exp.scaled <- sapply( X = unique(x = data.plot$features.plot), @@ -1979,6 +1979,8 @@ DotPlot <- function( if (scale) { data.use <- scale(x = data.use) data.use <- MinMax(data = data.use, min = col.min, max = col.max) + } else { + data.use <- log(x = data.use) } return(data.use) } From ac95f7124699de029ac2121d9fa5ab5e023b74f1 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 23 Mar 2020 10:42:21 -0400 Subject: [PATCH 066/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0cf361540..173460f34 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9015 +Version: 3.1.4.9016 Date: 2020-03-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 683fc08035129b08c3b784a8b02d87170c787196 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 23 Mar 2020 21:10:21 -0400 Subject: [PATCH 067/111] minor fixes to new UMAP projection code, roxygen2 update --- DESCRIPTION | 2 +- NAMESPACE | 3 ++ R/dimensional_reduction.R | 57 ++++++++++++++++++------------------ R/objects.R | 29 ++++++++++++++++++ R/zzz.R | 2 ++ man/AddMetaData.Rd | 8 ++--- man/FeaturePlot.Rd | 2 +- man/Misc.Rd | 6 ++++ man/RunUMAP.Rd | 4 +++ man/Seurat-package.Rd | 2 +- man/cc.genes.Rd | 6 ++-- man/cc.genes.updated.2019.Rd | 6 ++-- man/pbmc_small.Rd | 6 ++-- 13 files changed, 91 insertions(+), 42 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 173460f34..7c3eeb9b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,7 +80,7 @@ Collate: 'tree.R' 'utilities.R' 'zzz.R' -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Encoding: UTF-8 biocViews: Suggests: diff --git a/NAMESPACE b/NAMESPACE index 263b1df25..e74b07e07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ S3method("Key<-",Assay) S3method("Key<-",DimReduc) S3method("Loadings<-",DimReduc) S3method("Misc<-",Assay) +S3method("Misc<-",DimReduc) S3method("Misc<-",Seurat) S3method("Project<-",Seurat) S3method("Tool<-",Seurat) @@ -70,6 +71,7 @@ S3method(Key,Seurat) S3method(Loadings,DimReduc) S3method(Loadings,Seurat) S3method(Misc,Assay) +S3method(Misc,DimReduc) S3method(Misc,Seurat) S3method(NormalizeData,Assay) S3method(NormalizeData,Seurat) @@ -557,4 +559,5 @@ importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) importFrom(utils,write.table) importFrom(uwot,umap) +importFrom(uwot,umap_transform) useDynLib(Seurat) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 73903a3ee..88a654000 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1137,7 +1137,7 @@ RunTSNE.Seurat <- function( } #' @importFrom reticulate py_module_available py_set_seed import -#' @importFrom uwot umap +#' @importFrom uwot umap umap_transform #' @importFrom future nbrOfWorkers #' #' @rdname RunUMAP @@ -1146,7 +1146,7 @@ RunTSNE.Seurat <- function( #' RunUMAP.default <- function( object, - model.object = NULL, + reduction.model = NULL, assay = NULL, umap.method = 'uwot', n.neighbors = 30L, @@ -1285,12 +1285,19 @@ RunUMAP.default <- function( ) metric <- 'cosine' } - #if (model.object == NULL) { - # stop("UWOT predict needs a model, try umot-learn on the object first") - #} - uwot::umap_transform( + if (reduction.model == NULL) { + stop("If using uwot-predict, please pass a DimReduc object with the model stored to reduction.model.") + } + model <- reduction.model %||% Misc( + object = reduction.model, + slot = "model" + ) + if (length(x = model) == 0) { + stop("The provided reduction.model does not have a model stored. Please try running umot-learn on the object first") + } + umap_transform( X = object, - model = model.object@reductions$umap@misc, + model = model, n_threads = nbrOfWorkers(), n_epochs = n.epochs, verbose = verbose @@ -1298,38 +1305,29 @@ RunUMAP.default <- function( }, stop("Unknown umap method: ", umap.method, call. = FALSE) ) - if (umap.method == 'uwot-learn') { umap.model <- umap.output umap.output <- umap.output$embedding } - colnames(x = umap.output) <- paste0(reduction.key, 1:ncol(x = umap.output)) if (inherits(x = object, what = 'dist')) { rownames(x = umap.output) <- attr(x = object, "Labels") } else { rownames(x = umap.output) <- rownames(x = object) } - - if (umap.method == 'umap-learn' || umap.method == 'uwot' || umap.method == 'uwot-predict') { - umap.reduction <- CreateDimReducObject( - embeddings = umap.output, - key = reduction.key, - assay = assay, - global = TRUE - ) - } else if (umap.method == 'uwot-learn') { - umap.reduction <- CreateDimReducObject( - embeddings = umap.output, - key = reduction.key, - assay = assay, - global = TRUE, - misc = umap.model + umap.reduction <- CreateDimReducObject( + embeddings = umap.output, + key = reduction.key, + assay = assay, + global = TRUE + ) + if (umap.method == 'uwot-learn') { + umap.reduction <- Misc( + object = umap.reduction, + value = umap.model, + slot = "model" ) - } else { - stop("Unknown umap method: ", umap.method, call. = FALSE) } - return(umap.reduction) } @@ -1421,6 +1419,7 @@ RunUMAP.Graph <- function( return(umap) } +#' @param reduction.model \code{DimReduc} object that contains the umap model #' @param dims Which dimensions to use as input features, used only if #' \code{features} is NULL #' @param reduction Which dimensional reduction (PCA or ICA) to use for the @@ -1494,7 +1493,7 @@ RunUMAP.Graph <- function( #' RunUMAP.Seurat <- function( object, - model.object = NULL, + reduction.model = NULL, dims = NULL, reduction = 'pca', features = NULL, @@ -1559,7 +1558,7 @@ RunUMAP.Seurat <- function( } object[[reduction.name]] <- RunUMAP( object = data.use, - model.object = model.object, + reduction.model = reduction.model, assay = assay, umap.method = umap.method, n.neighbors = n.neighbors, diff --git a/R/objects.R b/R/objects.R index 914e46d54..f2956fa75 100644 --- a/R/objects.R +++ b/R/objects.R @@ -3333,6 +3333,18 @@ Misc.Assay <- function(object, slot = NULL, ...) { return(slot(object = object, name = 'misc')[[slot]]) } +#' @rdname Misc +#' @export +#' @method Misc DimReduc +#' +Misc.DimReduc <- function(object, slot = NULL, ...) { + CheckDots(...) + if (is.null(x = slot)) { + return(slot(object = object, name = 'misc')) + } + return(slot(object = object, name = 'misc')[[slot]]) +} + #' @rdname Misc #' @export #' @method Misc Seurat @@ -3366,6 +3378,23 @@ Misc.Seurat <- function(object, slot = NULL, ...) { return(object) } +#' @rdname Misc +#' @export +#' @method Misc<- DimReduc +#' +"Misc<-.DimReduc" <- function(object, slot, ..., value) { + CheckDots(...) + if (slot %in% names(x = Misc(object = object))) { + warning("Overwriting miscellanous data for ", slot) + } + if (is.list(x = value)) { + slot(object = object, name = 'misc')[[slot]] <- c(value) + } else { + slot(object = object, name = 'misc')[[slot]] <- value + } + return(object) +} + #' @rdname Misc #' @export #' @method Misc<- Seurat diff --git a/R/zzz.R b/R/zzz.R index 3befe3eba..23d052f38 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,5 @@ +#' Seurat package +#' #' Tools for single-cell genomics #' #' @section Package options: diff --git a/man/AddMetaData.Rd b/man/AddMetaData.Rd index 9eff0e8a1..4cd1823b8 100644 --- a/man/AddMetaData.Rd +++ b/man/AddMetaData.Rd @@ -5,8 +5,8 @@ \alias{SeuratAccess} \alias{AddMetaData.Assay} \alias{AddMetaData.Seurat} -\alias{[[<-,Assay,ANY,ANY-method} -\alias{[[<-,Seurat,ANY,ANY-method} +\alias{[[<-,Assay-method} +\alias{[[<-,Seurat-method} \title{Add in metadata associated with either cells or features.} \usage{ AddMetaData(object, metadata, col.name = NULL) @@ -15,9 +15,9 @@ AddMetaData(object, metadata, col.name = NULL) \method{AddMetaData}{Seurat}(object, metadata, col.name = NULL) -\S4method{[[}{Assay,ANY,ANY}(x, i, j, ...) <- value +\S4method{[[}{Assay}(x, i, j, ...) <- value -\S4method{[[}{Seurat,ANY,ANY}(x, i, j, ...) <- value +\S4method{[[}{Seurat}(x, i, j, ...) <- value } \arguments{ \item{x, object}{An object} diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 1c9d75547..b81f43986 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -92,7 +92,7 @@ different colors and different shapes on cells} \item{by.col}{If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE}} -\item{sort.cell}{Redundant with \code{order}. This argument is being +\item{sort.cell}{Redundant with \code{order}. This argument is being deprecated. Please use \code{order} instead.} \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} diff --git a/man/Misc.Rd b/man/Misc.Rd index 153f5944d..ccf6d3076 100644 --- a/man/Misc.Rd +++ b/man/Misc.Rd @@ -4,8 +4,10 @@ \alias{Misc} \alias{Misc<-} \alias{Misc.Assay} +\alias{Misc.DimReduc} \alias{Misc.Seurat} \alias{Misc<-.Assay} +\alias{Misc<-.DimReduc} \alias{Misc<-.Seurat} \title{Access miscellaneous data} \usage{ @@ -15,10 +17,14 @@ Misc(object, ...) <- value \method{Misc}{Assay}(object, slot = NULL, ...) +\method{Misc}{DimReduc}(object, slot = NULL, ...) + \method{Misc}{Seurat}(object, slot = NULL, ...) \method{Misc}{Assay}(object, slot, ...) <- value +\method{Misc}{DimReduc}(object, slot, ...) <- value + \method{Misc}{Seurat}(object, slot, ...) <- value } \arguments{ diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd index bab8bdc64..3bfddb124 100644 --- a/man/RunUMAP.Rd +++ b/man/RunUMAP.Rd @@ -11,6 +11,7 @@ RunUMAP(object, ...) \method{RunUMAP}{default}( object, + reduction.model = NULL, assay = NULL, umap.method = "uwot", n.neighbors = 30L, @@ -59,6 +60,7 @@ RunUMAP(object, ...) \method{RunUMAP}{Seurat}( object, + reduction.model = NULL, dims = NULL, reduction = "pca", features = NULL, @@ -93,6 +95,8 @@ RunUMAP(object, ...) \item{...}{Arguments passed to other methods and UMAP} +\item{reduction.model}{\code{DimReduc} object that contains the umap model} + \item{assay}{Assay to pull data for when using \code{features}, or assay used to construct Graph if running UMAP on a Graph} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 70fb5278d..1a950da1a 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -3,7 +3,7 @@ \docType{package} \name{Seurat-package} \alias{Seurat-package} -\title{Tools for single-cell genomics} +\title{Seurat package} \description{ Tools for single-cell genomics } diff --git a/man/cc.genes.Rd b/man/cc.genes.Rd index 1b07701e8..995b97dbc 100644 --- a/man/cc.genes.Rd +++ b/man/cc.genes.Rd @@ -4,11 +4,13 @@ \name{cc.genes} \alias{cc.genes} \title{Cell cycle genes} -\format{A list of two vectors +\format{ +A list of two vectors \describe{ \item{s.genes}{Genes associated with S-phase} \item{g2m.genes}{Genes associated with G2M-phase} -}} +} +} \source{ \url{http://science.sciencemag.org/content/352/6282/189} } diff --git a/man/cc.genes.updated.2019.Rd b/man/cc.genes.updated.2019.Rd index def9478ad..377a91e6c 100644 --- a/man/cc.genes.updated.2019.Rd +++ b/man/cc.genes.updated.2019.Rd @@ -4,11 +4,13 @@ \name{cc.genes.updated.2019} \alias{cc.genes.updated.2019} \title{Cell cycle genes: 2019 update} -\format{A list of two vectors +\format{ +A list of two vectors \describe{ \item{s.genes}{Genes associated with S-phase} \item{g2m.genes}{Genes associated with G2M-phase} -}} +} +} \source{ \url{http://science.sciencemag.org/content/352/6282/189} } diff --git a/man/pbmc_small.Rd b/man/pbmc_small.Rd index 6721bdfed..9b375dd2f 100644 --- a/man/pbmc_small.Rd +++ b/man/pbmc_small.Rd @@ -4,7 +4,8 @@ \name{pbmc_small} \alias{pbmc_small} \title{A small example version of the PBMC dataset} -\format{A Seurat object with the following slots filled +\format{ +A Seurat object with the following slots filled \describe{ \item{assays}{ \itemize{Currently only contains one assay ("RNA" - scRNA-seq expression data) @@ -21,7 +22,8 @@ \item{reductions}{Dimensional reductions: currently PCA and tSNE} \item{version}{Seurat version used to create the object} \item{commands}{Command history} -}} +} +} \source{ \url{https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k} } From 2350300a928b5cb87867514bc869345e19756f28 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 24 Mar 2020 18:33:28 -0400 Subject: [PATCH 068/111] transfer dimnames for single max.umi in SampleUMI --- R/preprocessing.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 9d9a1ab3d..018c9bf83 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1122,24 +1122,23 @@ SampleUMI <- function( ) { data <- as(object = data, Class = "dgCMatrix") if (length(x = max.umi) == 1) { - return( - RunUMISampling( - data = data, - sample_val = max.umi, - upsample = upsample, - display_progress = verbose - ) + new_data <- RunUMISampling( + data = data, + sample_val = max.umi, + upsample = upsample, + display_progress = verbose ) } else if (length(x = max.umi) != ncol(x = data)) { stop("max.umi vector not equal to number of cells") + } else { + new_data <- RunUMISamplingPerCell( + data = data, + sample_val = max.umi, + upsample = upsample, + display_progress = verbose + ) } - new_data = RunUMISamplingPerCell( - data = data, - sample_val = max.umi, - upsample = upsample, - display_progress = verbose - ) - dimnames(new_data) <- dimnames(data) + dimnames(x = new_data) <- dimnames(x = data) return(new_data) } From 1c21cbbf5a878345e84abba8cad1b3f0bfb65eb1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 24 Mar 2020 23:43:00 -0400 Subject: [PATCH 069/111] fix two bugs in uwot-learn --- R/dimensional_reduction.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 88a654000..8637ed066 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1285,7 +1285,7 @@ RunUMAP.default <- function( ) metric <- 'cosine' } - if (reduction.model == NULL) { + if ( is.null(reduction.model) ) { stop("If using uwot-predict, please pass a DimReduc object with the model stored to reduction.model.") } model <- reduction.model %||% Misc( @@ -1322,11 +1322,7 @@ RunUMAP.default <- function( global = TRUE ) if (umap.method == 'uwot-learn') { - umap.reduction <- Misc( - object = umap.reduction, - value = umap.model, - slot = "model" - ) + Misc(umap.reduction, slot = "model") <- umap.model } return(umap.reduction) } From f354ab1e109862285a75e3ab591f60c1ffdc0e77 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 25 Mar 2020 12:11:19 -0400 Subject: [PATCH 070/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 173460f34..a686e1760 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9016 -Date: 2020-03-23 +Version: 3.1.4.9017 +Date: 2020-03-25 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 0dc50831a3e6b0351b258802b2804e8420b82c77 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 27 Mar 2020 14:37:39 -0400 Subject: [PATCH 071/111] fix return value description for MixingMetric --- R/integration.R | 3 +-- man/MixingMetric.Rd | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 6b33ed490..2a73bab3f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1130,8 +1130,7 @@ LocalStruct <- function( #' @param eps Error bound on the neighbor finding algorithm (from RANN) #' @param verbose Displays progress bar #' -#' @return Returns a vector of values representing the entropy metric from each -#' bootstrapped iteration. +#' @return Returns a vector of values of the mixing metric for each cell #' #' @importFrom RANN nn2 #' @importFrom pbapply pbsapply diff --git a/man/MixingMetric.Rd b/man/MixingMetric.Rd index a4b3a90f4..4eea94961 100644 --- a/man/MixingMetric.Rd +++ b/man/MixingMetric.Rd @@ -33,8 +33,7 @@ MixingMetric( \item{verbose}{Displays progress bar} } \value{ -Returns a vector of values representing the entropy metric from each -bootstrapped iteration. +Returns a vector of values of the mixing metric for each cell } \description{ Here we compute a measure of how well mixed a composite dataset is. To From b02d8779dd260f822f2cae1ce0c93c655215ab85 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 27 Mar 2020 17:42:45 -0400 Subject: [PATCH 072/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a686e1760..1ad439c48 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9017 -Date: 2020-03-25 +Version: 3.1.4.9018 +Date: 2020-03-27 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 4883bcbffd351d81264f905b5198d5d4ad0dfb5a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 30 Mar 2020 14:16:01 -0400 Subject: [PATCH 073/111] Fix issue with underscores in identity class and split.by --- R/visualization.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 6761e2220..725e6a5cb 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1985,9 +1985,9 @@ DotPlot <- function( return(data.use) } ) - - + + avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled)) if (!is.null(x = split.by)) { avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled, breaks = 20)) @@ -2000,18 +2000,17 @@ DotPlot <- function( data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA data.plot$pct.exp <- data.plot$pct.exp * 100 if (!is.null(x = split.by)) { - # splits.use <- vapply( - # X = strsplit(x = as.character(x = data.plot$id), split = '_'), - # FUN = '[[', - # FUN.VALUE = character(length = 1L), - # 2 - # ) splits.use <- vapply( - X = strsplit(x = as.character(x = data.plot$id), split = '_'), - FUN = function(x) { - return(paste(x[2:length(x = x)], collapse = '_')) - }, - FUN.VALUE = character(length = 1L) + X = as.character(x = data.plot$id), + FUN = gsub, + FUN.VALUE = character(length = 1L), + pattern = paste0( + '^((', + paste(sort(x = levels(x = object), decreasing = TRUE), collapse = '|'), + ')_)' + ), + replacement = '', + USE.NAMES = FALSE ) data.plot$colors <- mapply( FUN = function(color, value) { @@ -4672,7 +4671,7 @@ SingleExIPlot <- function( jitter <- geom_jitter(height = 0, size = pt.size) } else { jitter <- geom_jitter( - position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), + position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size ) } From 7ccc43d454b5b8ea30f9b6d00d67c0680f6d93d5 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 30 Mar 2020 17:05:38 -0400 Subject: [PATCH 074/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ad439c48..b5c66586d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9018 -Date: 2020-03-27 +Version: 3.1.4.9019 +Date: 2020-03-30 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From fea8b504ec4d70b9973da180fa19f81d32731350 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 2 Apr 2020 13:22:41 -0400 Subject: [PATCH 075/111] Fix NSE errors in subset/WhichCells Replace substitute/eval with rlang::enquo/rlang::eval_tidy Ensures environments are correct for NSE Addresses satijalab/seurat#2799 and rstudio/shiny#2801 --- NAMESPACE | 3 +++ R/objects.R | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 263b1df25..0e0545193 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -496,6 +496,9 @@ importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(reticulate,tuple) importFrom(rlang,"!!") +importFrom(rlang,enquo) +importFrom(rlang,eval_tidy) +importFrom(rlang,is_quosure) importFrom(rsvd,rsvd) importFrom(scales,hue_pal) importFrom(scales,zero_range) diff --git a/R/objects.R b/R/objects.R index 914e46d54..051400da6 100644 --- a/R/objects.R +++ b/R/objects.R @@ -4754,6 +4754,7 @@ VariableFeatures.Seurat <- function(object, assay = NULL, selection.method = NUL #' @param invert Invert the selection of cells #' #' @importFrom stats na.omit +#' @importFrom rlang is_quosure enquo eval_tidy #' #' @rdname WhichCells #' @export @@ -4770,12 +4771,14 @@ WhichCells.Assay <- function( cells <- cells %||% colnames(x = object) if (!missing(x = expression) && !is.null(x = substitute(expr = expression))) { key.pattern <- paste0('^', Key(object = object)) - expr <- if (is.call(x = substitute(expr = expression))) { - substitute(expr = expression) + expr <- if (tryCatch(expr = is_quosure(x = expression), error = function(...) FALSE)) { + expression + } else if (is.call(x = enquo(arg = exression))) { + enquo(arg = expression) } else { parse(text = expression) } - expr.char <- as.character(x = expr) + expr.char <- suppressWarnings(expr = as.character(x = expr)) expr.char <- unlist(x = lapply(X = expr.char, FUN = strsplit, split = ' ')) expr.char <- gsub( pattern = key.pattern, @@ -4798,8 +4801,7 @@ WhichCells.Assay <- function( expr.char <- expr.char[vars.use] data.subset <- as.data.frame(x = t(x = as.matrix(x = object[expr.char, ]))) colnames(x = data.subset) <- expr.char - data.subset <- subset.data.frame(x = data.subset, subset = eval(expr = expr)) - cells <- rownames(x = data.subset) + cells <- rownames(x = data.subset)[eval_tidy(expr = expr, data = data.subset)] } if (invert) { cells <- colnames(x = object)[!colnames(x = object) %in% cells] @@ -4816,6 +4818,7 @@ WhichCells.Assay <- function( #' @param seed Random seed for downsampling. If NULL, does not set a seed #' #' @importFrom stats na.omit +#' @importFrom rlang is_quosure enquo eval_tidy #' #' @rdname WhichCells #' @export @@ -4870,12 +4873,14 @@ WhichCells.Seurat <- function( } ) key.pattern <- paste0('^', object.keys, collapse = '|') - expr <- if (is.call(x = substitute(expr = expression))) { - substitute(expr = expression) + expr <- if (tryCatch(expr = is_quosure(x = expression), error = function(...) FALSE)) { + expression + } else if (is.call(x = enquo(arg = expression))) { + enquo(arg = expression) } else { parse(text = expression) } - expr.char <- as.character(x = expr) + expr.char <- suppressWarnings(expr = as.character(x = expr)) expr.char <- unlist(x = lapply(X = expr.char, FUN = strsplit, split = ' ')) expr.char <- gsub( pattern = '(', @@ -4899,8 +4904,7 @@ WhichCells.Seurat <- function( cells = cells, slot = slot ) - data.subset <- subset.data.frame(x = data.subset, subset = eval(expr = expr)) - cells <- rownames(x = data.subset) + cells <- rownames(x = data.subset)[eval_tidy(expr = expr, data = data.subset)] } if (invert) { cell.order <- colnames(x = object) @@ -6058,6 +6062,8 @@ subset.DimReduc <- function(x, cells = NULL, features = NULL, ...) { #' #' @return A subsetted Seurat object #' +#' @importFrom rlang enquo +#' #' @rdname subset.Seurat #' @aliases subset #' @seealso \code{\link[base]{subset}} \code{\link{WhichCells}} @@ -6074,7 +6080,7 @@ subset.DimReduc <- function(x, cells = NULL, features = NULL, ...) { #' subset.Seurat <- function(x, subset, cells = NULL, features = NULL, idents = NULL, ...) { if (!missing(x = subset)) { - subset <- deparse(expr = substitute(expr = subset)) + subset <- enquo(arg = subset) } cells <- WhichCells( object = x, From dc7fb3f99a6cc08f764262d40cba2bd4f6cb5232 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 3 Apr 2020 16:51:12 -0400 Subject: [PATCH 076/111] Disallow jackstraw on SCT --- R/dimensional_reduction.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 73903a3ee..c99cb3aca 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -65,6 +65,10 @@ JackStraw <- function( my.sapply <- future_sapply } assay <- assay %||% DefaultAssay(object = object) + if (IsSCT(assay = assay)) { + stop("JackStraw cannot be run on SCTransform-normalized data. + Please supply a non-SCT assay.") + } if (dims > length(x = object[[reduction]])) { dims <- length(x = object[[reduction]]) warning("Number of dimensions specified is greater than those available. Setting dims to ", dims, " and continuing", immediate. = TRUE) From 9a4295d234bbba1cfe03061bf41f0c4c74821116 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 3 Apr 2020 17:00:30 -0400 Subject: [PATCH 077/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b5c66586d..71943b8f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9019 -Date: 2020-03-30 +Version: 3.1.4.9020 +Date: 2020-04-03 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From f3026da499772add5c3ede758f91a240a9716fb3 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 3 Apr 2020 17:04:32 -0400 Subject: [PATCH 078/111] Change default for split violin to multi --- R/visualization.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 725e6a5cb..cb1c14b3a 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -562,7 +562,7 @@ VlnPlot <- function( ) { return(ExIPlot( object = object, - type = ifelse(test = multi.group, yes = 'multiViolin', no = 'violin'), + type = ifelse(test = multi.group, yes = 'violin', no = 'multiViolin'), features = features, idents = idents, ncol = ncol, @@ -3728,6 +3728,10 @@ ExIPlot <- function( } cols <- rep_len(x = cols, length.out = length(x = levels(x = split))) names(x = cols) <- sort(x = levels(x = split)) + if ((length(x = cols) > 2) & (type == "violin")) { + warning("Split violin is only supported for <3 groups, using multi-violin.") + type <- "multiViolin" + } } if (same.y.lims && is.null(x = y.max)) { y.max <- max(data) From ee827e8473e1d7c2f105aa4528bfdee14a207a6d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 6 Apr 2020 11:02:29 -0400 Subject: [PATCH 079/111] assay name to assay object --- R/dimensional_reduction.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index c99cb3aca..56f549a93 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -65,7 +65,7 @@ JackStraw <- function( my.sapply <- future_sapply } assay <- assay %||% DefaultAssay(object = object) - if (IsSCT(assay = assay)) { + if (IsSCT(assay = object[[assay]])) { stop("JackStraw cannot be run on SCTransform-normalized data. Please supply a non-SCT assay.") } From aacc3252a7e4c5f9fe510cebbcfeaacc6cb07fab Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 6 Apr 2020 11:11:02 -0400 Subject: [PATCH 080/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 71943b8f7..0a5359a3b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9020 -Date: 2020-04-03 +Version: 3.1.4.9021 +Date: 2020-04-06 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 1a94e652e3eb8c292f5a64280ea83e37f40d83ca Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Mon, 6 Apr 2020 12:47:28 -0400 Subject: [PATCH 081/111] Fix case when no split; replace multi.group to split.plot --- R/visualization.R | 22 +++++++++++----------- man/VlnPlot.Rd | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index cb1c14b3a..3dd319449 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -526,8 +526,8 @@ RidgePlot <- function( #' @inheritParams RidgePlot #' @param pt.size Point size for geom_violin #' @param split.by A variable to split the violin plots by, -#' @param multi.group plot each group of the split violin plots by multiple or single violin shapes -#' see \code{\link{FetchData}} for more details +#' @param split.plot plot each group of the split violin plots by multiple or +#' single violin shapes. #' @param adjust Adjust parameter for geom_violin #' #' @return A \code{\link[patchwork]{patchwork}ed} ggplot object if @@ -557,12 +557,12 @@ VlnPlot <- function( log = FALSE, ncol = NULL, slot = 'data', - multi.group = FALSE, + split.plot = FALSE, combine = TRUE ) { return(ExIPlot( object = object, - type = ifelse(test = multi.group, yes = 'violin', no = 'multiViolin'), + type = ifelse(test = split.plot, yes = 'splitViolin', no = 'violin'), features = features, idents = idents, ncol = ncol, @@ -3640,7 +3640,7 @@ DefaultDimReduc <- function(object, assay = NULL) { # Basically combines the codebase for VlnPlot and RidgePlot # # @param object Seurat object -# @param type Plot type, choose from 'ridge', 'violin', or 'multiViolin' +# @param type Plot type, choose from 'ridge', 'violin', or 'splitViolin' # @param features Features to plot (gene expression, metrics, PC scores, # anything that can be retreived by FetchData) # @param idents Which classes to include in the plot (default is all) @@ -3728,9 +3728,9 @@ ExIPlot <- function( } cols <- rep_len(x = cols, length.out = length(x = levels(x = split))) names(x = cols) <- sort(x = levels(x = split)) - if ((length(x = cols) > 2) & (type == "violin")) { + if ((length(x = cols) > 2) & (type == "splitViolin")) { warning("Split violin is only supported for <3 groups, using multi-violin.") - type <- "multiViolin" + type <- "violin" } } if (same.y.lims && is.null(x = y.max)) { @@ -3756,7 +3756,7 @@ ExIPlot <- function( label.fxn <- switch( EXPR = type, 'violin' = ylab, - "multiViolin" = ylab, + "splitViolin" = ylab, 'ridge' = xlab, stop("Unknown ExIPlot type ", type, call. = FALSE) ) @@ -4649,11 +4649,11 @@ SingleExIPlot <- function( y.max <- y.max %||% max(data[, feature][is.finite(x = data[, feature])]) if (type == 'violin' && !is.null(x = split)) { data$split <- split - vln.geom <- geom_split_violin + vln.geom <- geom_violin fill <- 'split' - } else if (type == 'multiViolin' && !is.null(x = split )) { + } else if (type == 'splitViolin' && !is.null(x = split )) { data$split <- split - vln.geom <- geom_violin + vln.geom <- geom_split_violin fill <- 'split' type <- 'violin' } else { diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 56f9848ab..c10b4af01 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -20,7 +20,7 @@ VlnPlot( log = FALSE, ncol = NULL, slot = "data", - multi.group = FALSE, + split.plot = FALSE, combine = TRUE ) } @@ -57,8 +57,8 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{slot}{Use non-normalized counts data for plotting} -\item{multi.group}{plot each group of the split violin plots by multiple or single violin shapes -see \code{\link{FetchData}} for more details} +\item{split.plot}{plot each group of the split violin plots by multiple or +single violin shapes see \code{\link{FetchData}} for more details} \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} ggplot object. If \code{FALSE}, return a list of ggplot objects} From 065e42d4378fe3581d5016ad388eedbd986ded2a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 7 Apr 2020 13:54:57 -0400 Subject: [PATCH 082/111] Update documentation --- DESCRIPTION | 2 +- R/objects.R | 2 +- R/zzz.R | 2 ++ man/AddMetaData.Rd | 8 ++++---- man/FeaturePlot.Rd | 2 +- man/RunUMAP.Rd | 2 ++ man/cc.genes.Rd | 6 ++++-- man/cc.genes.updated.2019.Rd | 6 ++++-- man/pbmc_small.Rd | 6 ++++-- 9 files changed, 23 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0a5359a3b..aefbc439d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,7 +80,7 @@ Collate: 'tree.R' 'utilities.R' 'zzz.R' -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Encoding: UTF-8 biocViews: Suggests: diff --git a/R/objects.R b/R/objects.R index 051400da6..66d51e8a7 100644 --- a/R/objects.R +++ b/R/objects.R @@ -4773,7 +4773,7 @@ WhichCells.Assay <- function( key.pattern <- paste0('^', Key(object = object)) expr <- if (tryCatch(expr = is_quosure(x = expression), error = function(...) FALSE)) { expression - } else if (is.call(x = enquo(arg = exression))) { + } else if (is.call(x = enquo(arg = expression))) { enquo(arg = expression) } else { parse(text = expression) diff --git a/R/zzz.R b/R/zzz.R index 3befe3eba..aed6df2a8 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,7 @@ #' Tools for single-cell genomics #' +#' Tools for single-cell genomics +#' #' @section Package options: #' #' Seurat uses the following [options()] to configure behaviour: diff --git a/man/AddMetaData.Rd b/man/AddMetaData.Rd index 9eff0e8a1..4cd1823b8 100644 --- a/man/AddMetaData.Rd +++ b/man/AddMetaData.Rd @@ -5,8 +5,8 @@ \alias{SeuratAccess} \alias{AddMetaData.Assay} \alias{AddMetaData.Seurat} -\alias{[[<-,Assay,ANY,ANY-method} -\alias{[[<-,Seurat,ANY,ANY-method} +\alias{[[<-,Assay-method} +\alias{[[<-,Seurat-method} \title{Add in metadata associated with either cells or features.} \usage{ AddMetaData(object, metadata, col.name = NULL) @@ -15,9 +15,9 @@ AddMetaData(object, metadata, col.name = NULL) \method{AddMetaData}{Seurat}(object, metadata, col.name = NULL) -\S4method{[[}{Assay,ANY,ANY}(x, i, j, ...) <- value +\S4method{[[}{Assay}(x, i, j, ...) <- value -\S4method{[[}{Seurat,ANY,ANY}(x, i, j, ...) <- value +\S4method{[[}{Seurat}(x, i, j, ...) <- value } \arguments{ \item{x, object}{An object} diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 1c9d75547..b81f43986 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -92,7 +92,7 @@ different colors and different shapes on cells} \item{by.col}{If splitting by a factor, plot the splits per column with the features as rows; ignored if \code{blend = TRUE}} -\item{sort.cell}{Redundant with \code{order}. This argument is being +\item{sort.cell}{Redundant with \code{order}. This argument is being deprecated. Please use \code{order} instead.} \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd index bab8bdc64..da920968d 100644 --- a/man/RunUMAP.Rd +++ b/man/RunUMAP.Rd @@ -11,6 +11,7 @@ RunUMAP(object, ...) \method{RunUMAP}{default}( object, + model.object = NULL, assay = NULL, umap.method = "uwot", n.neighbors = 30L, @@ -59,6 +60,7 @@ RunUMAP(object, ...) \method{RunUMAP}{Seurat}( object, + model.object = NULL, dims = NULL, reduction = "pca", features = NULL, diff --git a/man/cc.genes.Rd b/man/cc.genes.Rd index 1b07701e8..995b97dbc 100644 --- a/man/cc.genes.Rd +++ b/man/cc.genes.Rd @@ -4,11 +4,13 @@ \name{cc.genes} \alias{cc.genes} \title{Cell cycle genes} -\format{A list of two vectors +\format{ +A list of two vectors \describe{ \item{s.genes}{Genes associated with S-phase} \item{g2m.genes}{Genes associated with G2M-phase} -}} +} +} \source{ \url{http://science.sciencemag.org/content/352/6282/189} } diff --git a/man/cc.genes.updated.2019.Rd b/man/cc.genes.updated.2019.Rd index def9478ad..377a91e6c 100644 --- a/man/cc.genes.updated.2019.Rd +++ b/man/cc.genes.updated.2019.Rd @@ -4,11 +4,13 @@ \name{cc.genes.updated.2019} \alias{cc.genes.updated.2019} \title{Cell cycle genes: 2019 update} -\format{A list of two vectors +\format{ +A list of two vectors \describe{ \item{s.genes}{Genes associated with S-phase} \item{g2m.genes}{Genes associated with G2M-phase} -}} +} +} \source{ \url{http://science.sciencemag.org/content/352/6282/189} } diff --git a/man/pbmc_small.Rd b/man/pbmc_small.Rd index 6721bdfed..9b375dd2f 100644 --- a/man/pbmc_small.Rd +++ b/man/pbmc_small.Rd @@ -4,7 +4,8 @@ \name{pbmc_small} \alias{pbmc_small} \title{A small example version of the PBMC dataset} -\format{A Seurat object with the following slots filled +\format{ +A Seurat object with the following slots filled \describe{ \item{assays}{ \itemize{Currently only contains one assay ("RNA" - scRNA-seq expression data) @@ -21,7 +22,8 @@ \item{reductions}{Dimensional reductions: currently PCA and tSNE} \item{version}{Seurat version used to create the object} \item{commands}{Command history} -}} +} +} \source{ \url{https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k} } From b09122fda3b1c3445e5df27f3790c94b740b6ea8 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Tue, 7 Apr 2020 15:00:35 -0400 Subject: [PATCH 083/111] Add one-time warning about new split vln --- R/visualization.R | 14 ++++++++++++++ R/zzz.R | 1 + 2 files changed, 15 insertions(+) diff --git a/R/visualization.R b/R/visualization.R index 3dd319449..28e8be1a2 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -560,6 +560,20 @@ VlnPlot <- function( split.plot = FALSE, combine = TRUE ) { + if ( + !is.null(x = split.by) & + getOption(x = 'Seurat.warn.vlnplot.split', default = TRUE) + ) { + warning( + "The default behaviour of split.by has changed.\n", + "Separate violin plots are now plotted side-by-side.\n", + "To restore the old behaviour of a single split violin,\n", + "set split.plot = TRUE", + call. = FALSE, + immediate. = TRUE + ) + options(Seurat.warn.vlnplot.split = FALSE) + } return(ExIPlot( object = object, type = ifelse(test = split.plot, yes = 'splitViolin', no = 'violin'), diff --git a/R/zzz.R b/R/zzz.R index 3befe3eba..7cf834be7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -28,6 +28,7 @@ NULL seurat_default_options <- list( Seurat.memsafe = FALSE, Seurat.warn.umap.uwot = TRUE, + Seurat.warn.vlnplot.split = TRUE, Seurat.checkdots = "warn", Seurat.limma.wilcox.msg = TRUE ) From 099128235061a1591c191739013c698f9fe5429a Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Tue, 7 Apr 2020 15:12:08 -0400 Subject: [PATCH 084/111] Change to message --- R/visualization.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 28e8be1a2..103aadc82 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -564,13 +564,12 @@ VlnPlot <- function( !is.null(x = split.by) & getOption(x = 'Seurat.warn.vlnplot.split', default = TRUE) ) { - warning( + message( "The default behaviour of split.by has changed.\n", "Separate violin plots are now plotted side-by-side.\n", "To restore the old behaviour of a single split violin,\n", - "set split.plot = TRUE", - call. = FALSE, - immediate. = TRUE + "set split.plot = TRUE. + \nThis message will be shown once per session." ) options(Seurat.warn.vlnplot.split = FALSE) } From c31192eb77bfba96a13ffbaa9a286a4d5484af22 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 7 Apr 2020 23:33:06 -0400 Subject: [PATCH 085/111] add uwot-learn docu --- R/dimensional_reduction.R | 1 + man/RunUMAP.Rd | 1 + man/as.loom.Rd | 15 --------------- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 8637ed066..17939376f 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1429,6 +1429,7 @@ RunUMAP.Graph <- function( #' @param umap.method UMAP implementation to run. Can be #' \describe{ #' \item{\code{uwot}:}{Runs umap via the uwot R package} +#' \item{\code{uwot-learn}:}{Runs umap via the uwot R package and return the learned umap model} #' \item{\code{umap-learn}:}{Run the Seurat wrapper of the python umap-learn package} #' } #' @param n.neighbors This determines the number of neighboring points used in diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd index 3bfddb124..5d5d3c8bb 100644 --- a/man/RunUMAP.Rd +++ b/man/RunUMAP.Rd @@ -103,6 +103,7 @@ if running UMAP on a Graph} \item{umap.method}{UMAP implementation to run. Can be \describe{ \item{\code{uwot}:}{Runs umap via the uwot R package} + \item{\code{uwot-learn}:}{Runs umap via the uwot R package and return the learned umap model} \item{\code{umap-learn}:}{Run the Seurat wrapper of the python umap-learn package} }} diff --git a/man/as.loom.Rd b/man/as.loom.Rd index 6fe8c7b65..936b0b4c4 100644 --- a/man/as.loom.Rd +++ b/man/as.loom.Rd @@ -22,22 +22,7 @@ as.loom(x, ...) \arguments{ \item{x}{An object to convert to class \code{loom}} -\item{...}{Ignored for now} - \item{assay}{Assay to store in loom file} - -\item{filename}{The name of the new loom file} - -\item{max.size}{Set maximum chunk size in terms of memory usage, unused if \code{chunk.dims} is set; -may pass a character string (eg. \code{3gb}, \code{1200mb}) or exact value in bytes} - -\item{chunk.dims}{Matrix chunk dimensions; auto-determined by default} - -\item{chunk.size}{Maximum number of cells read/written to disk at once; auto-determined by default} - -\item{overwrite}{Overwrite an already existing loom file?} - -\item{verbose}{Display a progress bar} } \description{ Convert objects to loom objects From eadaeb9fc1db7acdf827ad64f79a859fada7ca27 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 7 Apr 2020 23:43:02 -0400 Subject: [PATCH 086/111] Update documentation --- man/as.loom.Rd | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/man/as.loom.Rd b/man/as.loom.Rd index 936b0b4c4..6fe8c7b65 100644 --- a/man/as.loom.Rd +++ b/man/as.loom.Rd @@ -22,7 +22,22 @@ as.loom(x, ...) \arguments{ \item{x}{An object to convert to class \code{loom}} +\item{...}{Ignored for now} + \item{assay}{Assay to store in loom file} + +\item{filename}{The name of the new loom file} + +\item{max.size}{Set maximum chunk size in terms of memory usage, unused if \code{chunk.dims} is set; +may pass a character string (eg. \code{3gb}, \code{1200mb}) or exact value in bytes} + +\item{chunk.dims}{Matrix chunk dimensions; auto-determined by default} + +\item{chunk.size}{Maximum number of cells read/written to disk at once; auto-determined by default} + +\item{overwrite}{Overwrite an already existing loom file?} + +\item{verbose}{Display a progress bar} } \description{ Convert objects to loom objects From 767520faed161af737223ed29fe3560b71d4579f Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 7 Apr 2020 23:54:03 -0400 Subject: [PATCH 087/111] Style fixes Check to make sure reduction.model is a DimReduc --- R/dimensional_reduction.R | 16 +++++++++++----- R/zzz.R | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 17939376f..ed1bc689c 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1285,15 +1285,21 @@ RunUMAP.default <- function( ) metric <- 'cosine' } - if ( is.null(reduction.model) ) { - stop("If using uwot-predict, please pass a DimReduc object with the model stored to reduction.model.") + if (is.null(x = reduction.model) || !inherits(x = reduction.model, what = 'DimReduc')) { + stop( + "If using uwot-predict, please pass a DimReduc object with the model stored to reduction.model.", + call. = FALSE + ) } model <- reduction.model %||% Misc( - object = reduction.model, + object = reduction.model, slot = "model" ) if (length(x = model) == 0) { - stop("The provided reduction.model does not have a model stored. Please try running umot-learn on the object first") + stop( + "The provided reduction.model does not have a model stored. Please try running umot-learn on the object first", + call. = FALSE + ) } umap_transform( X = object, @@ -1415,7 +1421,7 @@ RunUMAP.Graph <- function( return(umap) } -#' @param reduction.model \code{DimReduc} object that contains the umap model +#' @param reduction.model \code{DimReduc} object that contains the umap model #' @param dims Which dimensions to use as input features, used only if #' \code{features} is NULL #' @param reduction Which dimensional reduction (PCA or ICA) to use for the diff --git a/R/zzz.R b/R/zzz.R index 23d052f38..895a84880 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,5 @@ #' Seurat package -#' +#' #' Tools for single-cell genomics #' #' @section Package options: @@ -17,7 +17,7 @@ #' \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, #' this controls the behavior when an item isn't used. Can be one of warn, #' stop, or silent.} -#' \item{\code{Seurat.limma.wilcox.msg}}{{Show message about more efficient +#' \item{\code{Seurat.limma.wilcox.msg}}{{Show message about more efficient #' Wilcoxon Rank Sum test available via the limma package}} #' } #' From 916bd3c8ed949da25604b692e4f0db0e52484faf Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 8 Apr 2020 09:14:27 -0400 Subject: [PATCH 088/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index aefbc439d..1b0984792 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9021 -Date: 2020-04-06 +Version: 3.1.4.9022 +Date: 2020-04-08 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 994bcaef8ee61ded2421f484677cf9eb417a6286 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 8 Apr 2020 09:28:59 -0400 Subject: [PATCH 089/111] add docs for split violin message option --- R/zzz.R | 10 ++++++---- man/Seurat-package.Rd | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 7cf834be7..f003038f6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -15,8 +15,10 @@ #' \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, #' this controls the behavior when an item isn't used. Can be one of warn, #' stop, or silent.} -#' \item{\code{Seurat.limma.wilcox.msg}}{{Show message about more efficient -#' Wilcoxon Rank Sum test available via the limma package}} +#' \item{\code{Seurat.limma.wilcox.msg}}{Show message about more efficient +#' Wilcoxon Rank Sum test available via the limma package} +#' \item{\code{Seurat.warn.vlnplot.split}}{Show message about changes to +#' default behavior of split/multi violin plots} #' } #' #' @docType package @@ -28,9 +30,9 @@ NULL seurat_default_options <- list( Seurat.memsafe = FALSE, Seurat.warn.umap.uwot = TRUE, - Seurat.warn.vlnplot.split = TRUE, Seurat.checkdots = "warn", - Seurat.limma.wilcox.msg = TRUE + Seurat.limma.wilcox.msg = TRUE, + Seurat.warn.vlnplot.split = TRUE ) .onLoad <- function(libname, pkgname) { diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 70fb5278d..b7a26d031 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -23,8 +23,10 @@ Seurat uses the following [options()] to configure behaviour: \item{\code{Seurat.checkdots}}{For functions that have ... as a parameter, this controls the behavior when an item isn't used. Can be one of warn, stop, or silent.} - \item{\code{Seurat.limma.wilcox.msg}}{{Show message about more efficient - Wilcoxon Rank Sum test available via the limma package}} + \item{\code{Seurat.limma.wilcox.msg}}{Show message about more efficient + Wilcoxon Rank Sum test available via the limma package} + \item{\code{Seurat.warn.vlnplot.split}}{Show message about changes to + default behavior of split/multi violin plots} } } From aa6e48df2f631a6a7a6f1c72cdd1f1c66f5dd237 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 8 Apr 2020 09:46:40 -0400 Subject: [PATCH 090/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1b0984792..97aa40b95 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9022 +Version: 3.1.4.9023 Date: 2020-04-08 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 18e5afb9f10b158514f549147790269443d97d84 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Wed, 8 Apr 2020 10:31:40 -0400 Subject: [PATCH 091/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 97aa40b95..4ea69b793 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9023 +Version: 3.1.4.9024 Date: 2020-04-08 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 3822859fd7b2b0e04d9175f8f56eecdd27c40d31 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 8 Apr 2020 18:31:39 -0400 Subject: [PATCH 092/111] Fix issue with NSE in subset Filtering multiple times on the same bit of data caused pulling the column multiple times subset/substitute/eval could handle this rlang apparently cannot --- R/objects.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index a9c4a66e3..3dd02cf5f 100644 --- a/R/objects.R +++ b/R/objects.R @@ -4929,7 +4929,7 @@ WhichCells.Seurat <- function( ) data.subset <- FetchData( object = object, - vars = expr.char[vars.use], + vars = unique(x = expr.char[vars.use]), cells = cells, slot = slot ) From 733a17262d125c993e031e22f46fd80377ed83c5 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Thu, 9 Apr 2020 00:21:05 -0400 Subject: [PATCH 093/111] bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4ea69b793..da3cfa67c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9024 -Date: 2020-04-08 +Version: 3.1.4.9025 +Date: 2020-04-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 72dcc3f26853b1b639bf3b60bd05edcd3c3776b8 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Thu, 9 Apr 2020 17:06:11 -0400 Subject: [PATCH 094/111] Add stip.suffix option to Read10X --- R/preprocessing.R | 10 ++++++++-- man/Read10X.Rd | 9 ++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 018c9bf83..f2805779e 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -832,6 +832,7 @@ ReadAlevin <- function(base.path) { #' will be prefixed with the name. #' @param gene.column Specify which column of genes.tsv or features.tsv to use for gene names; default is 2 #' @param unique.features Make feature names unique (default TRUE) +#' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. #' #' @return If features.csv indicates the data has multiple data types, a list #' containing a sparse matrix of the data from each type will be returned. @@ -857,7 +858,12 @@ ReadAlevin <- function(base.path) { #' seurat_object[['Protein']] = CreateAssayObject(counts = data$`Antibody Capture`) #' } #' -Read10X <- function(data.dir = NULL, gene.column = 2, unique.features = TRUE) { +Read10X <- function( + data.dir = NULL, + gene.column = 2, + unique.features = TRUE, + strip.suffix = FALSE +) { full.data <- list() for (i in seq_along(along.with = data.dir)) { run <- data.dir[i] @@ -888,7 +894,7 @@ Read10X <- function(data.dir = NULL, gene.column = 2, unique.features = TRUE) { } data <- readMM(file = matrix.loc) cell.names <- readLines(barcode.loc) - if (all(grepl(pattern = "\\-1$", x = cell.names))) { + if (all(grepl(pattern = "\\-1$", x = cell.names)) & strip.suffix) { cell.names <- as.vector(x = as.character(x = sapply( X = cell.names, FUN = ExtractField, diff --git a/man/Read10X.Rd b/man/Read10X.Rd index fdf87d001..23e935ad0 100644 --- a/man/Read10X.Rd +++ b/man/Read10X.Rd @@ -4,7 +4,12 @@ \alias{Read10X} \title{Load in data from 10X} \usage{ -Read10X(data.dir = NULL, gene.column = 2, unique.features = TRUE) +Read10X( + data.dir = NULL, + gene.column = 2, + unique.features = TRUE, + strip.suffix = FALSE +) } \arguments{ \item{data.dir}{Directory containing the matrix.mtx, genes.tsv (or features.tsv), and barcodes.tsv @@ -15,6 +20,8 @@ will be prefixed with the name.} \item{gene.column}{Specify which column of genes.tsv or features.tsv to use for gene names; default is 2} \item{unique.features}{Make feature names unique (default TRUE)} + +\item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} } \value{ If features.csv indicates the data has multiple data types, a list From 9e2d98d752a8d1ac233a4ef0bffeecb1caa88a31 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 9 Apr 2020 17:42:11 -0400 Subject: [PATCH 095/111] Fix tests --- tests/testthat/test_load_10X.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_load_10X.R b/tests/testthat/test_load_10X.R index 25bd216f0..1ab15b134 100644 --- a/tests/testthat/test_load_10X.R +++ b/tests/testthat/test_load_10X.R @@ -9,7 +9,7 @@ test_that("Cell Ranger 3.0 Data Parsing", { expect_is(test.data, "list") expect_equal(ncol(test.data$`Gene Expression`), .5 * ncol(test.data2$`Gene Expression`)) expect_equal(ncol(test.data$`Antibody Capture`), .5 * ncol(test.data2$`Antibody Capture`)) - expect_equal(colnames(test.data2[[1]])[6], "2_AAAGTAGCACAGTCGC") + expect_equal(colnames(test.data2[[1]])[6], "2_AAAGTAGCACAGTCGC-1") expect_equal(test.data$`Gene Expression`[2,2], 1000) }) @@ -17,7 +17,7 @@ test_that("Cell Ranger 3.0 Data Parsing", { test.data3 <- Read10X("../testdata/") test_that("Read10X creates sparse matrix", { expect_is(test.data3, "dgCMatrix") - expect_equal(colnames(test.data3)[1], "ATGCCAGAACGACT") + expect_equal(colnames(test.data3)[1], "ATGCCAGAACGACT-1") expect_equal(rownames(test.data3)[1], "MS4A1") }) From 3a1a12308fd248f4b75dfa70bc34411e4e1d49fb Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Apr 2020 19:19:21 -0400 Subject: [PATCH 096/111] add cells for group.by value --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 103aadc82..3d17ba33d 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1470,7 +1470,7 @@ FeatureScatter <- function( cells <- cells %||% colnames(x = object) group.by <- group.by %||% Idents(object = object)[cells] if (length(x = group.by) == 1) { - group.by <- object[[]][, group.by] + group.by <- object[[]][cells, group.by] } plot <- SingleCorPlot( data = FetchData( From 4c62567de7f4a063e120f7dc053017992880ec19 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Apr 2020 20:08:49 -0400 Subject: [PATCH 097/111] change blend rgb to rgba --- NAMESPACE | 1 + R/visualization.R | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f4d68b414..eea0ebbea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -490,6 +490,7 @@ importFrom(pbapply,pblapply) importFrom(pbapply,pbsapply) importFrom(plotly,layout) importFrom(plotly,plot_ly) +importFrom(plotly,toRGB) importFrom(png,readPNG) importFrom(reticulate,dict) importFrom(reticulate,import) diff --git a/R/visualization.R b/R/visualization.R index 103aadc82..e1ab94ae3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -3491,7 +3491,7 @@ BlendMap <- function(color.matrix) { # # @return An n x n matrix of blended colors # -#' @importFrom grDevices rgb colorRamp +#' @importFrom plotly toRGB # BlendMatrix <- function( n = 10, @@ -3502,10 +3502,13 @@ BlendMatrix <- function( if (0 > col.threshold || col.threshold > 1) { stop("col.threshold must be between 0 and 1") } - C0 <- colorRamp(colors = negative.color)(1) - ramp <- colorRamp(colors = two.colors) - C1 <- ramp(x = 0) - C2 <- ramp(x = 1) + C0 <- as.numeric(strsplit(gsub("[rgba(]|[)]", "" , plotly::toRGB(negative.color)), ",")[[1]]) + C1 <- as.numeric(strsplit(gsub("[rgba(]|[)]", "" , plotly::toRGB(two.colors[1])), ",")[[1]]) + C2 <- as.numeric(strsplit(gsub("[rgba(]|[)]", "" , plotly::toRGB(two.colors[2])), ",")[[1]]) + blend_alpha <- 255*(C1[4] + C2[4])/2 + C0 <- C0[-4] + C1 <- C1[-4] + C2 <- C2[-4] merge.weight <- min(255 / (C1 + C2 + C0 + 0.01)) sigmoid <- function(x) { return(1 / (1 + exp(-x))) @@ -3518,6 +3521,7 @@ BlendMatrix <- function( C0, C1, C2, + alpha, merge.weight ) { c.min <- sigmoid(5 * (1 / n - col.threshold)) @@ -3538,10 +3542,10 @@ BlendMatrix <- function( C_blend[C_blend > 255] <- 255 C_blend[C_blend < 0] <- 0 return(rgb( - red = C_blend[, 1], - green = C_blend[, 2], - blue = C_blend[, 3], - alpha = 255, + red = C_blend[1], + green = C_blend[2], + blue = C_blend[3], + alpha = alpha, maxColorValue = 255 )) } @@ -3556,6 +3560,7 @@ BlendMatrix <- function( C0 = C0, C1 = C1, C2 = C2, + alpha = blend_alpha, merge.weight = merge.weight ) } From 272b106fe5344e5a19ed591ccd46d18093b9e7e3 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Thu, 9 Apr 2020 21:19:16 -0400 Subject: [PATCH 098/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index da3cfa67c..d2cd532cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9025 +Version: 3.1.4.9026 Date: 2020-04-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 0c7b51d2ac46cf5bea88787b106dacc77b49febf Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Apr 2020 23:13:14 -0400 Subject: [PATCH 099/111] DimPlot format --- R/visualization.R | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 3d17ba33d..c21ae32e6 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1443,7 +1443,8 @@ CellScatter <- function( #' @param span Spline span in loess function call, if \code{NULL}, no spline added #' @param smooth Smooth the graph (similar to smoothScatter) #' @param slot Slot to pull data from, should be one of 'counts', 'data', or 'scale.data' -#' +#' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} +#' #' @return A ggplot object #' #' @importFrom ggplot2 geom_smooth aes_string @@ -1465,21 +1466,31 @@ FeatureScatter <- function( shape.by = NULL, span = NULL, smooth = FALSE, + combine = TRUE, slot = 'data' ) { cells <- cells %||% colnames(x = object) + data <- FetchData( + object = object, + vars = c(feature1, feature2), + cells = cells, + slot = slot + ) + data <- as.data.frame(x = data) group.by <- group.by %||% Idents(object = object)[cells] - if (length(x = group.by) == 1) { - group.by <- object[[]][cells, group.by] + object[['ident']] <- Idents(object = object) + group.by <- group.by %||% 'ident' + data[, group.by] <- object[[group.by]][cells, , drop = FALSE] + for (group in group.by) { + if (!is.factor(x = data[, group])) { + data[, group] <- factor(x = data[, group]) + } } - plot <- SingleCorPlot( - data = FetchData( - object = object, - vars = c(feature1, feature2), - cells = cells, - slot = slot - ), - col.by = group.by, + plots <- lapply(X = group.by, + FUN = function(x) { + plot <- SingleCorPlot( + data = data[,c(feature1, feature2)], + col.by = data[, x], cols = cols, pt.size = pt.size, smooth = smooth, @@ -1487,6 +1498,12 @@ FeatureScatter <- function( span = span ) return(plot) + } + ) + if (combine) { + plots <- wrap_plots(plots, ncol = length(group.by)) + } + return(plots) } #' View variable features From 428b9f71ce95ee3fb9a38c9e78be18839f28268f Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 10 Apr 2020 12:41:25 -0400 Subject: [PATCH 100/111] Update assay.used for DimReduc objects; #2832 --- R/objects.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/objects.R b/R/objects.R index 3dd02cf5f..8c6a05443 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1309,6 +1309,12 @@ RenameAssays <- function(object, ...) { DefaultAssay(object = object) <- new } Key(object = object[[new]]) <- old.key + # change assay used in any dimreduc object + for (i in Reductions(object = object)) { + if (DefaultAssay(object = object[[i]]) == old) { + DefaultAssay(object = object[[i]]) <- new + } + } object[[old]] <- NULL } return(object) From 937d1c8aaa0e7627670ae7167be6e2f4aa689470 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 10 Apr 2020 12:55:31 -0400 Subject: [PATCH 101/111] fix idents bug --- R/visualization.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index c21ae32e6..bc02f3fc3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1477,7 +1477,6 @@ FeatureScatter <- function( slot = slot ) data <- as.data.frame(x = data) - group.by <- group.by %||% Idents(object = object)[cells] object[['ident']] <- Idents(object = object) group.by <- group.by %||% 'ident' data[, group.by] <- object[[group.by]][cells, , drop = FALSE] From 5ff01a98755cdeca0b2c23a8748d98476b279766 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 10 Apr 2020 13:21:20 -0400 Subject: [PATCH 102/111] Bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2cd532cd..a35229cf8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9026 -Date: 2020-04-09 +Version: 3.1.4.9027 +Date: 2020-04-10 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( From 3cd92a3b68efec046e7a12035070fd99e2cafa65 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 10 Apr 2020 14:23:33 -0400 Subject: [PATCH 103/111] minor style tweaks --- R/visualization.R | 30 ++++++++++++++++-------------- man/FeatureScatter.Rd | 3 +++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index bc02f3fc3..9f9adf7b1 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1448,6 +1448,8 @@ CellScatter <- function( #' @return A ggplot object #' #' @importFrom ggplot2 geom_smooth aes_string +#' @importFrom patchwork wrap_plots +#' #' @export #' #' @aliases GenePlot @@ -1485,22 +1487,22 @@ FeatureScatter <- function( data[, group] <- factor(x = data[, group]) } } - plots <- lapply(X = group.by, - FUN = function(x) { - plot <- SingleCorPlot( - data = data[,c(feature1, feature2)], - col.by = data[, x], - cols = cols, - pt.size = pt.size, - smooth = smooth, - legend.title = 'Identity', - span = span - ) - return(plot) + plots <- lapply( + X = group.by, + FUN = function(x) { + SingleCorPlot( + data = data[,c(feature1, feature2)], + col.by = data[, x], + cols = cols, + pt.size = pt.size, + smooth = smooth, + legend.title = 'Identity', + span = span + ) } ) - if (combine) { - plots <- wrap_plots(plots, ncol = length(group.by)) + if (isTRUE(x = combine)) { + plots <- wrap_plots(plots, ncol = length(x = group.by)) } return(plots) } diff --git a/man/FeatureScatter.Rd b/man/FeatureScatter.Rd index 599fcaffb..cbba72eed 100644 --- a/man/FeatureScatter.Rd +++ b/man/FeatureScatter.Rd @@ -16,6 +16,7 @@ FeatureScatter( shape.by = NULL, span = NULL, smooth = FALSE, + combine = TRUE, slot = "data" ) } @@ -42,6 +43,8 @@ be metrics, PC scores, etc. - anything that can be retreived with FetchData} \item{smooth}{Smooth the graph (similar to smoothScatter)} +\item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed}} + \item{slot}{Slot to pull data from, should be one of 'counts', 'data', or 'scale.data'} } \value{ From 211c8243caedcae7d7aec019605cf46ca8553fb0 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Fri, 10 Apr 2020 14:25:15 -0400 Subject: [PATCH 104/111] bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a35229cf8..7be571c32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9027 +Version: 3.1.4.9028 Date: 2020-04-10 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From 5c1764ccbb291b11371efee0905bcceb4128d763 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 10 Apr 2020 14:30:30 -0400 Subject: [PATCH 105/111] put group.by into FetchData --- R/visualization.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 9f9adf7b1..b5792e523 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1472,16 +1472,16 @@ FeatureScatter <- function( slot = 'data' ) { cells <- cells %||% colnames(x = object) + object[['ident']] <- Idents(object = object) + group.by <- group.by %||% 'ident' + data <- FetchData( object = object, - vars = c(feature1, feature2), + vars = c(feature1, feature2, group.by), cells = cells, slot = slot ) data <- as.data.frame(x = data) - object[['ident']] <- Idents(object = object) - group.by <- group.by %||% 'ident' - data[, group.by] <- object[[group.by]][cells, , drop = FALSE] for (group in group.by) { if (!is.factor(x = data[, group])) { data[, group] <- factor(x = data[, group]) From 9c97bd78c0ed30514f7a391c86c17a3b7d126638 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 10 Apr 2020 14:52:11 -0400 Subject: [PATCH 106/111] change to col2rgb --- NAMESPACE | 1 - R/visualization.R | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index eea0ebbea..f4d68b414 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -490,7 +490,6 @@ importFrom(pbapply,pblapply) importFrom(pbapply,pbsapply) importFrom(plotly,layout) importFrom(plotly,plot_ly) -importFrom(plotly,toRGB) importFrom(png,readPNG) importFrom(reticulate,dict) importFrom(reticulate,import) diff --git a/R/visualization.R b/R/visualization.R index e1ab94ae3..27f1cdf4c 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -3491,7 +3491,7 @@ BlendMap <- function(color.matrix) { # # @return An n x n matrix of blended colors # -#' @importFrom plotly toRGB +#' @importFrom grDevices col2rgb # BlendMatrix <- function( n = 10, @@ -3502,10 +3502,10 @@ BlendMatrix <- function( if (0 > col.threshold || col.threshold > 1) { stop("col.threshold must be between 0 and 1") } - C0 <- as.numeric(strsplit(gsub("[rgba(]|[)]", "" , plotly::toRGB(negative.color)), ",")[[1]]) - C1 <- as.numeric(strsplit(gsub("[rgba(]|[)]", "" , plotly::toRGB(two.colors[1])), ",")[[1]]) - C2 <- as.numeric(strsplit(gsub("[rgba(]|[)]", "" , plotly::toRGB(two.colors[2])), ",")[[1]]) - blend_alpha <- 255*(C1[4] + C2[4])/2 + C0 <- as.vector(col2rgb(negative.color, alpha = TRUE)) + C1 <- as.vector(col2rgb(two.colors[1], alpha = TRUE)) + C2 <- as.vector(col2rgb(two.colors[2], alpha = TRUE)) + blend_alpha <- (C1[4] + C2[4])/2 C0 <- C0[-4] C1 <- C1[-4] C2 <- C2[-4] From 1c596f8bd809e6eba330850a368f3677024d2bf3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 10 Apr 2020 14:53:00 -0400 Subject: [PATCH 107/111] Fix whitespace issues --- R/visualization.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index b5792e523..5314c0928 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1444,12 +1444,12 @@ CellScatter <- function( #' @param smooth Smooth the graph (similar to smoothScatter) #' @param slot Slot to pull data from, should be one of 'counts', 'data', or 'scale.data' #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} -#' +#' #' @return A ggplot object #' #' @importFrom ggplot2 geom_smooth aes_string #' @importFrom patchwork wrap_plots -#' +#' #' @export #' #' @aliases GenePlot @@ -1468,13 +1468,12 @@ FeatureScatter <- function( shape.by = NULL, span = NULL, smooth = FALSE, - combine = TRUE, + combine = TRUE, slot = 'data' ) { cells <- cells %||% colnames(x = object) object[['ident']] <- Idents(object = object) group.by <- group.by %||% 'ident' - data <- FetchData( object = object, vars = c(feature1, feature2, group.by), @@ -1488,7 +1487,7 @@ FeatureScatter <- function( } } plots <- lapply( - X = group.by, + X = group.by, FUN = function(x) { SingleCorPlot( data = data[,c(feature1, feature2)], From 12a5554a4e38752e0755afe5172dc71215744b44 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 10 Apr 2020 15:02:18 -0400 Subject: [PATCH 108/111] Bump develop version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7be571c32..2f4ece47c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 3.1.4.9028 +Version: 3.1.4.9029 Date: 2020-04-10 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. From e61561a46dcf2761b90e7460acd85ce3ed092e1d Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Sat, 11 Apr 2020 20:01:43 -0400 Subject: [PATCH 109/111] update DESCRIPTION, NEWS, README files --- DESCRIPTION | 4 ++-- NEWS.md | 18 +++++++++++++++++- README.md | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2f4ece47c..065ed8ebd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 3.1.4.9029 -Date: 2020-04-10 +Version: 3.1.5 +Date: 2020-04-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 8c41752c4..29bae530a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,26 @@ All notable changes to Seurat will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) -## Develop +## [3.1.5] - 2020-04-14 +### Added +- New `scale` parameter in `DotPlot` +- New `keep.sparse parameter in `CreateGeneActivityMatrix` for a more memory efficient option +- Added ability to store model learned by UMAP and project new data +- New `stip.suffix` option in `Read10X` +- Added `group.by` parameter to `FeatureScatter` + ### Changes - Replace wilcox.test with limma implementation for a faster FindMarkers default method - Better point separation for `VlnPlot`s when using the `split.by` option +- Efficiency improvements for anchor pairing +- Deprecate redundant `sort.cell` parameter in `FeaturePlot` +- Fixes to ensure correct class of Matrix passed to c++ functions +- Fixes for underscores in ident labels for `DotPlot` +- Ensure preservation of matrix dimnames in `SampleUMI` +- Fix non-standard evaluation problems in `subset` and `WhichCells` +- Default split violin option is now a multi group option +- Preserve alpha in `FeaturePlot` when using `blend` +- Update `assay.used` slot for `DimReduc`s when Assay is renamed ## [3.1.4] - 2020-02-20 ### Changes diff --git a/README.md b/README.md index 8133d96b9..3be4fd4de 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) -# Seurat v3.1.4 +# Seurat v3.1.5 Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. From 0bbb2797e0a717aa7dc09d95781be53987f1907f Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Mon, 13 Apr 2020 09:51:18 -0400 Subject: [PATCH 110/111] only wrap if more than 1 plot, better error msgs in FeatureScatter --- R/visualization.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/visualization.R b/R/visualization.R index 6694bc686..1e19939d9 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1480,6 +1480,12 @@ FeatureScatter <- function( cells = cells, slot = slot ) + if (isFALSE(x = feature1 %in% colnames(x = data))) { + stop("Feature 1 (", feature1, ") not found.", call. = FALSE) + } + if (isFALSE(x = feature2 %in% colnames(x = data))) { + stop("Feature 2 (", feature2, ") not found.", call. = FALSE) + } data <- as.data.frame(x = data) for (group in group.by) { if (!is.factor(x = data[, group])) { @@ -1500,6 +1506,9 @@ FeatureScatter <- function( ) } ) + if (isTRUE(x = length(x = plots) == 1)) { + return(plots[[1]]) + } if (isTRUE(x = combine)) { plots <- wrap_plots(plots, ncol = length(x = group.by)) } From 60b0f18aa6445c3d30e2142463b3fff6e0d87608 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 14 Apr 2020 15:02:42 -0700 Subject: [PATCH 111/111] Update cran-comments and DESCRIPTION --- DESCRIPTION | 2 +- cran-comments.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 065ed8ebd..95dbbc39e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: Seurat Version: 3.1.5 Date: 2020-04-14 Title: Tools for Single Cell Genomics -Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL); it is not in the standard repositories. +Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Stuart T, Butler A, et al (2019) for more details. Please note: SDMTools is available is available from the CRAN archives with install.packages(<"https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz">, repos = NULL); it is not in the standard repositories. Authors@R: c( person(given = 'Rahul', family = 'Satija', email = 'rsatija@nygenome.org', role = 'aut', comment = c(ORCID = '0000-0001-9448-8833')), person(given = 'Andrew', family = 'Butler', email = 'abutler@nygenome.org', role = 'aut', comment = c(ORCID = '0000-0003-3608-0463')), diff --git a/cran-comments.md b/cran-comments.md index 81a9a0ba4..1bbd87b9c 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,8 +1,8 @@ -# Seurat v3.1.4 +# Seurat v3.1.5 ## Test environments * local Ubuntu 16.04.6 and 18.04.2 installs, R 3.6.1 -* local Windows 10 install, R 3.5.3 +* local Windows 10 install, R 3.5.3, R-devel (4.1.0) * Ubuntu 16.04.6 (on travis-ci), R 3.6.1 * macOS 10.13.3 (on travis-ci), R 3.6.1 * Windows Server 2012 R2 (on AppVeyor), R 3.6.1 Patched @@ -40,6 +40,6 @@ There were 3 NOTEs: ## Downstream dependencies -There is one pacakge that imports Seurat: multicross; this update does not impact its functionality +There are three pacakges that imports Seurat: multicross, scMappR, and Signac; this update does not impact their functionality -There are three packages that suggest Seurat: BisqueRNA, clustree, and Rmagic; this update does not impact their functionality. +There are four packages that suggest Seurat: BisqueRNA, clustree, Rmagic, and treefit; this update does not impact their functionality.