Merge branch 'devel'

kharchenkolab · Mar 3, 2021 · c77a26e · c77a26e
2 parents 1fb2ebc + 0579dfe
commit c77a26e
Show file tree

Hide file tree

Showing 21 changed files with 440 additions and 662 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
-pagoda2*.tar.gz
+pagoda2*.tar.gz
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 ## Upcoming
 
+## [1.0.2] - 2020-03-03
+
+### Changed
+
+- Revised vignettes figures for the HTML tutorial
+
 ## [1.0.1] - 2020-02-25
 
 ### Added

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: pagoda2
 Title: Single Cell Analysis and Differential Expression
-Version: 1.0.1
+Version: 1.0.2
 Authors@R: c(person("Nikolas","Barkas", email="nikolas_barkas@hms.harvard.edu", role="aut"), person("Viktor", "Petukhov", email="viktor.s.petuhov@ya.ru", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"), person("Simon", "Steiger", email = "simon.steiger@gmail.com", role = "ctb"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role=c("cre", "aut")))
 Description: Analyzing and interactively exploring large-scale single-cell RNA-seq datasets. 'pagoda2' primarily performs normalization and differential gene expression analysis, with an interactive application for exploring single-cell RNA-seq datasets. It performs basic tasks such as cell size normalization, gene variance normalization, and can be used to identify subpopulations and run differential expression within individual samples. 'pagoda2' was written to rapidly process modern large-scale scRNAseq datasets of approximately 1e6 cells. The companion web application allows users to explore which gene expression patterns form the different subpopulations within your data. The package also serves as the primary method for preprocessing data for conos, <https://github.com/kharchenkolab/conos>. This package interacts with data available through the 'p2data' package, which is available in a 'drat' repository. To access this data package, see the instructions at <https://github.com/kharchenkolab/pagoda2>. The size of the 'p2data' package is approximately 6 MB.
 License: GPL-3 

diff --git a/README.md b/README.md
@@ -112,7 +112,7 @@ As of version 0.1.3, `pagoda2` should sucessfully install on Mac OS. Furthermore
 
 ### Pagoda2 via Docker 
 
-If you are having trouble setting up `pagoda2` on your system, an alternative approach to work with `pagoda2` is via a Docker container. To use the Docker container, first [install docker](https://docs.docker.com/install/) on your platform and then run the `pagoda2` image with the following command in the shell:
+If you are having trouble setting up `pagoda2` on your system, an alternative approach to work with `pagoda2` is via a Docker container. To use the Docker container, first [install docker](https://docs.docker.com/get-docker/) on your platform and then run the `pagoda2` image with the following command in the shell:
 
 ```
 docker run -p 8787:8787 -e PASSWORD=pass pkharchenkolab/pagoda2:latest

diff --git a/doc/pagoda2.walkthrough.R b/doc/pagoda2.walkthrough.R
@@ -42,16 +42,16 @@ cm[1:3, 1:3]
 ## -----------------------------------------------------------------------------
 str(cm)
 
-## ---- fig.height=8, fig.width=10----------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 old_par <- par(mfrow=c(1,2), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 1.0)
 on.exit(par(old_par))
 hist(log10(colSums(cm)+1), main='molecules per cell', col='cornsilk', xlab='molecules per cell (log10)')
 hist(log10(rowSums(cm)+1), main='molecules per gene', col='cornsilk', xlab='molecules per gene (log10)')
 
-## ---- fig.height=8, fig.width=10----------------------------------------------
+## ---- fig.height=6, fig.width=8-----------------------------------------------
 counts <- gene.vs.molecule.cell.filter(cm, min.cell.size=500)
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 hist(log10(rowSums(counts)+1), main='Molecules per gene', xlab='molecules (log10)', col='cornsilk')
 abline(v=1, lty=2, col=2)
 
@@ -63,7 +63,7 @@ dim(counts)
 rownames(counts) <- make.unique(rownames(counts))
 r <- Pagoda2$new(counts, log.scale=TRUE, n.cores=1)
 
-## ---- fig.height=8, fig.width=10----------------------------------------------
+## ---- fig.height=6, fig.width=8-----------------------------------------------
 r$adjustVariance(plot=TRUE, gam.k=10)
 
 ## -----------------------------------------------------------------------------
@@ -79,20 +79,22 @@ r$getKnnClusters(method=infomap.community, type='PCA')
 M <- 30
 r$getEmbedding(type='PCA', embeddingType = 'largeVis', M=M, perplexity=30, gamma=1/M)
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 r$plotEmbedding(type='PCA', show.legend=FALSE, mark.groups=TRUE, min.cluster.size=50, shuffle.colors=FALSE, font.size=3, alpha=0.3, title='clusters (largeVis)', plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 r$getEmbedding(type='PCA', embeddingType='tSNE', perplexity=50,verbose=FALSE)
 r$plotEmbedding(type='PCA', embeddingType='tSNE', show.legend=FALSE, mark.groups=TRUE, min.cluster.size=1, shuffle.colors=FALSE, font.size=3, alpha=0.3, title='clusters (tSNE)', plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 gene <-"HBB"
-r$plotEmbedding(type='PCA', embeddingType='tSNE', colors=r$counts[,gene], shuffle.colors=FALSE, font.size=3, alpha=0.3, title=gene, plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
+r$plotEmbedding(type='PCA', embeddingType='tSNE', colors=r$counts[,gene], shuffle.colors=FALSE, 
+    font.size=3, alpha=0.3, title=gene, plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 gene <-"LYZ"
-r$plotEmbedding(type='PCA', embeddingType='tSNE', colors=r$counts[,gene], shuffle.colors=FALSE, font.size=3, alpha=0.3, title=gene, plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
+r$plotEmbedding(type='PCA', embeddingType='tSNE', colors=r$counts[,gene], shuffle.colors=FALSE, 
+    font.size=3, alpha=0.3, title=gene, plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
 
 ## -----------------------------------------------------------------------------
 r$getKnnClusters(method=multilevel.community, type='PCA', name='multilevel')
@@ -101,7 +103,7 @@ r$getKnnClusters(method=walktrap.community, type='PCA', name='walktrap')
 ## -----------------------------------------------------------------------------
 str(r$clusters)
 
-## ---- fig.height=8, fig.width=12----------------------------------------------
+## ---- fig.height=6, fig.width=10----------------------------------------------
 plt1 = r$plotEmbedding(type='PCA', embeddingType='tSNE', groups=r$clusters$PCA$community, show.legend=FALSE, mark.groups=TRUE, min.cluster.size=1, shuffle.colors=FALSE, font.size=3, alpha=0.3, title='infomap clusters (tSNE)', plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
 plt2 = r$plotEmbedding(type='PCA',embeddingType='tSNE', clusterType='multilevel', show.legend=FALSE, mark.groups=TRUE, min.cluster.size=1, shuffle.colors=FALSE, font.size=3, alpha=0.3, title='multlevel clusters (tSNE)', plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
 plt3 = r$plotEmbedding(type='PCA',embeddingType='tSNE', clusterType='walktrap', show.legend=FALSE, mark.groups=TRUE, min.cluster.size=1, shuffle.colors=FALSE, font.size=3, alpha=0.3, title='walktrap clusters (tSNE)', plot.theme=theme_bw() + theme(plot.title = element_text(hjust = 0.5)))
@@ -110,13 +112,14 @@ gridExtra::grid.arrange(plt1, plt2, plt3, ncol=3)
 ## -----------------------------------------------------------------------------
 r$getDifferentialGenes(type='PCA', verbose=TRUE, clusterType='community')
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 de <- r$diffgenes$PCA[[1]][['2']]
 r$plotGeneHeatmap(genes=rownames(de)[1:15], groups=r$clusters$PCA[[1]])
 
-## -----------------------------------------------------------------------------
+## ---- fig.height=6, fig.width=6-----------------------------------------------
 gene <-"CD74"
-r$plotEmbedding(type='PCA', embeddingType='tSNE', colors=r$counts[,gene], shuffle.colors=FALSE, font.size=3, alpha=0.3, title=gene, legend.title=gene)
+r$plotEmbedding(type='PCA', embeddingType='tSNE', colors=r$counts[,gene], shuffle.colors=FALSE, 
+    font.size=3, alpha=0.3, title=gene, legend.title=gene)
 
 ## -----------------------------------------------------------------------------
 suppressMessages(library(org.Hs.eg.db))