-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
151 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
#Human DLPFC workflow | ||
#This workflow analyzes one sample of human brain from the dorsolateral prefrontal cortex (DLPFC) region, | ||
#measured using the 10x Genomics Visium platform | ||
#12 samples in total, from 3 individuals, with 2 pairs of spatially adjacent replicates (serial sections) per individual | ||
#(4 samples per individual). | ||
#Workflow https://lmweber.org/BestPracticesST/chapters/workflow-Visium-humanDLPFC.html | ||
|
||
library(SpatialExperiment) | ||
library(STexampleData) | ||
|
||
# load object | ||
spe <- Visium_humanDLPFC() | ||
spe | ||
|
||
library(ggspavis) | ||
# plot spatial coordinates (spots) | ||
plotSpots(spe) | ||
|
||
spe <- spe[, colData(spe)$in_tissue == 1] | ||
dim(spe) | ||
|
||
library(scater) | ||
|
||
# identify mitochondrial genes | ||
is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$gene_name) | ||
table(is_mito) | ||
|
||
rowData(spe)$gene_name[is_mito] | ||
|
||
# calculate per-spot QC metrics and store in colData | ||
spe <- addPerCellQC(spe, subsets = list(mito = is_mito)) | ||
head(colData(spe), 3) | ||
|
||
# histograms of QC metrics | ||
par(mfrow = c(1, 4)) | ||
hist(colData(spe)$sum, xlab = "sum", main = "UMIs per spot") | ||
hist(colData(spe)$detected, xlab = "detected", main = "Genes per spot") | ||
hist(colData(spe)$subsets_mito_percent, xlab = "percent mitochondrial", main = "Percent mito UMIs") | ||
hist(colData(spe)$cell_count, xlab = "number of cells", main = "No. cells per spot") | ||
|
||
par(mfrow = c(1, 1)) | ||
|
||
# select QC thresholds | ||
qc_lib_size <- colData(spe)$sum < 600 | ||
qc_detected <- colData(spe)$detected < 400 | ||
qc_mito <- colData(spe)$subsets_mito_percent > 28 | ||
qc_cell_count <- colData(spe)$cell_count > 10 | ||
|
||
# number of discarded spots for each metric | ||
apply(cbind(qc_lib_size, qc_detected, qc_mito, qc_cell_count), 2, sum) | ||
|
||
# combined set of discarded spots | ||
discard <- qc_lib_size | qc_detected | qc_mito | qc_cell_count | ||
table(discard) | ||
|
||
# store in object | ||
colData(spe)$discard <- discard | ||
|
||
# check spatial pattern of discarded spots | ||
plotQC(spe, type = "spots", discard = "discard") | ||
|
||
# filter low-quality spots | ||
spe <- spe[, !colData(spe)$discard] | ||
dim(spe) | ||
|
||
library(scran) | ||
# calculate library size factors | ||
spe <- computeLibraryFactors(spe) | ||
|
||
summary(sizeFactors(spe)) | ||
|
||
hist(sizeFactors(spe), breaks = 20) | ||
|
||
hist(sizeFactors(spe), breaks = 20) | ||
|
||
# calculate logcounts and store in object | ||
spe <- logNormCounts(spe) | ||
|
||
assayNames(spe) | ||
|
||
# remove mitochondrial genes | ||
spe <- spe[!is_mito, ] | ||
dim(spe) | ||
|
||
# fit mean-variance relationship | ||
dec <- modelGeneVar(spe) | ||
|
||
# visualize mean-variance relationship | ||
fit <- metadata(dec) | ||
plot(fit$mean, fit$var, | ||
xlab = "mean of log-expression", ylab = "variance of log-expression") | ||
curve(fit$trend(x), col = "dodgerblue", add = TRUE, lwd = 2) | ||
|
||
# select top HVGs | ||
top_hvgs <- getTopHVGs(dec, prop = 0.1) | ||
length(top_hvgs) | ||
|
||
library(nnSVG) | ||
|
||
# subsample spots | ||
n <- 100 | ||
set.seed(123) | ||
ix <- sample(seq_len(n), n) | ||
|
||
spe_nnSVG <- spe[, ix] | ||
|
||
# filter low-expressed and mitochondrial genes | ||
# using very stringent filtering parameters for faster runtime in this example | ||
# note: for a full analysis, use alternative filtering parameters (e.g. defaults) | ||
spe_nnSVG <- filter_genes( | ||
spe_nnSVG, filter_genes_ncounts = 10, filter_genes_pcspots = 3 | ||
) | ||
|
||
# re-calculate logcounts after filtering | ||
# using library size factors | ||
spe_nnSVG <- logNormCounts(spe_nnSVG) | ||
|
||
# run nnSVG | ||
# using a single core for compatibility on build system | ||
# note: for a full analysis, use multiple cores | ||
set.seed(123) | ||
spe_nnSVG <- nnSVG(spe_nnSVG, n_threads = 1) | ||
|
||
# investigate results | ||
|
||
# show results | ||
#head(rowData(spe_nnSVG), 3) | ||
|
||
# number of significant SVGs | ||
table(rowData(spe_nnSVG)$padj <= 0.05) | ||
|
||
# show results for top n SVGs | ||
rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank)[1:6], ] | ||
|
||
# identify top-ranked SVG | ||
rowData(spe_nnSVG)$gene_name[which(rowData(spe_nnSVG)$rank == 1)] | ||
|
||
# compute PCA | ||
set.seed(123) | ||
spe <- runPCA(spe, subset_row = top_hvgs) | ||
|
||
reducedDimNames(spe) | ||
|
||
dim(reducedDim(spe, "PCA")) | ||
|
||
# compute UMAP on top 50 PCs | ||
set.seed(123) | ||
spe <- runUMAP(spe, dimred = "PCA") | ||
|
||
reducedDimNames(spe) | ||
#Next step Clustering |