-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
100 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# script to perform standard workflow steps to analyze single cell RNA-Seq data | ||
# data: 20k Mixture of NSCLC DTCs from 7 donors, 3' v3.1 | ||
# data source: https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-multiome-v-1-0-chromium-controller-1-standard-2-0-0 | ||
|
||
|
||
# Load libraries | ||
library(Seurat) | ||
library(tidyverse) | ||
|
||
# Load the NSCLC dataset | ||
nsclc.sparse.m <- Read10X_h5(filename = '20k_NSCLC_DTC_3p_nextgem_Multiplex_count_raw_feature_bc_matrix.h5') | ||
str(nsclc.sparse.m) | ||
|
||
# Genome matrix has multiple modalities, returning a list of matrices for this genome, select gene expression | ||
# Getting gene expression from matrix for analysis | ||
cts <- nsclc.sparse.m$`Gene Expression` | ||
|
||
# Initialize the Seurat object with the raw (non-normalized data). | ||
nsclc.seurat.obj <- CreateSeuratObject(counts = cts, project = "NSCLC", min.cells = 3, min.features = 200) | ||
str(nsclc.seurat.obj) | ||
nsclc.seurat.obj | ||
# 29552 features across 42081 samples | ||
|
||
# 1. Quality control | ||
View(nsclc.seurat.obj@meta.data) | ||
|
||
# % Mitochondrial reads | ||
#calculating the percentage of mitochondrial genes | ||
#all genes starting with mt save it to another object within the metadata | ||
nsclc.seurat.obj[["percent.mt"]] <- PercentageFeatureSet(nsclc.seurat.obj, pattern = "^MT-") | ||
View(nsclc.seurat.obj@meta.data) | ||
|
||
#Visualzing the features | ||
VlnPlot(nsclc.seurat.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) | ||
FeatureScatter(nsclc.seurat.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + | ||
geom_smooth(method = 'lm') | ||
#`geom_smooth()` using formula = 'y ~ x' | ||
|
||
# 2. Filtering | ||
nsclc.seurat.obj <- subset(nsclc.seurat.obj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & | ||
percent.mt < 5) | ||
# 3. Normalize data | ||
#nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj, normalization.method = "LogNormalize", scale.factor = 10000) | ||
# OR | ||
#nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj) | ||
str(nsclc.seurat.obj) | ||
|
||
# 4. Identify highly variable features | ||
nsclc.seurat.obj <- FindVariableFeatures(nsclc.seurat.obj, selection.method = "vst", nfeatures = 2000) | ||
|
||
# Identify the 10 most highly variable genes | ||
top10 <- head(VariableFeatures(nsclc.seurat.obj), 10) | ||
|
||
# plot variable features with and without labels | ||
plot1 <- VariableFeaturePlot(nsclc.seurat.obj) | ||
LabelPoints(plot = plot1, points = top10, repel = TRUE) | ||
|
||
# 5. Centering and scaling data matrix | ||
#Error: 5.4 Gb, el tamaño tiene como límite la capacidad de RAM de tu PC | ||
|
||
all.genes <- rownames(nsclc.seurat.obj) | ||
nsclc.seurat.obj <- ScaleData(nsclc.seurat.obj, features = all.genes) | ||
|
||
str(nsclc.seurat.obj) | ||
|
||
# 6. Perform Linear dimensionality reduction | ||
nsclc.seurat.obj <- RunPCA(nsclc.seurat.obj, features = VariableFeatures(object = nsclc.seurat.obj)) | ||
|
||
# visualize PCA results | ||
print(nsclc.seurat.obj[["pca"]], dims = 1:5, nfeatures = 5) | ||
DimHeatmap(nsclc.seurat.obj, dims = 1, cells = 500, balanced = TRUE) | ||
|
||
|
||
# determine dimensionality of the data | ||
ElbowPlot(nsclc.seurat.obj) | ||
|
||
|
||
# 7. Clustering | ||
nsclc.seurat.obj <- FindNeighbors(nsclc.seurat.obj, dims = 1:15) | ||
|
||
# understanding resolution | ||
nsclc.seurat.obj <- FindClusters(nsclc.seurat.obj, resolution = c(0.1,0.3, 0.5, 0.7, 1)) | ||
View(nsclc.seurat.obj@meta.data) | ||
|
||
DimPlot(nsclc.seurat.obj, group.by = "RNA_snn_res.0.5", label = TRUE) | ||
|
||
# setting identity of clusters | ||
Idents(nsclc.seurat.obj) | ||
Idents(nsclc.seurat.obj) <- "RNA_snn_res.0.1" | ||
Idents(nsclc.seurat.obj) | ||
|
||
# non-linear dimensionality reduction | ||
# If you haven't installed UMAP, you can do so via reticulate::py_install(packages = | ||
# 'umap-learn') | ||
nsclc.seurat.obj <- RunUMAP(nsclc.seurat.obj, dims = 1:15) | ||
|
||
# note that you can set `label = TRUE` or use the LabelClusters function to help label | ||
# individual clusters | ||
DimPlot(nsclc.seurat.obj, reduction = "umap") | ||
|