From cc359e2d89b2467a040b677185e0df1b60b7d61d Mon Sep 17 00:00:00 2001 From: IraisVal <153772091+IraisVal@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:01:41 -0500 Subject: [PATCH] Add files via upload --- SingleCell_seurat_workflow.R | 100 +++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 SingleCell_seurat_workflow.R diff --git a/SingleCell_seurat_workflow.R b/SingleCell_seurat_workflow.R new file mode 100644 index 0000000..e586531 --- /dev/null +++ b/SingleCell_seurat_workflow.R @@ -0,0 +1,100 @@ +# script to perform standard workflow steps to analyze single cell RNA-Seq data +# data: 20k Mixture of NSCLC DTCs from 7 donors, 3' v3.1 +# data source: https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-multiome-v-1-0-chromium-controller-1-standard-2-0-0 + + +# Load libraries +library(Seurat) +library(tidyverse) + +# Load the NSCLC dataset +nsclc.sparse.m <- Read10X_h5(filename = '20k_NSCLC_DTC_3p_nextgem_Multiplex_count_raw_feature_bc_matrix.h5') +str(nsclc.sparse.m) + +# Genome matrix has multiple modalities, returning a list of matrices for this genome, select gene expression +# Getting gene expression from matrix for analysis +cts <- nsclc.sparse.m$`Gene Expression` + +# Initialize the Seurat object with the raw (non-normalized data). +nsclc.seurat.obj <- CreateSeuratObject(counts = cts, project = "NSCLC", min.cells = 3, min.features = 200) +str(nsclc.seurat.obj) +nsclc.seurat.obj +# 29552 features across 42081 samples + +# 1. Quality control +View(nsclc.seurat.obj@meta.data) + +# % Mitochondrial reads +#calculating the percentage of mitochondrial genes +#all genes starting with mt save it to another object within the metadata +nsclc.seurat.obj[["percent.mt"]] <- PercentageFeatureSet(nsclc.seurat.obj, pattern = "^MT-") +View(nsclc.seurat.obj@meta.data) + +#Visualzing the features +VlnPlot(nsclc.seurat.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) +FeatureScatter(nsclc.seurat.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + + geom_smooth(method = 'lm') +#`geom_smooth()` using formula = 'y ~ x' + +# 2. Filtering +nsclc.seurat.obj <- subset(nsclc.seurat.obj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & + percent.mt < 5) +# 3. Normalize data +#nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj, normalization.method = "LogNormalize", scale.factor = 10000) +# OR +#nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj) +str(nsclc.seurat.obj) + +# 4. Identify highly variable features +nsclc.seurat.obj <- FindVariableFeatures(nsclc.seurat.obj, selection.method = "vst", nfeatures = 2000) + +# Identify the 10 most highly variable genes +top10 <- head(VariableFeatures(nsclc.seurat.obj), 10) + +# plot variable features with and without labels +plot1 <- VariableFeaturePlot(nsclc.seurat.obj) +LabelPoints(plot = plot1, points = top10, repel = TRUE) + +# 5. Centering and scaling data matrix +#Error: 5.4 Gb, el tamaño tiene como límite la capacidad de RAM de tu PC + +all.genes <- rownames(nsclc.seurat.obj) +nsclc.seurat.obj <- ScaleData(nsclc.seurat.obj, features = all.genes) + +str(nsclc.seurat.obj) + +# 6. Perform Linear dimensionality reduction +nsclc.seurat.obj <- RunPCA(nsclc.seurat.obj, features = VariableFeatures(object = nsclc.seurat.obj)) + +# visualize PCA results +print(nsclc.seurat.obj[["pca"]], dims = 1:5, nfeatures = 5) +DimHeatmap(nsclc.seurat.obj, dims = 1, cells = 500, balanced = TRUE) + + +# determine dimensionality of the data +ElbowPlot(nsclc.seurat.obj) + + +# 7. Clustering +nsclc.seurat.obj <- FindNeighbors(nsclc.seurat.obj, dims = 1:15) + +# understanding resolution +nsclc.seurat.obj <- FindClusters(nsclc.seurat.obj, resolution = c(0.1,0.3, 0.5, 0.7, 1)) +View(nsclc.seurat.obj@meta.data) + +DimPlot(nsclc.seurat.obj, group.by = "RNA_snn_res.0.5", label = TRUE) + +# setting identity of clusters +Idents(nsclc.seurat.obj) +Idents(nsclc.seurat.obj) <- "RNA_snn_res.0.1" +Idents(nsclc.seurat.obj) + +# non-linear dimensionality reduction +# If you haven't installed UMAP, you can do so via reticulate::py_install(packages = +# 'umap-learn') +nsclc.seurat.obj <- RunUMAP(nsclc.seurat.obj, dims = 1:15) + +# note that you can set `label = TRUE` or use the LabelClusters function to help label +# individual clusters +DimPlot(nsclc.seurat.obj, reduction = "umap") +