From cc359e2d89b2467a040b677185e0df1b60b7d61d Mon Sep 17 00:00:00 2001
From: IraisVal <153772091+IraisVal@users.noreply.github.com>
Date: Wed, 20 Dec 2023 19:01:41 -0500
Subject: [PATCH] Add files via upload

---
 SingleCell_seurat_workflow.R | 100 +++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 SingleCell_seurat_workflow.R

diff --git a/SingleCell_seurat_workflow.R b/SingleCell_seurat_workflow.R
new file mode 100644
index 0000000..e586531
--- /dev/null
+++ b/SingleCell_seurat_workflow.R
@@ -0,0 +1,100 @@
+# script to perform standard workflow steps to analyze single cell RNA-Seq data
+# data: 20k Mixture of NSCLC DTCs from 7 donors, 3' v3.1
+# data source: https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-multiome-v-1-0-chromium-controller-1-standard-2-0-0
+
+
+# Load libraries
+library(Seurat)
+library(tidyverse)
+
+# Load the NSCLC dataset
+nsclc.sparse.m <- Read10X_h5(filename = '20k_NSCLC_DTC_3p_nextgem_Multiplex_count_raw_feature_bc_matrix.h5')
+str(nsclc.sparse.m)
+
+# Genome matrix has multiple modalities, returning a list of matrices for this genome, select gene expression
+# Getting gene expression from matrix for analysis
+cts <-  nsclc.sparse.m$`Gene Expression`
+
+# Initialize the Seurat object with the raw (non-normalized data).
+nsclc.seurat.obj <- CreateSeuratObject(counts = cts, project = "NSCLC", min.cells = 3, min.features = 200)
+str(nsclc.seurat.obj)
+nsclc.seurat.obj
+# 29552 features across 42081 samples
+
+# 1. Quality control 
+View(nsclc.seurat.obj@meta.data)
+
+# % Mitochondrial reads
+#calculating the percentage of mitochondrial genes
+#all genes starting with mt save it to another object within the metadata
+nsclc.seurat.obj[["percent.mt"]] <- PercentageFeatureSet(nsclc.seurat.obj, pattern = "^MT-")
+View(nsclc.seurat.obj@meta.data)
+
+#Visualzing the features
+VlnPlot(nsclc.seurat.obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
+FeatureScatter(nsclc.seurat.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") +
+    geom_smooth(method = 'lm')
+#`geom_smooth()` using formula = 'y ~ x'
+
+# 2. Filtering 
+nsclc.seurat.obj <- subset(nsclc.seurat.obj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 &
+                               percent.mt < 5)
+# 3. Normalize data 
+#nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj, normalization.method = "LogNormalize", scale.factor = 10000)
+# OR
+#nsclc.seurat.obj <- NormalizeData(nsclc.seurat.obj)
+str(nsclc.seurat.obj)
+
+# 4. Identify highly variable features 
+nsclc.seurat.obj <- FindVariableFeatures(nsclc.seurat.obj, selection.method = "vst", nfeatures = 2000)
+
+# Identify the 10 most highly variable genes
+top10 <- head(VariableFeatures(nsclc.seurat.obj), 10)
+
+# plot variable features with and without labels
+plot1 <- VariableFeaturePlot(nsclc.seurat.obj)
+LabelPoints(plot = plot1, points = top10, repel = TRUE)
+
+# 5. Centering and scaling data matrix 
+#Error: 5.4 Gb, el tamaño tiene como límite la capacidad de RAM de tu PC
+
+all.genes <- rownames(nsclc.seurat.obj)
+nsclc.seurat.obj <- ScaleData(nsclc.seurat.obj, features = all.genes)
+
+str(nsclc.seurat.obj)
+
+# 6. Perform Linear dimensionality reduction
+nsclc.seurat.obj <- RunPCA(nsclc.seurat.obj, features = VariableFeatures(object = nsclc.seurat.obj))
+
+# visualize PCA results
+print(nsclc.seurat.obj[["pca"]], dims = 1:5, nfeatures = 5)
+DimHeatmap(nsclc.seurat.obj, dims = 1, cells = 500, balanced = TRUE)
+
+
+# determine dimensionality of the data
+ElbowPlot(nsclc.seurat.obj)
+
+
+# 7. Clustering
+nsclc.seurat.obj <- FindNeighbors(nsclc.seurat.obj, dims = 1:15)
+
+# understanding resolution
+nsclc.seurat.obj <- FindClusters(nsclc.seurat.obj, resolution = c(0.1,0.3, 0.5, 0.7, 1))
+View(nsclc.seurat.obj@meta.data)
+
+DimPlot(nsclc.seurat.obj, group.by = "RNA_snn_res.0.5", label = TRUE)
+
+# setting identity of clusters
+Idents(nsclc.seurat.obj)
+Idents(nsclc.seurat.obj) <- "RNA_snn_res.0.1"
+Idents(nsclc.seurat.obj)
+
+# non-linear dimensionality reduction 
+# If you haven't installed UMAP, you can do so via reticulate::py_install(packages =
+# 'umap-learn')
+nsclc.seurat.obj <- RunUMAP(nsclc.seurat.obj, dims = 1:15)
+
+# note that you can set `label = TRUE` or use the LabelClusters function to help label
+# individual clusters
+DimPlot(nsclc.seurat.obj, reduction = "umap")
+