-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdriverR.Rmd
102 lines (83 loc) · 2.56 KB
/
driverR.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
---
title: "Elena 2024: Keratinocyte single-cell data
"
output: html_document
---
```{r}
library(Seurat)
library(tidyverse)
library(ggplot2)
library(FactoMineR)
library(factoextra)
library("AnnotationDbi")
library("org.Hs.eg.db")
library("biomaRt")
```
```{r}
# setwd('/mnt/data/users-home/thomas.sauter/Claudia_2023')
setwd('../Elena_2024')
# combined.all <- readRDS("combined.all_annotated.rds")
sdata <- readRDS("K82_K86_integrated_FINAL.rds")
```
```{r}
table(sdata$new_names)
prop.table(table(sdata$new_names))
table(sdata$orig.ident, sdata$new_names)
table(sdata$seurat_clusters)
table(sdata$orig.ident)
# Visualization
p <- DimPlot(sdata, reduction = "umap", label = TRUE)
print(p)
p1 <- DimPlot(sdata, reduction = "umap", label = TRUE, group.by = "orig.ident",pt.size = 1.5)
print(p1)
dim(sdata$RNA@counts)
```
```{r}
# Separating the clusters into different variables
maxNrCells <- 1000
counts_matrix <- sdata[["RNA"]]$counts
print(dim(counts_matrix))
listNames <- unique(sdata$new_names)
listNames
cell_types_to_extract <- c("H","M","P","TD1","TD2","F1","F2")
# cell_types_to_extract <- c("H")
cell_types_to_extract
table(sdata$new_names)
# for (ii in 1:length(cell_types_to_extract)) {
# ii
# subset_counts_matrix <- counts_matrix[ ,sdata$new_names %in% cell_types_to_extract[ii]]
# if (ncol(subset_counts_matrix) > maxNrCells) {
# subset_counts_matrix <- subset_counts_matrix[ , 1:maxNrCells]
# }
# print(dim(subset_counts_matrix))
# write.table(subset_counts_matrix, file = paste("./Cluster_",ii,".txt",sep = ""))
# }
```
```{r}
mart <- useMart("ensembl")
mart <- useDataset("hsapiens_gene_ensembl", mart = mart)
head(listAttributes(mart))
EnsemblGeneID <- rownames(counts_matrix)
geneTable <- getBM(attributes= c("ensembl_gene_id",
"external_gene_name", "entrezgene_id"),
values= EnsemblGeneID, mart= mart)
head(geneTable)
head(geneTable$entrezgene_id)
for (ii in 1:length(cell_types_to_extract)) {
ii
subset_counts_matrix <- counts_matrix[ ,sdata$new_names %in% cell_types_to_extract[ii]]
if (ncol(subset_counts_matrix) > maxNrCells) {
subset_counts_matrix <- subset_counts_matrix[ , 1:maxNrCells]
}
data <- subset_counts_matrix
m <- match(rownames(data),geneTable$external_gene_name)
GeneID <- geneTable$entrezgene_id[m]
Var1 <- GeneID
head(rownames(data))
head(GeneID)
data2 <- cbind(EnsemblGeneID, GeneID,Var1,as.matrix(data[, 1:min(ncol(data),maxNrCells)]))
head(data2)
print(dim(data2))
write.table(data2, file = paste("./Cluster_",ii,".txt",sep = ""), row.names = F)
}
```