-
Notifications
You must be signed in to change notification settings - Fork 0
/
ma_plot.r
127 lines (97 loc) · 5.89 KB
/
ma_plot.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
library(DESeq2)
library(ggplot2)
packageVersion("DESeq2")
# On récupère la table et on nettoie pour garder juste les colonnes intéressantes
count_data <- read.table("merged_feature_counts.txt", header = TRUE, skip = 1, row.names = 1)
clean_count_data <- count_data[,c(6:11)]
clean_count_data <- as.matrix(clean_count_data)
#Partie pour créer le metadata
metadata <- data.frame(
Sample = c("SRR10379726.fastq_trimmed.fastq.bam", "SRR10379723.fastq_trimmed.fastq.bam", "SRR10379722.fastq_trimmed.fastq.bam",
"SRR10379725.fastq_trimmed.fastq.bam", "SRR10379724.fastq_trimmed.fastq.bam", "SRR10379721.fastq_trimmed.fastq.bam"),
Condition = c("control", "persister", "persister", "control", "control", "persister")
)
#write.csv(metadata, "metadata.csv", row.names = FALSE)
#metadata <- read.csv("metadata.csv", row.names = 1)
rownames(metadata) <- colnames(clean_count_data)
metadata$Condition <- factor(metadata$Condition, levels = c("control", "persister"))
# Créer l'objet DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = clean_count_data,
colData = metadata,
design = ~ Condition)
# Exécuter DESeq
dds <- DESeq(dds)
# Obtenir les résultats
res <- results(dds)
summary(res)
# Vérification de la distribution des pvalues ajustées
hist_plot <- ggplot(as.data.frame(res$padj), aes(x = res$padj)) +
geom_histogram(bins = 30, fill = "blue", color = "black", alpha = 0.7) +
theme_minimal() +
labs(title = "Histogram of Adjusted p-values",
x = "Adjusted p-value",
y = "Frequency")
ggsave("Hist_pvalAdj.pdf", plot = hist_plot, width = 7, height = 8)
##### MA-PLOT complete dataset
res_df <- as.data.frame(res)
# Identification des gènes différentiellements exprimés
res_df$Significance <- ifelse(res_df$padj < 0.05 & !is.na(res_df$padj), "Significant", "Not Significant")
# Représentation sous forme de MA-plot
plot <- ggplot(res_df, aes(x = baseMean, y = log2FoldChange, color = Significance)) +
geom_point(size = 1.2, alpha = 0.5) + # Points (gènes)
scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black"),
guide = "none") + # Couleurs
scale_x_log10(breaks = c(10^0, 10^2, 10^4), # Axe des abscisses
labels = c(expression(10^0), expression(10^2), expression(10^4))) + # Labels
coord_cartesian(ylim = c(-4, 4)) + # Axe des ordonnées
geom_hline(yintercept = 0, linetype = "dashed", color = "black") + # Ligne horizontale
theme_gray(base_size = 14) + # Couleur du fond
labs(title = "MA-plot of complete RNA-seq dataset",
x = "Mean of normalized counts",
y = "Log2 Fold Change") +
theme(panel.grid.major = element_line(color = "white"), # Grille
panel.grid.minor = element_line(color = "white"), # Grille
legend.position = "top")
ggsave("MA_plot_complete_dataset.png", plot = plot)
##### Volcano-plot complete dataset
res_df <- as.data.frame(res)
# Identification des gènes significatifs différentiellement exprimés
res_df$significant <- ifelse(res_df$padj < 0.05 & abs(res_df$log2FoldChange) > 1, "Significant", "Not Significant")
#Identification des gènes up-regulated et down-regulated
res_df$regulation <- ifelse(res_df$log2FoldChange > 1 & res$padj < 0.05, "Up-regulated",
ifelse(res_df$log2FoldChange < -1 & res$padj < 0.05, "Down-regulated", "Not Significant"))
# Construction du graphique
volcano_plot <- ggplot(res_df, aes(x = log2FoldChange, y = -log10(padj), color = regulation)) +
geom_point(alpha = 0.7, size = 2) + # Modifier la taille et la transparence des points
scale_color_manual(values = c("Not Significant" = "gray", "Up-regulated" = "blue", "Down-regulated" = "red"), na.translate = FALSE) +
theme_minimal() +
labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10(p-value)") +
theme(legend.position = "right", plot.title = element_text(size = 16),
axis.title = element_text(size = 12), axis.text = element_text(size = 10)) +
xlim(c(-5, 5)) + # Axe des abscisses
ylim(c(0, 50)) + # Axe des ordonnées
# Délimitation verticale
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
# Délimitation horizontale
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")
ggsave("Volcano_plot.pdf", plot = volcano_plot)
##### ACP
# Transformation des données
rld <- rlog(dds)
# Extraction des données de plotPCA
pca_data <- plotPCA(rld, intgroup = "Condition", returnData = TRUE)
# Calculer le pourcentage de variance expliquée
percent_variance <- round(100 * attr(pca_data, "percentVar"))
# Modifier les noms des échantillons (couper avant le ".")
pca_data$name <- sapply(pca_data$name, function(x) strsplit(x, "\\.")[[1]][1])
ACP <- ggplot(pca_data, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point(size = 4, alpha = 0.8) +
geom_text(vjust = -1, hjust = 0.5, size = 3) +
labs(title = "Principal Component Analysis of samples (PCA)", x = paste0("PC1: ", percent_variance[1], "% variance"),
y = paste0("PC2: ", percent_variance[2], "% variance"), color = "Condition") +
scale_x_continuous(limits = c(-25, 21)) +
scale_y_continuous(limits = c(-15, 10)) +
theme(legend.position = "bottom", plot.title = element_text(size = 20),
axis.title = element_text(size = 16), axis.text = element_text(size = 14),
legend.text = element_text(size = 14), legend.title = element_text(size = 16))
ggsave("ACP_samples.png", plot = ACP)