|
| 1 | +# Install necessary packages if not already installed |
| 2 | +if (!requireNamespace("BiocManager", quietly = TRUE)) |
| 3 | + install.packages("BiocManager") |
| 4 | +BiocManager::install(c("DESeq2", "ggplot2", "EnhancedVolcano")) |
| 5 | + |
| 6 | +# Load libraries |
| 7 | +library(DESeq2) |
| 8 | +library(ggplot2) |
| 9 | +library(EnhancedVolcano) |
| 10 | + |
| 11 | +# Set working directory (optional) |
| 12 | +# setwd("your_working_directory") |
| 13 | + |
| 14 | +# 1. Load read counts data from CSV |
| 15 | +counts_data <- read.csv("read_counts.csv", row.names = 1) # Replace "read_counts.csv" with your file name |
| 16 | + |
| 17 | +# 2. Create DESeqDataSet object |
| 18 | +# Define sample conditions (replace with your actual conditions) |
| 19 | +condition <- factor(c("Condition1", "Condition1", "Condition2", "Condition2")) # Example: 2 conditions, 2 samples each |
| 20 | +colData <- data.frame(condition) |
| 21 | +dds <- DESeqDataSetFromMatrix(countData = counts_data, |
| 22 | + colData = colData, |
| 23 | + design = ~ condition) |
| 24 | + |
| 25 | +# 3. Perform differential expression analysis |
| 26 | +dds <- DESeq(dds) |
| 27 | +res <- results(dds, contrast = c("condition", "Condition2", "Condition1")) # Comparing "Condition2" vs "Condition1" |
| 28 | + |
| 29 | + |
| 30 | +# 4 & 6. Generate and save volcano plot |
| 31 | +volcano_plot <- EnhancedVolcano(res, |
| 32 | + lab = rownames(res), |
| 33 | + x = "log2FoldChange", |
| 34 | + y = "padj", |
| 35 | + title = "Volcano Plot of Differential Expression", |
| 36 | + pCutoff = 0.05, |
| 37 | + FCcutoff = 1, |
| 38 | + pointSize = 3.0, |
| 39 | + labSize = 6.0, |
| 40 | + drawConnectors = TRUE, |
| 41 | + widthConnectors = 0.75 |
| 42 | +) |
| 43 | + |
| 44 | +ggsave("volcano_plot.png", volcano_plot, width = 10, height = 8) |
| 45 | + |
| 46 | + |
| 47 | + |
| 48 | +# 5. Prepare and save results table |
| 49 | +res_table <- as.data.frame(res) |
| 50 | +res_table$GeneID <- rownames(res_table) |
| 51 | +res_table <- res_table[, c("GeneID", "log2FoldChange", "pvalue", "padj")] |
| 52 | + |
| 53 | +# Determine differential expression status |
| 54 | +res_table$DifferentialExpressionStatus <- ifelse(res_table$padj < 0.05 & abs(res_table$log2FoldChange) >= 1, "Significant", "Not Significant") |
| 55 | + |
| 56 | + |
| 57 | +write.csv(res_table, file = "differential_expression_results.csv", row.names = FALSE) |
| 58 | + |
| 59 | +print("Differential expression analysis completed and results saved.") |
| 60 | + |
| 61 | +Use code with care. Learn more |
| 62 | +Explanation and Key Improvements: |
| 63 | + |
| 64 | +Clearer package installation: Uses BiocManager::install() for easier Bioconductor package management. |
| 65 | +Descriptive comments: Added more comments to explain each step. |
| 66 | +Customizable condition vector: The condition vector is now explicitly defined, making it easy to adapt to different experimental designs. |
| 67 | +Explicit contrast: The results() function now uses an explicit contrast to define the comparison, improving clarity and control. |
| 68 | +Enhanced volcano plot: Uses EnhancedVolcano for a more informative and visually appealing volcano plot. Customizable parameters like pCutoff and FCcutoff are included. |
| 69 | +Complete results table: Includes Gene ID, log2 fold change, p-value, adjusted p-value, and differential expression status. |
| 70 | +Informative output: Prints a message upon successful completion. |
| 71 | +Before running: |
| 72 | + |
| 73 | +Install required packages: Make sure you have DESeq2, ggplot2, and EnhancedVolcano installed. |
| 74 | +Input data: Replace "read_counts.csv" with the actual path to your read counts file. Ensure that the first column contains gene IDs and subsequent columns represent samples. |
| 75 | +Adjust parameters: Modify the condition vector, contrast in results(), pCutoff, and FCcutoff in EnhancedVolcano as needed to fit your experimental design and analysis criteria. Consider adjusting plot dimensions in ggsave too. |
| 76 | +This enhanced script is more robust, user-friendly, and provides a more comprehensive analysis output. It is designed for better customization and clarity, fitting a wider range of experimental designs and user preferences. |
0 commit comments