-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDESeq2.Rmd
89 lines (70 loc) · 2.41 KB
/
DESeq2.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
---
title: "DeSeq2"
output: html_notebook
---
```{r setup}
library(DESeq2)
library(tidyverse)
library(here)
library(readr)
source(here::here("simulateDeSeq2.R"))
source(here::here("DeSeq2_helpers.R"))
```
# Meddling with DESeq2
What are the basic statistics when 20% genes is either up or down regulated as a function of a) true LFC and b) the DESeq2 parameter lfcThreshold
```{r, warning="hide", message="hide"}
set.seed(6521475)
num_simulations = 100
results_base_list <- list()
j = 1
inputs <- data.frame(effect_size = c(0,3,3,6,6), lfcThreshold = c(0, 0, 1.5, 0, 1.5))
for(num_replicates in c(3:5,10)) {
for(i in 1:nrow(inputs)) {
results_base_list[[j]] <- deSeqTest(0.2, effect_size = inputs$effect_size[i], lfcThreshold = inputs$lfcThreshold[i], num_simulations = num_simulations, num_replicates = num_replicates)
j <- j + 1
}
}
results_base <- do.call(rbind, results_base_list)
```
```{r}
avg_func <- function(x) { mean(x, na.rm = TRUE)}
results_base %>% group_by(num_replicates, effect_size, lfcThreshold) %>%
summarise(
TP = avg_func(TP_),
TN = avg_func(TN_),
FP = avg_func(FP_),
FN = avg_func(FN_),
S_error = avg_func(S_error_)
)
results_base %>% group_by(num_replicates, effect_size, lfcThreshold) %>%
summarise(
FDR = avg_func(FP_ / (FP_ + TP_)),
true_eff = avg_func(mean_true_eff),
false_eff = avg_func(mean_false_eff),
p_positive_ = median(median_p_positive),
p_negative_ = median(median_p_negative),
p_fn_ = median(median_p_fn)
)
```
How frequently will the genes marked as DE be replicated in a subsequent experiment?
```{r, warning="hide", message="hide"}
set.seed(1354752)
num_simulations = 100
results_list_repl <- list()
j = 1
inputs <- data.frame(effect_size = c(1.5,3,3,6,6), lfcThreshold = c(1.5,0, 1.5, 0, 1.5))
for(i in 1:nrow(inputs)) {
for(sim in 1:num_simulations) {
results_list_repl[[j]] <- deseq_replication(0.2, effect_size = inputs$effect_size[i], lfcThreshold = inputs$lfcThreshold[i])
j <- j + 1
}
}
results_repl <- do.call(rbind, results_list_repl)
```
```{r}
results_repl %>% group_by(effect_size, lfcThreshold) %>%
summarise( significant = mean(significant_),
replicated = mean(replicated),
smaller_eff = mean(smaller_eff),
smaller_eff_significant = mean(smaller_eff_significant / significant, na.rm = TRUE))
```