@@ -26,6 +26,8 @@ params:
26
26
factor_of_interest : genotype
27
27
numerator : cKO
28
28
denominator : WT
29
+ # species = mouse or human
30
+ species : mouse
29
31
---
30
32
31
33
``` {r, cache = FALSE, message = FALSE, warning=FALSE}
@@ -73,6 +75,19 @@ library(DiffBind)
73
75
library(qs)
74
76
library(EnhancedVolcano)
75
77
library(ggprism)
78
+ library(ChIPseeker)
79
+
80
+ if (params$species == 'mouse'){
81
+ library(TxDb.Mmusculus.UCSC.mm10.knownGene)
82
+ txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
83
+ anno_db <- 'org.Mm.eg.db'
84
+ } else if (params$species == human){
85
+ library(TxDb.Hsapiens.UCSC.hg38.knownGene)
86
+ txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
87
+ anno_db <- 'org.Hs.eg.db'
88
+ }
89
+
90
+
76
91
colors=cb_friendly_cols(1:15)
77
92
ggplot2::theme_set(theme_prism(base_size = 14))
78
93
opts_chunk[["set"]](
@@ -89,6 +104,7 @@ opts_chunk[["set"]](
89
104
90
105
# set seed for reproducibility
91
106
set.seed(1234567890L)
107
+
92
108
```
93
109
94
110
@@ -118,7 +134,7 @@ samplesheet <- make_diffbind_samplesheet(coldata, bam_dir, peaks_dir, params$fac
118
134
```
119
135
120
136
``` {r show_metadata}
121
- samplesheet %>% select(SampleID, Replicate, Condition, Factor, ControlID) %>% sanitize_datatable()
137
+ samplesheet %>% dplyr:: select(SampleID, Replicate, Condition, Factor, ControlID) %>% sanitize_datatable()
122
138
```
123
139
124
140
``` {r create diffbind counts object, eval = !file.exists(params$diffbind_counts_file)}
@@ -142,9 +158,9 @@ diffbind_norm <- dba.normalize(diffbind_counts)
142
158
143
159
norm_counts <- dba.peakset(diffbind_norm, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) %>%
144
160
mutate(peak = paste(CHR, START, END, sep = '_')) %>%
145
- select(-CHR, -START, -END)
161
+ dplyr:: select(-CHR, -START, -END)
146
162
rownames(norm_counts) <- norm_counts$peak
147
- norm_counts <- norm_counts %>% select(-peak) %>% as.matrix()
163
+ norm_counts <- norm_counts %>% dplyr:: select(-peak) %>% as.matrix()
148
164
norm_counts_log <- log2(norm_counts + 1)
149
165
150
166
coldata_for_pca <- coldata[colnames(norm_counts), ]
@@ -161,24 +177,27 @@ degPCA(norm_counts_log, coldata_for_pca, condition = params$factor_of_interest)
161
177
## Table
162
178
``` {r DE analysis}
163
179
diffbind_norm <- dba.contrast(diffbind_norm, contrast = c('Factor', params$numerator, params$denominator))
164
- results <- dba.analyze(diffbind_norm, bGreylist = F)
180
+ results_obj <- dba.analyze(diffbind_norm, bGreylist = F)
181
+
182
+ results_report <- dba.report(results_obj, th = 1)
183
+ results_report_sig <- dba.report(results_obj)
165
184
166
- results_report <- dba.report(results, th = 1) %>% as.data.frame()
167
- results_report_sig <- results_report %>% filter(FDR < 0.05 )
185
+ results <- results_report %>% as.data.frame()
186
+ results_sig <- results_report_sig %>% as.data.frame( )
168
187
169
- results_report_sig %>% sanitize_datatable()
188
+ results_sig %>% sanitize_datatable()
170
189
171
190
```
172
191
173
192
## Volcano plot
174
193
``` {r volcano, fig.height = 8}
175
- results_report_mod <- results_report %>%
194
+ results_mod <- results %>%
176
195
mutate(Fold = replace(Fold, Fold < -5, -5)) %>%
177
196
mutate(Fold = replace(Fold, Fold > 5, 5)) %>%
178
197
mutate(peak = paste(seqnames, start, end, sep = '_'))
179
- show <- as.data.frame(results_report_mod [1:6, c("Fold", "FDR", "peak")])
180
- EnhancedVolcano(results_report_mod ,
181
- lab= results_report_mod $peak,
198
+ show <- as.data.frame(results_mod [1:6, c("Fold", "FDR", "peak")])
199
+ EnhancedVolcano(results_mod ,
200
+ lab= results_mod $peak,
182
201
pCutoff = 0.05,
183
202
selectLab = c(show$peak),
184
203
FCcutoff = 0.5,
@@ -203,3 +222,27 @@ norm_counts_log_long_top <- norm_counts_log_long %>% filter(peak %in% show$peak)
203
222
ggplot(norm_counts_log_long_top, aes(x = .data[[params$factor_of_interest]], y = norm_counts_log2)) +
204
223
facet_wrap(~peak, scale = 'free_y') + geom_boxplot()
205
224
```
225
+
226
+ ## Annotate DB peaks
227
+
228
+ ``` {r annotate, echo = F}
229
+
230
+ results_sig_anno <- annotatePeak(results_report_sig,
231
+ tssRegion = c(-2000, 2000),
232
+ TxDb = txdb,
233
+ annoDb = params$anno_db,
234
+ verbose = F)
235
+ results_sig_anno_df <- results_sig_anno %>% as.data.frame()
236
+
237
+ plotAnnoPie(results_sig_anno)
238
+
239
+ plotDistToTSS(results_sig_anno)
240
+
241
+ anno_data <- toGRanges(txdb, feature = 'gene')
242
+ results_sig_anno_batch <- annotatePeakInBatch(results_report_sig,
243
+ AnnotationData = anno_data,
244
+ output = 'overlapping',
245
+ maxgap = 1000)
246
+
247
+ results_sig_anno_batch_df <- results_sig_anno_batch %>% as.data.frame()
248
+ ```
0 commit comments