Skip to content

Commit 46da116

Browse files
committed
begin adding annotation and functional enrichment
1 parent b7201ba commit 46da116

File tree

2 files changed

+58
-15
lines changed

2 files changed

+58
-15
lines changed

inst/templates/chipseq/diffbind/diffbind.Rmd

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ params:
2626
factor_of_interest: genotype
2727
numerator: cKO
2828
denominator: WT
29+
# species = mouse or human
30+
species: mouse
2931
---
3032

3133
```{r, cache = FALSE, message = FALSE, warning=FALSE}
@@ -73,6 +75,19 @@ library(DiffBind)
7375
library(qs)
7476
library(EnhancedVolcano)
7577
library(ggprism)
78+
library(ChIPseeker)
79+
80+
if (params$species == 'mouse'){
81+
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
82+
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
83+
anno_db <- 'org.Mm.eg.db'
84+
} else if (params$species == human){
85+
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
86+
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
87+
anno_db <- 'org.Hs.eg.db'
88+
}
89+
90+
7691
colors=cb_friendly_cols(1:15)
7792
ggplot2::theme_set(theme_prism(base_size = 14))
7893
opts_chunk[["set"]](
@@ -89,6 +104,7 @@ opts_chunk[["set"]](
89104
90105
# set seed for reproducibility
91106
set.seed(1234567890L)
107+
92108
```
93109

94110

@@ -118,7 +134,7 @@ samplesheet <- make_diffbind_samplesheet(coldata, bam_dir, peaks_dir, params$fac
118134
```
119135

120136
```{r show_metadata}
121-
samplesheet %>% select(SampleID, Replicate, Condition, Factor, ControlID) %>% sanitize_datatable()
137+
samplesheet %>% dplyr::select(SampleID, Replicate, Condition, Factor, ControlID) %>% sanitize_datatable()
122138
```
123139

124140
```{r create diffbind counts object, eval = !file.exists(params$diffbind_counts_file)}
@@ -142,9 +158,9 @@ diffbind_norm <- dba.normalize(diffbind_counts)
142158
143159
norm_counts <- dba.peakset(diffbind_norm, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) %>%
144160
mutate(peak = paste(CHR, START, END, sep = '_')) %>%
145-
select(-CHR, -START, -END)
161+
dplyr::select(-CHR, -START, -END)
146162
rownames(norm_counts) <- norm_counts$peak
147-
norm_counts <- norm_counts %>% select(-peak) %>% as.matrix()
163+
norm_counts <- norm_counts %>% dplyr::select(-peak) %>% as.matrix()
148164
norm_counts_log <- log2(norm_counts + 1)
149165
150166
coldata_for_pca <- coldata[colnames(norm_counts), ]
@@ -161,24 +177,27 @@ degPCA(norm_counts_log, coldata_for_pca, condition = params$factor_of_interest)
161177
## Table
162178
```{r DE analysis}
163179
diffbind_norm <- dba.contrast(diffbind_norm, contrast = c('Factor', params$numerator, params$denominator))
164-
results <- dba.analyze(diffbind_norm, bGreylist = F)
180+
results_obj <- dba.analyze(diffbind_norm, bGreylist = F)
181+
182+
results_report <- dba.report(results_obj, th = 1)
183+
results_report_sig <- dba.report(results_obj)
165184
166-
results_report <- dba.report(results, th = 1) %>% as.data.frame()
167-
results_report_sig <- results_report %>% filter(FDR < 0.05)
185+
results <- results_report %>% as.data.frame()
186+
results_sig <- results_report_sig %>% as.data.frame()
168187
169-
results_report_sig %>% sanitize_datatable()
188+
results_sig %>% sanitize_datatable()
170189
171190
```
172191

173192
## Volcano plot
174193
```{r volcano, fig.height = 8}
175-
results_report_mod <- results_report %>%
194+
results_mod <- results %>%
176195
mutate(Fold = replace(Fold, Fold < -5, -5)) %>%
177196
mutate(Fold = replace(Fold, Fold > 5, 5)) %>%
178197
mutate(peak = paste(seqnames, start, end, sep = '_'))
179-
show <- as.data.frame(results_report_mod[1:6, c("Fold", "FDR", "peak")])
180-
EnhancedVolcano(results_report_mod,
181-
lab= results_report_mod$peak,
198+
show <- as.data.frame(results_mod[1:6, c("Fold", "FDR", "peak")])
199+
EnhancedVolcano(results_mod,
200+
lab= results_mod$peak,
182201
pCutoff = 0.05,
183202
selectLab = c(show$peak),
184203
FCcutoff = 0.5,
@@ -203,3 +222,27 @@ norm_counts_log_long_top <- norm_counts_log_long %>% filter(peak %in% show$peak)
203222
ggplot(norm_counts_log_long_top, aes(x = .data[[params$factor_of_interest]], y = norm_counts_log2)) +
204223
facet_wrap(~peak, scale = 'free_y') + geom_boxplot()
205224
```
225+
226+
## Annotate DB peaks
227+
228+
```{r annotate, echo = F}
229+
230+
results_sig_anno <- annotatePeak(results_report_sig,
231+
tssRegion = c(-2000, 2000),
232+
TxDb = txdb,
233+
annoDb = params$anno_db,
234+
verbose = F)
235+
results_sig_anno_df <- results_sig_anno %>% as.data.frame()
236+
237+
plotAnnoPie(results_sig_anno)
238+
239+
plotDistToTSS(results_sig_anno)
240+
241+
anno_data <- toGRanges(txdb, feature = 'gene')
242+
results_sig_anno_batch <- annotatePeakInBatch(results_report_sig,
243+
AnnotationData = anno_data,
244+
output = 'overlapping',
245+
maxgap = 1000)
246+
247+
results_sig_anno_batch_df <- results_sig_anno_batch %>% as.data.frame()
248+
```

inst/templates/chipseq/libs/load_data.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ load_metrics <- function(multiqc_data_dir){
1919

2020
phantom <- read_tsv(file.path(multiqc_data_dir, 'multiqc_phantompeakqualtools.txt')) %>% clean_names() %>%
2121
dplyr::select(sample, nsc, rsc)
22-
frip <- read_tsv(file.path(multiqc_data_dir, 'multiqc_frip_score-plot.txt')) %>% select(-Sample) %>%
22+
frip <- read_tsv(file.path(multiqc_data_dir, 'multiqc_frip_score-plot.txt')) %>% dplyr::select(-Sample) %>%
2323
pivot_longer(everything(), names_to = 'sample', values_to = 'frip') %>% filter(!is.na(frip))
24-
peak_count <- read_tsv(file.path(multiqc_data_dir, 'multiqc_peak_count-plot.txt')) %>% select(-Sample) %>%
24+
peak_count <- read_tsv(file.path(multiqc_data_dir, 'multiqc_peak_count-plot.txt')) %>% dplyr::select(-Sample) %>%
2525
pivot_longer(everything(), names_to = 'sample', values_to = 'peak_count') %>% filter(!is.na(peak_count))
2626
nrf <- read_tsv(file.path(multiqc_data_dir, 'mqc_picard_deduplication_1.txt')) %>% clean_names() %>%
2727
mutate(nrf = unique_unpaired / (unique_unpaired + duplicate_unpaired)) %>%
@@ -108,8 +108,8 @@ make_diffbind_samplesheet <- function(coldata, bam_dir, peaks_dir, column = NULL
108108
coldata_for_diffbind$Factor <- coldata_for_diffbind[[column]]
109109

110110
samplesheet <- coldata_for_diffbind %>%
111-
left_join(bam_files %>% select(SampleID = sample, bamReads = bam), by = 'SampleID') %>%
112-
left_join(bam_files %>% select(ControlID = sample, bamControl = bam), by = 'ControlID') %>%
111+
left_join(bam_files %>% dplyr::select(SampleID = sample, bamReads = bam), by = 'SampleID') %>%
112+
left_join(bam_files %>% dplyr::select(ControlID = sample, bamControl = bam), by = 'ControlID') %>%
113113
left_join(peak_files, by = 'SampleID')
114114

115115
return(samplesheet)

0 commit comments

Comments
 (0)