-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmake-rna-bigwig.Rmd
98 lines (77 loc) · 2.6 KB
/
make-rna-bigwig.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
---
title: "Untitled"
author: "Sam Buckberry"
date: "20/10/2021"
output: html_document
---
```{r}
library(rtracklayer)
library(Rsamtools)
library(stringr)
library(magrittr)
library(edgeR)
```
Sort the bam files
```{r}
bams <- list.files(path = ".", pattern = ".bam$",
full.names = TRUE)
bam_prefix <- str_replace_all(string = bams,
pattern = ".bam",
replacement = "_sorted")
lapply(X = 1:length(bams), FUN = function(x){sortBam(bams[x], bam_prefix[x])})
```
Index the bam files
```{r}
sorted_bams <- list.files(path = "aligned_data/",
pattern = "_sorted.bam$",
full.names = TRUE)
lapply(sorted_bams, indexBam)
```
Make bigwig file from bam file
```{r}
bam_to_bigwig <- function(sorted_bam, CPM_normalise=TRUE){
message(Sys.time())
message(str_c("Processing ", sorted_bam))
# Check inputs
stopifnot(length(sorted_bam) == 1)
stopifnot(length(CPM_normalise) == 1)
stopifnot(file.exists(sorted_bam))
stopifnot(is.logical(CPM_normalise))
# Set outfile name
out_path <- str_replace(string = sorted_bam,
pattern = ".bam$",
replacement = ".bigwig")
# Use samtools to pileup reads
message("Running samtools pileup...")
param <- Rsamtools::PileupParam(distinguish_strands=FALSE,
distinguish_nucleotides = FALSE,
max_depth=10000,
min_base_quality=0)
pileup_dat <- Rsamtools::pileup(file = sorted_bam,
pileupParam = param)
# Convert to GRanges
gr <- GRanges(seqnames = pileup_dat$seqnames,
ranges = IRanges(start = pileup_dat$pos,
end = pileup_dat$pos))
# Get contig/chromosome lengths from BAM file
idx <- Rsamtools::idxstatsBam(sorted_bam)
used_seqlengths <- idx$seqlength
names(used_seqlengths) <- idx$seqnames
seqlengths(gr) <- used_seqlengths
# Normalise the data
if (CPM_normalise == TRUE){
message("Normalising data...")
pileup_dat$cpm <- edgeR::cpm(y = pileup_dat$count) %>% as.numeric()
gr$score <- as.numeric(pileup_dat$cpm)
} else if (CPM_normalise == FALSE){
gr$score <- as.numeric(pileup_dat$count)
}
# Add strand info
strand(gr) <- "+"
# Write the output file
message(str_c("Writing ", out_path))
export.bw(object = gr, con = out_path)
message("Done!")
}
lapply(X = sorted_bams, FUN = bam_to_bigwig)
```