-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathplot_coverage.R
127 lines (105 loc) · 3.79 KB
/
plot_coverage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
library(dplyr)
# loc = dplyr::tibble(
# chrom = "chr1",
# start = 43824626 - 3000 - 1,
# end = 43824626 + 3000
# )
## Use mutaiton motif as center
loc = dplyr::tibble(
chrom = "chr1",
start = 43824527 - 500,
end = 43824527 + 500
)
# Signal from ENCODE------------------------------------------------------------------
HEK293 = rtracklayer::import.bw("~/biodata/CDC20/ENCFF000WXM.bigWig")
HEK293 = data.table::as.data.table(HEK293)
HEK293_CDC20 = HEK293[start < loc$end & end > loc$start & seqnames == loc$chrom]
summary(HEK293_CDC20$score)
HELA = rtracklayer::import.bw("~/biodata/CDC20/ENCFF000XDZ.bigWig")
HELA = data.table::as.data.table(HELA)
HELA_CDC20 = HELA[start < loc$end & end > loc$start & seqnames == loc$chrom]
summary(HELA_CDC20$score)
save(HEK293_CDC20, HELA_CDC20, file = "signal_plotting.RData")
# Plotting ----------------------------------------------------------------
load(file = "signal_plotting.RData")
library(ggplot2)
library(ggforce)
start_point = 43824626
motif_start = 43824524
motif_end = 43824529
dat = rbind(
HEK293_CDC20[, c("start", "score")],
HEK293_CDC20[, c("end", "score")],
use.names = FALSE
)
dat = unique(dat)
p1 <- ggplot(dat, aes(x=start, y=score)) +
geom_area() +
geom_vline(xintercept = start_point, color = "green") +
annotate("rect", xmin = motif_start, xmax = motif_end, ymin = 0, ymax = 15, alpha = .3, fill = "red") +
cowplot::theme_cowplot() +
labs(x = NULL, y = "Signal in HEK293 cell line")
## Add zoom in
# ggplot(dat, aes(x=start, y=score)) +
# geom_area() +
# # xlim(start_point - 100, start_point + 10) +
# geom_vline(xintercept = start_point, color = "green") +
# facet_zoom(xlim = c(start_point - 150, start_point + 100)) +
# cowplot::theme_cowplot() +
# labs(x = "Genome coordinates of chr1", y = "Chip-Seq signal score in HEK293 cell line")
# dat[order(start)][start > start_point + 120]
dat2 = rbind(
HELA_CDC20[, c("start", "score")],
HELA_CDC20[, c("end", "score")],
use.names = FALSE
)
dat2 = unique(dat2)
p2 <- ggplot(dat2, aes(x=start, y=score)) +
geom_area() +
geom_vline(xintercept = start_point, color = "green") +
annotate("rect", xmin = motif_start, xmax = motif_end, ymin = 0, ymax = 8.4, alpha = .3, fill = "red") +
cowplot::theme_cowplot() +
labs(x = "Genome coordinates of chr1", y = "Signal score in HELA cell line")
library(patchwork)
p = p1 / p2
ggsave(filename = "ELK4_coverage.pdf", plot = p, width = 6, height = 4)
## Add zoom in
# ggplot(dat2, aes(x=start, y=score)) +
# geom_area() +
# # xlim(start_point - 100, start_point + 10) +
# geom_vline(xintercept = start_point, color = "green") +
# facet_zoom(xlim = c(start_point - 150, start_point + 100)) +
# cowplot::theme_cowplot() +
# labs(x = "Genome coordinates of chr1", y = "Chip-Seq signal score in HELA cell line")
# Read coverage -----------------------------------------------------------
# readr:::write_tsv(loc, path = "cdc20_selected_region.bed", col_names = FALSE)
#
# getCoverage = function(bam_file, bed_file) {
# stopifnot(length(bam_file) == 1)
# out_file = basename(sub("bam", "coverage", bam_file))
# # system(paste(
# # "/Users/wsx/anaconda3/envs/biosoft/bin/samtools depth",
# # "-b",
# # bed_file,
# # bam_file,
# # ">",
# # out_file
# # ))
#
# # ref: https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html
# system(
# paste(
# "/Users/wsx/anaconda3/envs/biosoft/bin/bedtools coverage",
# "-a", bed_file,
# "-b", bam_file,
# "-d",
# ">", out_file
# )
# )
#
# }
#
# getCoverage(bam_file = "~/biodata/CDC20/ENCFF000WXK.bam", bed_file = "cdc20_selected_region.bed")
# getCoverage(bam_file = "~/biodata/CDC20/ENCFF000WXN.bam", bed_file = "cdc20_selected_region.bed")
#
# dt <- readr::read_tsv("ENCFF000WXN.coverage", col_names = F)