-
Notifications
You must be signed in to change notification settings - Fork 0
/
helper_io.R
110 lines (95 loc) · 3.52 KB
/
helper_io.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#------------------------------------------------------------------------------#
# I/O helper #
# functions #
#------------------------------------------------------------------------------#
#' Return elapsed time
#'
#' Given a start and end time in seconds, returns elapsed time in sec., min. or
#' hrs, as appropriate.
#' @param t0 Numeric start time representing seconds. No default.
#' @param t1 Numeric end time representing continuous second count from
#' \code{t0}. No default.
#' @return Elapsed time as \code{string}.
#' @examples
#' \dontrun{
#' start <- proc.time()[3]
#' end <- proc.time()[3]
#' elapsed_time(t0=start, t1=end)
#'
#' start <- proc.time()
#' elapsed_time(start[3], proc.time()[3])
#' }
#' @export
elapsed_time <- function(t0, t1) {
# Calculate elapsed time
e_time <- t1 - t0
# Convert to appropriate unit
if (e_time < 60) {
e_time <- paste0(round(e_time, 1), " sec.")
} else if (e_time >= 60 & e_time < 3600) {
e_time <- paste0(round(e_time / 60, 1), " min.")
} else {
e_time <- paste0(round(e_time / 60 / 60, 2), " hrs.")
}
return(e_time)
}
#' Import bedGraph file
#'
#' Imports data from bedGraph file.
#' \cr\strong{Note 1:} In the bedGraph format chromosome coordinates are
#' zero-based and half-open (the first chromosome position is 0, and the last
#' position in a chromosome of length N is N - 1). Imported data here are always
#' converted to one-based format, irrespective of the format of the imported
#' file.
#' \cr\strong{Note 2:} This function imports the \code{bedGraph} table using the
#' \code{readr} package and then converts it to a \code{GRanges} object. This is
#' faster than using \code{rtracklayer}'s \code{import.bedGraph()} function.
#' @param path Path to a bedGraph file. No default.
#' @return Data as \code{GRanges} object.
#' @examples
#' \dontrun{
#' x <- import_bedGraph('data.bdg')
#' }
#' @export
import_bedGraph <- function(path) {
t0 <- proc.time()[3]
# Import bedGraph data
message(' Importing bedGraph file...')
df <- readr::read_tsv(path, col_names=FALSE)
names(df) <- c('chr','start','end', 'score')
message(' Converting to "GRanges" object...')
gr <- with(df, GenomicRanges::GRanges(chr, IRanges::IRanges(start + 1, end),
score=score))
# Drop zeros (missing data)
message(' Dropping zero scores...')
gr <- gr[gr$score > 0]
message(' ---')
message(' Completed in ', elapsed_time(t0, proc.time()[3]))
return(gr)
}
#' Import MACS2 peaks file
#'
#' Imports data from a peaks file in \coe{BED6+4} format, generated by MACS2.
#' @param path Path to a MACS2 peaks file. No default.
#' @param type Type of peaks file: one of \code{narrow} (default value) and
#' \code{broad}.
#' @return Data as \code{GRanges} object.
#' @examples
#' \dontrun{
#' x <- import_bedGraph('data.bdg')
#' }
#' @export
import_MACS2_peaks <- function(path, type='narrow') {
# Import peaks
message(' Importing peaks file...')
if (type == 'narrow') {
extra_cols <- c(signalValue = "numeric", pValue = "numeric",
qValue = "numeric", summit = "integer")
} else if (type == 'broad') {
extra_cols <- c(signalValue = "numeric", pValue = "numeric",
qValue = "numeric")
} else stop('"type" argument must be one of "narrow" or "broad".')
peaks <- rtracklayer::import.bed(path, extraCols=extra_cols)
message(' Completed.')
return(peaks)
}