Skip to content

Commit

Permalink
Fixes, rename read_sub to tag and added expand_tag
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Piechotta committed Oct 21, 2020
1 parent 4940583 commit 0a7803c
Show file tree
Hide file tree
Showing 14 changed files with 111 additions and 30 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: JACUSA2helper
Type: Package
Title: Post-processing for JACUSA2 output
Version: 1.99-1
Version: 1.99-2
Depends: R (>= 3.10)
Date: 2020-10-11
Author: Michael Piechotta
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ export(base_call)
export(base_count)
export(base_ratio)
export(base_sub)
export(clean_read_sub)
export(clean_tag)
export(coord)
export(coverage)
export(expand_tag)
export(filter_artefact)
export(gather_repl)
export(lapply_cond)
Expand Down
4 changes: 2 additions & 2 deletions R/base_sub.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
#' @param bases vector of base calls or tibble of base call counts.
#' @return vector of formatted base call substitutions.
#' @examples
#' ref <- c("A", "A", "C", "A")
#' bases <- c("AG", "G", "C", "G")
#' ref <- c("A", "A", "C", "A")
#' base_sub(ref, bases)
#' @export
base_sub <- function(ref, bases) {
base_sub <- function(bases, ref) {
if (all(nchar(ref) != 1)) {
stop("All ref elements must be nchar() == 1")
}
Expand Down
2 changes: 0 additions & 2 deletions R/common.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@
#' \item{mapply_repl}{mapply wrapper - applies function to all replicates.}
#' }
#'
# # TODO
# read_sub
#'
#' @docType package
#' @name JACUSA2helper
Expand Down
2 changes: 1 addition & 1 deletion R/helper-filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ filter_artefact <- function(filter, artefacts = NULL) {
#' @return logical tibble with columns merged with AND.
#' @export
All <- function(d) {
Reduce("&", tidyr::as_tibble(d)) %>%tidyr::as_tibble()
Reduce("&", tidyr::as_tibble(d)) %>% tidyr::as_tibble()
}


Expand Down
6 changes: 3 additions & 3 deletions R/helper-sub.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ mask_sub <- function(subs, keep) {
#' Transforms read substitution. JACUSA2 can stratify reads based on
#' base substitutions "-B A2G". This function will transform "A2G" to "A->G".
#'
#' @param subs string vector of read substitutions.
#' @param subs string vector of read tags.
#' @return string vector of base substitution.
#' @examples
#' subs <- c("*", "A2G", "*")
#' clean_read_sub(subs)
#' clean_tag(subs)
#' @export
clean_read_sub <- function(subs) {
clean_tag <- function(subs) {
gsub("2", .SUB_SEP, subs)
}
38 changes: 37 additions & 1 deletion R/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
.REF_BASE_COL <- "ref"

# JACUSA2 CLI option -B
.SUB_TAG_COL <- "sub_tag"
.SUB_TAG_COL <- "tag"

.ARREST_RATE_COL <- "arrest_rate"
.META_COND_COL <- "meta_cond"
Expand Down Expand Up @@ -79,3 +79,39 @@ gather_repl <- function(id, x, meta_cond = NULL) {
}
dplyr::bind_rows(r)
}


#' Expand tagged reads
#'
#' TODO
#'
#' @param result object created by \code{read_result()} or \code{read_results()}.
#' @param cores Integer defines how many cores to use.
#' @param result object tagged and not tagged structured base columns.
#' @export
expand_tag <- function(result, cores = 1) {
# extract data from tagged and not tagged reads
total <- result %>% dplyr::filter(tag == .EMPTY)
# set dummy column - not all sites have tagged reads
total$tagged_bases <- lapply_repl(total$bases, function(x) { x - x})

# extract data from tagged
tagged <- result %>% dplyr::filter(tag != .EMPTY)
matching <- match(tagged$coord, total$coord)

if (any(! is.na(matching))) {
tagged_bases <- tagged$bases[which(! is.na(matching)), ]
total$tagged_bases[matching[! is.na(matching)], ] <- tagged_bases
}
# untagged = total - tagged
total$not_tagged_bases <- mapply_repl(
function(total, tagged) {
total - tagged
},
total$bases,
total$tagged_bases,
cores = cores
)

total
}
22 changes: 17 additions & 5 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
attr(result, .ATTR_COND_DESC) <- cond_desc
}
attr(result, .ATTR_HEADER) <- jacusa_header
result <- sticky::sticky(result)
#result <- sticky::sticky(result)
id <- info <- filter <- ref <- NULL
result <- result %>%
dplyr::select(id, dplyr::everything()) %>%
Expand All @@ -96,9 +96,15 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
result
}


# create contig:start|start-end:strand
.coord <- function(data) {
#' Merged coordinate information
#'
#' Merge contig:start|start-end:strand from JACUSA2 result object
#'
#' @param result object created by \code{read_result()} or \code{read_results()}.
#' @return merged coorindates information
#'
#' @export
coord <- function(data) {
paste0(
data$contig,
":", ifelse(data$end - data$start == 1, data$start, paste0(data$start, "-", data$end)),
Expand All @@ -108,7 +114,7 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,

# FIXME remove |read_sub
.id <- function(data) {
coord <- .coord(data)
coord <- coord(data)
regex <- paste0("(", .SUB_TAG_COL, "|read_sub)=([^;]+)")
if (any(stringr::str_detect(data[[.INFO_COL]], regex))) {
sub_tag <- stringr::str_match(data[[.INFO_COL]], regex)[, 3]
Expand Down Expand Up @@ -203,6 +209,12 @@ base_call <-function(bases) {
x <- strsplit(info[[.INFO_COL]], "=") %>% do.call(rbind, .) %>% as.data.frame() %>% tidyr::as_tibble()
colnames(x) <- c("key", "value")

# FIXME remove
i <- x$key == "read_sub"
if (any(i)) {
x[i, "key"] <- .SUB_TAG_COL
}

info <- info[, "id"]
info <- dplyr::bind_cols(info, x) %>% tidyr::pivot_wider(names_from = key, values_from=value)

Expand Down
8 changes: 4 additions & 4 deletions man/base_sub.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/clean_read_sub.Rd → man/clean_tag.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/coord.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions man/expand_tag.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/testthat/test-helper-sub.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ test_that("mask_sub work as expected", {

})

# clean_read_sub
test_that("clean_read_sub work as expected", {
# clean_tag
test_that("clean_tag work as expected", {
subs <- c("A2G", "*")
expected <- c("A->G", "*")

expect_equal(clean_read_sub(subs), expected)
expect_equal(clean_tag(subs), expected)
})
4 changes: 2 additions & 2 deletions vignettes/JACUSA2helper.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ rna_bases <- Reduce("+", filtered$bases$cond2)
# we don't need lapply_repl, because we don't operate on all replicate from all
# conditions - only condition 2 / RNA
ref2rna <- base_sub(filtered$ref, rna_bases)
ref2rna <- base_sub(rna_bases, filtered$ref)
table(ref2rna)
```
Expand All @@ -173,7 +173,7 @@ TODO Robust requires observations (here base calls) to be present in all replica

```{r}
cond1_ref <- base_call(filtered$bases$cond1$rep1)
dna2rna <- base_sub(filtered$ref, rna_bases)
dna2rna <- base_sub(rna_bases, filtered$ref)
table(dna2rna)
```

Expand Down

0 comments on commit 0a7803c

Please sign in to comment.