Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Piechotta committed Oct 13, 2020
1 parent bf7e574 commit 4940583
Show file tree
Hide file tree
Showing 47 changed files with 710 additions and 807 deletions.
13 changes: 7 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
export(All)
export(Any)
export(add_arrest_rate)
export(apply_cond)
export(apply_repl)
export(arrest_rate)
export(base_call)
export(base_count)
export(base_ratio)
export(base_sub)
export(bc_ratio)
export(clean_read_sub)
export(filter_all_artefacts)
export(coverage)
export(filter_artefact)
export(gather_repl)
export(lapply_cond)
export(lapply_repl)
export(mapply_repl)
export(mask_sub)
export(merge_sub)
export(non_ref_ratio)
export(plot_ecdf_allele_count)
export(plot_ecdf_column)
export(read_result)
export(read_results)
export(robust)
Expand Down
23 changes: 3 additions & 20 deletions R/arrest-rate.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,10 @@
#'
#' @export
add_arrest_rate <- function(result, cores = 1) {
arrest_cov <- .cov(result[[.ARREST_HELPER_COL]])
through_cov <- .cov(result[[.THROUGH_HELPER_COL]])


result[[.ARREST_RATE_COL]] <- lapply(result[[.ARREST_HELPER_COL]], function(x) {
tidyr::as_tibble(lapply(x, function(y) {
rowSums(y)
}))
}) %>% c() %>% tidyr::as_tibble()
arrest_cov <- coverage(result[[.ARREST_COL]])
through_cov <- coverage(result[[.THROUGH_COL]])

# FIXME make it more robust
cond_count <- names(result[[.THROUGH_HELPER_COL]]) %>% length()
for (cond in paste0("cond", 1:cond_count)) {
cond_through <- result[[.THROUGH_HELPER_COL]][[cond]]
for (repl in names(cond_through)) {
arrest_cov <- result[[.ARREST_RATE_COL]][[cond]][[repl]]
through_cov <- rowSums(cond_through[[repl]])

result[[.ARREST_RATE_COL]][[cond]][[repl]] <- arrest_rate(arrest_cov, through_cov)
}
}
result[[.ARREST_RATE_COL]] <- mapply_repl(arrest_rate, arrest_cov, through_cov)

result
}
Expand Down
11 changes: 7 additions & 4 deletions R/base-count.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' }
#'
#' @importFrom magrittr %>%
#' @param bc vector of strings with observed base calls.
#' @param bases vector of base calls or tibble of base call counts.
#' @param ref vector of strings with reference bases
#' @return numeric vector of total number of bases.
#' @examples
Expand All @@ -21,12 +21,15 @@
#' ref <- c("A", "A", "T")
#' str(base_count(bc, ref))
#' @export
base_count <- function(bc, ref = NULL) {
base_count <- function(bases, ref = NULL) {
if (! is.vector(bases)) {
bases <- base_call(bases)
}
if (! is.null(ref)) {
bc <- paste0(bc, ref)
bases <- paste0(bases, ref)
}

strsplit(bc, "") %>%
strsplit(bases, "") %>%
lapply(unique) %>% # remove duplicates
lapply(length) %>% # how many bases?
unlist()
Expand Down
4 changes: 2 additions & 2 deletions R/bc-ratio.R → R/base-ratio.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
#' @return data frame of base call rations \code{bases}.
#' @examples
#' bases <- matrix(c(5, 0, 5, 0, 1, 1, 1, 1), byrow = TRUE, ncol = 4)
#' ratio <- bc_ratio(bases)
#' ratio <- base_ratio(bases)
#' @export
bc_ratio <- function(bases) {
base_ratio <- function(bases) {
total <- rowSums(bases)

m <- matrix(
Expand Down
21 changes: 12 additions & 9 deletions R/base_sub.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,33 @@
#'
#' Calculates and formats base changes for \code{ref} and \code{bc}.
#' All elements in \code{ref} must be all length 1.
#'
#'
#' @param ref vector of reference bases.
#' @param bc vector of base calls.
#' @param bases vector of base calls or tibble of base call counts.
#' @return vector of formatted base call substitutions.
#' @examples
#' ref <- c("A", "A", "C", "A")
#' bc <- c("AG", "G", "C", "G")
#' base_sub(ref, bc)
#' bases <- c("AG", "G", "C", "G")
#' base_sub(ref, bases)
#' @export
base_sub <- function(ref, bc) {
base_sub <- function(ref, bases) {
if (all(nchar(ref) != 1)) {
stop("All ref elements must be nchar() == 1")
}
if (! is.vector(bases)) {
bases <- base_call(bases)
}

# remove ref in bc
# goal: A->G instead of A->AG
bc <- mapply(function(r, o) {
bases <- mapply(function(r, o) {
return(gsub(r, "", o))
}, ref, bc)
}, ref, bases)

# add nice separator
sub <- paste(ref, sep = .SUB_SEP, bc)
sub <- paste(ref, sep = .SUB_SEP, bases)
# add nice info when there is no change
sub[ref == bc | bc == ""] <- .SUB_NO_CHANGE
sub[ref == bases | bases == ""] <- .SUB_NO_CHANGE

sub
}
57 changes: 31 additions & 26 deletions R/common.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
#' JACUSA2helper: A package for post-processing JACUSA2 result files.
#'
#' @section Description:
#' A package that provides the following categories of functions to post-process result files of JACUSA2:
#' \describe{
#' \item{read/write}{Read and write JACUSA2 result files, e.g.: \code{read_result()}.}
#' \item{add}{Adds some field to an existing JACUSA2 result object and return the modified object, e.g.: \code{base_sub()}.}
#' \item{filter}{Will remove sites from a result object with some filtering criteria, e.g.: \code{filter_by_coverage()}}
#' \item{plot}{Plots certain characteristics of a JACUSA2 result object.}
#' \item{other}{TODO}
#' }
#' A package that provides functions to post-process result files of JACUSA2.
#'
#' The following methods from JACUSA2 are supported:
#' \describe{
Expand All @@ -28,16 +21,21 @@
#' The central data structure in JACUSA2helper is the JACUSA2 result object that follows the
#' tidy data approach to feature easy interaction with dplyr and ggplot2.
#' A JACUSA2 result object can be created via \code{result <- read_result("jacusa2.out")} and is
#' currently represented as a tibble. Furthermore, JACUSA2helper supports the analysis of several related
#' JACUSA2 result files via \code{results <- read_results(files, meta)} where \code{meta_conditions} is a
#' vector of character strings that provides a descriptive name for each file in \code{files}.
#' currently represented as a tibble. Special structured columns exist that
#' hold condition andn replicate related data such as: coverage, bases. arrest rate.
#' Furthermore, JACUSA2helper supports the analysis of several related JACUSA2
#' result files via \code{results <- read_results(files, meta_cond)} where
#' \code{meta_cond} is a vector of character strings that provides a descriptive
#' name for each file in \code{files}.
#'
#' Check \code{vignette(TODO)} for a general introduction and \code{vignette(TODO meta conditions)} for details about meta conditions.
#' Check \code{vignette("JACUSA2helper", "JACUSA2helper")} for a general
#' introduction and \code{"JACUSA2helper", "JACUSA2helper"} for details about
#' meta conditions.
#'
#' @section read/write functions:
#' See:
#' \describe{
#' \item{read_result}{Reads and unpacks a JACUSA2 result file and creates a result object.}
#' \item{read_result}{Reads and unpacks a JACUSA2 result file.}
#' \item{read_results}{Allows to combine multiple result files and distinguish them with meta conditions.}
# \item{write_result}{This will pack result object and write its contents back to a file.}
#' \item{write_bedGraph}{Writes a vector of values as bedGraph file.}
Expand All @@ -46,28 +44,35 @@
#' @section Helper functions:
#' See:
#' \describe{
#' \item{arrest_rate}{Adds arrest rate to JACUSA2 result object.}
#' \item{base_count}{TODO}
#' \item{base_sub}{Adds base substitution column to JACUSA2 result object.}
#' \item{non_ref2bc_ratio}{Adds non reference base ratio to JACUSA2 result object.}
#' \item{sub_ratio}{Adds base substitution ratio for all bases to a JACUSA2 result object.}
#' \item{arrest_rate}{Calculates arrest rate from base call counts (arest, through).}
#' \item{base_count}{Calculates the bumber of observed base calls.}
#' \item{base_sub}{Calculates base substitution.}
#' \item{base_ratio}{Calculates base call ratios.}
#' \item{non_ref_ratio}{Calculates non reference base ratio to JACUSA2 result object.}
#' \item{sub_ratio}{Calculates base substitution ratio for all bases to a JACUSA2 result object.}
#'
# \item{merge_sub}
# \item{mask_sub}
#' }
#'
#' @section filter functions:
#' This function set enables filtering by read coverage or
#' enforcing a minimal number of variant base calls per sample.
#' This function set enables filtering by read coverage or enforcing a minimal
#' number of variant base calls per sample.
#'
#' See:
#' \describe{
#' \item{robust}{Retains sites that contain an arrest event in all replicates in at least one condition.}
#' \item{All}{TODO}
#' \item{Any}{TODO}
#' \item{robust}{Retains sites that are robust in one feature. The feature can be observed in all replicates of at least one condition.}
#' \item{filter_artefact}{Remove sites that have been marked as an artefact.}
#' \item{All}{Helper function}
#' \item{Any}{Helper function}
#' \item{lapply_cond}{lapply wrapper - applies function to all conditions.}
#' \item{lapply_repl}{lapply wrapper - applies function to all replicates.}
#' \item{mapply_repl}{mapply wrapper - applies function to all replicates.}
#' }
#'
#' # TODO
#' read_sub
# # TODO
# read_sub
#'
#' @docType package
#' @name JACUSA2helper
NULL

4 changes: 0 additions & 4 deletions R/cov.R

This file was deleted.

34 changes: 10 additions & 24 deletions R/helper-filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,32 @@
#' Removes sites that sites that have been marked by feature/artefact filter.
#'
#' @param filter vector of strings that contains artefact filter information.
#' @param artefacts vector of characters that correspond to feature/artefact filters to be filtered.
#' @param artefacts vector of characters that correspond to feature/artefact filters to be filtered, default: NULL - Filter all.
#' @return vector of logical.
#' @examples
#' data(rdd)
#' # remove sites that are marked by artefact filter "D"
#' filtered <- filter_by(rdd, filter_artefact(filter, c("D")))
#' str(filtered)
#' dim(rdd)
#' dim(dplyr::filter(rdd, filter_artefact(filter, c("D"))))
#' # remove all sites that are marked by some artefact filter
#' dim(dplyr::filter(rdd, filter_artefact(filter)))
#' @export
filter_artefact <- function(filter, artefacts) {
if (nchar(artefacts) == 0) {
stop("artefacts cannot be 0")
filter_artefact <- function(filter, artefacts = NULL) {
if (is.null(artefacts)) {
artefacts <- c(paste0('\\', .EMPTY))
}

grepl(paste0(artefacts, collapse = "|"), filter)
}

#' Filters all sites with an artefact
#'
#' Removes sites have been marked by any feature/artefact filter.
#'
#' @param filter vector of strings that contains artefact filter information.
#' @return vector of logical.
#' @examples
#' data(rdd)
#' # remove sites that are marked by artefact filter "D"
#' filtered <- filter_by(rdd, filter_all_artefacts(filter))
#' str(filtered)
#' @export
filter_all_artefacts <- function(filter) {
filter_artefact(filter, c(paste0('\\', .EMPTY)))
}

#' Merge tibbles with columns holding logical values with AND to a vector.
#'
#' Each column holding values of logicals will be merged row-wise with AND.
#' @param d tibble with logical values
#' @return logical tibble with columns merged with AND.
#' @export
All <- function(d) {
Reduce("&", tidyr::as_tibble(d))
Reduce("&", tidyr::as_tibble(d)) %>%tidyr::as_tibble()
}


Expand All @@ -53,5 +39,5 @@ All <- function(d) {
#' @return logical tibble with columns merged with OR.
#' @export
Any <- function(d) {
Reduce("|", tidyr::as_tibble(d))
Reduce("|", tidyr::as_tibble(d)) %>% tidyr::as_tibble()
}
Loading

0 comments on commit 4940583

Please sign in to comment.