Skip to content

Commit

Permalink
Merged old, broken, and new
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Piechotta committed Oct 12, 2020
1 parent e047091 commit bf7e574
Show file tree
Hide file tree
Showing 127 changed files with 1,167 additions and 3,541 deletions.
11 changes: 5 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Package: JACUSA2helper
Type: Package
Title: Post-processing for JACUSA2 output
Version: 1.98-3
Depends: R (>= 2.10)
Date: 2019-11-10
Version: 1.99-1
Depends: R (>= 3.10)
Date: 2020-10-11
Author: Michael Piechotta
Maintainer: <michael.piechotta@gmail.com>
Description: This package enables post-processing of JACUSA2 output: read, filter, plot, write.
Expand All @@ -22,10 +22,9 @@ Imports:
rlang,
magrittr,
ggplot2,
GGally,
parallel,
scales,
tibble,
sticky,
methods,
stringr
stringr
VignetteBuilder: knitr
44 changes: 5 additions & 39 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,59 +1,25 @@
# Generated by roxygen2: do not edit by hand

export(All)
export(Any)
export(add_arrest_rate)
export(add_bc)
export(add_bc_ratio)
export(add_cov)
export(add_non_ref_ratio)
export(add_sub)
export(add_sub_ratio)
export(arrest_col)
export(apply_cond)
export(apply_repl)
export(arrest_rate)
export(arrest_rate_col)
export(base_call)
export(base_call_non_ref)
export(base_count)
export(base_sub)
export(bc_col)
export(bc_ratio)
export(bc_ratio_col)
export(clean_read_sub)
export(copy_attr)
export(cov_col)
export(coverage)
export(extract_ref)
export(filter_all_artefacts)
export(filter_artefact)
export(filter_by)
export(filter_by_max_score)
export(filter_by_min_score)
export(filter_by_robust_arrest_events)
export(filter_by_robust_variants)
export(get_arrest_rate)
export(get_bc)
export(get_bc_ratio)
export(get_cov)
export(get_non_ref_ratio)
export(get_sub)
export(get_sub_ratio)
export(group_by_site)
export(mask_sub)
export(merge_sub)
export(non_ref_ratio)
export(non_ref_ratio_col)
export(peak)
export(plot_ecdf_allele_count)
export(plot_ecdf_column)
export(plot_pairs_matrix)
export(read_result)
export(read_results)
export(redefine_ref)
export(robust_arrest_events)
export(sub_col)
export(robust)
export(sub_ratio)
export(sub_ratio_col)
export(through_col)
export(write_bedGraph)
importFrom(magrittr,"%>%")
importFrom(rlang,syms)
importFrom(tibble,add_column)
89 changes: 25 additions & 64 deletions R/arrest-rate.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
#' Add arrest rate.
#'
#' Adds arrest rate calculated from \code{arrest[_suffix]} and \code{through[_suffix]} columns.
#' Calculates and adds arrest rate from \code{arrest} and \code{through} columns.
#'
#' @param result object created by \code{read_result()} or \code{read_results()}.
#' @param suffix string added to specifiy base columns to calculate arrest rate. Default: NULL
#' @return result object with arrest rate field \code{arrest_rate[_suffix]} added.
#' @param cores Integer defines how many cores to use.
#' @return result object with arrest rate field \code{arrest_rate} added.
#'
#' @export
add_arrest_rate <- function(result, suffix = NULL) {
check_column_exists(result, arrest_col(suffix))
check_column_exists(result, through_col(suffix))
add_arrest_rate <- function(result, cores = 1) {
arrest_cov <- .cov(result[[.ARREST_HELPER_COL]])
through_cov <- .cov(result[[.THROUGH_HELPER_COL]])

result[[arrest_rate_col(suffix)]] <- arrest_rate(
get_cov(result, arrest_col(suffix)),
get_cov(result, through_col(suffix))
)

result
}
result[[.ARREST_RATE_COL]] <- lapply(result[[.ARREST_HELPER_COL]], function(x) {
tidyr::as_tibble(lapply(x, function(y) {
rowSums(y)
}))
}) %>% c() %>% tidyr::as_tibble()

#' Retrieve arrest rate.
#'
#' Retrieves arrest rate from \code{arrest[_suffix]} and \code{through[_suffix]} columns.
#'
#' @param result object created by \code{read_result()} or \code{read_results()}.
#' @param suffix string added to specifiy base columns to retrieve arrest rate. Default: NULL
#' @return numeric vector with arrest rates for base counts with \code{suffix}.
#' @examples
#' data(HIVRT)
#' str(get_arrest_rate(HIVRT))
#' @export
get_arrest_rate <- function(result, suffix = NULL) {
result[[arrest_rate_col(suffix)]]
# FIXME make it more robust
cond_count <- names(result[[.THROUGH_HELPER_COL]]) %>% length()
for (cond in paste0("cond", 1:cond_count)) {
cond_through <- result[[.THROUGH_HELPER_COL]][[cond]]
for (repl in names(cond_through)) {
arrest_cov <- result[[.ARREST_RATE_COL]][[cond]][[repl]]
through_cov <- rowSums(cond_through[[repl]])
result[[.ARREST_RATE_COL]][[cond]][[repl]] <- arrest_rate(arrest_cov, through_cov)
}
}

result
}

#' Calculate arrest rate.
Expand All @@ -48,42 +48,3 @@ get_arrest_rate <- function(result, suffix = NULL) {
arrest_rate <- function(arrest_cov, through_cov) {
arrest_cov / (arrest_cov + through_cov)
}

#' Column name for arrest rate for \code{suffix}.
#'
#' Column name for arrest ratio for \code{suffix}.
#' \code{suffix} will be used to create the column name:
#' \code{arrest_rate[_suffix]}.
#'
#' @param suffix string to specifiy base columns to retrieve arrest rate. Default: NULL
#' @return string the represents the column name for arrest rate with marked \code{suffix}.
#' @export
arrest_rate_col <- function(suffix = NULL) {
process_col(ARREST_RATE, suffix)
}

#' Column name for arrest base counts for \code{suffix}.
#'
#' Column name for arrest base counts for \code{suffix}.
#' \code{suffix} will be used to create the column name:
#' \code{arrest[_suffix]}".
#'
#' @param suffix string to specifiy base columns to retrieve arrest bases Default: NULL
#' @return string the represents the column name for arrest bases marked with \code{suffix}.
#' @export
arrest_col <- function(suffix = NULL) {
process_col(ARREST_COLUMN, suffix)
}

#' Column name for through base counts for \code{suffix}.
#'
#' Column name for through base counts for \code{suffix}.
#' \code{suffix} will be used to create the column name:
#' \code{through[_suffix]}".
#'
#' @param suffix string to specifiy base columns to retrieve through bases Default: NULL
#' @return string the represents the column name for through bases marked with \code{suffix}.
#' @export
through_col <- function(suffix = NULL) {
process_col(THROUGH_COLUMN, suffix)
}
2 changes: 1 addition & 1 deletion R/base-count.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Retrieves the total number of observed unique bases.
#'
#' Retrieves the total number of observed unique bases for \code{base_type}.
#' Retrieves the total number of observed unique bases.
#' If \code{ref} is provided, reference base will be considered for counting, e.g.:
#' At some site only base 'A' was observed but the reference was 'T'.
#' \code{base_count} will be:
Expand Down
31 changes: 31 additions & 0 deletions R/base_sub.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' Calculates reference base to base call (base substitution).
#'
#' Calculates and formats base changes for \code{ref} and \code{bc}.
#' All elements in \code{ref} must be all length 1.
#'
#' @param ref vector of reference bases.
#' @param bc vector of base calls.
#' @return vector of formatted base call substitutions.
#' @examples
#' ref <- c("A", "A", "C", "A")
#' bc <- c("AG", "G", "C", "G")
#' base_sub(ref, bc)
#' @export
base_sub <- function(ref, bc) {
if (all(nchar(ref) != 1)) {
stop("All ref elements must be nchar() == 1")
}

# remove ref in bc
# goal: A->G instead of A->AG
bc <- mapply(function(r, o) {
return(gsub(r, "", o))
}, ref, bc)

# add nice separator
sub <- paste(ref, sep = .SUB_SEP, bc)
# add nice info when there is no change
sub[ref == bc | bc == ""] <- .SUB_NO_CHANGE

sub
}
23 changes: 0 additions & 23 deletions R/bases.R

This file was deleted.

69 changes: 3 additions & 66 deletions R/bc-ratio.R
Original file line number Diff line number Diff line change
@@ -1,52 +1,3 @@
#' Adds base call ratio.
#'
#' Calculates and adds base call ratio for all possible bases.
#' For a given \code{base_type}, calculates a base ratio vector \code{bc_ratio[_base_type]}
#' for all possible bases (A, C, G, and T).
#'
#' @param result created by \code{read_result()} or \code{read_results()}.
#' @param base_type string specifying base column. Default: bases.
#' @return result object with base call ratio added.
#' @examples
#' data(rdd)
#' # use default bases to define ratio
#' result <- add_bc_ratio(rdd)
#' str(subset(result, select = grep("bc_ratio", names(result))))
#'
#' data(HIVRT)
#' # use arrest bases
#' result <- add_bc_ratio(HIVRT, "arrest")
#' subset(result, select = grep("bc_ratio_arrest", names(result)))
#' @export
add_bc_ratio <- function(result, base_type = "bases") {
check_column_exists(result, base_type)
result[[bc_ratio_col(base_type)]] <- bc_ratio(result[[base_type]])

result
}

#' Retrive base call ratio from a result object.
#'
#' Extracts base call ratio for some \code{base_type} from a JACUSA2 result object.
#'
#' @param result created by \code{read_result()} or \code{read_results()}.
#' @param base_type string specifying base column. Default: bases.
#' @return numeric tibble of base call ratios for \code{base_type}.
#' @examples
#' data(rdd)
#' # use default bases to define ratio
#' ratio <- get_bc_ratio(rdd)
#' str(ratio)
#'
#' data(HIVRT)
#' # use arrest bases to define ratio
#' ratio <- get_bc_ratio(rdd, "arrest")
#' str(ratio)
#' @export
get_bc_ratio <- function(result, base_type = "bases") {
result[[bc_ratio_col(base_type)]]
}

#' Calculate base call ratios for base counts.
#'
#' Calculates base call ratios for base counts.
Expand All @@ -64,28 +15,14 @@ bc_ratio <- function(bases) {
m <- matrix(
0.0,
nrow = nrow(bases),
ncol = length(BASES)
ncol = length(.BASES)
)
colnames(m) <- BASES
df <- tibble::as_tibble(m)
colnames(m) <- .BASES
df <- tidyr::as_tibble(m)

# calculate ratio only for sites with cov > 0 -> x / cov
non_zero_i <- total > 0
df[non_zero_i, ] <- bases[non_zero_i, ] / total[non_zero_i]

df
}

#' Column name for base call ratios for \code{base_type}.
#'
#' Column name for base call ratios for \code{base_type}.
#' \code{base_type} will be used to create the column name:
#' \code{bc_ratio[_base_type]}. Using \code{base_type = "bases"}
#' will result in the field "bc_ratio".
#'
#' @param base_type string specifiying base column. Default: bases.
#' @return string that represents the column name for base call ratios for \code{base_type}.
#' @export
bc_ratio_col <- function(base_type = "bases") {
process_col(BC_RATIO, base_type)
}
Loading

0 comments on commit bf7e574

Please sign in to comment.