From 494058396bbaecdb3f79b7e7deda71913f1d8adb Mon Sep 17 00:00:00 2001
From: Michael Piechotta <michael.piechotta@gmail.com>
Date: Wed, 14 Oct 2020 01:00:46 +0200
Subject: [PATCH] Fixes

---
 NAMESPACE                                     |  13 +-
 R/arrest-rate.R                               |  23 +-
 R/base-count.R                                |  11 +-
 R/{bc-ratio.R => base-ratio.R}                |   4 +-
 R/base_sub.R                                  |  21 +-
 R/common.R                                    |  57 +--
 R/cov.R                                       |   4 -
 R/helper-filter.R                             |  34 +-
 R/helper-robust.R                             | 119 ++++--
 R/helper-sub.R                                |   2 +-
 R/helper.R                                    |  74 ++--
 R/io-guess.R                                  |   4 +-
 R/io.R                                        |  69 ++--
 R/non-ref-ratio.R                             |   2 +-
 R/plot-ecdf-allele-count.R                    |  18 -
 R/plot-ecdf-column.R                          |  90 -----
 R/sub-ratio.R                                 |   2 +-
 R/write-result.R                              |   1 +
 man/JACUSA2helper.Rd                          |  53 +--
 man/apply_cond.Rd                             |  20 -
 man/apply_repl.Rd                             |  20 -
 man/base_call.Rd                              |  17 +
 man/base_count.Rd                             |   4 +-
 man/{bc_ratio.Rd => base_ratio.Rd}            |  10 +-
 man/base_sub.Rd                               |   8 +-
 man/coverage.Rd                               |  19 +
 man/filter_all_artefacts.Rd                   |  23 --
 man/filter_artefact.Rd                        |  10 +-
 man/gather_repl.Rd                            |  21 ++
 man/lapply_cond.Rd                            |  23 ++
 man/lapply_repl.Rd                            |  23 ++
 man/mapply_repl.Rd                            |  23 ++
 man/non_ref_ratio.Rd                          |   2 +-
 man/plot_ecdf_allele_count.Rd                 |  31 --
 man/plot_ecdf_column.Rd                       |  42 ---
 man/read_results.Rd                           |   4 +-
 man/robust.Rd                                 |   4 +-
 man/sub_ratio.Rd                              |   2 +-
 .../{test-bc-ratio.R => test-base-ratio.R}    |   6 +-
 .../testthat/{test-cov.R => test-coverage.R}  |  16 +-
 tests/testthat/test-filter-by-filter-info.R   |   7 -
 tests/testthat/test-guess-cond-count.R        |  16 +-
 tests/testthat/test-guess-file-type.R         |   8 +-
 tests/testthat/test-io.R                      |   4 +-
 vignettes/JACUSA2helper-meta-condition.bib    |  37 --
 vignettes/JACUSA2helper-meta-condtions.Rmd    | 168 +++++----
 vignettes/JACUSA2helper.Rmd                   | 348 ++++++++++--------
 47 files changed, 710 insertions(+), 807 deletions(-)
 rename R/{bc-ratio.R => base-ratio.R} (91%)
 delete mode 100644 R/cov.R
 delete mode 100644 R/plot-ecdf-allele-count.R
 delete mode 100644 R/plot-ecdf-column.R
 delete mode 100644 man/apply_cond.Rd
 delete mode 100644 man/apply_repl.Rd
 create mode 100644 man/base_call.Rd
 rename man/{bc_ratio.Rd => base_ratio.Rd} (77%)
 create mode 100644 man/coverage.Rd
 delete mode 100644 man/filter_all_artefacts.Rd
 create mode 100644 man/gather_repl.Rd
 create mode 100644 man/lapply_cond.Rd
 create mode 100644 man/lapply_repl.Rd
 create mode 100644 man/mapply_repl.Rd
 delete mode 100644 man/plot_ecdf_allele_count.Rd
 delete mode 100644 man/plot_ecdf_column.Rd
 rename tests/testthat/{test-bc-ratio.R => test-base-ratio.R} (79%)
 rename tests/testthat/{test-cov.R => test-coverage.R} (60%)
 delete mode 100644 vignettes/JACUSA2helper-meta-condition.bib

diff --git a/NAMESPACE b/NAMESPACE
index 5d2819e..8f56ce2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,20 +3,21 @@
 export(All)
 export(Any)
 export(add_arrest_rate)
-export(apply_cond)
-export(apply_repl)
 export(arrest_rate)
+export(base_call)
 export(base_count)
+export(base_ratio)
 export(base_sub)
-export(bc_ratio)
 export(clean_read_sub)
-export(filter_all_artefacts)
+export(coverage)
 export(filter_artefact)
+export(gather_repl)
+export(lapply_cond)
+export(lapply_repl)
+export(mapply_repl)
 export(mask_sub)
 export(merge_sub)
 export(non_ref_ratio)
-export(plot_ecdf_allele_count)
-export(plot_ecdf_column)
 export(read_result)
 export(read_results)
 export(robust)
diff --git a/R/arrest-rate.R b/R/arrest-rate.R
index 9e8d050..c60b656 100644
--- a/R/arrest-rate.R
+++ b/R/arrest-rate.R
@@ -8,27 +8,10 @@
 #'
 #' @export
 add_arrest_rate <- function(result, cores = 1) {
-  arrest_cov <- .cov(result[[.ARREST_HELPER_COL]])
-  through_cov <- .cov(result[[.THROUGH_HELPER_COL]])
-  
-  
-  result[[.ARREST_RATE_COL]] <- lapply(result[[.ARREST_HELPER_COL]], function(x) {
-    tidyr::as_tibble(lapply(x, function(y) {
-      rowSums(y)
-    }))
-  }) %>% c() %>% tidyr::as_tibble()
+  arrest_cov <- coverage(result[[.ARREST_COL]])
+  through_cov <- coverage(result[[.THROUGH_COL]])
 
-  # FIXME make it more robust
-  cond_count <- names(result[[.THROUGH_HELPER_COL]]) %>% length()
-  for (cond in paste0("cond", 1:cond_count)) {
-    cond_through <- result[[.THROUGH_HELPER_COL]][[cond]]
-    for (repl in names(cond_through)) {
-      arrest_cov <- result[[.ARREST_RATE_COL]][[cond]][[repl]]
-      through_cov <- rowSums(cond_through[[repl]])
-      
-      result[[.ARREST_RATE_COL]][[cond]][[repl]] <- arrest_rate(arrest_cov, through_cov)
-    }
-  }
+  result[[.ARREST_RATE_COL]] <- mapply_repl(arrest_rate, arrest_cov, through_cov)
 
   result
 }
diff --git a/R/base-count.R b/R/base-count.R
index 3ea745c..6f452fe 100644
--- a/R/base-count.R
+++ b/R/base-count.R
@@ -10,7 +10,7 @@
 #' }
 #'
 #' @importFrom magrittr %>%
-#' @param bc vector of strings with observed base calls.
+#' @param bases vector of base calls or tibble of base call counts.
 #' @param ref vector of strings with reference bases 
 #' @return numeric vector of total number of bases.
 #' @examples
@@ -21,12 +21,15 @@
 #' ref <- c("A", "A", "T")
 #' str(base_count(bc, ref))
 #' @export
-base_count <- function(bc, ref = NULL) {
+base_count <- function(bases, ref = NULL) {
+  if (! is.vector(bases)) {
+    bases <- base_call(bases)
+  }
   if (! is.null(ref)) {
-    bc <- paste0(bc, ref)
+    bases <- paste0(bases, ref)
   }
   
-  strsplit(bc, "") %>% 
+  strsplit(bases, "") %>% 
     lapply(unique) %>% # remove duplicates
     lapply(length) %>% # how many bases?
     unlist()
diff --git a/R/bc-ratio.R b/R/base-ratio.R
similarity index 91%
rename from R/bc-ratio.R
rename to R/base-ratio.R
index c259384..aa54258 100644
--- a/R/bc-ratio.R
+++ b/R/base-ratio.R
@@ -7,9 +7,9 @@
 #' @return data frame of base call rations \code{bases}.
 #' @examples
 #' bases <- matrix(c(5, 0, 5, 0,  1, 1, 1, 1), byrow = TRUE, ncol = 4)
-#' ratio <- bc_ratio(bases)
+#' ratio <- base_ratio(bases)
 #' @export
-bc_ratio <- function(bases) {
+base_ratio <- function(bases) {
   total <- rowSums(bases)
   
   m <- matrix(
diff --git a/R/base_sub.R b/R/base_sub.R
index 897cebd..482f69e 100644
--- a/R/base_sub.R
+++ b/R/base_sub.R
@@ -2,30 +2,33 @@
 #' 
 #' Calculates and formats base changes for \code{ref} and \code{bc}.
 #' All elements in \code{ref} must be all length 1.
-#' 
+#'
 #' @param ref vector of reference bases.
-#' @param bc vector of base calls.
+#' @param bases vector of base calls or tibble of base call counts.
 #' @return vector of formatted base call substitutions.
 #' @examples
 #' ref <- c("A", "A", "C", "A")
-#' bc <- c("AG", "G", "C", "G")
-#' base_sub(ref, bc)
+#' bases <- c("AG", "G", "C", "G")
+#' base_sub(ref, bases)
 #' @export
-base_sub <- function(ref, bc) {
+base_sub <- function(ref, bases) {
   if (all(nchar(ref) != 1)) {
     stop("All ref elements must be nchar() == 1")
   }
+  if (! is.vector(bases)) {
+    bases <- base_call(bases)
+  }
   
   # remove ref in bc
   # goal: A->G instead of A->AG
-  bc <- mapply(function(r, o) {
+  bases <- mapply(function(r, o) {
     return(gsub(r, "", o))
-  }, ref, bc)
+  }, ref, bases)
   
   # add nice separator
-  sub <- paste(ref, sep = .SUB_SEP, bc)
+  sub <- paste(ref, sep = .SUB_SEP, bases)
   # add nice info when there is no change
-  sub[ref == bc | bc == ""] <- .SUB_NO_CHANGE
+  sub[ref == bases | bases == ""] <- .SUB_NO_CHANGE
   
   sub
 }
diff --git a/R/common.R b/R/common.R
index bf2359f..3433080 100644
--- a/R/common.R
+++ b/R/common.R
@@ -1,14 +1,7 @@
 #' JACUSA2helper: A package for post-processing JACUSA2 result files.
 #'
 #' @section Description:
-#' A package that provides the following categories of functions to post-process result files of JACUSA2:
-#' \describe{
-#'  \item{read/write}{Read and write JACUSA2 result files, e.g.: \code{read_result()}.}
-#'  \item{add}{Adds some field to an existing JACUSA2 result object and return the modified object, e.g.: \code{base_sub()}.}
-#'  \item{filter}{Will remove sites from a result object with some filtering criteria, e.g.: \code{filter_by_coverage()}}
-#'  \item{plot}{Plots certain characteristics of a JACUSA2 result object.}
-#'  \item{other}{TODO}
-#' }
+#' A package that provides functions to post-process result files of JACUSA2.
 #' 
 #' The following methods from JACUSA2 are supported:
 #' \describe{
@@ -28,16 +21,21 @@
 #' The central data structure in JACUSA2helper is the JACUSA2 result object that follows the 
 #' tidy data approach to feature easy interaction with dplyr and ggplot2.
 #' A JACUSA2 result object can be created via \code{result <- read_result("jacusa2.out")} and is 
-#' currently represented as a tibble. Furthermore, JACUSA2helper supports the analysis of several related
-#' JACUSA2 result files via \code{results <- read_results(files, meta)} where \code{meta_conditions} is a 
-#' vector of character strings that provides a descriptive name for each file in \code{files}.
+#' currently represented as a tibble. Special structured columns exist that 
+#' hold condition andn replicate related data such as: coverage, bases. arrest rate.
+#' Furthermore, JACUSA2helper supports the analysis of several related  JACUSA2 
+#' result files via \code{results <- read_results(files, meta_cond)} where 
+#' \code{meta_cond} is a vector of character strings that provides a descriptive 
+#' name for each file in \code{files}.
 #' 
-#' Check \code{vignette(TODO)} for a general introduction and \code{vignette(TODO meta conditions)} for details about meta conditions.
+#' Check \code{vignette("JACUSA2helper", "JACUSA2helper")} for a general 
+#' introduction and \code{"JACUSA2helper", "JACUSA2helper"} for details about 
+#' meta conditions.
 #' 
 #' @section read/write functions:
 #' See:
 #' \describe{
-#'   \item{read_result}{Reads and unpacks a JACUSA2 result file and creates a result object.}
+#'   \item{read_result}{Reads and unpacks a JACUSA2 result file.}
 #'   \item{read_results}{Allows to combine multiple result files and distinguish them with meta conditions.}
 #   \item{write_result}{This will pack result object and write its contents back to a file.}
 #'   \item{write_bedGraph}{Writes a vector of values as bedGraph file.}
@@ -46,28 +44,35 @@
 #' @section Helper functions:
 #' See:
 #' \describe{
-#'   \item{arrest_rate}{Adds arrest rate to JACUSA2 result object.}
-#'   \item{base_count}{TODO}
-#'   \item{base_sub}{Adds base substitution column to JACUSA2 result object.}
-#'   \item{non_ref2bc_ratio}{Adds non reference base ratio to JACUSA2 result object.}
-#'   \item{sub_ratio}{Adds base substitution ratio for all bases to a JACUSA2 result object.}
+#'   \item{arrest_rate}{Calculates arrest rate from base call counts (arest, through).}
+#'   \item{base_count}{Calculates the bumber of observed base calls.}
+#'   \item{base_sub}{Calculates base substitution.}
+#'   \item{base_ratio}{Calculates base call ratios.}
+#'   \item{non_ref_ratio}{Calculates non reference base ratio to JACUSA2 result object.}
+#'   \item{sub_ratio}{Calculates base substitution ratio for all bases to a JACUSA2 result object.}
+#'
+#   \item{merge_sub}
+#   \item{mask_sub}
 #' }
 #'
 #' @section filter functions:
-#' This function set enables filtering by read coverage or 
-#' enforcing a minimal number of variant base calls per sample.
+#' This function set enables filtering by read coverage or enforcing a minimal 
+#' number of variant base calls per sample.
 #'
 #' See:
 #' \describe{
-#'   \item{robust}{Retains sites that contain an arrest event in all replicates in at least one condition.}
-#'	 \item{All}{TODO}
-#'	 \item{Any}{TODO}
+#'   \item{robust}{Retains sites that are robust in one feature. The feature can be observed in all replicates of at least one condition.}
+#'   \item{filter_artefact}{Remove sites that have been marked as an artefact.}
+#'	 \item{All}{Helper function}
+#'	 \item{Any}{Helper function}
+#'	 \item{lapply_cond}{lapply wrapper - applies function to all conditions.}
+#'	 \item{lapply_repl}{lapply wrapper - applies function to all replicates.}
+#'	 \item{mapply_repl}{mapply wrapper - applies function to all replicates.}
 #' }
 #'
-#' # TODO
-#' read_sub
+# # TODO
+# read_sub
 #'
 #' @docType package
 #' @name JACUSA2helper
 NULL
-
diff --git a/R/cov.R b/R/cov.R
deleted file mode 100644
index f9febdc..0000000
--- a/R/cov.R
+++ /dev/null
@@ -1,4 +0,0 @@
-#
-.cov <- function(bases, cores = 1) {
-  apply_repl(bases, rowSums, cores)
-}
diff --git a/R/helper-filter.R b/R/helper-filter.R
index 21d76cd..9592a06 100644
--- a/R/helper-filter.R
+++ b/R/helper-filter.R
@@ -3,38 +3,24 @@
 #' Removes sites that sites that have been marked by feature/artefact filter. 
 #' 
 #' @param filter vector of strings that contains artefact filter information. 
-#' @param artefacts vector of characters that correspond to feature/artefact filters to be filtered.
+#' @param artefacts vector of characters that correspond to feature/artefact filters to be filtered, default: NULL - Filter all.
 #' @return vector of logical.
 #' @examples
 #' data(rdd)
 #' # remove sites that are marked by artefact filter "D"
-#' filtered <- filter_by(rdd, filter_artefact(filter, c("D")))
-#' str(filtered)
+#' dim(rdd)
+#' dim(dplyr::filter(rdd, filter_artefact(filter, c("D"))))
+#' # remove all sites that are marked by some artefact filter
+#' dim(dplyr::filter(rdd, filter_artefact(filter)))
 #' @export
-filter_artefact <- function(filter, artefacts) {
-  if (nchar(artefacts) == 0) {
-    stop("artefacts cannot be 0")
+filter_artefact <- function(filter, artefacts = NULL) {
+  if (is.null(artefacts)) {
+    artefacts <- c(paste0('\\', .EMPTY))
   }
   
   grepl(paste0(artefacts, collapse = "|"), filter)
 }
 
-#' Filters all sites with an artefact
-#'
-#' Removes sites have been marked by any feature/artefact filter. 
-#' 
-#' @param filter vector of strings that contains artefact filter information. 
-#' @return vector of logical.
-#' @examples
-#' data(rdd)
-#' # remove sites that are marked by artefact filter "D"
-#' filtered <- filter_by(rdd, filter_all_artefacts(filter))
-#' str(filtered)
-#' @export
-filter_all_artefacts <- function(filter) {
-  filter_artefact(filter, c(paste0('\\', .EMPTY)))
-}
-
 #' Merge tibbles with columns holding logical values with AND to a vector.
 #' 
 #' Each column holding values of logicals will be merged row-wise with AND.
@@ -42,7 +28,7 @@ filter_all_artefacts <- function(filter) {
 #' @return logical tibble with columns merged with AND.
 #' @export
 All <- function(d) {
-  Reduce("&", tidyr::as_tibble(d))
+  Reduce("&", tidyr::as_tibble(d)) %>%tidyr::as_tibble()
 }
 
 
@@ -53,5 +39,5 @@ All <- function(d) {
 #' @return logical tibble with columns merged with OR.
 #' @export
 Any <- function(d) {
-  Reduce("|", tidyr::as_tibble(d))
+  Reduce("|", tidyr::as_tibble(d)) %>% tidyr::as_tibble()
 }
diff --git a/R/helper-robust.R b/R/helper-robust.R
index bb8bc32..fe5786c 100644
--- a/R/helper-robust.R
+++ b/R/helper-robust.R
@@ -1,70 +1,115 @@
-
-
-
 #' Check if data is robust
 #' 
 #' Tests if all values that have been observed (>0) in all conditions (op = |)
 #  can observed in one of the conditions (op = &)
 #' Supports 1 or 2 conditions.
 #' 
-#' @param data tibble of numeric.
+#' @param x tibble of numeric.
 #' @return logical vector indicating robust rows.
 #' @export
-robust <- function(data) {
-  not_null <- apply_repl(df, function(x) {x > 0})
+robust <- function(x) {
+  not_null <- lapply_repl(x, function(x) {x > 0})
 
   mask_any <- .mask_any(not_null)
   mask_all <- .mask_all(not_null)
   
   mask <- mask_any & mask_all | ! mask_any
-  n <- length(names(mask))
-  
+  n <- dim(mask)[2]
+
   rowSums(mask) == n
 }
 
 #' @noRd
-.mask_any <- function(df) {
-  apply_cond(df, function(cond) { 
-    Reduce("|", cond)
-  }) %>% Reduce("|", .) %>% tidyr::as_tibble()
+.mask_any <- function(x) {
+  lapply_cond(x, function(cond) { 
+    Any(cond)
+  }) %>% Any() %>% tidyr::as_tibble()
 }
 
 #' @noRd
-.mask_all <- function(df) {
-  apply_cond(df, function(cond) { 
-    Reduce("&", cond)
-  }) %>% Reduce("|", .) %>% tidyr::as_tibble()
+.mask_all <- function(x) {
+  lapply_cond(x, function(cond) { 
+    All(cond)
+  }) %>% Any() %>% tidyr::as_tibble()
 }
 
 
-#' Apply FUN to all tibbles of conditions
+#' Apply f to all conditions
 #' 
-#' TODO
-#' @param X TODO
-#' @param FUN TODO
-#' @param cores TODO
-#' @param ... TODO
+#' Wrapper for lapply
+#' @param x tibble of data per condition
+#' @param f function to apply to each condition data
+#' @param ... parameters for f
+#' @param cores numer of cores to use
+#' @return tibble
 #' @export
-apply_cond <- function(X, FUN, cores = 1, ...) {
+lapply_cond <- function(x, f, ..., cores = 1) {
   parallel::mclapply(
-    X, FUN, ..., 
-    mc.cores = min(names(X), cores), mc.preschedule = FALSE
-  ) %>% lapply(., tidyr::as_tibble) %>% tidyr::as_tibble()
+    x, f, ..., 
+    mc.cores = min(names(x), cores),
+    mc.preschedule = FALSE,
+    mc.allow.recursive = FALSE
+  ) %>% 
+    .as() %>% 
+    tidyr::as_tibble()
 }
 
-#' Apply FUN to all replicates
+#' Apply f to all replicates
 #' 
-#' TODO
-#' @param X TODO
-#' @param FUN TODO
-#' @param cores TODO
-#' @param ... TODO
+#' Wrapper for lapply
+#' @param x tibble of data per condition
+#' @param f function to apply to each replicate data
+#' @param ... parameters for f
+#' @param cores numer of cores to use
+#' @return tibble
 #' @export
-apply_repl <- function(X, FUN, cores = 1, ...) {
-  # lapply(., tidyr::as_tibble) %>% 
+lapply_repl <- function(x, f, ..., cores = 1) {
+  .helper <- function(y) {
+    lapply(y, f, ...) %>% 
+      lapply(.as) %>% 
+      tidyr::as_tibble()
+  }
+
+  # TODO use lapply_cond
   parallel::mclapply(
-    X, function(cond) {
-      lapply(cond, FUN, ...) %>% tidyr::as_tibble()
-    }, mc.cores = min(names(X), cores), mc.preschedule = FALSE
+    x, .helper,
+    mc.cores = min(names(x), cores),
+    mc.preschedule = FALSE,
+    mc.allow.recursive = FALSE
+  ) %>% tidyr::as_tibble()
+}
+
+# make everything a tibble if it has a 2nd dimension
+#' @noRd
+.as <- function(y) {
+  if (is.vector(y)) {
+    return(y)
+  }
+  return(tidyr::as_tibble(y))
+}
+
+#' Apply f to all replicates
+#' 
+#' Wrapper for mapply.
+#' @param f function to apply to each replicate data
+#' @param ... see mapply
+#' @param MoreArgs see mapply
+#' @param cores numer of cores to use
+#' @return tibble
+#' @export
+mapply_repl <- function(f, ..., MoreArgs = NULL, cores = 1) {
+  .helper <- function(...) {
+    dots <- list(...)
+    mapply(f, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE) %>% 
+      lapply(.as) %>% 
+      tidyr::as_tibble()
+  }
+  
+  parallel::mcmapply(
+    .helper,
+    ...,
+    SIMPLIFY = FALSE,
+    mc.cores = cores,
+    mc.preschedule = FALSE
   ) %>% tidyr::as_tibble()
 }
diff --git a/R/helper-sub.R b/R/helper-sub.R
index 988dce3..3f008e6 100644
--- a/R/helper-sub.R
+++ b/R/helper-sub.R
@@ -96,5 +96,5 @@ mask_sub <- function(subs, keep) {
 #' clean_read_sub(subs)
 #' @export
 clean_read_sub <- function(subs) {
-  i <- gsub("2", .SUB_SEP, subs)
+  gsub("2", .SUB_SEP, subs)
 }
diff --git a/R/helper.R b/R/helper.R
index 53b3b38..f402425 100644
--- a/R/helper.R
+++ b/R/helper.R
@@ -23,10 +23,8 @@
 # convenience: description data fields
 .CALL_PILEUP_COL <- "bases"
 
-.ARREST_DATA_COL <- "arrest"
-.ARREST_HELPER_COL <- "arrest"
-.THROUGH_DATA_COL <- "through"
-.THROUGH_HELPER_COL <- "through"
+.ARREST_COL <- "arrest"
+.THROUGH_COL <- "through"
 
 .LRT_ARREST_POS_COL <- "arrest_pos"
 
@@ -42,42 +40,42 @@
 .META_COND_COL <- "meta_cond"
 
 .COV <- "cov"
-.BC <- "bc"
-.SUB <- "sub"
-.SUB_RATIO <- "sub_ratio"
-.NON_REF_RATIO <- "non_ref_ratio"
 
-# get unique base calls from a vector of base calls
-.get_unique_base_calls <- function(bcs) {
-  base_calls <- bcs %>% 
-    paste0() %>% 
-    strsplit("") %>% 
-    unlist() %>% 
-    unique()
-    
-  base_calls
+#' Calculate coverage for structured column.
+#' 
+#' Calculate coverage for structured column.
+#' 
+#' @param bases structured column of bases
+#' @param cores number of cores to use
+#' @return structure column with coverages
+#' 
+#' @export
+coverage <- function(bases, cores = 1) {
+  lapply_repl(bases, rowSums, cores)
 }
 
-# apply boolean operator "&","|" on all columns 
-.get_mask <- function(mat, op) {
-  mat <- t(apply(mat, 1, function(x) { x > 0 }))
-  if (op == "all") {
-    mat <- t(apply(mat, 2, all))
-  } else if (op == "any") {
-    mat <- t(apply(mat, 2, any))
-  } else {
-    stop("Unknown op: ", op)
+#' Extract values from a structured column.
+#' 
+#' Extract values from a structured column.
+#' 
+#' @param id vector that unique identifies each row
+#' @param x structured column
+#' @param meta_cond vector of meta conditions
+#' @return extracted column
+#' 
+#' @export
+gather_repl <- function(id, x, meta_cond = NULL) {
+  r <- list()
+  for (cond in names(x)) {
+    for (repl in names(x[[cond]])) {
+      df <- tidyr::tibble(id = id, value = x[[cond]][[repl]])
+      if (! is.null(meta_cond)) {
+        df[["meta_cond"]] <- meta_cond
+      }
+      df[["cond"]] <- cond
+      df[["repl"]] <- repl
+      r[[length(r) + 1]] <- df
+    }
   }
-  
-  mat
+  dplyr::bind_rows(r)
 }
-
-# get number of alleles from a vector of base calls
-.get_alleles <- function(bcs) {
-  allele_count <- length(.get_unique_base_calls(bcs))
-  
-  allele_count
-}
-
-
-
diff --git a/R/io-guess.R b/R/io-guess.R
index baf1345..1d6d40a 100644
--- a/R/io-guess.R
+++ b/R/io-guess.R
@@ -10,7 +10,7 @@
     cond_repl <- .extract_cond_repl(header_names, prefix)
     cond_count <- .find_cond_count(cond_repl)
   } else if (type %in% c(.RT_ARREST, .LRT_ARREST)) {
-    cond_counts <- lapply(c(.ARREST_DATA_COL, .THROUGH_DATA_COL), function(x) {
+    cond_counts <- lapply(c(.ARREST_COL, .THROUGH_COL), function(x) {
       cond_repl <- .extract_cond_repl(header_names, x)
       return(.find_cond_count(cond_repl))
     })
@@ -70,7 +70,7 @@
   type <- .UNKNOWN_METHOD
   if (length(grep(.LRT_ARREST_POS_COL, line)) > 0) { # lrt-arrest
     type <- .LRT_ARREST
-  } else if(length(grep(paste0("\t", .ARREST_DATA_COL), line)) > 0) { # rt-arrest
+  } else if(length(grep(paste0("\t", .ARREST_COL), line)) > 0) { # rt-arrest
     type <- .RT_ARREST
   } else if (length(grep(paste0("\t", .CALL_PILEUP_COL), line)) > 0) { # call-pileup
     type <- .CALL_PILEUP
diff --git a/R/io.R b/R/io.R
index 47bb5d9..ce71276 100644
--- a/R/io.R
+++ b/R/io.R
@@ -87,10 +87,13 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
   }
   attr(result, .ATTR_HEADER) <- jacusa_header
   result <- sticky::sticky(result)
+  id <- info <- filter <- ref <- NULL
+  result <- result %>%
+    dplyr::select(id, dplyr::everything()) %>%
+    dplyr::select(-c(info, filter, ref), c(info, filter, ref))
+    
 
-  rownames(result) <- .id(result)
-
-  result
+    result
 }
 
 
@@ -109,6 +112,7 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
   regex <- paste0("(", .SUB_TAG_COL, "|read_sub)=([^;]+)")
   if (any(stringr::str_detect(data[[.INFO_COL]], regex))) {
     sub_tag <- stringr::str_match(data[[.INFO_COL]], regex)[, 3]
+    sub_tag <- clean_read_sub(sub_tag)
     return(paste0(coord, ":", sub_tag))
   }
   
@@ -125,45 +129,24 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
   if(type == .CALL_PILEUP) {
     cols <- c(.CALL_PILEUP_COL)
   } else if (type %in% c(.RT_ARREST, .LRT_ARREST)) {
-    cols <- c(.ARREST_DATA_COL, .THROUGH_DATA_COL)
+    cols <- c(.ARREST_COL, .THROUGH_COL)
   } else {
     stop("Unknown type: ", type)
   }
   result <- .unpack_cols(data, cols, cond_count, .BASES, cores)
   # process arrest and through columns
   if (type %in% c(.RT_ARREST, .LRT_ARREST)) {
-    if (.ARREST_HELPER_COL != .ARREST_DATA_COL) {
-        result[[.ARREST_HELPER_COL]] <- result[[.ARREST_DATA_COL]]
-        result[[.ARREST_DATA_COL]] <- NULL
-    }
-    if (.THROUGH_HELPER_COL != .THROUGH_DATA_COL) {
-      result[[.THROUGH_HELPER_COL]] <- result[[.THROUGH_DATA_COL]]
-      result[[.THROUGH_DATA_COL]] <- NULL
-    }
-
-    result[[.CALL_PILEUP_COL]] <- result[[.ARREST_HELPER_COL]]
-    for (cond in paste0("cond", 1:cond_count)) {
-      cond_through <- result[[.THROUGH_HELPER_COL]][[cond]]
-      for (repl in names(cond_through)) {
-        result[[.CALL_PILEUP_COL]][[cond]][[repl]] <- 
-          result[[.CALL_PILEUP_COL]][[cond]][[repl]] + cond_through[[repl]]
-      }
-    }
+    result[[.CALL_PILEUP_COL]] <- mapply_repl(
+      Reduce, 
+      result[[.ARREST_COL]], result[[.THROUGH_COL]], 
+      MoreArgs=list(f="+"), cores = cores
+    )
     result <- add_arrest_rate(result)
   }
 
-  # FIXME make faster
-  cond_bases <- parallel::mclapply(result[[.CALL_PILEUP_COL]], function(x) {
-    return(Reduce('+', x))
-  }, mc.cores = min(cond_count, cores), mc.preschedule = FALSE)
-  total_bases <- Reduce('+', cond_bases)
-
   # add coverage
-  result[[.COV]] <- .cov(result[[.CALL_PILEUP_COL]])
-  
-  # add base call
-  result[[.BC]] <- .base_call(total_bases)
-  
+  result[[.COV]] <- coverage(result[[.CALL_PILEUP_COL]])
+
   # unpack info field
   if (unpack) {
     result <- .unpack_info(result, cond_count, cores)
@@ -172,8 +155,14 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
   result
 }
 
-#
-.base_call <-function(bases) {
+#' Observed base calls.
+#' 
+#' Observed base calls.
+#' 
+#' @param bases tibble of base call counts
+#' @return vector of observed base calls
+#' @export
+base_call <-function(bases) {
   apply(bases > 0, 1, function(x) {
     paste0(names(which(x)), collapse= "") 
     }
@@ -302,17 +291,17 @@ read_result <- function(file, cond_desc = c(), unpack = FALSE, progress = TRUE,
 #' 
 #' @param files Vector of files.
 #' @param meta_conds Vector of string vectors that explain files.
-# FIXME @param cond_descs Vector of strings that represent names/descriptions for conditions.
+#' @param cond_descs Vector of strings that represent names/descriptions for conditions.
 #' @param unpack Boolean indicates if info column should be processed.
 #' @param cores Integer defines how many cores to use.
 #'
 #' @export
-read_results <- function(files, meta_conds, unpack = FALSE, cores = 1) {
+read_results <- function(files, meta_conds, cond_descs, unpack = FALSE, cores = 1) {
   stopifnot(length(files) == length(meta_conds))
   
   # read all files  
-  l <- parallel::mcmapply(function(file, meta_cond) {
-    result <- read_result(file, unpack)
+  l <- parallel::mcmapply(function(file, meta_cond, cond_desc) {
+    result <- read_result(file, cond_desc, unpack)
     
     type <- attr(result, .ATTR_TYPE)
     attr(result, .ATTR_TYPE) <- NULL
@@ -320,7 +309,7 @@ read_results <- function(files, meta_conds, unpack = FALSE, cores = 1) {
 
     result[[.META_COND_COL]] <- meta_cond
     return(list(result, type))
-  }, files, meta_conds, mc.cores = min(length(files), cores), SIMPLIFY = FALSE)
+  }, files, meta_conds, cond_descs, mc.cores = min(length(files), cores), SIMPLIFY = FALSE)
 
   types <- lapply(l, "[[", 2) %>% unlist(use.names = FALSE)
 
@@ -330,7 +319,7 @@ read_results <- function(files, meta_conds, unpack = FALSE, cores = 1) {
   results$meta_cond <- as.factor(results$meta_cond)
 
   attr(results, .ATTR_TYPE) <- types
-  # FIXME attr(results, .ATTR_COND_DESC) <- cond_descs
+  attr(results, .ATTR_COND_DESC) <- cond_descs
 
   results
 }
diff --git a/R/non-ref-ratio.R b/R/non-ref-ratio.R
index 9b0d821..bd9cb90 100644
--- a/R/non-ref-ratio.R
+++ b/R/non-ref-ratio.R
@@ -13,7 +13,7 @@
 #'    0,  1,  1,  1,
 #'    1,  1,  1,  1
 #' )
-#' str(non_ref_ratio(ref, bases))
+#' non_ref_ratio(ref, bases)
 #' @export
 non_ref_ratio <- function(ref, bases) {
   stopifnot(length(ref) ==  nrow(bases))
diff --git a/R/plot-ecdf-allele-count.R b/R/plot-ecdf-allele-count.R
deleted file mode 100644
index 1d8f88f..0000000
--- a/R/plot-ecdf-allele-count.R
+++ /dev/null
@@ -1,18 +0,0 @@
-#' Plot cumulative allele count.
-#' 
-#' Plots cumulative distribution of allele counts for single and multiple JACUSA2 result objects.
-#' @seealso \code{plot_ecdf_column()}
-#' 
-#' @param result object created by \code{read_result()} or \code{read_results()}.
-#' @param data_desc vector of characters strings providing details about the sample.
-#' @param allele_count_column character sting specifing the name of the column.
-#' @param ... parameters forwared to plot_ecdf_column.
-#' @return ggplot2 plot object.
-#'
-#' @export
-plot_ecdf_allele_count <- function(result, data_desc, allele_count_column = "allele_count", ...) {
-  p <- plot_ecdf_column(result, allele_count_column, data_desc, ...) + 
-    ggplot2::xlab("Allele count")
-
-  p
-}
diff --git a/R/plot-ecdf-column.R b/R/plot-ecdf-column.R
deleted file mode 100644
index 157c3fd..0000000
--- a/R/plot-ecdf-column.R
+++ /dev/null
@@ -1,90 +0,0 @@
-#' Generic plot for cumulative data (EXPERIMENTAL)
-#' 
-#' Creates a generic plot of a \code{column} from a JACUSA2 \code{result} object and display  
-#' verbose data descripton \code{data_desc}. 
-#' 
-#' The user must make sure that dimensions (rows and length) of: 
-#' \code{result} and \code{data_desc} match and that \code{column} exists 
-#' in \code{result}. Furthermore, the user is responsible for correct filtering and grouping of the 
-#' \code{result} object.
-#' \code{data_desc} is used to provide a descriptive name for each observation.
-#' 
-#' @importFrom magrittr %>%
-#' @param result object created by \code{read_result()} or \code{read_results()}.
-#' @param column character string to be used for plotting.
-#' @param data_desc vector of characters strings providing details about the sample.
-#' @param main_group character string to be used to define colour.
-#' @param sub_group character string to be used to define linetype.
-#' @param name character string defines name of the legend.
-#' @return ggplot2 plot object.
-#'
-#' @export
-plot_ecdf_column <- function(result, column, data_desc, main_group, sub_group = NULL, name = "Data description") {
-  if (nrow(result) != length(data_desc)) {
-    stop("'data_desc' does not match number of entries of 'result'")
-  }
-
-  result[["data_desc"]] <- NULL
-  result <- tidyr::add_column(result, data_desc)
-
-  groups <- c(main_group)
-  if (! is.null(sub_group)) {
-    groups <- c(groups, sub_group)
-  }
-  result$group <- do.call(
-    interaction,
-    list(result[groups])
-  )
-
-  columns <- c(groups, "group", "data_desc")
-  meta_desc <- dplyr::distinct(
-    result, !!!rlang::syms(columns)
-  )
-
-  limits <- as.vector(meta_desc[["group"]])
-  labels <- meta_desc[["data_desc"]]
-
-  column <- group <- NULL
-  p <- NULL
-  if (is.null(sub_group)) {
-    p <- ggplot2::ggplot(
-      result, 
-      ggplot2::aes(
-        x = !!rlang::sym(column), 
-        colour = group
-      )
-    )
-  } else {
-    p <- ggplot2::ggplot(
-      result, 
-      ggplot2::aes(
-        x = !!rlang::sym(column), 
-        colour = group,
-        linetype = group
-      )
-    )
-  }
-
-  p <- p + ggplot2::scale_colour_manual(
-      name = name,
-      labels = labels,
-      limits = limits,
-      values = factor(meta_desc[[main_group]]) %>% as.integer()
-    )
-
-  if (! is.null(sub_group)) {
-    p <- p + ggplot2::scale_linetype_manual(
-      name = name,
-      labels = labels,
-      limits = limits,
-      values = factor(meta_desc[[sub_group]]) %>% as.integer()
-    )
-  }
-
-  p <- p +
-    ggplot2::xlab(column) +
-    ggplot2::ylab("Density") +
-    ggplot2::stat_ecdf(geom = "step")
-
-  p
-}
diff --git a/R/sub-ratio.R b/R/sub-ratio.R
index ff64550..49c44dc 100644
--- a/R/sub-ratio.R
+++ b/R/sub-ratio.R
@@ -7,7 +7,7 @@
 #'
 #' @param ref vector of reference bases.
 #' @param bases matrix of observed base call counts.
-#' @param bc vector strings that represents observed base calls. Default: "bases".
+#' @param bc vector strings that represent observed base calls. Default: "bases".
 #' @return vector of base callsubstitution ratios.
 #' @examples
 #' ref <- c("A", "A", "T")
diff --git a/R/write-result.R b/R/write-result.R
index 30cfcdf..72e5dde 100644
--- a/R/write-result.R
+++ b/R/write-result.R
@@ -12,6 +12,7 @@
 # @param file character string naming a file for writing 
 # @param txt character string to be added to the JACUSA2 header.
 # @export
+#' @noRd
 write_result <- function(result, file, txt = "") {
   if (.META_COND_COL %in% names(result)) {
     stop("result contains meta condition column: ", .META_COND_COL, 
diff --git a/man/JACUSA2helper.Rd b/man/JACUSA2helper.Rd
index e4d98b7..d82683d 100644
--- a/man/JACUSA2helper.Rd
+++ b/man/JACUSA2helper.Rd
@@ -6,14 +6,7 @@
 \title{JACUSA2helper: A package for post-processing JACUSA2 result files.}
 \section{Description}{
 
-A package that provides the following categories of functions to post-process result files of JACUSA2:
-\describe{
- \item{read/write}{Read and write JACUSA2 result files, e.g.: \code{read_result()}.}
- \item{add}{Adds some field to an existing JACUSA2 result object and return the modified object, e.g.: \code{base_sub()}.}
- \item{filter}{Will remove sites from a result object with some filtering criteria, e.g.: \code{filter_by_coverage()}}
- \item{plot}{Plots certain characteristics of a JACUSA2 result object.}
- \item{other}{TODO}
-}
+A package that provides functions to post-process result files of JACUSA2.
 
 The following methods from JACUSA2 are supported:
 \describe{
@@ -33,18 +26,23 @@ provided library type option "-P" UNSTRANDED|FR_FIRSTSTRAND|RF_SECOND_STRAND".
 The central data structure in JACUSA2helper is the JACUSA2 result object that follows the 
 tidy data approach to feature easy interaction with dplyr and ggplot2.
 A JACUSA2 result object can be created via \code{result <- read_result("jacusa2.out")} and is 
-currently represented as a tibble. Furthermore, JACUSA2helper supports the analysis of several related
-JACUSA2 result files via \code{results <- read_results(files, meta)} where \code{meta_conditions} is a 
-vector of character strings that provides a descriptive name for each file in \code{files}.
+currently represented as a tibble. Special structured columns exist that 
+hold condition andn replicate related data such as: coverage, bases. arrest rate.
+Furthermore, JACUSA2helper supports the analysis of several related  JACUSA2 
+result files via \code{results <- read_results(files, meta_cond)} where 
+\code{meta_cond} is a vector of character strings that provides a descriptive 
+name for each file in \code{files}.
 
-Check \code{vignette(TODO)} for a general introduction and \code{vignette(TODO meta conditions)} for details about meta conditions.
+Check \code{vignette("JACUSA2helper", "JACUSA2helper")} for a general 
+introduction and \code{"JACUSA2helper", "JACUSA2helper"} for details about 
+meta conditions.
 }
 
 \section{read/write functions}{
 
 See:
 \describe{
-  \item{read_result}{Reads and unpacks a JACUSA2 result file and creates a result object.}
+  \item{read_result}{Reads and unpacks a JACUSA2 result file.}
   \item{read_results}{Allows to combine multiple result files and distinguish them with meta conditions.}
   \item{write_bedGraph}{Writes a vector of values as bedGraph file.}
 }
@@ -54,27 +52,30 @@ See:
 
 See:
 \describe{
-  \item{arrest_rate}{Adds arrest rate to JACUSA2 result object.}
-  \item{base_count}{TODO}
-  \item{base_sub}{Adds base substitution column to JACUSA2 result object.}
-  \item{non_ref2bc_ratio}{Adds non reference base ratio to JACUSA2 result object.}
-  \item{sub_ratio}{Adds base substitution ratio for all bases to a JACUSA2 result object.}
+  \item{arrest_rate}{Calculates arrest rate from base call counts (arest, through).}
+  \item{base_count}{Calculates the bumber of observed base calls.}
+  \item{base_sub}{Calculates base substitution.}
+  \item{base_ratio}{Calculates base call ratios.}
+  \item{non_ref_ratio}{Calculates non reference base ratio to JACUSA2 result object.}
+  \item{sub_ratio}{Calculates base substitution ratio for all bases to a JACUSA2 result object.}
+
 }
 }
 
 \section{filter functions}{
 
-This function set enables filtering by read coverage or 
-enforcing a minimal number of variant base calls per sample.
+This function set enables filtering by read coverage or enforcing a minimal 
+number of variant base calls per sample.
 
 See:
 \describe{
-  \item{robust}{Retains sites that contain an arrest event in all replicates in at least one condition.}
- \item{All}{TODO}
- \item{Any}{TODO}
+  \item{robust}{Retains sites that are robust in one feature. The feature can be observed in all replicates of at least one condition.}
+  \item{filter_artefact}{Remove sites that have been marked as an artefact.}
+ \item{All}{Helper function}
+ \item{Any}{Helper function}
+ \item{lapply_cond}{lapply wrapper - applies function to all conditions.}
+ \item{lapply_repl}{lapply wrapper - applies function to all replicates.}
+ \item{mapply_repl}{mapply wrapper - applies function to all replicates.}
 }
-
-# TODO
-read_sub
 }
 
diff --git a/man/apply_cond.Rd b/man/apply_cond.Rd
deleted file mode 100644
index 6a07fe4..0000000
--- a/man/apply_cond.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper-robust.R
-\name{apply_cond}
-\alias{apply_cond}
-\title{Apply FUN to all tibbles of conditions}
-\usage{
-apply_cond(X, FUN, cores = 1, ...)
-}
-\arguments{
-\item{X}{TODO}
-
-\item{FUN}{TODO}
-
-\item{cores}{TODO}
-
-\item{...}{TODO}
-}
-\description{
-TODO
-}
diff --git a/man/apply_repl.Rd b/man/apply_repl.Rd
deleted file mode 100644
index 1e55c88..0000000
--- a/man/apply_repl.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper-robust.R
-\name{apply_repl}
-\alias{apply_repl}
-\title{Apply FUN to all replicates}
-\usage{
-apply_repl(X, FUN, cores = 1, ...)
-}
-\arguments{
-\item{X}{TODO}
-
-\item{FUN}{TODO}
-
-\item{cores}{TODO}
-
-\item{...}{TODO}
-}
-\description{
-TODO
-}
diff --git a/man/base_call.Rd b/man/base_call.Rd
new file mode 100644
index 0000000..e419dc2
--- /dev/null
+++ b/man/base_call.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{base_call}
+\alias{base_call}
+\title{Observed base calls.}
+\usage{
+base_call(bases)
+}
+\arguments{
+\item{bases}{tibble of base call counts}
+}
+\value{
+vector of observed base calls
+}
+\description{
+Observed base calls.
+}
diff --git a/man/base_count.Rd b/man/base_count.Rd
index 8716cfc..26d6806 100644
--- a/man/base_count.Rd
+++ b/man/base_count.Rd
@@ -4,10 +4,10 @@
 \alias{base_count}
 \title{Retrieves the total number of observed unique bases.}
 \usage{
-base_count(bc, ref = NULL)
+base_count(bases, ref = NULL)
 }
 \arguments{
-\item{bc}{vector of strings with observed base calls.}
+\item{bases}{vector of base calls or tibble of base call counts.}
 
 \item{ref}{vector of strings with reference bases}
 }
diff --git a/man/bc_ratio.Rd b/man/base_ratio.Rd
similarity index 77%
rename from man/bc_ratio.Rd
rename to man/base_ratio.Rd
index 7fa2725..a2c73ad 100644
--- a/man/bc_ratio.Rd
+++ b/man/base_ratio.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/bc-ratio.R
-\name{bc_ratio}
-\alias{bc_ratio}
+% Please edit documentation in R/base-ratio.R
+\name{base_ratio}
+\alias{base_ratio}
 \title{Calculate base call ratios for base counts.}
 \usage{
-bc_ratio(bases)
+base_ratio(bases)
 }
 \arguments{
 \item{bases}{nx4 matrix or data frame of base call counts.}
@@ -18,5 +18,5 @@ Calculates base call ratios for base counts.
 }
 \examples{
 bases <- matrix(c(5, 0, 5, 0,  1, 1, 1, 1), byrow = TRUE, ncol = 4)
-ratio <- bc_ratio(bases)
+ratio <- base_ratio(bases)
 }
diff --git a/man/base_sub.Rd b/man/base_sub.Rd
index 8299ffc..0458030 100644
--- a/man/base_sub.Rd
+++ b/man/base_sub.Rd
@@ -4,12 +4,12 @@
 \alias{base_sub}
 \title{Calculates reference base to base call (base substitution).}
 \usage{
-base_sub(ref, bc)
+base_sub(ref, bases)
 }
 \arguments{
 \item{ref}{vector of reference bases.}
 
-\item{bc}{vector of base calls.}
+\item{bases}{vector of base calls or tibble of base call counts.}
 }
 \value{
 vector of formatted base call substitutions.
@@ -20,6 +20,6 @@ All elements in \code{ref} must be all length 1.
 }
 \examples{
 ref <- c("A", "A", "C", "A")
-bc <- c("AG", "G", "C", "G")
-base_sub(ref, bc)
+bases <- c("AG", "G", "C", "G")
+base_sub(ref, bases)
 }
diff --git a/man/coverage.Rd b/man/coverage.Rd
new file mode 100644
index 0000000..c73cd37
--- /dev/null
+++ b/man/coverage.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper.R
+\name{coverage}
+\alias{coverage}
+\title{Calculate coverage for structured column.}
+\usage{
+coverage(bases, cores = 1)
+}
+\arguments{
+\item{bases}{structured column of bases}
+
+\item{cores}{number of cores to use}
+}
+\value{
+structure column with coverages
+}
+\description{
+Calculate coverage for structured column.
+}
diff --git a/man/filter_all_artefacts.Rd b/man/filter_all_artefacts.Rd
deleted file mode 100644
index 08d3759..0000000
--- a/man/filter_all_artefacts.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper-filter.R
-\name{filter_all_artefacts}
-\alias{filter_all_artefacts}
-\title{Filters all sites with an artefact}
-\usage{
-filter_all_artefacts(filter)
-}
-\arguments{
-\item{filter}{vector of strings that contains artefact filter information.}
-}
-\value{
-vector of logical.
-}
-\description{
-Removes sites have been marked by any feature/artefact filter.
-}
-\examples{
-data(rdd)
-# remove sites that are marked by artefact filter "D"
-filtered <- filter_by(rdd, filter_all_artefacts(filter))
-str(filtered)
-}
diff --git a/man/filter_artefact.Rd b/man/filter_artefact.Rd
index 89b0526..180a4d6 100644
--- a/man/filter_artefact.Rd
+++ b/man/filter_artefact.Rd
@@ -4,12 +4,12 @@
 \alias{filter_artefact}
 \title{Filters sites by artefacts.}
 \usage{
-filter_artefact(filter, artefacts)
+filter_artefact(filter, artefacts = NULL)
 }
 \arguments{
 \item{filter}{vector of strings that contains artefact filter information.}
 
-\item{artefacts}{vector of characters that correspond to feature/artefact filters to be filtered.}
+\item{artefacts}{vector of characters that correspond to feature/artefact filters to be filtered, default: NULL - Filter all.}
 }
 \value{
 vector of logical.
@@ -20,6 +20,8 @@ Removes sites that sites that have been marked by feature/artefact filter.
 \examples{
 data(rdd)
 # remove sites that are marked by artefact filter "D"
-filtered <- filter_by(rdd, filter_artefact(filter, c("D")))
-str(filtered)
+dim(rdd)
+dim(dplyr::filter(rdd, filter_artefact(filter, c("D"))))
+# remove all sites that are marked by some artefact filter
+dim(dplyr::filter(rdd, filter_artefact(filter)))
 }
diff --git a/man/gather_repl.Rd b/man/gather_repl.Rd
new file mode 100644
index 0000000..f6c5d3f
--- /dev/null
+++ b/man/gather_repl.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper.R
+\name{gather_repl}
+\alias{gather_repl}
+\title{Extract values from a structured column.}
+\usage{
+gather_repl(id, x, meta_cond = NULL)
+}
+\arguments{
+\item{id}{vector that unique identifies each row}
+
+\item{x}{structured column}
+
+\item{meta_cond}{vector of meta conditions}
+}
+\value{
+extracted column
+}
+\description{
+Extract values from a structured column.
+}
diff --git a/man/lapply_cond.Rd b/man/lapply_cond.Rd
new file mode 100644
index 0000000..6d4831b
--- /dev/null
+++ b/man/lapply_cond.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper-robust.R
+\name{lapply_cond}
+\alias{lapply_cond}
+\title{Apply f to all conditions}
+\usage{
+lapply_cond(x, f, ..., cores = 1)
+}
+\arguments{
+\item{x}{tibble of data per condition}
+
+\item{f}{function to apply to each condition data}
+
+\item{...}{parameters for f}
+
+\item{cores}{numer of cores to use}
+}
+\value{
+tibble
+}
+\description{
+Wrapper for lapply
+}
diff --git a/man/lapply_repl.Rd b/man/lapply_repl.Rd
new file mode 100644
index 0000000..05996ed
--- /dev/null
+++ b/man/lapply_repl.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper-robust.R
+\name{lapply_repl}
+\alias{lapply_repl}
+\title{Apply f to all replicates}
+\usage{
+lapply_repl(x, f, ..., cores = 1)
+}
+\arguments{
+\item{x}{tibble of data per condition}
+
+\item{f}{function to apply to each replicate data}
+
+\item{...}{parameters for f}
+
+\item{cores}{numer of cores to use}
+}
+\value{
+tibble
+}
+\description{
+Wrapper for lapply
+}
diff --git a/man/mapply_repl.Rd b/man/mapply_repl.Rd
new file mode 100644
index 0000000..c68a1b8
--- /dev/null
+++ b/man/mapply_repl.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper-robust.R
+\name{mapply_repl}
+\alias{mapply_repl}
+\title{Apply f to all replicates}
+\usage{
+mapply_repl(f, ..., MoreArgs = NULL, cores = 1)
+}
+\arguments{
+\item{f}{function to apply to each replicate data}
+
+\item{...}{see mapply}
+
+\item{MoreArgs}{see mapply}
+
+\item{cores}{numer of cores to use}
+}
+\value{
+tibble
+}
+\description{
+Wrapper for mapply.
+}
diff --git a/man/non_ref_ratio.Rd b/man/non_ref_ratio.Rd
index daedc46..5f584b4 100644
--- a/man/non_ref_ratio.Rd
+++ b/man/non_ref_ratio.Rd
@@ -25,5 +25,5 @@ bases <- tidyr::tribble(
    0,  1,  1,  1,
    1,  1,  1,  1
 )
-str(non_ref_ratio(ref, bases))
+non_ref_ratio(ref, bases)
 }
diff --git a/man/plot_ecdf_allele_count.Rd b/man/plot_ecdf_allele_count.Rd
deleted file mode 100644
index 7e6bee2..0000000
--- a/man/plot_ecdf_allele_count.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot-ecdf-allele-count.R
-\name{plot_ecdf_allele_count}
-\alias{plot_ecdf_allele_count}
-\title{Plot cumulative allele count.}
-\usage{
-plot_ecdf_allele_count(
-  result,
-  data_desc,
-  allele_count_column = "allele_count",
-  ...
-)
-}
-\arguments{
-\item{result}{object created by \code{read_result()} or \code{read_results()}.}
-
-\item{data_desc}{vector of characters strings providing details about the sample.}
-
-\item{allele_count_column}{character sting specifing the name of the column.}
-
-\item{...}{parameters forwared to plot_ecdf_column.}
-}
-\value{
-ggplot2 plot object.
-}
-\description{
-Plots cumulative distribution of allele counts for single and multiple JACUSA2 result objects.
-}
-\seealso{
-\code{plot_ecdf_column()}
-}
diff --git a/man/plot_ecdf_column.Rd b/man/plot_ecdf_column.Rd
deleted file mode 100644
index 82625a8..0000000
--- a/man/plot_ecdf_column.Rd
+++ /dev/null
@@ -1,42 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot-ecdf-column.R
-\name{plot_ecdf_column}
-\alias{plot_ecdf_column}
-\title{Generic plot for cumulative data (EXPERIMENTAL)}
-\usage{
-plot_ecdf_column(
-  result,
-  column,
-  data_desc,
-  main_group,
-  sub_group = NULL,
-  name = "Data description"
-)
-}
-\arguments{
-\item{result}{object created by \code{read_result()} or \code{read_results()}.}
-
-\item{column}{character string to be used for plotting.}
-
-\item{data_desc}{vector of characters strings providing details about the sample.}
-
-\item{main_group}{character string to be used to define colour.}
-
-\item{sub_group}{character string to be used to define linetype.}
-
-\item{name}{character string defines name of the legend.}
-}
-\value{
-ggplot2 plot object.
-}
-\description{
-Creates a generic plot of a \code{column} from a JACUSA2 \code{result} object and display  
-verbose data descripton \code{data_desc}.
-}
-\details{
-The user must make sure that dimensions (rows and length) of: 
-\code{result} and \code{data_desc} match and that \code{column} exists 
-in \code{result}. Furthermore, the user is responsible for correct filtering and grouping of the 
-\code{result} object.
-\code{data_desc} is used to provide a descriptive name for each observation.
-}
diff --git a/man/read_results.Rd b/man/read_results.Rd
index 8238c52..eddff57 100644
--- a/man/read_results.Rd
+++ b/man/read_results.Rd
@@ -4,13 +4,15 @@
 \alias{read_results}
 \title{Read multiple related JACUSA2 results}
 \usage{
-read_results(files, meta_conds, unpack = FALSE, cores = 1)
+read_results(files, meta_conds, cond_descs, unpack = FALSE, cores = 1)
 }
 \arguments{
 \item{files}{Vector of files.}
 
 \item{meta_conds}{Vector of string vectors that explain files.}
 
+\item{cond_descs}{Vector of strings that represent names/descriptions for conditions.}
+
 \item{unpack}{Boolean indicates if info column should be processed.}
 
 \item{cores}{Integer defines how many cores to use.}
diff --git a/man/robust.Rd b/man/robust.Rd
index d3d585f..fac1c5a 100644
--- a/man/robust.Rd
+++ b/man/robust.Rd
@@ -4,10 +4,10 @@
 \alias{robust}
 \title{Check if data is robust}
 \usage{
-robust(data)
+robust(x)
 }
 \arguments{
-\item{data}{tibble of numeric.}
+\item{x}{tibble of numeric.}
 }
 \value{
 logical vector indicating robust rows.
diff --git a/man/sub_ratio.Rd b/man/sub_ratio.Rd
index c4907d7..0fdcf47 100644
--- a/man/sub_ratio.Rd
+++ b/man/sub_ratio.Rd
@@ -11,7 +11,7 @@ sub_ratio(ref, bases, bc = NULL)
 
 \item{bases}{matrix of observed base call counts.}
 
-\item{bc}{vector strings that represents observed base calls. Default: "bases".}
+\item{bc}{vector strings that represent observed base calls. Default: "bases".}
 }
 \value{
 vector of base callsubstitution ratios.
diff --git a/tests/testthat/test-bc-ratio.R b/tests/testthat/test-base-ratio.R
similarity index 79%
rename from tests/testthat/test-bc-ratio.R
rename to tests/testthat/test-base-ratio.R
index 882b5c2..fb6421e 100644
--- a/tests/testthat/test-bc-ratio.R
+++ b/tests/testthat/test-base-ratio.R
@@ -1,6 +1,6 @@
-context("bc_ratio")
+context("base_ratio")
 
-test_that("bc_ratio works as expected", {
+test_that("base_ratio works as expected", {
   bases <- tidyr::tribble(
     ~A, ~C, ~G, ~T,
     1,  0,  0,  0,
@@ -20,7 +20,7 @@ test_that("bc_ratio works as expected", {
   )
 
     expect_equal(
-    bc_ratio(bases),
+    base_ratio(bases),
     expected  
   )
 })
diff --git a/tests/testthat/test-cov.R b/tests/testthat/test-coverage.R
similarity index 60%
rename from tests/testthat/test-cov.R
rename to tests/testthat/test-coverage.R
index 126663d..bc6d162 100644
--- a/tests/testthat/test-cov.R
+++ b/tests/testthat/test-coverage.R
@@ -1,7 +1,7 @@
 context("coverage")
 
-test_that(".cov works as expected", {
-  bases <- tidyr::tibble(
+test_that("coverageworks as expected", {
+  cond1 <- tidyr::tibble(
     bases1 = tidyr::tribble(
       ~A, ~C, ~G, ~T,
       1,  0,  0,  0,
@@ -21,12 +21,16 @@ test_that(".cov works as expected", {
   )
 
   expected <- tidyr::tibble(
-    bases1 = c(1, 1, 1, 1, 4),
-    bases2 = c(1, 1, 1, 1, 4),
+    cond1=tidyr::tibble(
+      bases1 = c(1, 1, 1, 1, 4),
+      bases2 = c(1, 1, 1, 1, 4),
+    )
   )
   
+  actual <- tidyr::tibble(cond1=cond1)
+  
   expect_equal(
-    .cov(bases),
-    expected  
+    as.matrix(coverage(actual)),
+    as.matrix(expected)
   )
 })
diff --git a/tests/testthat/test-filter-by-filter-info.R b/tests/testthat/test-filter-by-filter-info.R
index 14faa36..af9fd7d 100644
--- a/tests/testthat/test-filter-by-filter-info.R
+++ b/tests/testthat/test-filter-by-filter-info.R
@@ -1,12 +1,5 @@
 context("helper_filter")
 
-test_that("filter_artefact fails on missing option", {
-  expect_error(
-    filter_artefact(c(.EMPTY, .EMPTY, .EMPTY), ""),
-    "artefacts cannot be 0"
-  )
-})
-
 test_that("filter_artefact works as expected", {
   expect_equal(
     filter_artefact(c(.EMPTY, .EMPTY, .EMPTY), "D"),
diff --git a/tests/testthat/test-guess-cond-count.R b/tests/testthat/test-guess-cond-count.R
index 00a5334..0d9eae8 100644
--- a/tests/testthat/test-guess-cond-count.R
+++ b/tests/testthat/test-guess-cond-count.R
@@ -37,10 +37,10 @@ test_that(".guess_cond_count works as expected on rt-arrest", {
     .guess_cond_count(
       .RT_ARREST, 
       c(
-        create_header_names(.ARREST_DATA_COL, 1, 3),
-        create_header_names(.THROUGH_DATA_COL, 1, 3),
-        create_header_names(.ARREST_DATA_COL, 2, 3),
-        create_header_names(.THROUGH_DATA_COL, 2, 3)
+        create_header_names(.ARREST_COL, 1, 3),
+        create_header_names(.THROUGH_COL, 1, 3),
+        create_header_names(.ARREST_COL, 2, 3),
+        create_header_names(.THROUGH_COL, 2, 3)
       )
     ),
     2
@@ -52,10 +52,10 @@ test_that(".guess_cond_count works as expected on lrt-arrest", {
     .guess_cond_count(
       .LRT_ARREST, 
       c(
-        create_header_names(.ARREST_DATA_COL, 1, 3),
-        create_header_names(.THROUGH_DATA_COL, 1, 3),
-        create_header_names(.ARREST_DATA_COL, 2, 3),
-        create_header_names(.THROUGH_DATA_COL, 2, 3)
+        create_header_names(.ARREST_COL, 1, 3),
+        create_header_names(.THROUGH_COL, 1, 3),
+        create_header_names(.ARREST_COL, 2, 3),
+        create_header_names(.THROUGH_COL, 2, 3)
       )
     ),
     2
diff --git a/tests/testthat/test-guess-file-type.R b/tests/testthat/test-guess-file-type.R
index ab8b797..97a69d8 100644
--- a/tests/testthat/test-guess-file-type.R
+++ b/tests/testthat/test-guess-file-type.R
@@ -50,8 +50,8 @@ create_condition <-function(condition, replicates, arrest, through) {
 test_that(".guess_file_type recognizes rt-arrest", {
   line <- paste(
     TEST_HEADER_COORD,
-    create_condition(1, 3, .ARREST_DATA_COL, .THROUGH_DATA_COL),
-    create_condition(2, 3, .ARREST_DATA_COL, .THROUGH_DATA_COL),
+    create_condition(1, 3, .ARREST_COL, .THROUGH_COL),
+    create_condition(2, 3, .ARREST_COL, .THROUGH_COL),
     TEST_HEADER_INFO,
     sep = "\t", collapse = ""
   )
@@ -62,8 +62,8 @@ test_that(".guess_file_type recognizes lrt-arrest", {
   line <- paste(
     TEST_HEADER_COORD,
     .LRT_ARREST_POS_COL,
-    create_condition(1, 3, .ARREST_DATA_COL, .THROUGH_DATA_COL),
-    create_condition(2, 3, .ARREST_DATA_COL, .THROUGH_DATA_COL),
+    create_condition(1, 3, .ARREST_COL, .THROUGH_COL),
+    create_condition(2, 3, .ARREST_COL, .THROUGH_COL),
     TEST_HEADER_INFO,
     sep = "\t", collapse = ""
   )
diff --git a/tests/testthat/test-io.R b/tests/testthat/test-io.R
index 22597e4..45035ef 100644
--- a/tests/testthat/test-io.R
+++ b/tests/testthat/test-io.R
@@ -40,7 +40,7 @@ test_that(".fill_empty works as expected", {
 })
 
 
-test_that(".base_call works as expected", {
+test_that("base_call works as expected", {
   bases <- tidyr::tribble(
     ~A, ~C, ~G, ~T,
     1,  0,  0,  0,
@@ -59,7 +59,7 @@ test_that(".base_call works as expected", {
   )
   
   expect_equal(
-    .base_call(bases),
+    base_call(bases),
     expected  
   )
 })
\ No newline at end of file
diff --git a/vignettes/JACUSA2helper-meta-condition.bib b/vignettes/JACUSA2helper-meta-condition.bib
deleted file mode 100644
index 2c77bab..0000000
--- a/vignettes/JACUSA2helper-meta-condition.bib
+++ /dev/null
@@ -1,37 +0,0 @@
-@article{PieknaPrzybylska2007,
-  doi = {10.1093/nar/gkm855},
-  url = {https://doi.org/10.1093/nar/gkm855},
-  year = {2007},
-  month = dec,
-  publisher = {Oxford University Press ({OUP})},
-  volume = {36},
-  number = {Database},
-  pages = {D178--D183},
-  author = {D. Piekna-Przybylska and W. A. Decatur and M. J. Fournier},
-  title = {The 3D {rRNA} modification maps database: with interactive tools for ribosome analysis},
-  journal = {Nucleic Acids Research}
-}
-@Article{Piechotta2017,
-  author    = {Michael Piechotta and Emanuel Wyler and Uwe Ohler and Markus Landthaler and Christoph Dieterich},
-  title     = {{JACUSA}: site-specific identification of {RNA} editing events from replicate sequencing data},
-  journal   = {{BMC} Bioinformatics},
-  year      = {2017},
-  volume    = {18},
-  number    = {1},
-  month     = {jan},
-  doi       = {10.1186/s12859-016-1432-8},
-  publisher = {Springer Nature},
-}
-@article{Zhou2018,
-  doi = {10.1080/15476286.2018.1462654},
-  url = {https://doi.org/10.1080/15476286.2018.1462654},
-  year = {2018},
-  month = may,
-  publisher = {Informa {UK} Limited},
-  volume = {15},
-  number = {7},
-  pages = {892--900},
-  author = {Katherine I. Zhou and Wesley C. Clark and David W. Pan and Matthew J. Eckwahl and Qing Dai and Tao Pan},
-  title = {Pseudouridines have context-dependent mutation and stop rates in high-throughput sequencing},
-  journal = {{RNA} Biology}
-}
\ No newline at end of file
diff --git a/vignettes/JACUSA2helper-meta-condtions.Rmd b/vignettes/JACUSA2helper-meta-condtions.Rmd
index 8a22a49..a6993cd 100644
--- a/vignettes/JACUSA2helper-meta-condtions.Rmd
+++ b/vignettes/JACUSA2helper-meta-condtions.Rmd
@@ -45,7 +45,7 @@ unique(Zhou2018$meta_cond)
 When manipulating a multi `results` object created by `read_results()`, it is crucial to distinguish the following files in an analysis pipeline:
 
 * `results %>% ... other functions()`
-* `results %>% dplyr:group_by(results, meta_condition) %>% ... other functions()`
+* `results %>% dplyr:group_by(results, meta_cond) %>% ... other functions()`
 
 The first statement will apply any subsequent functions to ALL sites regardless of the meta condition while the
 last statement will apply to sites of EACH meta condition!
@@ -74,7 +74,7 @@ Filter by coverage regardless of the meta condition.
 
 ```{r}
 filtered_all <- Zhou2018 %>% 
-  dplyr::filter(All(cov >= 20000)) %>% 
+  dplyr::filter(All(cov$cond1 >= 20000) & All(cov$cond2 >= 20000)) %>% 
   dplyr::group_by(contig, meta_cond) %>% 
   dplyr::count() # count in a group
 filtered_all
@@ -85,96 +85,107 @@ Filter by coverage of each meta condition.
 ```{r}
 filtered_meta <- Zhou2018 %>% 
   dplyr::group_by(meta_cond) %>%
-  dplyr::filter(All(cov >= 20000)) %>% 
+  dplyr::filter(All(cov$cond1 >= 20000) & All(cov$cond2 >= 20000)) %>% 
   dplyr::group_by(contig, meta_cond) %>% 
   dplyr::count() # count in a group
 filtered_meta
 ```
 
 # Plot
-
-First, we add a description `cond_desc` of the conditions to the result object. The data sets of `Zhou2018` have been layout out in such a way that condition 1 and 2 correspond to carbodiimide (+CMC) treatment and control (-CMC), respectively.
+First, we add a description `data_desc` of the conditions to the result object. 
+The data sets of `Zhou2018` have been layout out in such a way that condition 1 
+and 2 correspond to carbodiimide (+CMC) treatment and control (-CMC), respectively.
 
 ```{r}
 data(Zhou2018)
-result <- Zhou2018 %>% 
-  dplyr::filter(strand == "+")
-
-# create nice label
-#result$cond_desc <- ""
-#result$cond_desc[result$cond == 1] <- "+CMC"
-#result$cond_desc[result$cond == 2] <- "-CMC"
 
-#result$group <- do.call(interaction, result[c("cond", "repl")])
+result <- gather_repl(Zhou2018$id, Zhou2018$arrest_rate) %>%
+  dplyr::inner_join(Zhou2018 %>% dplyr::select(id, pvalue, meta_cond), by = "id") %>%
+  dplyr::mutate(
+    data_desc = dplyr::case_when(
+      cond == "cond1" ~ "+CMC",
+      cond == "cond2" ~ "-CMC",
+    )
+  )
+
+# grouping enables nice legends in plot
+group <- do.call(interaction, result[c("cond", "repl")])
+result$group <- group
 # relate group={cond(ition), repl(icat)}, and nice label
-#meta_desc <- dplyr::distinct(result, cond, repl, group, cond_desc)
-# map group values to nice labels (cond_desc)
-#limits <- as.vector(meta_desc[["group"]])
-#labels <- meta_desc[["cond_desc"]]
+meta_desc <- dplyr::distinct(result, cond, repl, group, data_desc)
+# map group values to nice labels (data_desc)
+limits <- as.vector(meta_desc[["group"]])
+labels <- meta_desc[["data_desc"]]
 ```
 
-Next, we define a ggplot2 object that allows to merge legend for different scales. Check [combine legends]{https://ozh2014.wordpress.com/2017/06/26/combine-related-legends-into-one-in-ggplot2/} for details.
-In brief, we use `colour` to represent cond(ition) and `linetype` to represent repl(icate) an relate their possible combinations to descriptive `labels`.
+Next, we define a ggplot2 object that allows to merge legend for different scales. 
+Check [combine legends]{https://ozh2014.wordpress.com/2017/06/26/combine-related-legends-into-one-in-ggplot2/} for details.
+In brief, we use `colour` to represent cond(ition) and `linetype` to represent 
+repl(icate) an relate their possible combinations to descriptive `labels`.
 
 ```{r}
-#colour_scale <- ggplot2::scale_colour_manual(
-#    name = "Treatment",
-#    labels = labels,
-#    limits = limits,
-#    values = factor(meta_desc[["cond"]]) %>% as.integer()
-#  )
-#linetype_scale <- ggplot2::scale_linetype_manual(
-#    name = "Treatment",
-#    labels = labels,
-#    limits = limits,
-#    values = factor(meta_desc[["repl"]]) %>% as.integer()
-#  )
+colour_scale <- ggplot2::scale_colour_manual(
+  name = "Treatment",
+  labels = labels,
+  limits = limits,
+  values = factor(meta_desc[["cond"]]) %>% as.integer()
+)
+linetype_scale <- ggplot2::scale_linetype_manual(
+  name = "Treatment",
+  labels = labels,
+  limits = limits,
+  values = factor(meta_desc[["repl"]]) %>% as.integer()
+)
 ```
 
 ## Pvalue distribution 
-
 Plot of the empirical cumulative distributions of pvalues for all meta conditions.
 
 ```{r}
-#result %>% 
-#  dplyr::distinct(contig, start, end, strand, meta_cond, pvalue) %>%
-#  ggplot2::ggplot(ggplot2::aes(x = pvalue, colour = meta_cond)) +
-#  ggplot2::labs(colour = "Meta condition") +
-#  ggplot2::ylab("Density") +
-#  ggplot2::stat_ecdf(geom = "step") +
-#  ggplot2::xlab("pvalue") +
-#  ggplot2::theme(legend.position = "bottom") 
+result %>% 
+  ggplot2::ggplot(ggplot2::aes(x = pvalue, colour = meta_cond)) +
+  ggplot2::labs(colour = "Meta condition") +
+  ggplot2::ylab("Density") +
+  ggplot2::stat_ecdf(geom = "step") +
+  ggplot2::xlab("pvalue") +
+  ggplot2::theme(legend.position = "bottom") 
 ```
 
-## Empirical cumulative arrest rate distribution
 
+## Empirical cumulative arrest rate distribution
 ```{r}
-#result %>% 
-#  ggplot2::ggplot(ggplot2::aes(x = arrest_rate, colour = group, linetype = group)) +
-#  ggplot2::stat_ecdf(geom = "step") +
-#  colour_scale +
-#  linetype_scale +
-#  ggplot2::xlab("Density") +
-#  ggplot2::xlab("Arrest rate") +
-#  ggplot2::theme(legend.position = "bottom") + 
-#  ggplot2::facet_grid(meta_cond ~ .)
+result %>% 
+  ggplot2::ggplot(ggplot2::aes(x = value, colour = group, linetype = group)) +
+  ggplot2::stat_ecdf(geom = "step") +
+  colour_scale +
+  linetype_scale +
+  ggplot2::xlab("Density") +
+  ggplot2::xlab("Arrest rate") +
+  ggplot2::theme(legend.position = "bottom") + 
+  ggplot2::facet_grid(meta_cond ~ .)
 ```
 
 ## Compare coverage
-
 ```{r}
-#result %>% 
-#  ggplot2::ggplot(ggplot2::aes(x = cov, colour = group, linetype = group)) +
-#  ggplot2::stat_ecdf(geom = "step") +
-#  colour_scale +
-#  linetype_scale +
-#  ggplot2::scale_x_log10(
-#    breaks = scales::trans_breaks("log10", function(x) 10^x),
-#    labels = scales::trans_format("log10", scales::math_format(10^.x))
-#  ) + 
-#  ggplot2::xlab("Read Coverage") +
-#  ggplot2::theme(legend.position = "bottom") + 
-#  ggplot2::facet_grid(meta_cond ~ .)
+gather_repl(Zhou2018$id, Zhou2018$cov) %>%
+  dplyr::inner_join(dplyr::select(Zhou2018, id, meta_cond), by = "id") %>%
+  dplyr::mutate(
+    data_desc = dplyr::case_when(
+      cond == "cond1" ~ "+CMC",
+      cond == "cond2" ~ "-CMC",
+    )
+  ) %>%
+  ggplot2::ggplot(ggplot2::aes(x = value, colour = group, linetype = group)) +
+  ggplot2::stat_ecdf(geom = "step") +
+  colour_scale +
+  linetype_scale +
+  ggplot2::scale_x_log10(
+    breaks = scales::trans_breaks("log10", function(x) 10^x),
+    labels = scales::trans_format("log10", scales::math_format(10^.x))
+  ) + 
+  ggplot2::xlab("Read Coverage") +
+  ggplot2::theme(legend.position = "bottom") + 
+  ggplot2::facet_grid(meta_cond ~ .)
 ```
 
 
@@ -183,35 +194,34 @@ Plot of the empirical cumulative distributions of pvalues for all meta condition
 We plot the arrest rate for all three meta conditions and +CMC and -CMC treatment.
 
 ```{r}
-result %>% 
+Zhou2018 %>% 
   dplyr::filter(pvalue <= 0.00001) %>%
   dplyr::group_by(meta_cond) %>%
-  dplyr::filter_by(All(cov >= 20000)) %>%
-  dplyr::ungroup() %>%
-  dplyr::select(contig, start, end, strand, meta_cond, arrest_rate) %>%
-#  tidyr::spread(cond, arrest_rate) %>%
-#  ggplot2::ggplot(ggplot2::aes(x = arrest_rate1, y = arrest_rate2, colour = meta_cond)) +
-#    ggplot2::geom_point() + 
-#    ggplot2::ylab("arrest rate -CMC") + 
-#    ggplot2::xlab("arrest rate +CMC") + 
-#    ggplot2::facet_grid(. ~ meta_cond) +
-#    ggplot2::geom_abline(linetype = "dashed", colour = "gray") +
-#    ggplot2::labs(colour = "Meta condition") +
-#    ggplot2::theme(legend.position = "bottom")
+  dplyr::filter(All(cov$cond1 >= 20000) & All(cov$cond2 >= 20000)) %>%
+  ggplot2::ggplot(ggplot2::aes(x = arrest_rate$cond1$rep1, y = arrest_rate$cond2$rep1, colour = meta_cond)) +
+    ggplot2::geom_point() + 
+    ggplot2::ylab("arrest rate -CMC") + 
+    ggplot2::xlab("arrest rate +CMC") + 
+    ggplot2::facet_grid(. ~ meta_cond) +
+    ggplot2::geom_abline(linetype = "dashed", colour = "gray") +
+    ggplot2::labs(colour = "Meta condition") +
+    ggplot2::theme(legend.position = "bottom")
 ```
 
-We plot the arrest rate delta of +CMC and -CMC treatment for all three meta conditions and add  Pseudouridine sites (black points) from [3Dmodmap]{https://people.biochem.umass.edu/fournierlab/3dmodmap/main.php}.
+We plot the arrest rate delta of +CMC and -CMC treatment for all three meta 
+conditions and add  Pseudouridine sites (black points) from 
+[3Dmodmap]{https://people.biochem.umass.edu/fournierlab/3dmodmap/main.php}.
 
 ```{r}
 data(modmap)
 mod <- modmap %>% 
   dplyr::filter(rrna == "NR_003286_RNA18SN5", base == "PseudoU")
 
-result %>%
+Zhou2018 %>%
   dplyr::filter(contig == "NR_003286_RNA18SN5") %>%
   dplyr::filter(pvalue <= 0.00001) %>%
   dplyr::group_by(meta_cond) %>%
-  dplyr::filter(All(cov >= 20000)) %>%
+  dplyr::filter(All(cov$cond1 >= 20000) & All(cov$cond1 >= 20000)) %>%
   dplyr::mutate(arrest_rate_delta = arrest_rate$cond1$rep1 - arrest_rate$cond2$rep1) %>%
   ggplot2::ggplot(ggplot2::aes(x = end, y = arrest_rate_delta, fill = meta_cond)) +
     ggplot2::geom_bar(stat = "identity") +
diff --git a/vignettes/JACUSA2helper.Rmd b/vignettes/JACUSA2helper.Rmd
index 080d835..1382380 100644
--- a/vignettes/JACUSA2helper.Rmd
+++ b/vignettes/JACUSA2helper.Rmd
@@ -23,101 +23,186 @@ output file(s) with [JACUSA2helper](https://github.com/dieterich-lab/JACUSA2help
 consists of:
 
 * Read JACUSA2 output file(s) into result object.
-* Add optional data, depending on the result file, e.g.: `base_sub()` for base substitutions.
-* Filter result object by some criteria, e.g.: `dplyr::filter(All(cov >= 10))` and 
-  use `All()` or `Any` on combined data (conditions and replicates).
+* Add optional data, e.g.: `base_sub()` - add base substitution.
+* Filter result object by some criteria, e.g.: `dplyr::filter(All(cov$cond1 >= 10))` - 
+  retain sites with read coverage >= 10, and use `All()` or `Any` on structured 
+  columns (conditions and replicates).
 * Plot filtered result object.
 
-JACUSA2helper supports the analysis of the following methods from JACUSA2: *call-{1,2}*, *pileup*, *rt-arrest*, and *lrt-arrest*(experimental).
-Check the [JACUSA2 manual](https://github.com/dieterich-lab/JACUSA2/manual/manual.pdf) for more details.
+JACUSA2helper supports the analysis of the following methods from JACUSA2: 
+*call-{1,2}*, *pileup*, *rt-arrest*, and *lrt-arrest*(experimental).
+Check the [JACUSA2 manual](https://github.com/dieterich-lab/JACUSA2/manual/manual.pdf) 
+for more details.
 
-The main data structure is the result object that is implemented by following the tidy data approach from @Wickham2014 to feature easy interaction with [dplyr](https://github.com/tidyverse/dplyr) and [ggplot2](https://github.com/tidyverse/ggplot2).
+The main data structure is the result object that is implemented by following 
+the tidy data approach from @Wickham2014 to feature easy interaction with 
+[dplyr](https://github.com/tidyverse/dplyr) and [ggplot2](https://github.com/tidyverse/ggplot2).
 
-In the following, we will focus on single pairwise comparisons of two conditions of one output file.
-If you want to analyze multiple files with similar pairwise comparisons, read `vignette("JACUSA2helper-meta-conditions")`. 
+In the following, we will focus on single pairwise comparisons of two conditions 
+of one output file. If you want to analyze multiple files with similar pairwise 
+comparisons, read `vignette("JACUSA2helper-meta-conditions")`. 
 
-## Multi line site
-
-While previously in JACUSA*1*, a site could be uniquely identified as one line in the output by the coordinate columns: 'contig', 'start', 'end', and 'strand', JACUSA2 features more complex data structures to store new features such as arrest positions of arrest events, read/base stratification and INDEL counts. Therefore, sites can now cover multiple lines of output - check Section "Introduction" of [JACUSA2 manual](https://github.com/dieterich-lab/JACUSA2/manual/manual.pdf) for details.
+## Multi site and new features
+While previously in JACUSA*1*, a site could be uniquely identified as one line 
+in the output by the coordinate columns: 'contig', 'start', 'end', and 'strand', 
+JACUSA*2* features more complex data structures to store new features such as 
+arrest positions of arrest events, variant tagging, and INDEL counts. 
+Therefore, sites can now cover multiple lines of output - check Section 
+"Introduction" of [JACUSA2 manual](https://github.com/dieterich-lab/JACUSA2/manual/manual.pdf) 
+for details.
 
 ## Variant calling - Data: rdd (RNA-DNA-differences)
+To introduce the basic verbs for manipulating JAUCSA2 output, we'll use 
+`JACUSA2helper:rdd`. This data set contains a subset from @Piechotta2017 and is 
+documented in `?rdd`. In brief, the sample data set consists of 10.000 sites of 
+1x DNA and 2x stranded RNA sequencing libraries that allow to define 
+RNA-DNA-differences (RDDs).
+
+```{r}
+library(JACUSA2helper)
+data(rdd)
+```
 
-To introduce the basic verbs for manipulating JAUCSA2 output, we'll use `JACUSA2helper:rdd`. This data set contains a subset from @Piechotta2017 and is documented in `?rdd`. In brief, the sample data set consists of 10.000 sites of 1x DNA and 2x stranded RNA sequencing libraries that allow to define RNA-DNA-differences (RDDs).
+Note that `rdd` is a tibble - check [tibble](http://tibble.tidyverse.org) for 
+more details. 
 
+Use dplyr to manipulate the result object `rdd`. Check details on 
+[dplyr](https://github.com/tidyverse/dplyr) and read `vignette("dplyr")`.
 
+### Structured column
+JACUSA2 features structured columns (nested tibbles) where condition and replicate 
+specific data is stored, e.g.: bases, cov, etc.
 ```{r}
-library(JACUSA2helper)
-dplyr::arrange(rdd, contig, start, strand)
+rdd[c("id", "bases", "cov")]
 ```
 
-Note that `rdd` is a tibble - check [tibble](http://tibble.tidyverse.org) for more details. 
+Check structure of column with:
+```{r}
+names(rdd$cov)
+```
+To access specific coverage information for condition 2 use:
+```{r}
+rdd$cov$cond2
+```
+In summary, all structured/nested columns hold:
+* data of a condition "cond*I*" on the first level, and
+* data of a replicate "rep*J*" on the second level.
 
-Use dplyr to manipulate the result object `rdd`. Check details on [dplyr](https://github.com/tidyverse/dplyr) and read `vignette("dplyr")`.
+Coverage information for condition 2 and replicate 1 can be accessed with:
+```{r}
+rdd$cov$cond2$rep1
+```
 
-Once the result object is available, you can start to manipulate it. In a first step, we will retain sites with score >= 2.
+#### Wrapper for `lapply()` and `mapply()`
+For accessing and manipulating structured columns there are JACUSA2 specific 
+wrappers for `lapply()` and `mapply()`:
+* `lapply_cond(x, f)` operates on condition data (list of replicate data),
+* `lapply_repl(x, f)` operates on replicate data of each condition, and
+* `mapply_repl(f, ...)` operates on replicate data, takes multiple inputs.
+
+To get the total coverage for each condition use:
+```{r}
+lapply_cond(rdd$cov, rowSums)
+```
+
+The following gives you the observed base calls:
+```{r}
+base_calls <- lapply_repl(rdd$bases, base_call)
+base_calls$cond2
+```
+
+### Filtering with `dplyr::filter()`
+dplyr and `%>%` from magrittr allow to formulate compact analysis pipelines. 
+
+In a first step, we will retain sites with score >= 2.
 
 ```{r}
 # before filtering
 dim(rdd)
 result <- dplyr::filter(rdd, score >= 2)
+print("After filtering")
 # after filtering
 dim(result)
 ```
 
-dplyr and `%>%` from magrittr allow to formulate compact analysis pipelines. In the following, we will:
+In order to formulate complicate statements with `dplyr::filter()` featuring 
+arbitrary combinations of conditions and/or replicates use the following 
+convenience functions:
+* `All()` and
+* `Any()`.
+
+The following statement retains sites where all replicates of condition 1 have 
+coverage >= 50 and at least one replicate of condition 2 has 10 reads:
+```{r}
+rdd %>% 
+  dplyr::filter(All(cov$cond1 >= 50) & Any(cov$cond2 >= 10)) %>%
+  dplyr::select(id, cov)
+```
+
+In the following, we will:
 
 * remove sites with score < 2,
 * retain sites with coverage >= 10 for all replicates,
-* remove sites with > 2 observed bases (including reference base),
-* apply a filter that retains only robust sites,
-* use the observed base calls from one condition to redefine the reference (useful in DNA vs. RND comparisons), 
-* add a field that shows base substitution (here RNA editing sites ) per data sample.
+* remove sites with > 2 observed bases (excluding reference base),
+* apply a filter that retains only robust sites (RNA editing must be present in 
+  all/both replicates - see TODO),
+* Finally, we compute the base substitution (here RNA editing sites against FASTA 
+  reference) per replicate.
 
 ```{r}
-# filter data
 filtered <- rdd %>% 
     dplyr::filter(score >= 2) %>%
-    dplyr::filter(All(cov$cond1 >= 10) & All(cov$cond2 > 10))
-    dplyr::filter(base_count(bc, ref) <= 2) %>%
-    dplyr::filter(robust(bases)) %>%
-    dplyr::mutate(base_sub = base_sub(bc$cond1, bc$cond2)) # add base substitutions
+    dplyr::filter(All(cov$cond1 >= 10) & All(cov$cond2 > 10)) %>%
+    dplyr::filter(base_count(bc) <= 2) %>%
+    dplyr::filter(robust(bases))
+
+# sum base call counts of condition / RNA replicates
+rna_bases <- Reduce("+", filtered$bases$cond2)
+
+# we don't need lapply_repl, because we don't operate on all replicate from all
+# conditions - only condition 2 / RNA
+ref2rna <- base_sub(filtered$ref, rna_bases)
+table(ref2rna)
+
 ```
 
-Finally, we will add a summary of base substitutions for a site by using `merge_sub()`:
+Instead of using DNA information from the reference FASTA sequence via column "ref",
+we could use the actual data from condition 1 / DNA.
+
+TODO Robust requires observations (here base calls) to be present in all replicates of at least condition.
 
 ```{r}
-# summarise data
-#summary_site_sub <- filtered %>%  
-#    group_by_site() %>% # following function will be applied on rows group by site(s)
-#    dplyr::summarise(site_sub = merge_sub(sub)) # adds new column site_sub
-#summary_site_sub
+cond1_ref <- base_call(filtered$bases$cond1$rep1)
+dna2rna <- base_sub(filtered$ref, rna_bases)
+table(dna2rna)
 ```
 
-There is more than one way to arrive at this summary of the data. In another approach `dplry::summarise(site_sub = merge_sub(sub))` could be replaced with `dplyr::mutate(site_sub = merge_sub(sub)) %>% dplyr::distinct(contig, start, end, strand, site_sub)`. 
+Using different sources of DNA to describe RRDs via `ref2rna` and `dna2rna` 
+we can conclude that there are no polymorphic positions in `filtered` because 
+`ref2rna` and `dna2rna` are identical.
 
 ### Plot base substitutions
-
 We can now plot the site specific distribution of base substitutions using [ggplot2](https://github.com/tidyverse/ggplot2) with:
 
 ```{r}
-#summary_site_sub %>%
-#  ggplot2::ggplot(ggplot2::aes(x = site_sub, fill = site_sub)) +
-#    ggplot2::geom_bar() +
-#    ggplot2::xlab("Base substitution")  +
-#    ggplot2::scale_fill_discrete(name = "Base substitution") +
-#    ggplot2::theme(
-#     legend.position = "bottom", 
-#     axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)
-#    )
+tidyr::tibble(base_sub = dna2rna) %>% # ggplot requires a data frame
+  ggplot2::ggplot(ggplot2::aes(x = base_sub, fill = base_sub)) +
+  ggplot2::geom_bar() +
+  ggplot2::xlab("Base substitution")  +
+  ggplot2::scale_fill_discrete(name = "Base substitution") +
+  ggplot2::theme(
+   legend.position = "bottom", 
+   axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)
+  )
 ```
 
 ### Plot coverage distribution
 
-To compare the coverage distribution of each data sample, we create a simple plot of the empirical coverage distribution, where we map cond(ition) to `colour` and repl(icate) to `linetype`.
+To compare the coverage distribution of each data sample, we create a simple plot of the empirical coverage distribution, where we map cond(ition) to `colour` and rep(licate) to `linetype`.
 
 ```{r}
-result %>%
-  ggplot2::ggplot(ggplot2::aes(x = cov, colour = cond, linetype = repl)) + 
+gather_repl(result$id, result$cov) %>%
+  ggplot2::ggplot(ggplot2::aes(x = value, colour = cond, linetype = repl)) + 
     ggplot2::stat_ecdf(geom = "step")
 ```
 
@@ -131,16 +216,18 @@ In order to improve the preliminary plot, we need to modify the following aspect
 
 First, a data description is added to the result object:
 
-# TODO
 ```{r}
-data(rdd)
-result <- rdd
-# create nice data description
-# DNA RNA
-# data_desc for cond and repl in result:
-dplyr::distinct(result, cond, repl, data_desc)
+result <- gather_repl(rdd$id, rdd$cov) %>%
+  dplyr::mutate(
+    data_desc = dplyr::case_when(
+      cond == "cond1" ~ "DNA",
+      cond == "cond2" & repl == "rep1" ~ "RNA 1",
+      cond == "cond2" & repl == "rep2" ~ "RNA 2"
+    )
+  )
 ```
 
+
 Supplementary data `limits` and `labels` are created to enable sleek legend by relating cond(ition) and repl(icate) to data description.
 They will be mapped to `colour` and `linetype`.
 
@@ -159,7 +246,7 @@ Finally, we combine the previous snippets and arrive at the final plot:
 ```{r}
 name <- "Data description"
 result %>% 
-  ggplot2::ggplot(ggplot2::aes(x = cov, colour = group, linetype = group)) +
+  ggplot2::ggplot(ggplot2::aes(x = value, colour = group, linetype = group)) +
   # map values for cond to colour and use nice labels
   ggplot2::scale_colour_manual(
     name = name,
@@ -183,26 +270,35 @@ result %>%
     labels = scales::trans_format("log10", scales::math_format(10^.x))
   ) +
   ggplot2::xlab("Read Coverage") +
-  ggplot2::theme(legend.position = "bottom") 
+  ggplot2::theme(legend.position = "bottom")  
 ```
 
-From the plot, we can deduce that RNA samples have similar coverage distribution and are higher covered than the DNA data sample. This code serves as a blueprint for other plots, e.g.: 
+From the plot, we can deduce that RNA samples have similar coverage distribution 
+and are higher covered than the DNA data sample. This code serves as a blueprint 
+for other plots, e.g.: 
 
 * empirical cumulative distribution of arrest rate or
 * number of observed bases.
 
 ## Arrest events
 
-@Zhou2018 map RNA modification of pseudouridine ($\Psi$) by chemically modifying
-pseudouridines with carbodiimide (+CMC) and detecting arrest events that are induced by reverse
-transcription stops in high-throughput sequencing under 3 different conditions: HIVRT, SIIIRTMn, and
-SIIIRTMg. The result of JACUSA2 are available by using `data()`. Read [JACUSA2 manual] for details on how data has been processed from the original publication by @Zhou2018. In brief, the data has been filtered to contain the following rRNAs: RNA18SN5 and RNA28SN5.
+In @Zhou2018 the authors map RNA modification of pseudouridine ($\Psi$) by 
+chemically modifying pseudouridines with carbodiimide (+CMC) and detecting 
+arrest events that are induced by reverse transcription stops in high-throughput 
+sequencing under 3 different conditions: HIVRT, SIIIRTMn, and SIIIRTMg. 
+The result of JACUSA2 are available by using `data()`. 
+Read [JACUSA2 manual] for details on how data has been processed from the 
+original publication by @Zhou2018. In brief, the data has been filtered to 
+contain the following rRNAs: RNA18SN5 and RNA28SN5.
 
-In the following, we will be looking at the results for `data(HIVRT)`. By default JACUSA2 output from *rt-arrest* will contain the following columns:
+In the following, we will be looking at the results for `data(HIVRT)`. 
+By default JACUSA2 output from *rt-arrest* will contain the following 
+structured/nested columns:
 
-* `arrest` and `through` base count columns.
-* `bases` = `arrest` + `through`. Total base counts.
-* TODO Columns prefixed with `bc_`, `cov_` will hold observed bases and coverage for the respective base counts, respectively.
+* `arrest` and `through`
+* `bases` = `arrest` + `through`: total base counts.
+* `arrest_rate`
+* `cov` holds coverage for total base counts.
 
 ```{r}
 data(HIVRT)
@@ -214,87 +310,55 @@ We investigate the strand specific coverage distribution of reads and discover t
 The other mappings are probably artefacts. But this discussion is beyond the scope of this vignette.
 
 ```{r}
-HIVRT %>%
-  ggplot2::ggplot(ggplot2::aes(x = cov, colour = strand)) + 
-    ggplot2::geom_density() + ggplot2::scale_x_log10(
-      breaks = scales::trans_breaks("log10", function(x) 10^x),
-      labels = scales::trans_format("log10", scales::math_format(10^.x))
-    ) +
-  ggplot2::xlab("Read Coverage") +
-  ggplot2::theme(legend.position = "bottom") + 
-  ggplot2::facet_grid(contig ~ .)
+gather_repl(HIVRT$id, HIVRT$cov) %>% # extract coverages
+dplyr::inner_join(dplyr::select(HIVRT, id, contig, strand), by = "id") %>% # add contig and strand
+ggplot2::ggplot(ggplot2::aes(x = value, colour = strand)) + 
+  ggplot2::geom_density() + ggplot2::scale_x_log10(
+    breaks = scales::trans_breaks("log10", function(x) 10^x),
+    labels = scales::trans_format("log10", scales::math_format(10^.x))
+  ) +
+ggplot2::xlab("Read Coverage") +
+ggplot2::theme(legend.position = "bottom") + 
+ggplot2::facet_grid(contig ~ .)
 ```
 
 We filter by pvalue, coverage, strand, and only retain robust arrest events:
 
 ```{r}
-result <- HIVRT
-# result$cond_desc <- ""
-# TODO
-#result$cond_desc[result$cond == 1] <- "+CMC"
-#result$cond_desc[result$cond == 2] <- "-CMC"
-# TODO
-filtered <- result %>%
+dim(HIVRT)
+filtered <- HIVRT %>% 
   dplyr::filter(strand == "+") %>%
-  rt-arrest has pvalue instead of score
-  dplyr::filter(pvalue = 0.00001) %>% 
-  dplyr::filter(All(cov$cond1 >= 100) & All(cov$cond21 >= 100)) %>%
-  dplyr::filter(robust(arrest)) %>%
+  dplyr::filter(pvalue <= 0.00001) %>% 
+  dplyr::filter(All(cov$cond1 >= 100) & All(cov$cond2 >= 100)) %>%
+  dplyr::filter(robust(arrest))
 dim(filtered)
 ```
-
-We use the `peak()` function to transform the filtered result object and plot the arrest rate from each condition in a scatter plot.
+We use `gather_repl()` function to transform the filtered result object and 
+plot the arrest rate from each condition in a scatter plot.
 
 ```{r}
-# TODO
-#peak(filtered, peak_cols = c("arrest_rate")) %>% 
-#  ggplot2::ggplot(ggplot2::aes(x = arrest_rate11, y = arrest_rate21)) +  
-#  ggplot2::geom_point() + 
-#  ggplot2::facet_grid(. ~ contig) + 
-#  ggplot2::geom_abline(colour = "red") +
-#  ggplot2::xlab("+CMC arrest rate") + 
-#  ggplot2::ylab("-CMC arrest rate")
+filtered %>%
+  ggplot2::ggplot(ggplot2::aes(x = arrest_rate$cond1$rep1, y = arrest_rate$cond2$rep1)) +  
+  ggplot2::geom_point() + 
+  ggplot2::facet_grid(. ~ contig) + 
+  ggplot2::geom_abline(colour = "red") +
+  ggplot2::xlab("+CMC arrest rate") + 
+  ggplot2::ylab("-CMC arrest rate")
 ```
 
 From the above plot, we can deduce that the arrest rate is higher in the +CMC condition for both rRNAs when compared against -CMC.
 
 If you want to look simultaneously at all conditions (HIVRT, SIIIRTMn, and SIIIRTMg), checkout `vignette("JACUSA2helper-meta-conditions")`.
 
-### Plot base substitutions
-
-# TODO
-```{r}
-#filtered %>% 
-#  add_bc_ratio() %>%
-#  add_non_ref_ratio() %>%
-#  dplyr::select(contig, start, end, strand, cond_desc, cond, repl, bc_ratio, ref, non_ref_ratio) %>% 
-#  group_by_site() %>%
-#  filter_by(any(non_ref_ratio >= 0.05)) %>%
-#  dplyr::ungroup() %>%
-#  dplyr::mutate(bc_max = base_call_non_ref(ref, bc_ratio), sub = base_sub(ref, bc_max)) %>%
-#  ggplot2::ggplot(ggplot2::aes(x = sub, fill = sub)) +
-#    ggplot2::geom_bar() + 
-#    ggplot2::xlab("Base substitution")  +
-#    ggplot2::scale_fill_discrete(name = "Base substitution") +
-#    ggplot2::theme(
-#      legend.position = "bottom", 
-#      axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)
-#    ) + 
-#    ggplot2::facet_grid(cond_desc ~ .)
-```
-
 
 ## Stranded data
-
 When working with stranded RNA-Seq data, inverting base calls is not necessary because JACUSA2 will automatically invert Single End (SE) and Paired End (PE) depending on the provided library type option "-P" UNSTRANDED|FR_FIRSTSTRAND|RF_SECONDSTRAND".
 
 # Input/Output
-
-There are two functions to read JACUSA2 output `read_result(file)` and `read_results(files, meta_conditions)` which is explained in  `vignette("JACUSA2helper-meta-conditions")`.
+There are two functions to read JACUSA2 output `read_result(file)` and `read_results(files, meta_conds)` which is explained in  `vignette("JACUSA2helper-meta-conditions")`.
 
 Use `result <- read_result("JACUSA2.out")` to read and create a JACUSA2 result object from a JACUSA2 output file `JACUSA2.out`.
 By default, variables stored in the **info** column will be processed and unpacked making them available for further manipulation.  
-<!-- Use `read_result(..., unpack = FALSE)` to prevent processing of the **info*** column. This will make reading output much faster.-->
 
 Summary of input/output methods:
 
@@ -303,52 +367,24 @@ Summary of input/output methods:
 * `write_bedGraph()` Writes a vector of values as bedGraph file.
 
 # General function layout
-
 In the following the core functions of JACUSA2helper are presented. Check the respective help page, e.g.: `?bc_ratio` to get more details.
 
 `arrest_rate` Calculate arrest rate from arrest and through reads.
-`bc` Calculates observed bases from base counts. In most cases this will be called automatically in `read_result()`
-`bc_ratio` Calculates a ratio matrix from a base count matrix.
+`base_call` Calculates observed bases from base counts. 
+`base_ratio` Calculates a ratio matrix from a base count matrix.
 `coverage` Calculates coverage from base counts. In most cases this will be called automatically in `read_result()`
 `non_ref_ratio` Calculates non reference base ratio for base counts of some base type.
 `base_sub` Calculates base substitution from reference and base counts, e.g.: A->G.
 `sub_ratio` Calculates base substitution ratio from reference and observed bases, e.g.: 25% A->G.
 
-All of the core functions mentioned above have wrappers and additional function with the following naming name scheme.
-For the core function `bc()` which calculates observed base, there exist the following functions:
-
-* `add_bc(result, ...)` Calls `bc()` and adds the outcome to the `result` object to the column specified by `bc_col()`.
-* `bc_col(base_type = "bases")` Defines the field to be used for the provided `base_type`. Supported values for `base_type` are:
-*bases*, *arrest*, or *through*. *bases* is the default value.
-* `get_bc(result, ...)` Retrieves the value stored under `bc_col()` in the `result` object.
-
-For all core function *F*, there exist the following functions: `add_F()`, `get_F()`, and `F_col()`. The exceptions to this rule are: `coverage()` and `base_sub()`. In order to maintain short columns names and prevent from overwriting R functions, core JACUSA2 function have the long form and `add_()`, `get_()`, `_col()` the short form (cov, and sub).
-
 # Filter result object
+* `robust()`
+* `filter_artefact()`
+* `All(), Any()`
 
-This function set enables filtering of sites. Make sure to use the correct parameters in `group_by_site()`.
-
-* `filter_by_robust_arrest_events()` Arrest event specific filter that will remove non robust results. Uses `filter_by()` internally.
-* `filter_by_robust_variants()` Variant specific filter that will remove non robust results. Uses `filter_by()` internally.
-
-# Peak - change data layout
-
-Sometimes it is useful and desired to have observations of some variable(s) in columns. This enables a direct comparison.
-Use `peak(result, peak_cols)` to transform a result object and transform variables defined in `peak_cols` to show them side-by-side.
-In the following, we will transform `rdd` result object to show coverage information.
-
-```{r}
-# TODO
-#data(rdd)
-#peak(rdd, peak_cols = "cov")
-```
-
-`peak()` uses "contig", "start", "end", "strand" columns to identify each site. Change parameter `id_cols` to change this.
-
+# Variant tagging (Base Stratification)
 <!--- # Experimental features
-
 * *lrt-arrest*
-* Read/Base Stratification
 --->
 
 ## References