diff --git a/NAMESPACE b/NAMESPACE index 4cf547e..6239a8d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,30 +4,19 @@ S3method(read,File) export(BcftoolsStatsFile) export(BclconvertReports) export(BclconvertReports375) -export(FastqcMetricsFile) export(File) -export(FragmentLengthHistFile) -export(MappingMetricsFile) export(MultiqcFile) export(PcgrJsonFile) export(PcgrTiersFile) export(PloidyEstimationMetricsFile) -export(ReplayFile) -export(SvMetricsFile) -export(TimeMetricsFile) -export(TrimmerMetricsFile) -export(VCMetricsFile) export(Wf) +export(Wf_dragen) export(Wf_sash) export(Wf_sash_download_tidy_write) export(Wf_tso_ctdna_tumor_only) export(Wf_tso_ctdna_tumor_only_v2) export(Wf_umccrise) export(Wf_umccrise_download_tidy_write) -export(WgsContigMeanCovFile) -export(WgsCoverageMetricsFile) -export(WgsFineHistFile) -export(WgsHistFile) export(bcftools_parse_vcf) export(bcftools_parse_vcf_regions) export(date_log) @@ -35,6 +24,15 @@ export(dr_func_eval) export(dr_gds_download) export(dr_output_format_valid) export(dr_s3_download) +export(dragen_cnv_metrics_read) +export(dragen_coverage_metrics_read) +export(dragen_fastqc_metrics_read) +export(dragen_gc_metrics_read) +export(dragen_mapping_metrics_read) +export(dragen_sv_metrics_read) +export(dragen_trimmer_metrics_read) +export(dragen_umi_metrics_read) +export(dragen_vc_metrics_read) export(dtw_Wf_tso_ctdna_tumor_only) export(dtw_Wf_tso_ctdna_tumor_only_v2) export(empty_tbl) @@ -69,7 +67,6 @@ export(s3_list_files_filter_relevant) export(s3_search) export(session_info_kable) export(tidy_files) -export(time_metrics_process) export(tso_acfc_plot) export(tso_acfc_read) export(tso_combinedvaro_smallv_read) diff --git a/R/Wf.R b/R/Wf.R index 7c2edab..9d35956 100644 --- a/R/Wf.R +++ b/R/Wf.R @@ -92,7 +92,8 @@ Wf <- R6::R6Class( "oncoanalyser_wgts_existing_both", "sash" ) - assertthat::assert_that(wname %in% wnames) + subwnames <- c("dragen") + assertthat::assert_that(wname %in% c(wnames, subwnames)) self$path <- sub("/$", "", path) # remove potential trailing slash self$wname <- wname self$filesystem <- dplyr::case_when( @@ -208,6 +209,7 @@ Wf <- R6::R6Class( #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). write = function(x, outdir = NULL, prefix = NULL, format = "tsv", drid = NULL) { assertthat::assert_that(!is.null(prefix)) + assertthat::assert_that(all(c("name", "data") %in% colnames(x))) if (!is.null(outdir)) { prefix <- file.path(outdir, prefix) } @@ -218,7 +220,7 @@ Wf <- R6::R6Class( out = list(write_dracarys(obj = .data$data, prefix = .data$p, out_format = format, drid = drid)) ) |> dplyr::ungroup() |> - dplyr::select("name", "data") + dplyr::select("name", "data", prefix = "p") invisible(d_write) } ) # end public diff --git a/R/dragen.R b/R/dragen.R index 7e7459c..2e6cbbd 100644 --- a/R/dragen.R +++ b/R/dragen.R @@ -1,574 +1,770 @@ -#' WgsContigMeanCovFile R6 Class +#' DRAGEN Fragment Length Hist Plot +#' +#' Plots the fragment length distributions as given in the +#' `fragment_length_hist` file. +#' +#' @param d Parsed tibble. +#' @param min_count Minimum read count to be plotted (def: 10). +#' +#' @return A ggplot2 plot containing fragment lengths on X axis and read counts +#' on Y axis for each sample. +dragen_fraglenhist_plot <- function(d, min_count = 10) { + assertthat::assert_that(is.numeric(min_count), min_count >= 0) + d |> + dplyr::filter(.data$Count >= min_count) |> + ggplot2::ggplot(ggplot2::aes(x = .data$FragmentLength, y = .data$Count)) + + ggplot2::geom_line() + + ggplot2::labs(title = "Fragment Length Distribution") + + ggplot2::xlab("Fragment Length (bp)") + + ggplot2::ylab(glue("Read Count (min: {min_count})")) + + ggplot2::theme_minimal() + + ggplot2::theme( + legend.position = c(0.9, 0.9), + legend.justification = c(1, 1), + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major = ggplot2::element_blank(), + plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold") + ) +} + +#' Read DRAGEN UMI Metrics #' -#' @description -#' Contains methods for reading and displaying contents of -#' the `wgs_contig_mean_cov_.csv` file output from DRAGEN. -#' This file contains the estimated coverage for all contigs, and an autosomal -#' estimated coverage. +#' Reads the `umi_metrics.csv` file output from DRAGEN. +#' +#' @param x Path to file. #' +#' @return Tibble with metrics. #' @examples -#' x1 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_normal.csv.gz", package = "dracarys") -#' x2 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_tumor.csv.gz", package = "dracarys") -#' cc1 <- WgsContigMeanCovFile$new(x1) -#' cc2 <- WgsContigMeanCovFile$new(x2) -#' d1 <- cc1$read() -#' d2 <- cc2$read() -#' cc1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv") -#' cc2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv") +#' \dontrun{ +#' dragen_umi_metrics_read(x) +#' } +#' @export +dragen_umi_metrics_read <- function(x) { + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("UMI STATISTICS", d0[1])) + abbrev_nm <- tibble::tribble( + ~raw, ~clean, ~target, + "number of reads", "reads_tot", TRUE, + "number of reads with valid or correctable umis", "reads_umi_valid_correctable", TRUE, + "number of reads in discarded families", "reads_discarded_families", TRUE, + "reads filtered out", "reads_filtered_out", FALSE, + "reads with all-g umis filtered out", "reads_filtered_out_all_g_umis", FALSE, + "reads with uncorrectable umis", "reads_uncorrectable_umis", FALSE, + "total number of families", "families_tot", FALSE, + "families contextually corrected", "families_contextually_corrected", FALSE, + "families shifted", "families_shifted", FALSE, + "families discarded", "families_discarded_tot", TRUE, + "families discarded by min-support-reads", "families_discarded_minsupportreads", TRUE, + "families discarded by duplex/simplex", "families_discarded_duplexsimplex", TRUE, + "families with ambiguous correction", "families_ambiguous_correction", TRUE, + "duplex families", "duplex_families", TRUE, + "consensus pairs emitted", "consensus_pairs_emitted", FALSE, + "mean family depth", "avg_family_depth", TRUE, + "number of collapsible regions", "collapsible_regions_tot", FALSE, + "min collapsible region size (num reads)", "collapsible_region_size_min", FALSE, + "max collapsible region size (num reads)", "collapsible_region_size_max", FALSE, + "mean collapsible region size (num reads)", "collapsible_region_size_mean", FALSE, + "collapsible region size standard deviation", "collapsible_region_size_sd", FALSE, + "histogram of num supporting fragments", "histo_num_supporting_fragments", TRUE, + "histogram of unique umis per fragment position", "histo_unique_umis", FALSE + ) + abbrev_nm_target <- abbrev_nm |> + dplyr::filter(.data$target) |> + dplyr::mutate( + raw = as.character(glue("on target {.data$raw}")), + clean = as.character(glue("{.data$clean}_ontarget")) + ) + abbrev_nm <- abbrev_nm |> + dplyr::bind_rows(abbrev_nm_target) |> + dplyr::select("raw", "clean") |> + tibble::deframe() + d1 <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim( + "value", + names = c("category", "dummy1", "var", "count", "pct"), + delim = ",", too_few = "align_start" + ) |> + dplyr::mutate( + var = tolower(.data$var), + var = dplyr::recode(.data$var, !!!abbrev_nm) + ) + dirty_names_cleaned(d1$var, abbrev_nm, x) + hist <- d1 |> + dplyr::filter(grepl("histo", .data$var)) |> + dplyr::select(name = "var", "count") |> + dplyr::mutate( + count = gsub("\\{|\\}", "", .data$count), + count = strsplit(.data$count, "\\|") + ) |> + tidyr::unnest("count") |> + dplyr::mutate(count = as.numeric(.data$count)) |> + tidyr::nest(.by = "name") + d2 <- d1 |> + dplyr::filter(!grepl("histo", .data$var)) |> + dplyr::select("var", "count", "pct") |> + tidyr::pivot_longer(c("count", "pct"), names_to = "name", values_to = "value") |> + dplyr::mutate( + value = as.numeric(.data$value), + name = dplyr::if_else(.data$name == "count", "", "_pct"), + var = glue("{.data$var}{.data$name}") + ) |> + dplyr::select("var", "value") |> + dplyr::filter(!is.na(.data$value)) |> + tidyr::pivot_wider(names_from = "var", values_from = "value") + res <- list(metrics = d2) |> + tibble::enframe(name = "name", value = "data") |> + dplyr::bind_rows(hist) |> + dplyr::mutate(name = glue("umi_{.data$name}")) + res +} + +#' Read DRAGEN GC Metrics +#' +#' Reads the `gc_metrics.csv` file output from DRAGEN. #' -#' cc1$plot(d1) -#' cc2$plot(d2) +#' @param x Path to file. +#' +#' @return Tibble with metrics. +#' @examples +#' \dontrun{ +#' dragen_gc_metrics_read(x) +#' } #' #' @export -WgsContigMeanCovFile <- R6::R6Class( - "WgsContigMeanCovFile", - inherit = File, - public = list( - #' @description - #' Reads the `wgs_contig_mean_cov_.csv` file output from DRAGEN. - #' - #' @param keep_alt Keep the ALT + Mito chromosomes? - #' @return tibble with the following columns: - #' - label: file label. - #' - chrom: contig name. - #' - n_bases: number of bases aligned to contig (excludes bases from - #' duplicate marked reads, reads with MAPQ=0, and clipped bases). - #' - coverage: col2 / contig length - read = function(keep_alt = TRUE) { - x <- self$path - readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |> - dplyr::filter( - if (!keep_alt) { - !grepl("chrM|MT|_|Autosomal|HLA-|EBV", .data$chrom) - } else { - TRUE - } - ) - }, - - #' @description - #' Writes a tidy version of the `wgs_contig_mean_cov_.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - }, - - - #' @description Plots the `wgs_contig_mean_cov_.csv` files. - #' @param d Parsed object from `self$read()`. - #' @param top_alt_n Number of top covered alt contigs to plot per phenotype. - #' @return A ggplot2 object with chromosomes on X axis, and coverage on Y axis. - plot = function(d, top_alt_n = 15) { - assertthat::assert_that(top_alt_n >= 0) - - # Display chr1-22, X, Y at top (M goes to bottom). - # Display top 20 of the rest, plus rest as 'other', at bottom - main_chrom1 <- c(1:22, "X", "Y") - main_chrom2 <- c(paste0("chr", main_chrom1)) - main_chrom <- c(main_chrom1, main_chrom2, "Autosomal regions") - min_cvg <- 0.000001 - - d <- d |> - dplyr::mutate( - panel = dplyr::if_else(.data$chrom %in% main_chrom, "main", "alt"), - panel = factor(.data$panel, levels = c("main", "alt")) - ) |> - dplyr::select("chrom", "coverage", "panel") - - main_panel <- d |> dplyr::filter(.data$panel == "main") - alt_panel <- d |> dplyr::filter(.data$panel == "alt") - top_alt <- alt_panel |> - dplyr::top_n(top_alt_n, wt = .data$coverage) |> - dplyr::arrange(dplyr::desc(.data$coverage)) |> - dplyr::pull(.data$chrom) |> - unique() - - alt_panel2 <- alt_panel |> - dplyr::mutate(alt_group = dplyr::if_else(.data$chrom %in% top_alt, "top", "bottom")) - - alt_panel_final <- alt_panel2 |> - dplyr::group_by(.data$alt_group) |> - dplyr::summarise(mean_cov = mean(.data$coverage)) |> - dplyr::inner_join(alt_panel2, by = c("alt_group")) |> - dplyr::mutate( - chrom = dplyr::if_else(.data$alt_group == "bottom", "OTHER", .data$chrom), - coverage = dplyr::if_else(.data$alt_group == "bottom", .data$mean_cov, .data$coverage) - ) |> - dplyr::distinct() |> - dplyr::filter(coverage > min_cvg) |> - dplyr::ungroup() |> - dplyr::select("chrom", "coverage", "panel") - - chrom_fac_levels <- c(main_chrom, "chrM", "MT", top_alt[!top_alt %in% c("chrM", "MT")], "OTHER") - d <- dplyr::bind_rows(main_panel, alt_panel_final) |> - dplyr::mutate(chrom = factor(.data$chrom, levels = chrom_fac_levels)) - - d |> - dplyr::mutate(label = "sampleA") |> - ggplot2::ggplot( - ggplot2::aes( - x = .data$chrom, y = .data$coverage, group = .data$label, - ) - ) + - ggplot2::geom_point() + - ggplot2::geom_line() + - ggplot2::scale_y_continuous( - limits = c(0, NA), expand = c(0, 0), labels = scales::comma, - breaks = scales::pretty_breaks(n = 8) - ) + - ggplot2::theme_minimal() + - ggplot2::labs(title = "Mean Coverage Per Chromosome", colour = "Label") + - ggplot2::xlab("Chromosome") + - ggplot2::ylab("Coverage") + - ggplot2::theme( - legend.position = "top", - panel.grid.minor = ggplot2::element_blank(), - panel.grid.major.y = ggplot2::element_blank(), - strip.background = ggplot2::element_blank(), - strip.text.x = ggplot2::element_blank(), - axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1, size = 6), - plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold"), - panel.spacing = ggplot2::unit(2, "lines") - ) + - ggplot2::facet_wrap(ggplot2::vars(.data$panel), nrow = 2, scales = "free") - } +dragen_gc_metrics_read <- function(x) { + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("GC BIAS DETAILS", d0[1])) + d1 <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim( + "value", + names = c("category", "dummy1", "name", "value", "fraction"), + delim = ",", too_few = "align_start" + ) + abbrev_nm <- c( + "Window size" = "window_size", + "Number of valid windows" = "n_windows_valid", + "Number of discarded windows" = "n_windows_discard", + "Average reference GC" = "gc_avg_reference", + "Mean global coverage" = "cov_avg_global", + "Normalized coverage at GCs 0-19" = "cov_norm_0to19", + "Normalized coverage at GCs 20-39" = "cov_norm_20to39", + "Normalized coverage at GCs 40-59" = "cov_norm_40to59", + "Normalized coverage at GCs 60-79" = "cov_norm_60to79", + "Normalized coverage at GCs 80-100" = "cov_norm_80to100", + "AT Dropout" = "dropout_at", + "GC Dropout" = "dropout_gc" ) -) + # GC METRICS SUMMARY + summary <- d1 |> + dplyr::filter(.data$category == "GC METRICS SUMMARY") |> + dplyr::select("name", "value") |> + dplyr::mutate( + value = as.numeric(.data$value), + name = dplyr::recode(.data$name, !!!abbrev_nm) + ) |> + tidyr::pivot_wider(names_from = "name", values_from = "value") + dirty_names_cleaned(colnames(summary), abbrev_nm, x) + + # GC BIAS DETAILS + details <- d1 |> + dplyr::filter(.data$category == "GC BIAS DETAILS") |> + dplyr::mutate( + gc = sub(".* at GC (.*)", "\\1", .data$name), + name = sub("(.*) at GC .*", "\\1", .data$name), + name = tolower(.data$name), + name = sub(" ", "", .data$name), + value = as.numeric(.data$value), + fraction = as.numeric(.data$fraction) + ) + details_wind <- details |> + dplyr::filter(.data$name == "windows") |> + dplyr::select("gc", "value") + + details_cov <- details |> + dplyr::filter(.data$name == "normalizedcoverage") |> + dplyr::select("gc", "value") + + list( + gcmetrics_summary = summary, + gcmetrics_windows = details_wind, + gcmetrics_coverage = details_cov + ) |> + tibble::enframe(name = "name", value = "data") +} -#' WgsCoverageMetricsFile R6 Class +#' Read DRAGEN CNV Metrics #' -#' @description -#' Contains methods for reading and displaying contents of -#' the `wgs_coverage_metrics_.csv` file output from DRAGEN. -#' This file contains read depth of coverage metrics. +#' Reads the `cnv_metrics.csv` file output from DRAGEN. +#' +#' @param x Path to file. #' +#' @return Tibble with metrics. #' @examples -#' x1 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_normal.csv.gz", package = "dracarys") -#' x2 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_tumor.csv.gz", package = "dracarys") -#' cm1 <- WgsCoverageMetricsFile$new(x1) -#' cm2 <- WgsCoverageMetricsFile$new(x2) -#' d1 <- read(cm1) -#' d2 <- read(cm2) -#' cm1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv") -#' cm2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv") +#' \dontrun{ +#' dragen_cnv_metrics_read(x) +#' } #' #' @export -WgsCoverageMetricsFile <- R6::R6Class( - "WgsCoverageMetricsFile", - inherit = File, - public = list( - #' @description - #' Reads the `wgs_coverage_metrics_.csv` file output from DRAGEN. - #' - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - abbrev_nm <- c( - "Aligned bases" = "bases_aligned_dragen", - "Aligned bases in genome" = "bases_aligned_in_genome_dragen", - "Average alignment coverage over genome" = "cov_alignment_avg_over_genome_dragen", - "Uniformity of coverage (PCT > 0.2*mean) over genome" = "cov_uniformity_over_genome_pct_gt02mean_dragen", - "Uniformity of coverage (PCT > 0.4*mean) over genome" = "cov_uniformity_over_genome_pct_gt04mean_dragen", - "Average chr X coverage over genome" = "cov_avg_x_over_genome_dragen", - "Average chr Y coverage over genome" = "cov_avg_y_over_genome_dragen", - "Average mitochondrial coverage over genome" = "cov_avg_mt_over_genome_dragen", - "Average autosomal coverage over genome" = "cov_avg_auto_over_genome_dragen", - "Median autosomal coverage over genome" = "cov_median_auto_over_genome_dragen", - "Mean/Median autosomal coverage ratio over genome" = "cov_mean_median_auto_ratio_over_genome_dragen", - "Aligned reads" = "reads_aligned_dragen", - "Aligned reads in genome" = "reads_aligned_in_genome_dragen" - ) - - x <- self$path - raw <- readr::read_lines(x) - assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1])) - - res <- raw |> - tibble::as_tibble_col(column_name = "value") |> - tidyr::separate_wider_delim( - "value", - delim = ",", too_few = "align_start", - names = c("category", "dummy1", "var", "value", "pct") - ) - # split to rename the - # "PCT of genome with coverage [100x: inf)" values - res1 <- res |> - # pct just shows 100% for a couple rows - dplyr::filter(!grepl("PCT of genome with coverage", .data$var)) |> - dplyr::select("var", "value") - res2 <- res |> - dplyr::filter(grepl("PCT of genome with coverage", .data$var)) |> - dplyr::mutate( - var = sub("PCT of genome with coverage ", "", .data$var), - var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var), - var = gsub("x", "", .data$var), - var = gsub("inf", "Inf", .data$var) - ) |> - tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |> - dplyr::mutate(var = as.character(glue("cov_genome_pct_{start}_{end}_dragen"))) |> - dplyr::select("var", "value") - res <- dplyr::bind_rows(res1, res2) |> - dplyr::mutate( - value = dplyr::na_if(.data$value, "NA"), - value = as.numeric(.data$value), - var = dplyr::recode(.data$var, !!!abbrev_nm) - ) |> - tidyr::pivot_wider(names_from = "var", values_from = "value") - }, - #' @description - #' Writes a tidy version of the `wgs_coverage_metrics_.csv` file output - #' from DRAGEN - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } +dragen_cnv_metrics_read <- function(x) { + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("SEX GENOTYPER", d0[1])) + abbrev_nm <- c( + "Bases in reference genome" = "bases_in_ref_genome", + "Average alignment coverage over genome" = "cov_alignment_avg_over_genome", + "Number of alignment records" = "n_alignment_records", + "Number of filtered records (total)" = "n_filtered_records_tot", + "Number of filtered records (duplicates)" = "n_filtered_records_dup", + "Number of filtered records (MAPQ)" = "n_filtered_records_mapq", + "Number of filtered records (unmapped)" = "n_filtered_records_unmap", + "Coverage MAD" = "coverage_mad", + "Gene Scaled MAD" = "gene_scaled_mad", + "Median Bin Count" = "median_bin_count", + "Number of target intervals" = "n_target_intervals", + "Number of normal samples" = "n_samp_norm", + "Number of segments" = "n_seg", + "Number of amplifications" = "n_amp", + "Number of deletions" = "n_del", + "Number of passing amplifications" = "n_amp_pass", + "Number of passing deletions" = "n_del_pass" ) -) + d1 <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim( + "value", + names = c("category", "extra", "var", "count", "pct"), + delim = ",", too_few = "align_start" + ) + sexgt <- d1 |> + dplyr::filter(.data$category == "SEX GENOTYPER") |> + dplyr::select(sexgt = "count", sexgt_pct = "pct") + + d2 <- d1 |> + dplyr::filter(!.data$category == "SEX GENOTYPER") |> + dplyr::mutate( + count = as.numeric(.data$count), + pct = round(as.numeric(.data$pct), 2), + var = dplyr::recode(.data$var, !!!abbrev_nm) + ) + dirty_names_cleaned(d2$var, abbrev_nm, x) + d2 <- d2 |> + dplyr::select("var", "count", "pct") |> + tidyr::pivot_longer(c("count", "pct")) |> + dplyr::filter(!is.na(.data$value)) |> + dplyr::mutate( + name = dplyr::if_else(.data$name == "count", "", "_pct"), + var = glue("{.data$var}{.data$name}") + ) |> + dplyr::select("var", "value") |> + tidyr::pivot_wider(names_from = "var", values_from = "value") + res <- dplyr::bind_cols(sexgt, d2) + return(res) +} -#' WgsFineHistFile R6 Class +#' Read DRAGEN SV Metrics #' -#' @description -#' Contains methods for reading and displaying contents of -#' the `wgs_fine_hist_.csv` file output from DRAGEN. -#' This file contains two columns: Depth and Overall. -#' The value in the Depth column ranges from 0 to 1000+ and the Overall -#' column indicates the number of loci covered at the corresponding depth. +#' Reads the `sv_metrics.csv` file output from DRAGEN. +#' +#' @param x Path to file. #' +#' @return Tibble with metrics. #' @examples -#' x1 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_normal.csv.gz", package = "dracarys") -#' x2 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_tumor.csv.gz", package = "dracarys") -#' ch1 <- WgsFineHistFile$new(x1) -#' ch2 <- WgsFineHistFile$new(x2) -#' d1 <- read(ch1) -#' d2 <- read(ch2) -#' ch1$plot(d1) -#' ch2$plot(d2) -#' ch1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv") -#' ch2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv") +#' \dontrun{ +#' dragen_sv_metrics_read(x) +#' } #' @export -WgsFineHistFile <- R6::R6Class( - "WgsFineHistFile", - inherit = File, - public = list( - #' @description - #' Reads the `wgs_fine_hist_.csv` file output from DRAGEN. - #' @return tibble with following columns: - #' - depth - #' - number of loci with given depth - read = function() { - x <- self$path - d <- readr::read_csv(x, col_types = "cd") - assertthat::assert_that(all(colnames(d) == c("Depth", "Overall"))) - - # there's a max Depth of 2000+, so convert to numeric for easier plotting - d |> - dplyr::mutate( - Depth = ifelse(grepl("+", .data$Depth), sub("(\\d*)\\+", "\\1", .data$Depth), .data$Depth), - Depth = as.integer(.data$Depth) - ) |> - dplyr::select(depth = "Depth", n_loci = "Overall") - }, - #' @description - #' Writes a tidy version of the `wgs_fine_hist_.csv` file output - #' from DRAGEN - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - }, - - #' @description Plots the `wgs_fine_hist_.csv` files. - #' @param d Parsed object from `self$read()`. - #' @param x_lim X axis range to plot. - #' @return A ggplot2 object with depth of coverage on X axis, - #' and number of loci with that depth on Y axis. - plot = function(d, x_lim = c(0, 300)) { - assertthat::assert_that(length(x_lim) == 2) - d |> - ggplot2::ggplot(ggplot2::aes(x = .data$depth, y = .data$n_loci)) + - ggplot2::geom_line() + - ggplot2::coord_cartesian(xlim = x_lim) + - ggplot2::scale_y_continuous(labels = scales::label_comma()) + - ggplot2::scale_x_continuous(n.breaks = 8) + - ggplot2::labs(title = "Coverage Distribution", colour = "Label") + - ggplot2::xlab("Depth of Coverage") + - ggplot2::ylab("Number of Loci with Given Coverage") + - ggplot2::theme_minimal() + - ggplot2::theme( - legend.position = c(0.9, 0.9), - legend.justification = c(1, 1), - panel.grid.minor = ggplot2::element_blank(), - panel.grid.major.y = ggplot2::element_blank(), - plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold") - ) - } +dragen_sv_metrics_read <- function(x) { + d <- readr::read_lines(x) + assertthat::assert_that(grepl("SV SUMMARY", d[1])) + abbrev_nm <- c( + "Number of deletions (PASS)" = "del", + "Number of insertions (PASS)" = "ins", + "Number of duplications (PASS)" = "dup", + "Number of breakend pairs (PASS)" = "bnd" ) -) + res <- d |> + tibble::as_tibble_col(column_name = "value") |> + dplyr::filter(!grepl("Total number of structural variants", .data$value)) |> + tidyr::separate_wider_delim( + "value", + names = c("svsum", "sample", "var", "count", "pct"), delim = ",", + too_few = "align_start" + ) |> + dplyr::mutate( + count = as.numeric(.data$count), + var = dplyr::recode(.data$var, !!!abbrev_nm) + ) |> + dplyr::select("var", "count") |> + tidyr::pivot_wider(names_from = "var", values_from = "count") + dirty_names_cleaned(colnames(res), abbrev_nm, x) + res +} -#' FragmentLengthHistFile R6 Class +#' Read DRAGEN Trimmer Metrics #' -#' @description -#' Contains methods for reading and plotting contents of -#' the `fragment_length_hist.csv` file output from DRAGEN. +#' Reads the `trimmer_metrics.csv` file output from DRAGEN. #' +#' @param x Path to file. +#' +#' @return Tibble with metrics. #' @examples -#' x <- system.file("extdata/wgs/SEQC-II.fragment_length_hist.csv.gz", package = "dracarys") -#' fl <- FragmentLengthHistFile$new(x) -#' d <- fl$read() # or read(fl) -#' fl$plot(d) # or plot(fl) -#' fl$write(d |> dplyr::filter(count > 10), out_dir = tempdir(), prefix = "seqc_fl") +#' \dontrun{ +#' dragen_trimmer_metrics_read(x) +#' } #' @export -FragmentLengthHistFile <- R6::R6Class( - "FragmentLengthHistFile", - inherit = File, - public = list( - #' @description Reads the `fragment_length_hist.csv` file, which contains the - #' fragment length distribution for each sample. - #' @return A tibble with the following columns: - #' - sample: name of sample - #' - fragmentLength: estimated fragment length - #' - count: number of reads with estimated fragment length - read = function() { - x <- self$path - d <- readr::read_lines(x) - assertthat::assert_that(grepl("#Sample", d[1])) - - d |> - tibble::enframe() |> - dplyr::mutate( - sample = dplyr::if_else( - grepl("#Sample", .data$value), - sub("#Sample: (.*)", "\\1", .data$value), - NA_character_ - ) - ) |> - tidyr::fill("sample", .direction = "down") |> - dplyr::filter(!grepl("#Sample: |FragmentLength,Count", .data$value)) |> - tidyr::separate_wider_delim(cols = "value", names = c("fragmentLength", "count"), delim = ",") |> - dplyr::mutate( - count = as.numeric(.data$count), - fragmentLength = as.numeric(.data$fragmentLength) - ) |> - dplyr::select("sample", "fragmentLength", "count") - }, - #' @description - #' Writes a tidy version of the `fragment_length_hist.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - }, - +dragen_trimmer_metrics_read <- function(x) { + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("TRIMMER STATISTICS", d0[1])) + abbrev_nm <- c( + "Total input reads" = "reads_tot_input", + "Total input bases" = "bases_tot", + "Total input bases R1" = "bases_r1", + "Total input bases R2" = "bases_r2", + "Average input read length" = "read_len_avg", + "Total trimmed reads" = "reads_trimmed_tot", + "Total trimmed bases" = "bases_trimmed_tot", + "Average bases trimmed per read" = "bases_trimmed_avg_per_read", + "Average bases trimmed per trimmed read" = "bases_trimmed_avg_per_trimmedread", + "Remaining poly-G K-mers R1 3prime" = "polygkmers3r1_remaining", + "Remaining poly-G K-mers R2 3prime" = "polygkmers3r2_remaining", + "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1", + "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2", + "Poly-G soft trimmed reads filtered R1 3prime" = "polyg_soft_trimmed_reads_filt_3r1", + "Poly-G soft trimmed reads filtered R2 3prime" = "polyg_soft_trimmed_reads_filt_3r2", + "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1", + "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2", + "Poly-G soft trimmed bases filtered R1 3prime" = "polyg_soft_trimmed_bases_filt_3r1", + "Poly-G soft trimmed bases filtered R2 3prime" = "polyg_soft_trimmed_bases_filt_3r2", + "Total filtered reads" = "reads_tot_filt", + "Reads filtered for minimum read length R1" = "reads_filt_minreadlenr1", + "Reads filtered for minimum read length R2" = "reads_filt_minreadlenr2" + ) - #' @description Plots the fragment length distributions as given in the - #' `fragment_length_hist.csv` file. - #' - #' @param d Parsed object from `self$read()`. - #' @param min_count Minimum read count to be plotted (Default: 10). - #' @return A ggplot2 plot containing fragment lengths on X axis and read counts - #' on Y axis for each sample. - plot = function(d, min_count = 10) { - assertthat::assert_that(min_count >= 0) - d <- d |> - dplyr::filter(.data$count >= min_count) + d1 <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim("value", names = c("category", "extra", "var", "count", "pct"), delim = ",", too_few = "align_start") |> + dplyr::mutate( + count = as.numeric(.data$count), + pct = round(as.numeric(.data$pct), 2), + var = dplyr::recode(.data$var, !!!abbrev_nm) + ) |> + dplyr::select("var", "count", "pct") + dirty_names_cleaned(d1$var, abbrev_nm, x) + d1 |> + tidyr::pivot_longer(c("count", "pct")) |> + dplyr::filter(!is.na(.data$value)) |> + dplyr::mutate( + name = dplyr::if_else(.data$name == "count", "", "_pct"), + var = glue("{.data$var}{.data$name}") + ) |> + dplyr::select("var", "value") |> + tidyr::pivot_wider(names_from = "var", values_from = "value") +} - d |> - ggplot2::ggplot(ggplot2::aes(x = .data$fragmentLength, y = .data$count, colour = sample)) + - ggplot2::geom_line() + - ggplot2::labs(title = "Fragment Length Distribution") + - ggplot2::xlab("Fragment Length (bp)") + - ggplot2::ylab(glue("Read Count (min: {min_count})")) + - ggplot2::theme_minimal() + - ggplot2::theme( - legend.position = c(0.9, 0.9), - legend.justification = c(1, 1), - panel.grid.minor = ggplot2::element_blank(), - panel.grid.major = ggplot2::element_blank(), - plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold") - ) - } +#' Read DRAGEN VariantCall Metrics +#' +#' Reads the `vc_metrics.csv` file output from DRAGEN. +#' +#' @param x Path to file. +#' +#' @return Tibble with metrics. +#' @examples +#' \dontrun{ +#' dragen_vc_metrics_read(x) +#' } +#' @export +dragen_vc_metrics_read <- function(x) { + abbrev_nm1 <- tibble::tribble( + ~raw, ~clean, ~region, + "Total", "var_tot", FALSE, + "Biallelic", "var_biallelic", FALSE, + "Multiallelic", "var_multiallelic", FALSE, + "SNPs", "var_snp", FALSE, + "Insertions (Hom)", "var_ins_hom", FALSE, + "Insertions (Het)", "var_ins_het", FALSE, + "Deletions (Hom)", "var_del_hom", FALSE, + "Deletions (Het)", "var_del_het", FALSE, + "Indels (Het)", "var_indel_het", FALSE, + "Chr X number of SNPs over ", "var_snp_x_over_", TRUE, + "Chr Y number of SNPs over ", "var_snp_y_over_", TRUE, + "(Chr X SNPs)/(chr Y SNPs) ratio over ", "var_x_over_y_snp_ratio_over_", TRUE, + "SNP Transitions", "var_snp_transitions", FALSE, + "SNP Transversions", "var_snp_transversions", FALSE, + "Ti/Tv ratio", "var_ti_tv_ratio", FALSE, + "Heterozygous", "var_heterozygous", FALSE, + "Homozygous", "var_homozygous", FALSE, + "Het/Hom ratio", "var_het_hom_ratio", FALSE, + "In dbSNP", "var_in_dbsnp", FALSE, + "Not in dbSNP", "var_nin_dbsnp", FALSE, + "Percent Callability", "callability_pct", FALSE, + "Percent Autosome Callability", "callability_auto_pct", FALSE, + "Number of samples", "sample_num", FALSE, + "Reads Processed", "reads_processed", FALSE, + "Child Sample", "sample_child", FALSE, + "Percent QC Region Callability in Region 1", "qc_region_callability_pct_region1", FALSE, + "Percent QC Region Callability in Region 2", "qc_region_callability_pct_region2", FALSE ) -) + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("VARIANT CALLER", d0[1])) + # first detect if this is genome or target region + d1 <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim( + "value", + names = c("category", "sample", "var", "count", "pct"), + delim = ",", too_few = "align_start" + ) + reg1 <- NULL + str1 <- NULL + tmp <- d1 |> + dplyr::filter(grepl("Chr X number of SNPs over ", .data$var)) |> + dplyr::slice_head(n = 1) |> + dplyr::pull("var") + assertthat::assert_that(length(tmp) == 1) + if (grepl("genome", tmp)) { + str1 <- "genome" + reg1 <- "genome" + } else if (grepl("QC coverage region", tmp)) { + str1 <- "QC coverage region" + reg1 <- "qccovreg" + } else if (grepl("target region", tmp)) { + str1 <- "target region" + reg1 <- "targetreg" + } else { + cli::cli_abort("Cannot determine the varcall region from: {x}") + } + abbrev_nm <- abbrev_nm1 |> + dplyr::mutate( + raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw), + clean = ifelse(.data$region, glue("{.data$clean}{reg1}"), .data$clean) + ) |> + dplyr::select("raw", "clean") |> + tibble::deframe() + + d <- d1 |> + dplyr::mutate( + var = dplyr::recode(.data$var, !!!abbrev_nm), + count = dplyr::na_if(.data$count, "NA"), + count = as.numeric(.data$count), + pct = round(as.numeric(.data$pct), 2), + category = dplyr::case_when( + grepl("SUMMARY", .data$category) ~ "summary", + grepl("PREFILTER", .data$category) ~ "prefilter", + grepl("POSTFILTER", .data$category) ~ "postfilter", + TRUE ~ "unknown" + ) + ) |> + dplyr::filter(.data$category != "summary") |> + dplyr::select("category", "sample", "var", "count", "pct") + dirty_names_cleaned(d$var, abbrev_nm, x) + # pivot + d |> + tidyr::pivot_longer(c("count", "pct")) |> + dplyr::mutate( + name = dplyr::if_else(.data$name == "count", "", "_pct"), + var = glue("{.data$var}{.data$name}") + ) |> + dplyr::select("category", "sample", "var", "value") |> + dplyr::filter(!is.na(.data$value)) |> + tidyr::pivot_wider(names_from = "var", values_from = "value") +} -#' MappingMetricsFile R6 Class +#' Read DRAGEN Mapping Metrics #' -#' @description -#' Contains methods for reading and displaying contents of -#' the `mapping_metrics.csv` file output from DRAGEN. -#' This file contains mapping and aligning metrics, like the metrics computed by -#' the Samtools Flagstat command. These metrics are available on an aggregate -#' level (over all input data), and on a per read group level. NOTE: we are -#' keeping only the read group level metrics (i.e. removing the aggregate data). -#' Unless explicitly stated, the metrics units are in reads (i.e., not in -#' terms of pairs or alignments). +#' Reads the `mapping_metrics.csv` file output from DRAGEN. +#' +#' @param x Path to file. #' +#' @return Tibble with metrics. #' @examples -#' x <- system.file("extdata/wgs/SEQC-II.mapping_metrics.csv.gz", package = "dracarys") -#' mm <- MappingMetricsFile$new(x) -#' d <- mm$read() # or read(mm) -#' mm$write(d, out_dir = tempdir(), prefix = "seqc_mm", out_format = "tsv") +#' \dontrun{ +#' dragen_mapping_metrics_read(x) +#' } +#' @export +dragen_mapping_metrics_read <- function(x) { + abbrev_nm <- c( + "Total input reads" = "reads_tot_input", + "Number of duplicate marked reads" = "reads_num_dupmarked", + "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed", + "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq", + "Reads with mate sequenced" = "reads_w_mate_seq", + "Reads without mate sequenced" = "reads_wo_mate_seq", + "QC-failed reads" = "reads_qcfail", + "Mapped reads" = "reads_mapped", + "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt", + "Mapped reads R1" = "reads_mapped_r1", + "Mapped reads R2" = "reads_mapped_r2", + "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped", + "Unmapped reads" = "reads_unmapped", + "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt", + "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj", + "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton", + "Paired reads (itself & mate mapped)" = "reads_paired", + "Properly paired reads" = "reads_paired_proper", + "Not properly paired reads (discordant)" = "reads_discordant", + "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom", + "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10", + "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf", + "Reads with MAPQ [30:40)" = "reads_mapq_30_40", + "Reads with MAPQ [20:30)" = "reads_mapq_20_30", + "Reads with MAPQ [10:20)" = "reads_mapq_10_20", + "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10", + "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped", + "Reads with indel R1" = "reads_indel_r1", + "Reads with indel R2" = "reads_indel_r2", + "Total bases" = "bases_tot", + "Total bases R1" = "bases_tot_r1", + "Total bases R2" = "bases_tot_r2", + "Mapped bases" = "bases_mapped", + "Mapped bases R1" = "bases_mapped_r1", + "Mapped bases R2" = "bases_mapped_r2", + "Soft-clipped bases" = "bases_softclip", + "Soft-clipped bases R1" = "bases_softclip_r1", + "Soft-clipped bases R2" = "bases_softclip_r2", + "Hard-clipped bases" = "bases_hardclip", + "Hard-clipped bases R1" = "bases_hardclip_r1", + "Hard-clipped bases R2" = "bases_hardclip_r2", + "Mismatched bases R1" = "bases_mismatched_r1", + "Mismatched bases R2" = "bases_mismatched_r2", + "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels", + "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels", + "Q30 bases" = "bases_q30", + "Q30 bases R1" = "bases_q30_r1", + "Q30 bases R2" = "bases_q30_r2", + "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped", + "Total alignments" = "alignments_tot", + "Secondary alignments" = "alignments_secondary", + "Supplementary (chimeric) alignments" = "alignments_chimeric", + "Estimated read length" = "read_len", + "Bases in reference genome" = "bases_in_ref_genome", + "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct", + "Insert length: mean" = "insert_len_mean", + "Insert length: median" = "insert_len_median", + "Insert length: standard deviation" = "insert_len_std_dev", + "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided", + "Estimated sample contamination" = "contamination_est", + "Estimated sample contamination standard error" = "contamination_stderr_est", + "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec", + "Total reads in RG" = "reads_tot_rg", + "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl", + "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl", + "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl", + "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl", + "Reads mapping to multiple locations" = "reads_map_multiloc", + "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj", + "Reads with splice junction" = "reads_splicejunc", + "Average sequenced coverage over genome" = "cov_avg_seq_over_genome", + "Filtered rRNA reads" = "reads_rrna_filtered" + ) + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("MAPPING/ALIGNING", d0[1])) + # split by RG and non-RG + # tidy + d <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim( + "value", + names = c("category", "RG", "var", "count", "pct"), + delim = ",", too_few = "align_start" + ) |> + dplyr::mutate( + count = dplyr::na_if(.data$count, "NA"), + count = as.numeric(.data$count), + pct = as.numeric(.data$pct), + var = dplyr::recode(.data$var, !!!abbrev_nm), + RG = dplyr::if_else(.data$RG == "", "Total", .data$RG) + ) |> + dplyr::select("RG", "var", "count", "pct") + dirty_names_cleaned(unique(d$var), abbrev_nm, x) + # pivot + d |> + tidyr::pivot_longer(c("count", "pct")) |> + dplyr::mutate( + name = dplyr::if_else(.data$name == "count", "", "_pct"), + var = glue("{.data$var}{.data$name}") + ) |> + dplyr::select("RG", "var", "value") |> + dplyr::filter(!is.na(.data$value)) |> + tidyr::pivot_wider(names_from = "var", values_from = "value") +} + +#' Read DRAGEN Coverage Metrics +#' +#' Reads the `coverage_metrics.csv` file generated by DRAGEN. +#' @param x Path to file. #' +#' @return Tibble with metrics. +#' @examples +#' \dontrun{ +#' dragen_coverage_metrics_read(x) +#' } #' @export -MappingMetricsFile <- R6::R6Class( - "MappingMetricsFile", - inherit = File, - public = list( - #' @description - #' Reads the `mapping_metrics.csv` file output from DRAGEN. - #' - #' @return tibble with one row of X metrics per read group. - read = function() { - abbrev_nm <- c( - "Total input reads" = "reads_tot_input_dragen", - "Number of duplicate marked reads" = "reads_num_dupmarked_dragen", - "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq_dragen", - "Reads with mate sequenced" = "reads_w_mate_seq_dragen", - "Reads without mate sequenced" = "reads_wo_mate_seq_dragen", - "QC-failed reads" = "reads_qcfail_dragen", - "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl_dragen", - "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl_dragen", - "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl_dragen", - "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl_dragen", - "Reads mapping to multiple locations" = "reads_map_multiloc_dragen", - "Hard-clipped bases R1" = "bases_hardclip_r1_dragen", - "Hard-clipped bases R2" = "bases_hardclip_r2_dragen", - "Soft-clipped bases" = "bases_softclip_dragen", - "Hard-clipped bases" = "bases_hardclip_dragen", - "Mapped reads" = "reads_mapped_dragen", - "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt_dragen", - "Mapped reads R1" = "reads_mapped_r1_dragen", - "Mapped reads R2" = "reads_mapped_r2_dragen", - "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped_dragen", - "Unmapped reads" = "reads_unmapped_dragen", - "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt_dragen", - "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj_dragen", - "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj_dragen", - "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton_dragen", - "Paired reads (itself & mate mapped)" = "reads_paired_dragen", - "Properly paired reads" = "reads_paired_proper_dragen", - "Not properly paired reads (discordant)" = "reads_discordant_dragen", - "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom_dragen", - "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10_dragen", - "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf_dragen", - "Reads with MAPQ [30:40)" = "reads_mapq_30_40_dragen", - "Reads with MAPQ [20:30)" = "reads_mapq_20_30_dragen", - "Reads with MAPQ [10:20)" = "reads_mapq_10_20_dragen", - "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10_dragen", - "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped_dragen", - "Reads with indel R1" = "reads_indel_r1_dragen", - "Reads with indel R2" = "reads_indel_r2_dragen", - "Reads with splice junction" = "reads_splicejunc_dragen", - "Total bases" = "bases_tot_dragen", - "Total bases R1" = "bases_tot_r1_dragen", - "Total bases R2" = "bases_tot_r2_dragen", - "Mapped bases" = "bases_mapped_dragen", - "Mapped bases R1" = "bases_mapped_r1_dragen", - "Mapped bases R2" = "bases_mapped_r2_dragen", - "Soft-clipped bases R1" = "bases_softclip_r1_dragen", - "Soft-clipped bases R2" = "bases_softclip_r2_dragen", - "Mismatched bases R1" = "bases_mismatched_r1_dragen", - "Mismatched bases R2" = "bases_mismatched_r2_dragen", - "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels_dragen", - "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels_dragen", - "Q30 bases" = "bases_q30_dragen", - "Q30 bases R1" = "bases_q30_r1_dragen", - "Q30 bases R2" = "bases_q30_r2_dragen", - "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped_dragen", - "Total alignments" = "alignments_tot_dragen", - "Secondary alignments" = "alignments_secondary_dragen", - "Supplementary (chimeric) alignments" = "alignments_chimeric_dragen", - "Estimated read length" = "read_len_dragen", - "Bases in reference genome" = "bases_in_ref_genome_dragen", - "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct_dragen", - "Average sequenced coverage over genome" = "cov_avg_seq_over_genome_dragen", - "Insert length: mean" = "insert_len_mean_dragen", - "Insert length: median" = "insert_len_median_dragen", - "Insert length: standard deviation" = "insert_len_std_dev_dragen", - "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided_dragen", - "Estimated sample contamination" = "contamination_est_dragen", - "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec_dragen", - "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed_dragen", - "Total reads in RG" = "reads_tot_rg_dragen", - "Filtered rRNA reads" = "reads_rrna_filtered_dragen" - ) - x <- self$path - raw <- readr::read_lines(x) - assertthat::assert_that(grepl("MAPPING/ALIGNING", raw[1])) - # tidy - d <- raw |> - tibble::as_tibble_col(column_name = "value") |> - tidyr::separate_wider_delim( - "value", - names = c("category", "RG", "var", "count", "pct"), - delim = ",", too_few = "align_start" - ) |> - dplyr::filter(.data$RG != "") |> - dplyr::mutate( - count = dplyr::na_if(.data$count, "NA"), - count = as.numeric(.data$count), - pct = as.numeric(.data$pct), - var = dplyr::recode(.data$var, !!!abbrev_nm) - ) |> - dplyr::select("RG", "var", "count", "pct") - # pivot - d |> - tidyr::pivot_longer(c("count", "pct")) |> - dplyr::mutate( - name = dplyr::if_else(.data$name == "count", "", "_pct"), - var = glue("{.data$var}{.data$name}") - ) |> - dplyr::select("RG", "var", "value") |> - dplyr::filter(!is.na(.data$value)) |> - tidyr::pivot_wider(names_from = "var", values_from = "value") - }, - #' @description - #' Writes a tidy version of the `mapping_metrics.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } +dragen_coverage_metrics_read <- function(x) { + # all rows except 'Aligned bases' and 'Aligned reads' refer to the region + abbrev_nm <- tibble::tribble( + ~raw, ~clean, ~region, + "Aligned bases", "bases_aligned_tot", FALSE, + "Aligned reads", "reads_aligned_tot", FALSE, + "Aligned bases in ", "bases_aligned_", TRUE, + "Average alignment coverage over ", "cov_alignment_avg_over_", TRUE, + "Uniformity of coverage (PCT > 0.2*mean) over ", "cov_uniformity_pct_gt02mean_", TRUE, + "Uniformity of coverage (PCT > 0.4*mean) over ", "cov_uniformity_pct_gt04mean_", TRUE, + "Average chr X coverage over ", "cov_avg_x_over_", TRUE, + "Average chr Y coverage over ", "cov_avg_y_over_", TRUE, + "Average mitochondrial coverage over ", "cov_avg_mt_over_", TRUE, + "Average autosomal coverage over ", "cov_avg_auto_over_", TRUE, + "Median autosomal coverage over ", "cov_median_auto_over_", TRUE, + "Mean/Median autosomal coverage ratio over ", "cov_mean_median_auto_ratio_over_", TRUE, + "Aligned reads in ", "reads_aligned_in_", TRUE ) -) + d0 <- readr::read_lines(x) + assertthat::assert_that(grepl("COVERAGE SUMMARY", d0[1])) + # first detect if this is genome, QC coverage region, or target region + d1 <- d0 |> + tibble::as_tibble_col(column_name = "value") |> + tidyr::separate_wider_delim( + "value", + delim = ",", too_few = "align_start", + names = c("category", "dummy1", "var", "value", "pct") + ) + reg1 <- NULL + str1 <- NULL + tmp <- d1 |> + dplyr::filter(grepl("PCT of .* with coverage ", .data$var)) |> + dplyr::slice_head(n = 1) |> + dplyr::pull("var") + assertthat::assert_that(length(tmp) == 1) + if (grepl("genome", tmp)) { + str1 <- "genome" + reg1 <- "genome" + } else if (grepl("QC coverage region", tmp)) { + str1 <- "QC coverage region" + reg1 <- "qccovreg" + } else if (grepl("target region", tmp)) { + str1 <- "target region" + reg1 <- "targetreg" + } else { + cli::cli_abort("Cannot determine the coverage region from: {x}") + } + abbrev_nm <- abbrev_nm |> + dplyr::mutate( + raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw), + clean = ifelse(.data$region, glue("{.data$clean}{reg1}"), .data$clean) + ) |> + dplyr::select("raw", "clean") |> + tibble::deframe() + # split to rename the + # "PCT of genome with coverage [100x: inf)" values + pat <- glue("PCT of {str1} with coverage ") + res1 <- d1 |> + # pct just shows % for a couple rows which can be + # calculated from their above values + dplyr::filter(!grepl(pat, .data$var)) |> + dplyr::select("var", "value") + dirty_names_cleaned(res1$var, names(abbrev_nm), x) + res2 <- d1 |> + dplyr::filter(grepl(pat, .data$var)) |> + dplyr::mutate( + var = sub(pat, "", .data$var), + var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var), + var = gsub("x", "", .data$var), + var = gsub("inf", "Inf", .data$var) + ) |> + tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |> + dplyr::mutate(var = as.character(glue("cov_pct_{start}_{end}_{reg1}"))) |> + dplyr::select("var", "value") + res <- dplyr::bind_rows(res1, res2) |> + dplyr::mutate( + value = dplyr::na_if(.data$value, "NA"), + value = as.numeric(.data$value), + var = dplyr::recode(.data$var, !!!abbrev_nm) + ) |> + tidyr::pivot_wider(names_from = "var", values_from = "value") + return(res) +} + +#' Plot DRAGEN Contig Mean Coverage +#' TODO +#' Plots the `wgs_contig_mean_cov_.csv` files. +#' +#' @param d Parsed tibble. +#' @param top_alt_n Number of top covered alt contigs to plot per phenotype. +#' @return A ggplot2 object with chromosomes on X axis, and coverage on Y axis. +dragen_contig_mean_coverage_plot <- function(d, top_alt_n = 15) { + assertthat::assert_that(top_alt_n >= 0) + + # Display chr1-22, X, Y at top (M goes to bottom). + # Display top 20 of the rest, plus rest as 'other', at bottom + main_chrom1 <- c(1:22, "X", "Y") + main_chrom2 <- c(paste0("chr", main_chrom1)) + main_chrom <- c(main_chrom1, main_chrom2, "Autosomal regions") + min_cvg <- 0.000001 + + d <- d |> + dplyr::mutate( + panel = dplyr::if_else(.data$chrom %in% main_chrom, "main", "alt"), + panel = factor(.data$panel, levels = c("main", "alt")) + ) |> + dplyr::select("chrom", "coverage", "panel") + + main_panel <- d |> dplyr::filter(.data$panel == "main") + alt_panel <- d |> dplyr::filter(.data$panel == "alt") + top_alt <- alt_panel |> + dplyr::top_n(top_alt_n, wt = .data$coverage) |> + dplyr::arrange(dplyr::desc(.data$coverage)) |> + dplyr::pull(.data$chrom) |> + unique() + + alt_panel2 <- alt_panel |> + dplyr::mutate(alt_group = dplyr::if_else(.data$chrom %in% top_alt, "top", "bottom")) + + alt_panel_final <- alt_panel2 |> + dplyr::group_by(.data$alt_group) |> + dplyr::summarise(mean_cov = mean(.data$coverage)) |> + dplyr::inner_join(alt_panel2, by = c("alt_group")) |> + dplyr::mutate( + chrom = dplyr::if_else(.data$alt_group == "bottom", "OTHER", .data$chrom), + coverage = dplyr::if_else(.data$alt_group == "bottom", .data$mean_cov, .data$coverage) + ) |> + dplyr::distinct() |> + dplyr::filter(.data$coverage > min_cvg) |> + dplyr::ungroup() |> + dplyr::select("chrom", "coverage", "panel") + + chrom_fac_levels <- c(main_chrom, "chrM", "MT", top_alt[!top_alt %in% c("chrM", "MT")], "OTHER") + d <- dplyr::bind_rows(main_panel, alt_panel_final) |> + dplyr::mutate(chrom = factor(.data$chrom, levels = chrom_fac_levels)) + + d |> + dplyr::mutate(label = "sampleA") |> + ggplot2::ggplot( + ggplot2::aes( + x = .data$chrom, y = .data$coverage, group = .data$label, + ) + ) + + ggplot2::geom_point() + + ggplot2::geom_line() + + ggplot2::scale_y_continuous( + limits = c(0, NA), expand = c(0, 0), labels = scales::comma, + breaks = scales::pretty_breaks(n = 8) + ) + + ggplot2::theme_minimal() + + ggplot2::labs(title = "Mean Coverage Per Chromosome", colour = "Label") + + ggplot2::xlab("Chromosome") + + ggplot2::ylab("Coverage") + + ggplot2::theme( + legend.position = "top", + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major.y = ggplot2::element_blank(), + strip.background = ggplot2::element_blank(), + strip.text.x = ggplot2::element_blank(), + axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1, size = 6), + plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold"), + panel.spacing = ggplot2::unit(2, "lines") + ) + + ggplot2::facet_wrap(ggplot2::vars(.data$panel), nrow = 2, scales = "free") +} #' PloidyEstimationMetricsFile R6 Class #' @@ -596,34 +792,34 @@ PloidyEstimationMetricsFile <- R6::R6Class( raw <- readr::read_lines(x) assertthat::assert_that(grepl("PLOIDY ESTIMATION", raw[1])) abbrev_nm <- c( - "Autosomal median coverage" = "cov_auto_median_dragen", - "X median coverage" = "cov_x_median_dragen", - "Y median coverage" = "cov_y_median_dragen", - "1 median / Autosomal median" = "cov_1_div_auto_median_dragen", - "2 median / Autosomal median" = "cov_2_div_auto_median_dragen", - "3 median / Autosomal median" = "cov_3_div_auto_median_dragen", - "4 median / Autosomal median" = "cov_4_div_auto_median_dragen", - "5 median / Autosomal median" = "cov_5_div_auto_median_dragen", - "6 median / Autosomal median" = "cov_6_div_auto_median_dragen", - "7 median / Autosomal median" = "cov_7_div_auto_median_dragen", - "8 median / Autosomal median" = "cov_8_div_auto_median_dragen", - "9 median / Autosomal median" = "cov_9_div_auto_median_dragen", - "10 median / Autosomal median" = "cov_10_div_auto_median_dragen", - "11 median / Autosomal median" = "cov_11_div_auto_median_dragen", - "12 median / Autosomal median" = "cov_12_div_auto_median_dragen", - "13 median / Autosomal median" = "cov_13_div_auto_median_dragen", - "14 median / Autosomal median" = "cov_14_div_auto_median_dragen", - "15 median / Autosomal median" = "cov_15_div_auto_median_dragen", - "16 median / Autosomal median" = "cov_16_div_auto_median_dragen", - "17 median / Autosomal median" = "cov_17_div_auto_median_dragen", - "18 median / Autosomal median" = "cov_18_div_auto_median_dragen", - "19 median / Autosomal median" = "cov_19_div_auto_median_dragen", - "20 median / Autosomal median" = "cov_20_div_auto_median_dragen", - "21 median / Autosomal median" = "cov_21_div_auto_median_dragen", - "22 median / Autosomal median" = "cov_22_div_auto_median_dragen", - "X median / Autosomal median" = "cov_x_div_auto_median_dragen", - "Y median / Autosomal median" = "cov_y_div_auto_median_dragen", - "Ploidy estimation" = "ploidy_est_dragen" + "Autosomal median coverage" = "cov_auto_median", + "X median coverage" = "cov_x_median", + "Y median coverage" = "cov_y_median", + "1 median / Autosomal median" = "cov_1_div_auto_median", + "2 median / Autosomal median" = "cov_2_div_auto_median", + "3 median / Autosomal median" = "cov_3_div_auto_median", + "4 median / Autosomal median" = "cov_4_div_auto_median", + "5 median / Autosomal median" = "cov_5_div_auto_median", + "6 median / Autosomal median" = "cov_6_div_auto_median", + "7 median / Autosomal median" = "cov_7_div_auto_median", + "8 median / Autosomal median" = "cov_8_div_auto_median", + "9 median / Autosomal median" = "cov_9_div_auto_median", + "10 median / Autosomal median" = "cov_10_div_auto_median", + "11 median / Autosomal median" = "cov_11_div_auto_median", + "12 median / Autosomal median" = "cov_12_div_auto_median", + "13 median / Autosomal median" = "cov_13_div_auto_median", + "14 median / Autosomal median" = "cov_14_div_auto_median", + "15 median / Autosomal median" = "cov_15_div_auto_median", + "16 median / Autosomal median" = "cov_16_div_auto_median", + "17 median / Autosomal median" = "cov_17_div_auto_median", + "18 median / Autosomal median" = "cov_18_div_auto_median", + "19 median / Autosomal median" = "cov_19_div_auto_median", + "20 median / Autosomal median" = "cov_20_div_auto_median", + "21 median / Autosomal median" = "cov_21_div_auto_median", + "22 median / Autosomal median" = "cov_22_div_auto_median", + "X median / Autosomal median" = "cov_x_div_auto_median", + "Y median / Autosomal median" = "cov_y_div_auto_median", + "Ploidy estimation" = "ploidy_est" ) d <- raw |> @@ -635,7 +831,7 @@ PloidyEstimationMetricsFile <- R6::R6Class( ) |> tidyr::pivot_wider(names_from = "var", values_from = "value") # now convert all except 'Ploidy estimation' to numeric - cols1 <- colnames(d)[colnames(d) != "ploidy_est_dragen"] + cols1 <- colnames(d)[colnames(d) != "ploidy_est"] d |> dplyr::mutate(dplyr::across(dplyr::all_of(cols1), as.numeric)) }, @@ -657,456 +853,13 @@ PloidyEstimationMetricsFile <- R6::R6Class( ) ) -#' ReplayFile R6 Class -#' -#' @description -#' Contains methods for reading contents of -#' the `replay.json` file output from DRAGEN, which contains the DRAGEN command -#' line, parameters and version for the specific run. -#' -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II-replay.json.gz", package = "dracarys") -#' r <- ReplayFile$new(x) -#' d <- r$read() # or read(r) -#' r$write(d, out_dir = tempdir(), prefix = "seqc_replay", out_format = "tsv") -#' @export -ReplayFile <- R6::R6Class( - "ReplayFile", - inherit = File, - public = list( - #' @description Reads the `replay.json` file. - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - x <- self$path - res <- x |> - jsonlite::read_json(simplifyVector = TRUE) |> - purrr::map_if(is.data.frame, tibble::as_tibble) - - req_elements <- c("command_line", "hash_table_build", "dragen_config", "system") - assertthat::assert_that(all(names(res) %in% req_elements)) - res[["system"]] <- res[["system"]] |> - tibble::as_tibble_row() - res[["hash_table_build"]] <- res[["hash_table_build"]] |> - tibble::as_tibble_row() - # we don't care if the columns are characters, no analysis likely to be done on dragen options - # (though never say never!) - res[["dragen_config"]] <- res[["dragen_config"]] |> - tidyr::pivot_wider(names_from = "name", values_from = "value") - - dplyr::bind_cols(res) - }, - #' @description - #' Writes a tidy version of the `replay.json` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } - ) -) - -#' TimeMetricsFile R6 Class -#' -#' @description -#' Contains methods for reading contents of -#' the `time_metrics.csv` file output from DRAGEN, which contains -#' a breakdown of the run duration for each DRAGEN process. -#' -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys") -#' tm <- TimeMetricsFile$new(x) -#' d <- tm$read() # or read(tm) -#' tm$write(d, out_dir = tempdir(), prefix = "seqc_time", out_format = "tsv") -#' @export -TimeMetricsFile <- R6::R6Class( - "TimeMetricsFile", - inherit = File, - public = list( - #' @description Reads the `time_metrics.csv` file. - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - x <- self$path - cn <- c("dummy1", "dummy2", "Step", "time_hrs", "time_sec") - ct <- readr::cols(.default = "c", time_hrs = readr::col_time(format = "%T"), time_sec = "d") - d <- readr::read_csv(x, col_names = cn, col_types = ct) - assertthat::assert_that(d$dummy1[1] == "RUN TIME", is.na(d$dummy2[1])) - assertthat::assert_that(inherits(d$time_hrs, "hms")) - d |> - dplyr::mutate( - Step = tools::toTitleCase(sub("Time ", "", .data$Step)), - Time = substr(.data$time_hrs, 1, 5) - ) |> - dplyr::select("Step", "Time") |> - tidyr::pivot_wider(names_from = "Step", values_from = "Time") |> - dplyr::relocate("Total Runtime") - }, - #' @description - #' Writes a tidy version of the `time_metrics.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } - ) -) - -#' Process Multiple TimeMetricsFile Objects -#' -#' Processes multiple TimeMetricsFile objects. -#' -#' @param x Atomic vector with one or more TimeMetricsFile objects. -#' @param id ID for each input, which is used to disambiguate files -#' generated from same samples. Default: index from 1 to length of `x`. -#' @return tibble with the following columns: -#' - Step: DRAGEN step -#' - Time: time in HH:MM -#' -#' @examples -#' p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys") -#' x <- TimeMetricsFile$new(p) -#' (tm <- time_metrics_process(c(x, x), id = c("run1", "run2"))) -#' -#' @testexamples -#' expect_equal(nrow(tm), 2) -#' -#' @export -time_metrics_process <- function(x, id = seq_len(length(x))) { - assertthat::assert_that(all(purrr::map_lgl(x, ~ inherits(.x, "TimeMetricsFile")))) - x |> - purrr::map(read) |> - purrr::set_names(id) |> - dplyr::bind_rows(.id = "ID") +dragen_subprefix <- function(x, suffix) { + # L2401290.exon_contig_mean_cov.csv -> exon + # L2401290.target_bed_contig_mean_cov.csv -> target_bed + # L2401290.tmb_contig_mean_cov.csv -> tmb + # L2401290.wgs_contig_mean_cov.csv -> wgs + bname <- basename(x) + s1 <- tools::file_path_sans_ext(bname) + s2 <- sub(".*\\.(.*)", "\\1", s1) + sub(suffix, "", s2) } - -#' VCMetricsFile R6 Class -#' -#' @description -#' Contains methods for reading and displaying contents of -#' the `vc_metrics.csv` file output from DRAGEN, which contains variant calling metrics. -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II.vc_metrics.csv.gz", package = "dracarys") -#' vm <- VCMetricsFile$new(x) -#' d <- vm$read() # or read(vm) -#' vm$write(d, out_dir = tempdir(), prefix = "seqc_vc", out_format = "tsv") -#' -#' @export -VCMetricsFile <- R6::R6Class( - "VCMetricsFile", - inherit = File, - public = list( - #' @description - #' Reads the `vc_metrics.csv` file output from DRAGEN. - #' - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - abbrev_nm <- c( - "Total" = "var_tot_dragen", - "Biallelic" = "var_biallelic_dragen", - "Multiallelic" = "var_multiallelic_dragen", - "SNPs" = "var_snp_dragen", - "Insertions (Hom)" = "var_ins_hom_dragen", - "Insertions (Het)" = "var_ins_het_dragen", - "Deletions (Hom)" = "var_del_hom_dragen", - "Deletions (Het)" = "var_del_het_dragen", - "Indels (Het)" = "var_indel_het_dragen", - "Chr X number of SNPs over genome" = "var_snp_x_over_genome_dragen", - "Chr Y number of SNPs over genome" = "var_snp_y_over_genome_dragen", - "(Chr X SNPs)/(chr Y SNPs) ratio over genome" = "var_x_over_y_snp_ratio_over_genome_dragen", - "SNP Transitions" = "var_snp_transitions_dragen", - "SNP Transversions" = "var_snp_transversions_dragen", - "Ti/Tv ratio" = "var_ti_tv_ratio_dragen", - "Heterozygous" = "var_heterozygous_dragen", - "Homozygous" = "var_homozygous_dragen", - "Het/Hom ratio" = "var_het_hom_ratio_dragen", - "In dbSNP" = "var_in_dbsnp_dragen", - "Not in dbSNP" = "var_nin_dbsnp_dragen", - "Percent Callability" = "callability_pct_dragen", - "Percent Autosome Callability" = "callability_auto_pct_dragen", - "Number of samples" = "sample_num_dragen", - "Reads Processed" = "reads_processed_dragen", - "Child Sample" = "sample_child_dragen" - ) - x <- self$path - raw <- readr::read_lines(x) - assertthat::assert_that(grepl("VARIANT CALLER", raw[1])) - # tidy - d <- raw |> - tibble::as_tibble_col(column_name = "value") |> - tidyr::separate_wider_delim( - "value", - names = c("category", "sample", "var", "count", "pct"), - delim = ",", too_few = "align_start" - ) |> - dplyr::mutate( - var = dplyr::recode(.data$var, !!!abbrev_nm), - count = dplyr::na_if(.data$count, "NA"), - count = as.numeric(.data$count), - pct = round(as.numeric(.data$pct), 2), - category = dplyr::case_when( - grepl("SUMMARY", .data$category) ~ "summary", - grepl("PREFILTER", .data$category) ~ "prefilter", - grepl("POSTFILTER", .data$category) ~ "postfilter", - TRUE ~ "unknown" - ) - ) |> - dplyr::filter(.data$category != "summary") |> - dplyr::select("category", "sample", "var", "count", "pct") - # pivot - d |> - tidyr::pivot_longer(c("count", "pct")) |> - dplyr::mutate( - name = dplyr::if_else(.data$name == "count", "", "_pct"), - var = glue("{.data$var}{.data$name}") - ) |> - dplyr::select("category", "sample", "var", "value") |> - dplyr::filter(!is.na(.data$value)) |> - tidyr::pivot_wider(names_from = "var", values_from = "value") - }, - #' @description - #' Writes a tidy version of the `vc_metrics.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } - ) -) - -#' TrimmerMetricsFile R6 Class -#' -#' @description -#' Contains methods for reading and displaying contents of -#' the `trimmer_metrics.csv` file output from DRAGEN -#' -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II.trimmer_metrics.csv.gz", package = "dracarys") -#' tm <- TrimmerMetricsFile$new(x) -#' d <- tm$read() -#' tm$write(d, out_dir = tempdir(), prefix = "seqc_tm", out_format = "tsv") -#' -#' @export -TrimmerMetricsFile <- R6::R6Class( - "TrimmerMetricsFile", - inherit = File, - public = list( - #' @description - #' Reads the `trimmer_metrics.csv` file output from DRAGEN. - #' - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - x <- self$path - d <- readr::read_lines(x) - assertthat::assert_that(grepl("TRIMMER STATISTICS", d[1])) - abbrev_nm <- c( - "Total input reads" = "reads_tot_input_dragen", - "Total input bases" = "bases_tot_dragen", - "Total input bases R1" = "bases_r1_dragen", - "Total input bases R2" = "bases_r2_dragen", - "Average input read length" = "read_len_avg_dragen", - "Total trimmed reads" = "reads_trimmed_tot_dragen", - "Total trimmed bases" = "bases_trimmed_tot_dragen", - "Average bases trimmed per read" = "bases_trimmed_avg_per_read_dragen", - "Average bases trimmed per trimmed read" = "bases_trimmed_avg_per_trimmedread_dragen", - "Remaining poly-G K-mers R1 3prime" = "polygkmers3r1_remaining_dragen", - "Remaining poly-G K-mers R2 3prime" = "polygkmers3r2_remaining_dragen", - "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1_dragen", - "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2_dragen", - "Poly-G soft trimmed reads filtered R1 3prime" = "polyg_soft_trimmed_reads_filt_3r1_dragen", - "Poly-G soft trimmed reads filtered R2 3prime" = "polyg_soft_trimmed_reads_filt_3r2_dragen", - "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1_dragen", - "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2_dragen", - "Poly-G soft trimmed bases filtered R1 3prime" = "polyg_soft_trimmed_bases_filt_3r1_dragen", - "Poly-G soft trimmed bases filtered R2 3prime" = "polyg_soft_trimmed_bases_filt_3r2_dragen", - "Total filtered reads" = "reads_tot_filt_dragen", - "Reads filtered for minimum read length R1" = "reads_filt_minreadlenr1_dragen", - "Reads filtered for minimum read length R2" = "reads_filt_minreadlenr2_dragen" - ) - - d |> - tibble::as_tibble_col(column_name = "value") |> - tidyr::separate_wider_delim("value", names = c("category", "extra", "var", "count", "pct"), delim = ",", too_few = "align_start") |> - dplyr::mutate( - count = as.numeric(.data$count), - pct = round(as.numeric(.data$pct), 2), - var = dplyr::recode(.data$var, !!!abbrev_nm) - ) |> - dplyr::select("var", "count", "pct") |> - tidyr::pivot_longer(c("count", "pct")) |> - dplyr::filter(!is.na(.data$value)) |> - dplyr::mutate( - name = dplyr::if_else(.data$name == "count", "", "_pct"), - var = glue("{.data$var}{.data$name}") - ) |> - dplyr::select("var", "value") |> - tidyr::pivot_wider(names_from = "var", values_from = "value") - }, - #' @description - #' Writes a tidy version of the `trimmer_metrics.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } - ) -) - -#' SvMetricsFile R6 Class -#' -#' @description -#' Contains methods for reading and displaying contents of -#' the `sv_metrics.csv` file output from DRAGEN -#' -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II.sv_metrics.csv.gz", package = "dracarys") -#' sv <- SvMetricsFile$new(x) -#' d <- sv$read() -#' sv$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv") -#' -#' @export -SvMetricsFile <- R6::R6Class( - "SvMetricsFile", - inherit = File, - public = list( - #' @description - #' Reads the `sv_metrics.csv` file output from DRAGEN. - #' - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - x <- self$path - d <- readr::read_lines(x) - assertthat::assert_that(grepl("SV SUMMARY", d[1])) - abbrev_nm <- c( - "Number of deletions (PASS)" = "n_del", - "Number of insertions (PASS)" = "n_ins", - "Number of duplications (PASS)" = "n_dup", - "Number of breakend pairs (PASS)" = "n_bnd" - ) - d |> - tibble::as_tibble_col(column_name = "value") |> - dplyr::filter(!grepl("Total number of structural variants", .data$value)) |> - tidyr::separate_wider_delim( - "value", - names = c("svsum", "sample", "var", "count", "pct"), delim = ",", - too_few = "align_start" - ) |> - dplyr::mutate( - count = as.numeric(.data$count), - pct = round(as.numeric(.data$pct), 2), - var = dplyr::recode(.data$var, !!!abbrev_nm) - ) |> - dplyr::select("var", "count", "pct") |> - tidyr::pivot_longer(c("count", "pct")) |> - dplyr::mutate( - name = dplyr::if_else(.data$name == "count", "", "_pct"), - var = glue("{.data$var}{.data$name}") - ) |> - dplyr::arrange(.data$name, .data$var) |> - dplyr::select("var", "value") |> - tidyr::pivot_wider(names_from = "var", values_from = "value") - }, - #' @description - #' Writes a tidy version of the `sv_metrics.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } - ) -) - -#' WgsHistFile R6 Class -#' -#' @description -#' Contains methods for reading and displaying contents of -#' the `wgs_hist.csv` file output from DRAGEN -#' -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II.wgs_hist.csv.gz", package = "dracarys") -#' h <- WgsHistFile$new(x) -#' d <- h$read() -#' h$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv") -#' -#' @export -WgsHistFile <- R6::R6Class( - "WgsHistFile", - inherit = File, - public = list( - #' @description - #' Reads the `wgs_hist.csv` file output from DRAGEN. - #' - #' @return tibble with one row and metrics spread across individual columns. - read = function() { - x <- self$path - d <- readr::read_csv(x, col_names = c("var", "pct"), col_types = "cd") - d |> - dplyr::mutate( - var = sub("PCT of bases in wgs with coverage ", "", .data$var), - var = gsub("\\[|\\]|\\(|\\)", "", .data$var), - var = gsub("x", "", .data$var), - var = gsub("inf", "Inf", .data$var) - ) |> - tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |> - dplyr::mutate( - start = as.numeric(.data$start), - end = as.numeric(.data$end), - pct = round(.data$pct, 2), - cumsum = cumsum(.data$pct) - ) - }, - #' @description - #' Writes a tidy version of the `wgs_hist.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid) - } - ) -) diff --git a/R/dragen_fastqc.R b/R/dragen_fastqc.R index c272e20..ceb9ce3 100644 --- a/R/dragen_fastqc.R +++ b/R/dragen_fastqc.R @@ -1,59 +1,12 @@ -#' FastqcMetrics R6 Class +#' DRAGEN FASTQC Metrics #' -#' @description -#' Contains methods for reading and displaying contents of -#' the `fastqc_metrics.csv` file output from DRAGEN. +#' Read DRAGEN `fastqc_metrics.csv` file. #' -#' @examples -#' x <- system.file("extdata/wgs/SEQC-II.fastqc_metrics.csv.gz", package = "dracarys") -#' fq <- FastqcMetricsFile$new(x) -#' d <- fq$read() -#' fq$write(d, out_dir = tempdir(), prefix = "seqc_fq", out_format = "tsv") -#' # fq$plot(d) +#' @param x Path to file. #' +#' @return Tibble with metrics. #' @export -FastqcMetricsFile <- R6::R6Class( - "FastqcMetricsFile", - inherit = File, - public = list( - #' @description - #' Reads the `fastqc_metrics.csv` file output from DRAGEN. - #' - #' @return tibble. TODO. - read = function() { - x <- self$path - res <- read_fastqc_metrics(x) - }, - #' @description - #' Writes a tidy version of the `fastqc_metrics.csv` file output - #' from DRAGEN. - #' - #' @param d Parsed object from `self$read()`. - #' @param prefix Prefix of output file(s). - #' @param out_dir Output directory. - #' @param out_format Format of output file(s). - #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). - write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { - if (!is.null(out_dir)) { - prefix <- file.path(out_dir, prefix) - } - d_write <- d |> - tibble::enframe(name = "section") |> - dplyr::rowwise() |> - dplyr::mutate( - section_low = tolower(.data$section), - p = glue("{prefix}_{.data$section_low}"), - out = list(write_dracarys(obj = .data$value, prefix = .data$p, out_format = out_format, drid = drid)) - ) |> - dplyr::ungroup() |> - dplyr::select("section", "value") |> - tibble::deframe() - invisible(d_write) - } - ) -) - -read_fastqc_metrics <- function(x) { +dragen_fastqc_metrics_read <- function(x) { # 'SEQUENCE POSITIONS' has an extra field for 'Total Sequence Starts' which # can be filtered out since that can be computed by the rest of that section. raw <- readr::read_lines(x) |> @@ -70,6 +23,15 @@ read_fastqc_metrics <- function(x) { value = as.numeric(.data$value) ) + # 1 POSITIONAL BASE CONTENT + # 2 POSITIONAL BASE MEAN QUALITY + # 3 POSITIONAL QUALITY + # 4 READ GC CONTENT + # 5 READ GC CONTENT QUALITY + # 6 READ LENGTHS + # 7 READ MEAN QUALITY + # 8 SEQUENCE POSITIONS + # there are binned pos e.g. "149-150" # the value is an accumulation, so divide by number of grains pos_base_cont <- d |> @@ -158,13 +120,14 @@ read_fastqc_metrics <- function(x) { dplyr::select("mate", "seq", "bp", "value") list( - positional_base_content = pos_base_cont, - positional_base_mean_quality = pos_base_mean_qual, - positional_quality = pos_qual, - read_gc_content = gc_cont, - read_gc_content_quality = gc_cont_qual, - read_lengths = read_len, - read_mean_quality = read_mean_qual, - sequence_positions = seq_pos - ) + fqc_positionalBaseContent = pos_base_cont, + fqc_positionalBaseMeanQuality = pos_base_mean_qual, + fqc_positionalQuality = pos_qual, + fqc_readGCContent = gc_cont, + fqc_readGCContentQuality = gc_cont_qual, + fqc_readLengths = read_len, + fqc_readMeanQuality = read_mean_qual, + fqc_sequencePositions = seq_pos + ) |> + tibble::enframe(name = "name", value = "data") } diff --git a/R/tso.R b/R/tso.R index 048e993..6df0cde 100644 --- a/R/tso.R +++ b/R/tso.R @@ -119,13 +119,13 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( #' @description Read `MergedSmallVariants.vcf.gz` file. #' @param x Path to file. read_msv = function(x) { - dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = FALSE) + dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = TRUE) tibble::tibble(name = "mergedsmallv", data = list(dat)) }, #' @description Read `MergedSmallVariants.genome.vcf.gz` file. #' @param x Path to file. read_msvg = function(x) { - dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = FALSE) + dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = TRUE) tibble::tibble(name = "mergedsmallvg", data = list(dat)) }, #' @description Read `CombinedVariantOutput.tsv` file. @@ -137,7 +137,7 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( #' @description Read `CopyNumberVariants.vcf.gz` file. #' @param x Path to file. read_cnv = function(x) { - dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = FALSE) + dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = TRUE) tibble::tibble(name = "cnv", data = list(dat)) }, #' @description Read `fragment_length_hist.json.gz` file. @@ -325,34 +325,6 @@ tso_tmbt_read <- function(x) { d[] } -#' Plot Fragment Length Hist -#' -#' Plots the fragment length distributions as given in the -#' `fragment_length_hist` file. -#' -#' @param d Parsed tibble. -#' @param min_count Minimum read count to be plotted (def: 10). -#' -#' @return A ggplot2 plot containing fragment lengths on X axis and read counts -#' on Y axis for each sample. -tso_fraglenhist_plot <- function(d, min_count = 10) { - assertthat::assert_that(is.numeric(min_count), min_count >= 0) - d |> - dplyr::filter(.data$Count >= min_count) |> - ggplot2::ggplot(ggplot2::aes(x = .data$FragmentLength, y = .data$Count)) + - ggplot2::geom_line() + - ggplot2::labs(title = "Fragment Length Distribution") + - ggplot2::xlab("Fragment Length (bp)") + - ggplot2::ylab(glue("Read Count (min: {min_count})")) + - ggplot2::theme_minimal() + - ggplot2::theme( - legend.position = c(0.9, 0.9), - legend.justification = c(1, 1), - panel.grid.minor = ggplot2::element_blank(), - panel.grid.major = ggplot2::element_blank(), - plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold") - ) -} #' Read TSO TargetRegionCoverage File #' @@ -471,6 +443,13 @@ tso_msi_read <- function(x) { if (j[["PercentageUnstableSites"]] == "NaN") { j[["PercentageUnstableSites"]] <- NA_real_ } + num_cols <- c( + "TotalMicrosatelliteSitesAssessed", "TotalMicrosatelliteSitesUnstable", + "PercentageUnstableSites", "SumDistance", "SumJsd" + ) tibble::as_tibble_row(j) |> - dplyr::mutate(ResultIsValid = as.character(.data$ResultIsValid)) + dplyr::mutate( + dplyr::across(dplyr::any_of(num_cols), as.numeric), + ResultIsValid = as.character(.data$ResultIsValid), + ) } diff --git a/R/tso_dragen.R b/R/tso_dragen.R new file mode 100644 index 0000000..dc35091 --- /dev/null +++ b/R/tso_dragen.R @@ -0,0 +1,272 @@ +#' Wf_dragen R6 Class +#' +#' @description +#' Reads and writes tidy versions of files from the `dragen` workflow. +#' +#' @examples +#' \dontrun{ +#' +#' #---- Local ----# +#' prefix <- "L2401290" +#' p <- file.path( +#' "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", +#' "analysis/cttsov2/20240915ff0295ed/Logs_Intermediates/DragenCaller", +#' prefix +#' ) +#' d1 <- Wf_dragen$new(path = p, prefix = prefix) +#' d1$list_files(max_files = 100) +#' d1$list_files_filter_relevant(max_files = 300) +#' d <- d1$download_files(max_files = 100, dryrun = F) +#' d_tidy <- d1$tidy_files(d) +#' d_write <- t1$write( +#' d_tidy, +#' outdir = file.path(p, "dracarys_tidy"), +#' prefix = prefix, +#' format = "tsv" +#' ) +#' } +#' @export +Wf_dragen <- R6::R6Class( + "Wf_dragen", + inherit = Wf, + public = list( + #' @field prefix The LibraryID prefix of the sample (needed for path lookup). + prefix = NULL, + #' @description Create a new Wf_dragen object. + #' @param path Path to directory with raw workflow results (from S3 or + #' local filesystem). + #' @param prefix The LibraryID prefix of the sample (needed for path lookup). + initialize = function(path = NULL, prefix = NULL) { + wname <- "dragen" + pref <- prefix + reg1 <- tibble::tribble( + ~regex, ~fun, + glue("{pref}\\-replay\\.json$"), "replay", + glue("{pref}\\.cnv_metrics.csv$"), "cnvMetrics", + glue("{pref}\\.exon_contig_mean_cov\\.csv$"), "contigMeanCov", + glue("{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov", + glue("{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov", + glue("{pref}\\.wgs_contig_mean_cov\\.csv$"), "contigMeanCov", + glue("{pref}\\.exon_coverage_metrics\\.csv$"), "coverageMetrics", + glue("{pref}\\.target_bed_coverage_metrics\\.csv$"), "coverageMetrics", + glue("{pref}\\.tmb_coverage_metrics\\.csv$"), "coverageMetrics", + glue("{pref}\\.wgs_coverage_metrics\\.csv$"), "coverageMetrics", + glue("{pref}\\.exon_fine_hist\\.csv$"), "fineHist", + glue("{pref}\\.target_bed_fine_hist\\.csv$"), "fineHist", + glue("{pref}\\.tmb_fine_hist\\.csv$"), "fineHist", + glue("{pref}\\.wgs_fine_hist\\.csv$"), "fineHist", + glue("{pref}\\.exon_hist\\.csv$"), "hist", + glue("{pref}\\.target_bed_hist\\.csv$"), "hist", + glue("{pref}\\.tmb_hist\\.csv$"), "hist", + glue("{pref}\\.wgs_hist\\.csv$"), "hist", + glue("{pref}\\.fastqc_metrics\\.csv$"), "fastqcMetrics", + glue("{pref}\\.fragment_length_hist\\.csv$"), "fragmentLengthHist", + glue("{pref}\\.gc_metrics\\.csv$"), "gcMetrics", + glue("{pref}\\.gvcf_metrics\\.csv$"), "vcMetrics", + glue("{pref}\\.mapping_metrics\\.csv$"), "mappingMetrics", + glue("{pref}\\.microsat_diffs\\.txt$"), "msiDiffs", + glue("{pref}\\.microsat_output\\.json$"), "msi", + glue("{pref}\\.sv_metrics\\.csv$"), "svMetrics", + glue("{pref}\\.time_metrics\\.csv$"), "timeMetrics", + glue("{pref}\\.trimmer_metrics\\.csv$"), "trimmerMetrics", + glue("{pref}\\.umi_metrics\\.csv$"), "umiMetrics", + glue("{pref}\\.vc_metrics\\.csv$"), "vcMetrics" + ) + regexes <- reg1 |> + dplyr::mutate( + fun = paste0("read_", .data$fun), + fun = ifelse(.data$fun == "read_DOWNLOAD_ONLY", "DOWNLOAD_ONLY", .data$fun) + ) + + super$initialize(path = path, wname = wname, regexes = regexes) + self$prefix <- prefix + }, + #' @description Print details about the Workflow. + #' @param ... (ignored). + print = function(...) { + res <- tibble::tribble( + ~var, ~value, + "path", self$path, + "wname", self$wname, + "filesystem", self$filesystem, + "prefix", self$prefix + ) + print(res) + invisible(self) + }, + #' @description Read `replay.json` file. + #' @param x Path to file. + read_replay = function(x) { + res <- x |> + jsonlite::read_json(simplifyVector = TRUE) |> + purrr::map_if(is.data.frame, tibble::as_tibble) + req_elements <- c("command_line", "hash_table_build", "dragen_config", "system") + assertthat::assert_that(all(names(res) %in% req_elements)) + res[["system"]] <- res[["system"]] |> + tibble::as_tibble_row() + res[["hash_table_build"]] <- res[["hash_table_build"]] |> + tibble::as_tibble_row() + # we don't care if the columns are characters, no analysis likely to be done on dragen options + # (though never say never!) + res[["dragen_config"]] <- res[["dragen_config"]] |> + tidyr::pivot_wider(names_from = "name", values_from = "value") + dat <- dplyr::bind_cols(res) + tibble::tibble(name = "replay", data = list(dat)) + }, + #' @description Read `contig_mean_cov.csv` file. + #' @param x Path to file. + #' @param keep_alt Keep ALT contigs. + read_contigMeanCov = function(x, keep_alt = FALSE) { + subprefix <- dragen_subprefix(x, "_contig_mean_cov") + dat <- readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |> + dplyr::filter( + if (!keep_alt) { + !grepl("chrM|MT|_|Autosomal|HLA-|EBV|GL|hs37d5", .data$chrom) + } else { + TRUE + } + ) + tibble::tibble(name = glue("contigmeancov_{subprefix}"), data = list(dat[])) + }, + #' @description Read `coverage_metrics.csv` file. + #' @param x Path to file. + read_coverageMetrics = function(x) { + subprefix <- dragen_subprefix(x, "_coverage_metrics") + dat <- dragen_coverage_metrics_read(x) + tibble::tibble(name = glue("covmetrics_{subprefix}"), data = list(dat)) + }, + #' @description Read `fine_hist.csv` file. + #' @param x Path to file. + read_fineHist = function(x) { + subprefix <- dragen_subprefix(x, "_fine_hist") + d <- readr::read_csv(x, col_types = "cd") + assertthat::assert_that(all(colnames(d) == c("Depth", "Overall"))) + # there's a max Depth of 2000+, so convert to numeric for easier plotting + dat <- d |> + dplyr::mutate( + Depth = ifelse(grepl("+", .data$Depth), sub("(\\d*)\\+", "\\1", .data$Depth), .data$Depth), + Depth = as.integer(.data$Depth) + ) |> + dplyr::select(depth = "Depth", n_loci = "Overall") + tibble::tibble(name = glue("finehist_{subprefix}"), data = list(dat)) + }, + #' @description Read `fragment_length_hist.csv` file. + #' @param x Path to file. + read_fragmentLengthHist = function(x) { + d <- readr::read_lines(x) + assertthat::assert_that(grepl("#Sample", d[1])) + dat <- d |> + tibble::enframe(name = "name", value = "value") |> + dplyr::filter(!grepl("#Sample: |FragmentLength,Count", .data$value)) |> + tidyr::separate_wider_delim(cols = "value", names = c("fragmentLength", "count"), delim = ",") |> + dplyr::mutate( + count = as.numeric(.data$count), + fragmentLength = as.numeric(.data$fragmentLength) + ) |> + dplyr::select("fragmentLength", "count") + tibble::tibble(name = "fraglen", data = list(dat)) + }, + #' @description Read `mapping_metrics.csv` file. + #' @param x Path to file. + read_mappingMetrics = function(x) { + dat <- dragen_mapping_metrics_read(x) + tibble::tibble(name = "mapmetrics", data = list(dat)) + }, + #' @description Read `hist.csv` (not `fine_hist.csv`!) file. + #' @param x Path to file. + read_hist = function(x) { + subprefix <- dragen_subprefix(x, "_hist") + d <- readr::read_csv(x, col_names = c("var", "pct"), col_types = "cd") + dat <- d |> + dplyr::mutate( + var = sub("PCT of bases in .* with coverage ", "", .data$var), + var = gsub("\\[|\\]|\\(|\\)", "", .data$var), + var = gsub("x", "", .data$var), + var = gsub("inf", "Inf", .data$var) + ) |> + tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |> + dplyr::mutate( + start = as.numeric(.data$start), + end = as.numeric(.data$end), + pct = round(.data$pct, 2), + cumsum = cumsum(.data$pct) + ) + tibble::tibble(name = glue("hist_{subprefix}"), data = list(dat)) + }, + #' @description Read `time_metrics.csv` file. + #' @param x Path to file. + read_timeMetrics = function(x) { + cn <- c("dummy1", "dummy2", "Step", "time_hrs", "time_sec") + ct <- readr::cols(.default = "c", time_hrs = readr::col_time(format = "%T"), time_sec = "d") + d <- readr::read_csv(x, col_names = cn, col_types = ct) + assertthat::assert_that(d$dummy1[1] == "RUN TIME", is.na(d$dummy2[1])) + assertthat::assert_that(inherits(d$time_hrs, "hms")) + dat <- d |> + dplyr::mutate( + Step = tools::toTitleCase(sub("Time ", "", .data$Step)), + Step = gsub(" |/", "", .data$Step), + Time = substr(.data$time_hrs, 1, 5) + ) |> + dplyr::select("Step", "Time") |> + tidyr::pivot_wider(names_from = "Step", values_from = "Time") |> + dplyr::relocate("TotalRuntime") + tibble::tibble(name = "timemetrics", data = list(dat)) + }, + #' @description Read `vc_metrics.csv`/`gvcf_metrics.csv` file. + #' @param x Path to file. + read_vcMetrics = function(x) { + subprefix <- dragen_subprefix(x, "_metrics") + dat <- dragen_vc_metrics_read(x) + tibble::tibble(name = glue("vcmetrics_{subprefix}"), data = list(dat[])) + }, + #' @description Read `trimmer_metrics.csv` file. + #' @param x Path to file. + read_trimmerMetrics = function(x) { + dat <- dragen_trimmer_metrics_read(x) + tibble::tibble(name = "trimmermetrics", data = list(dat[])) + }, + #' @description Read `sv_metrics.csv` file. + #' @param x Path to file. + read_svMetrics = function(x) { + dat <- dragen_sv_metrics_read(x) + tibble::tibble(name = "svmetrics", data = list(dat[])) + }, + #' @description Read `cnv_metrics.csv` file. + #' @param x Path to file. + read_cnvMetrics = function(x) { + dat <- dragen_cnv_metrics_read(x) + tibble::tibble(name = "cnvmetrics", data = list(dat[])) + }, + #' @description Read `fastqc_metrics.csv` file. + #' @param x Path to file. + read_fastqcMetrics = function(x) { + dat <- dragen_fastqc_metrics_read(x) + dat + }, + #' @description Read `gc_metrics.csv` file. + #' @param x Path to file. + read_gcMetrics = function(x) { + dat <- dragen_gc_metrics_read(x) + dat + }, + #' @description Read `umi_metrics.csv` file. + #' @param x Path to file. + read_umiMetrics = function(x) { + dat <- dragen_umi_metrics_read(x) + dat + }, + #' @description Read `microsat_output.json` file. + #' @param x Path to file. + read_msi = function(x) { + dat <- tso_msi_read(x) + tibble::tibble(name = "msi", data = list(dat[])) + }, + #' @description Read `microsat_diffs.txt` file. + #' @param x Path to file. + read_msiDiffs = function(x) { + dat <- readr::read_tsv(x, col_types = "cdccddc") |> + dplyr::rename(Chromosome = "#Chromosome") + tibble::tibble(name = "msidiffs", data = list(dat[])) + } + ) # end public +) # end Wf_dragen diff --git a/R/tsov2.R b/R/tsov2.R index 8ec5175..1a08dd9 100644 --- a/R/tsov2.R +++ b/R/tsov2.R @@ -14,11 +14,21 @@ #' prefix <- "L2401290" #' t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix) #' t1$list_files(max_files = 100) +#' t1$dragenObj$list_files(max_files = 100) #' t1$list_files_filter_relevant(max_files = 300) -#' d <- t1$download_files(max_files = 100, dryrun = F) -#' d_tidy <- t1$tidy_files(d) -#' d_write <- t1$write( -#' d_tidy, +#' t1$dragenObj$list_files_filter_relevant(max_files = 300) +#' d1 <- t1$download_files(max_files = 100, dryrun = F) +#' d2 <- t1$dragenObj$download_files(max_files = 100, dryrun = F) +#' d1_tidy <- t1$tidy_files(d1) +#' d2_tidy <- t1$dragenObj$tidy_files(d2) +#' d_write1 <- t1$write( +#' d1_tidy, +#' outdir = file.path(p, "dracarys_tidy"), +#' prefix = prefix, +#' format = "tsv" +#' ) +#' d_write2 <- t1$dragenObj$write( +#' d2_tidy, #' outdir = file.path(p, "dracarys_tidy"), #' prefix = prefix, #' format = "tsv" @@ -54,6 +64,8 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( public = list( #' @field prefix The LibraryID prefix of the tumor sample (needed for path lookup). prefix = NULL, + #' @field dragenObj dragen object. + dragenObj = NULL, #' @description Create a new Wf_tso_ctdna_tumor_only_v2 object. #' @param path Path to directory with raw workflow results (from S3 or #' local filesystem). @@ -64,6 +76,7 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( res <- glue("Results/{pref}") li <- "Logs_Intermediates" dc <- glue("{li}/DragenCaller/{pref}") + self$dragenObj <- Wf_dragen$new(path = file.path(path, dc), prefix = glue("{dc}/{prefix}")) # Results reg1 <- tibble::tribble( ~regex, ~fun, @@ -73,7 +86,7 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( glue("{res}/{pref}\\.gene_cov_report\\.tsv$"), "cvgrepg", glue("{res}/{pref}\\.hard-filtered\\.vcf\\.gz$"), "hardfilt", glue("{res}/{pref}\\.hard-filtered\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", - glue("{res}/{pref}\\.microsat_output\\.json$"), "msi", + # glue("{res}/{pref}\\.microsat_output\\.json$"), "msi", # in DragenCaller glue("{res}/{pref}\\.tmb.trace\\.tsv$"), "tmbt", glue("{res}/{pref}_CombinedVariantOutput\\.tsv$"), "cvo", glue("{res}/{pref}_Fusions\\.csv$"), "fus", @@ -81,50 +94,11 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( # glue("{res}/{pref}_SmallVariants_Annotated\\.json\\.gz$"), "DOWNLOAD_ONLY", glue("{li}/SampleAnalysisResults/{pref}_SampleAnalysisResults\\.json$"), "sar" ) - # DragenCaller - reg2 <- tibble::tribble( - ~regex, ~fun, - glue("{dc}/{pref}\\-replay\\.json$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.exon_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.exon_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.exon_fine_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.fragment_length_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.target_bed_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.target_bed_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.target_bed_fine_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.tmb_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.tmb_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.tmb_fine_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.wgs_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.wgs_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.wgs_fine_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY", - glue("{dc}/{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY" - ) - regexes <- dplyr::bind_rows(reg1, reg2) |> + regexes <- reg1 |> dplyr::mutate( fun = paste0("read_", .data$fun), fun = ifelse(.data$fun == "read_DOWNLOAD_ONLY", "DOWNLOAD_ONLY", .data$fun) ) - super$initialize(path = path, wname = wname, regexes = regexes) self$prefix <- prefix }, @@ -225,6 +199,19 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( #' #' @examples #' \dontrun{ +#' #---- Local ----# +#' p <- file.path( +#' "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", +#' "analysis/cttsov2/20240915ff0295ed" +#' ) +#' prefix <- "L2401290" +#' outdir <- file.path(p, "dracarys_tidy") +#' d <- dtw_Wf_tso_ctdna_tumor_only_v2( +#' path = p, prefix = prefix, outdir = outdir, +#' format = "tsv", +#' dryrun = F +#' ) +#' #' p <- file.path( #' "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", #' "analysis/cttsov2/20240915ff0295ed" @@ -244,18 +231,28 @@ dtw_Wf_tso_ctdna_tumor_only_v2 <- function(path, prefix, outdir, max_files = 1000, dryrun = FALSE) { obj <- Wf_tso_ctdna_tumor_only_v2$new(path = path, prefix = prefix) - d_dl <- obj$download_files( + d_dl1 <- obj$download_files( + outdir = outdir, max_files = max_files, dryrun = dryrun + ) + d_dl2 <- obj$dragenObj$download_files( outdir = outdir, max_files = max_files, dryrun = dryrun ) if (!dryrun) { - d_tidy <- obj$tidy_files(d_dl) - d_write <- obj$write( - d_tidy, + d_tidy1 <- obj$tidy_files(d_dl1) + d_tidy2 <- obj$dragenObj$tidy_files(d_dl2) + d_write1 <- obj$write( + d_tidy1, + outdir = outdir_tidy, + prefix = prefix, + format = format + ) + d_write2 <- obj$dragenObj$write( + d_tidy2, outdir = outdir_tidy, prefix = prefix, format = format ) - return(d_write) + return(dplyr::bind_rows(d_write1, d_write2)) } - return(d_dl) + return(dplyr::bind_rows(d_dl1, d_dl2)) } diff --git a/R/utils.R b/R/utils.R index 8691826..c2bc44c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,18 @@ +dirty_names_cleaned <- function(dirty, clean, fname = NULL) { + assertthat::assert_that( + rlang::is_atomic(dirty), + rlang::is_atomic(clean) + ) + are_dirty <- dirty[!dirty %in% clean] + msg <- paste( + "Following columns are dirty:", + paste(are_dirty, collapse = ", "), + ifelse(!is.null(fname), paste("Filename:", fname), ""), + sep = "\n" + ) + assertthat::assert_that(length(are_dirty) == 0, msg = msg) +} + #' Print current timestamp for logging #' #' @return Current timestamp as character. diff --git a/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R b/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R index f5ed36b..9fbfa12 100755 --- a/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R +++ b/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R @@ -14,8 +14,6 @@ c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION") |> stopifnot() icav1_token <- Sys.getenv("ICA_ACCESS_TOKEN") |> dracarys::ica_token_validate() -# this helps keep annoying reticulate prompt away -Sys.setenv(RETICULATE_PYTHON = Sys.getenv("CONDA_PYTHON_EXE")) query_workflow_alignqc <- function(start_date) { wfs <- c("wgs_alignment_qc", "wts_alignment_qc") |> @@ -37,7 +35,7 @@ query_limsrow_libids <- function(libids) { } # first read in the workflows table, extract metadata, then join with lims -start_date <- "2024-09-27" +start_date <- "2024-10-11" p_raw <- query_workflow_alignqc(start_date) wgs <- p_raw |> diff --git a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd index b40fbe7..95807e2 100644 --- a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd +++ b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd @@ -9,7 +9,7 @@ output: highlight: kate params: title: "UMCCR Alignment QC Summary Report" - meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-27_wgts.rds") + meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-10-11_wgts.rds") description: "UMCCR Alignment QC Summary Report" title: "`r params$title`" --- @@ -239,7 +239,14 @@ conf <- list( pink_range = c(8, 20) ) d_map |> - dt_view() |> + left_join( + meta |> + mutate(umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$lane}")) |> + select(umccrid, assay, workflow, project_name, project_owner), + by = "umccrid" + ) |> + select(umccrid, phenotype, type, source, assay, workflow, project_name, project_owner, everything()) |> + dt_view(scroll_y = 1500) |> DT::formatCurrency(columns = names(d_map)[num_pct_cols], currency = "", digits = 1) |> DT::formatCurrency(columns = names(d_map)[no_numpct_cols], currency = "", digits = 0) |> DT::formatStyle( @@ -304,7 +311,14 @@ d_cvg <- dr_unnest("WgsCoverageMetricsFile") |> ) num_cols <- names(d_cvg)[purrr::map_lgl(d_cvg, is.numeric)] d_cvg |> - dt_view() |> + left_join( + meta |> + mutate(umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$lane}")) |> + select(umccrid, assay, workflow, project_name, project_owner), + by = "umccrid" + ) |> + select(umccrid, phenotype, type, source, assay, workflow, project_name, project_owner, everything()) |> + dt_view(scroll_y = 1500) |> DT::formatCurrency(columns = num_cols, currency = "", digits = 1) |> DT::formatStyle( "ploidy", @@ -392,7 +406,7 @@ f1_plot <- ggplot() + fill = rep(fqc_colours1$col, length(unique(f1$type))), alpha = 0.7 ) + - geom_line(data = f1, aes(x = q, y = prop, colour = umccrid, linetype = mate), linewidth = 1) + + geom_line(data = f1, aes(x = q, y = prop, colour = umccrid, linetype = mate), linewidth = 1, show.legend = FALSE) + scale_y_continuous(labels = scales::label_comma()) + theme(panel.grid.major = element_blank()) + facet_wrap(~type, ncol = 1) + @@ -401,8 +415,8 @@ f1_plot <- ggplot() + subtitle = glue("Percentage of reads with average quality scores. Shows if\na subset of reads has poor quality.") ) -# plotly::ggplotly(f1_plot) -f1_plot +plotly::ggplotly(f1_plot) +# f1_plot ``` ### GC Content ('Per-Sequence GC Content') @@ -427,8 +441,8 @@ gc_data_plot <- gc_data |> title = "Read GC Content", subtitle = glue("Total number of reads with each GC content\npercentile between 0% and 100%") ) -# plotly::ggplotly(gc_data_plot) -gc_data_plot +plotly::ggplotly(gc_data_plot) +# gc_data_plot ``` ### GC Content Quality ('GC Content Mean Quality Scores') @@ -454,15 +468,15 @@ f1_plot <- ggplot() + title = "GC Content Quality", subtitle = glue("Average Phred-scale read mean quality for reads with\neach GC content percentile between 0% and 100%.") ) -# plotly::ggplotly(f1_plot) -f1_plot +plotly::ggplotly(f1_plot) +# f1_plot ``` ### Positional Base Content ('Per-Position Sequence Content') - TODO: create heatmap instead -```{r fqc_pbc, eval=T, fig.height=42} +```{r fqc_pbc, eval=F, fig.height=42} f1 <- dr_unnest("FastqcMetricsFile_positional_base_content") f1 |> filter(base != "N") |> @@ -484,7 +498,7 @@ f1 |> ### Positional Base Mean Quality ('Per-Position Mean Quality Scores') -```{r fqc_bmq, eval=T, fig.height=80} +```{r fqc_bmq, eval=F, fig.height=80} f1 <- dr_unnest("FastqcMetricsFile_positional_base_mean_quality") ggplot() + geom_rect( @@ -508,7 +522,7 @@ ggplot() + ### Positional Quality ('Per-Position Quality Score Ranges') -```{r fqc_pq, eval=T, fig.width=13} +```{r fqc_pq, eval=F, fig.width=13} # TODO: use boxplot instead of point f1 <- dr_unnest("FastqcMetricsFile_positional_quality") quants <- c(25, 50, 75) @@ -546,8 +560,8 @@ read_len_plot <- read_len |> title = "Read Lengths", subtitle = glue("Read percentage with each observed length.") ) -# plotly::ggplotly(read_len_plot) -read_len_plot +plotly::ggplotly(read_len_plot) +# read_len_plot ``` ### Sequence Positions ('Adapter Content') @@ -569,7 +583,7 @@ f1 |> ## Coverage {.tabset .tabset-pills} -```{r contig_cvg, eval=T, results='asis', fig.height=5} +```{r contig_cvg, eval=F, results='asis', fig.height=5} d1 <- dr_unnest("WgsContigMeanCovFile") |> arrange(desc("umccrid")) for (type1 in sort(unique(d1$type), decreasing = FALSE)) { @@ -608,13 +622,13 @@ flp <- fl1 |> xlab("Fragment Length (bp)") + ylab(glue("Read Count (min: {min_count})")) + theme( - legend.position = c(0.9, 0.9), + legend.position.inside = c(0.9, 0.9), legend.justification = c(1, 1), panel.grid.minor = ggplot2::element_blank(), plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold") ) -# plotly::ggplotly(flp) -flp +plotly::ggplotly(flp) +# flp ``` --- @@ -638,8 +652,8 @@ d_pl_plot <- d_pl_plot_data |> geom_line(aes(colour = umccrid, group = umccrid), na.rm = TRUE) + geom_point(aes(colour = umccrid), na.rm = TRUE) + labs(title = "Chromosome Median / Autosomal Median") -# plotly::ggplotly(d_pl_plot) -d_pl_plot +plotly::ggplotly(d_pl_plot) +# d_pl_plot ``` @@ -671,12 +685,12 @@ d_hist2 <- d_hist |> x = "Coverage", y = "PCT Cumsum", title = "Percentage of sites in genome with given coverage" ) -plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, nrows = 2) +# plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, nrows = 2) ``` ## FineHist -```{r finehist, eval=T, fig.height=10, fig.width=12} +```{r finehist, eval=F, fig.height=10, fig.width=12} d_fhist <- dr_unnest("WgsFineHistFile") d_fhist |> dracarys::WgsFineHistFile$public_methods$plot(c(0, 150)) + diff --git a/man/FastqcMetricsFile.Rd b/man/FastqcMetricsFile.Rd deleted file mode 100644 index ec13b16..0000000 --- a/man/FastqcMetricsFile.Rd +++ /dev/null @@ -1,101 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen_fastqc.R -\name{FastqcMetricsFile} -\alias{FastqcMetricsFile} -\title{FastqcMetrics R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \code{fastqc_metrics.csv} file output from DRAGEN. -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.fastqc_metrics.csv.gz", package = "dracarys") -fq <- FastqcMetricsFile$new(x) -d <- fq$read() -fq$write(d, out_dir = tempdir(), prefix = "seqc_fq", out_format = "tsv") -# fq$plot(d) - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{FastqcMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-FastqcMetricsFile-read}{\code{FastqcMetricsFile$read()}} -\item \href{#method-FastqcMetricsFile-write}{\code{FastqcMetricsFile$write()}} -\item \href{#method-FastqcMetricsFile-clone}{\code{FastqcMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FastqcMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{fastqc_metrics.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FastqcMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble. TODO. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FastqcMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{fastqc_metrics.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FastqcMetricsFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FastqcMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FastqcMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/FragmentLengthHistFile.Rd b/man/FragmentLengthHistFile.Rd deleted file mode 100644 index 07c9081..0000000 --- a/man/FragmentLengthHistFile.Rd +++ /dev/null @@ -1,131 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{FragmentLengthHistFile} -\alias{FragmentLengthHistFile} -\title{FragmentLengthHistFile R6 Class} -\description{ -Contains methods for reading and plotting contents of -the \code{fragment_length_hist.csv} file output from DRAGEN. -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.fragment_length_hist.csv.gz", package = "dracarys") -fl <- FragmentLengthHistFile$new(x) -d <- fl$read() # or read(fl) -fl$plot(d) # or plot(fl) -fl$write(d |> dplyr::filter(count > 10), out_dir = tempdir(), prefix = "seqc_fl") -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{FragmentLengthHistFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-FragmentLengthHistFile-read}{\code{FragmentLengthHistFile$read()}} -\item \href{#method-FragmentLengthHistFile-write}{\code{FragmentLengthHistFile$write()}} -\item \href{#method-FragmentLengthHistFile-plot}{\code{FragmentLengthHistFile$plot()}} -\item \href{#method-FragmentLengthHistFile-clone}{\code{FragmentLengthHistFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{fragment_length_hist.csv} file, which contains the -fragment length distribution for each sample. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FragmentLengthHistFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -A tibble with the following columns: -\itemize{ -\item sample: name of sample -\item fragmentLength: estimated fragment length -\item count: number of reads with estimated fragment length -} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{fragment_length_hist.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FragmentLengthHistFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-plot}{}}} -\subsection{Method \code{plot()}}{ -Plots the fragment length distributions as given in the -\code{fragment_length_hist.csv} file. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FragmentLengthHistFile$plot(d, min_count = 10)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{min_count}}{Minimum read count to be plotted (Default: 10).} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -A ggplot2 plot containing fragment lengths on X axis and read counts -on Y axis for each sample. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{FragmentLengthHistFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/MappingMetricsFile.Rd b/man/MappingMetricsFile.Rd deleted file mode 100644 index 62d5cfb..0000000 --- a/man/MappingMetricsFile.Rd +++ /dev/null @@ -1,106 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{MappingMetricsFile} -\alias{MappingMetricsFile} -\title{MappingMetricsFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \code{mapping_metrics.csv} file output from DRAGEN. -This file contains mapping and aligning metrics, like the metrics computed by -the Samtools Flagstat command. These metrics are available on an aggregate -level (over all input data), and on a per read group level. NOTE: we are -keeping only the read group level metrics (i.e. removing the aggregate data). -Unless explicitly stated, the metrics units are in reads (i.e., not in -terms of pairs or alignments). -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.mapping_metrics.csv.gz", package = "dracarys") -mm <- MappingMetricsFile$new(x) -d <- mm$read() # or read(mm) -mm$write(d, out_dir = tempdir(), prefix = "seqc_mm", out_format = "tsv") - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{MappingMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-MappingMetricsFile-read}{\code{MappingMetricsFile$read()}} -\item \href{#method-MappingMetricsFile-write}{\code{MappingMetricsFile$write()}} -\item \href{#method-MappingMetricsFile-clone}{\code{MappingMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-MappingMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{mapping_metrics.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{MappingMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row of X metrics per read group. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-MappingMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{mapping_metrics.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{MappingMetricsFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-MappingMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{MappingMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/ReplayFile.Rd b/man/ReplayFile.Rd deleted file mode 100644 index c2c34eb..0000000 --- a/man/ReplayFile.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{ReplayFile} -\alias{ReplayFile} -\title{ReplayFile R6 Class} -\description{ -Contains methods for reading contents of -the \code{replay.json} file output from DRAGEN, which contains the DRAGEN command -line, parameters and version for the specific run. -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II-replay.json.gz", package = "dracarys") -r <- ReplayFile$new(x) -d <- r$read() # or read(r) -r$write(d, out_dir = tempdir(), prefix = "seqc_replay", out_format = "tsv") -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{ReplayFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-ReplayFile-read}{\code{ReplayFile$read()}} -\item \href{#method-ReplayFile-write}{\code{ReplayFile$write()}} -\item \href{#method-ReplayFile-clone}{\code{ReplayFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ReplayFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{replay.json} file. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ReplayFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ReplayFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{replay.json} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ReplayFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ReplayFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ReplayFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/SvMetricsFile.Rd b/man/SvMetricsFile.Rd deleted file mode 100644 index e5c775d..0000000 --- a/man/SvMetricsFile.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{SvMetricsFile} -\alias{SvMetricsFile} -\title{SvMetricsFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \code{sv_metrics.csv} file output from DRAGEN -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.sv_metrics.csv.gz", package = "dracarys") -sv <- SvMetricsFile$new(x) -d <- sv$read() -sv$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv") - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{SvMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-SvMetricsFile-read}{\code{SvMetricsFile$read()}} -\item \href{#method-SvMetricsFile-write}{\code{SvMetricsFile$write()}} -\item \href{#method-SvMetricsFile-clone}{\code{SvMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SvMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{sv_metrics.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{SvMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SvMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{sv_metrics.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{SvMetricsFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SvMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{SvMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/TimeMetricsFile.Rd b/man/TimeMetricsFile.Rd deleted file mode 100644 index 43b463d..0000000 --- a/man/TimeMetricsFile.Rd +++ /dev/null @@ -1,100 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{TimeMetricsFile} -\alias{TimeMetricsFile} -\title{TimeMetricsFile R6 Class} -\description{ -Contains methods for reading contents of -the \code{time_metrics.csv} file output from DRAGEN, which contains -a breakdown of the run duration for each DRAGEN process. -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys") -tm <- TimeMetricsFile$new(x) -d <- tm$read() # or read(tm) -tm$write(d, out_dir = tempdir(), prefix = "seqc_time", out_format = "tsv") -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{TimeMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-TimeMetricsFile-read}{\code{TimeMetricsFile$read()}} -\item \href{#method-TimeMetricsFile-write}{\code{TimeMetricsFile$write()}} -\item \href{#method-TimeMetricsFile-clone}{\code{TimeMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TimeMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{time_metrics.csv} file. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TimeMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TimeMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{time_metrics.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TimeMetricsFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TimeMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TimeMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/TrimmerMetricsFile.Rd b/man/TrimmerMetricsFile.Rd deleted file mode 100644 index a3b4fe8..0000000 --- a/man/TrimmerMetricsFile.Rd +++ /dev/null @@ -1,100 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{TrimmerMetricsFile} -\alias{TrimmerMetricsFile} -\title{TrimmerMetricsFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \code{trimmer_metrics.csv} file output from DRAGEN -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.trimmer_metrics.csv.gz", package = "dracarys") -tm <- TrimmerMetricsFile$new(x) -d <- tm$read() -tm$write(d, out_dir = tempdir(), prefix = "seqc_tm", out_format = "tsv") - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{TrimmerMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-TrimmerMetricsFile-read}{\code{TrimmerMetricsFile$read()}} -\item \href{#method-TrimmerMetricsFile-write}{\code{TrimmerMetricsFile$write()}} -\item \href{#method-TrimmerMetricsFile-clone}{\code{TrimmerMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TrimmerMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{trimmer_metrics.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TrimmerMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TrimmerMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{trimmer_metrics.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TrimmerMetricsFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TrimmerMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TrimmerMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/VCMetricsFile.Rd b/man/VCMetricsFile.Rd deleted file mode 100644 index 3011160..0000000 --- a/man/VCMetricsFile.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{VCMetricsFile} -\alias{VCMetricsFile} -\title{VCMetricsFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \code{vc_metrics.csv} file output from DRAGEN, which contains variant calling metrics. -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.vc_metrics.csv.gz", package = "dracarys") -vm <- VCMetricsFile$new(x) -d <- vm$read() # or read(vm) -vm$write(d, out_dir = tempdir(), prefix = "seqc_vc", out_format = "tsv") - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{VCMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-VCMetricsFile-read}{\code{VCMetricsFile$read()}} -\item \href{#method-VCMetricsFile-write}{\code{VCMetricsFile$write()}} -\item \href{#method-VCMetricsFile-clone}{\code{VCMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-VCMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{vc_metrics.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{VCMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-VCMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{vc_metrics.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{VCMetricsFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-VCMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{VCMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd new file mode 100644 index 0000000..141e855 --- /dev/null +++ b/man/Wf_dragen.Rd @@ -0,0 +1,423 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tso_dragen.R +\name{Wf_dragen} +\alias{Wf_dragen} +\title{Wf_dragen R6 Class} +\description{ +Reads and writes tidy versions of files from the \code{dragen} workflow. +} +\examples{ +\dontrun{ + +#---- Local ----# +prefix <- "L2401290" +p <- file.path( + "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", + "analysis/cttsov2/20240915ff0295ed/Logs_Intermediates/DragenCaller", + prefix +) +d1 <- Wf_dragen$new(path = p, prefix = prefix) +d1$list_files(max_files = 100) +d1$list_files_filter_relevant(max_files = 300) +d <- d1$download_files(max_files = 100, dryrun = F) +d_tidy <- d1$tidy_files(d) +d_write <- t1$write( + d_tidy, + outdir = file.path(p, "dracarys_tidy"), + prefix = prefix, + format = "tsv" +) +} +} +\section{Super class}{ +\code{\link[dracarys:Wf]{dracarys::Wf}} -> \code{Wf_dragen} +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{prefix}}{The LibraryID prefix of the sample (needed for path lookup).} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-Wf_dragen-new}{\code{Wf_dragen$new()}} +\item \href{#method-Wf_dragen-print}{\code{Wf_dragen$print()}} +\item \href{#method-Wf_dragen-read_replay}{\code{Wf_dragen$read_replay()}} +\item \href{#method-Wf_dragen-read_contigMeanCov}{\code{Wf_dragen$read_contigMeanCov()}} +\item \href{#method-Wf_dragen-read_coverageMetrics}{\code{Wf_dragen$read_coverageMetrics()}} +\item \href{#method-Wf_dragen-read_fineHist}{\code{Wf_dragen$read_fineHist()}} +\item \href{#method-Wf_dragen-read_fragmentLengthHist}{\code{Wf_dragen$read_fragmentLengthHist()}} +\item \href{#method-Wf_dragen-read_mappingMetrics}{\code{Wf_dragen$read_mappingMetrics()}} +\item \href{#method-Wf_dragen-read_hist}{\code{Wf_dragen$read_hist()}} +\item \href{#method-Wf_dragen-read_timeMetrics}{\code{Wf_dragen$read_timeMetrics()}} +\item \href{#method-Wf_dragen-read_vcMetrics}{\code{Wf_dragen$read_vcMetrics()}} +\item \href{#method-Wf_dragen-read_trimmerMetrics}{\code{Wf_dragen$read_trimmerMetrics()}} +\item \href{#method-Wf_dragen-read_svMetrics}{\code{Wf_dragen$read_svMetrics()}} +\item \href{#method-Wf_dragen-read_cnvMetrics}{\code{Wf_dragen$read_cnvMetrics()}} +\item \href{#method-Wf_dragen-read_fastqcMetrics}{\code{Wf_dragen$read_fastqcMetrics()}} +\item \href{#method-Wf_dragen-read_gcMetrics}{\code{Wf_dragen$read_gcMetrics()}} +\item \href{#method-Wf_dragen-read_umiMetrics}{\code{Wf_dragen$read_umiMetrics()}} +\item \href{#method-Wf_dragen-read_msi}{\code{Wf_dragen$read_msi()}} +\item \href{#method-Wf_dragen-read_msiDiffs}{\code{Wf_dragen$read_msiDiffs()}} +\item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-new}{}}} +\subsection{Method \code{new()}}{ +Create a new Wf_dragen object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$new(path = NULL, prefix = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{path}}{Path to directory with raw workflow results (from S3 or +local filesystem).} + +\item{\code{prefix}}{The LibraryID prefix of the sample (needed for path lookup).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-print}{}}} +\subsection{Method \code{print()}}{ +Print details about the Workflow. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_replay}{}}} +\subsection{Method \code{read_replay()}}{ +Read \code{replay.json} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_replay(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_contigMeanCov}{}}} +\subsection{Method \code{read_contigMeanCov()}}{ +Read \code{contig_mean_cov.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_contigMeanCov(x, keep_alt = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} + +\item{\code{keep_alt}}{Keep ALT contigs.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_coverageMetrics}{}}} +\subsection{Method \code{read_coverageMetrics()}}{ +Read \code{coverage_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_coverageMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_fineHist}{}}} +\subsection{Method \code{read_fineHist()}}{ +Read \code{fine_hist.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_fineHist(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_fragmentLengthHist}{}}} +\subsection{Method \code{read_fragmentLengthHist()}}{ +Read \code{fragment_length_hist.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_fragmentLengthHist(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_mappingMetrics}{}}} +\subsection{Method \code{read_mappingMetrics()}}{ +Read \code{mapping_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_mappingMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_hist}{}}} +\subsection{Method \code{read_hist()}}{ +Read \code{hist.csv} (not \code{fine_hist.csv}!) file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_hist(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_timeMetrics}{}}} +\subsection{Method \code{read_timeMetrics()}}{ +Read \code{time_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_timeMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_vcMetrics}{}}} +\subsection{Method \code{read_vcMetrics()}}{ +Read \code{vc_metrics.csv}/\code{gvcf_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_vcMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_trimmerMetrics}{}}} +\subsection{Method \code{read_trimmerMetrics()}}{ +Read \code{trimmer_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_trimmerMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_svMetrics}{}}} +\subsection{Method \code{read_svMetrics()}}{ +Read \code{sv_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_svMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_cnvMetrics}{}}} +\subsection{Method \code{read_cnvMetrics()}}{ +Read \code{cnv_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_cnvMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_fastqcMetrics}{}}} +\subsection{Method \code{read_fastqcMetrics()}}{ +Read \code{fastqc_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_fastqcMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_gcMetrics}{}}} +\subsection{Method \code{read_gcMetrics()}}{ +Read \code{gc_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_gcMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_umiMetrics}{}}} +\subsection{Method \code{read_umiMetrics()}}{ +Read \code{umi_metrics.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_umiMetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_msi}{}}} +\subsection{Method \code{read_msi()}}{ +Read \code{microsat_output.json} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_msi(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-read_msiDiffs}{}}} +\subsection{Method \code{read_msiDiffs()}}{ +Read \code{microsat_diffs.txt} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$read_msiDiffs(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_dragen-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_dragen$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/Wf_tso_ctdna_tumor_only_v2.Rd b/man/Wf_tso_ctdna_tumor_only_v2.Rd index 55e998d..fb84a29 100644 --- a/man/Wf_tso_ctdna_tumor_only_v2.Rd +++ b/man/Wf_tso_ctdna_tumor_only_v2.Rd @@ -17,11 +17,21 @@ p <- file.path( prefix <- "L2401290" t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix) t1$list_files(max_files = 100) +t1$dragenObj$list_files(max_files = 100) t1$list_files_filter_relevant(max_files = 300) -d <- t1$download_files(max_files = 100, dryrun = F) -d_tidy <- t1$tidy_files(d) -d_write <- t1$write( - d_tidy, +t1$dragenObj$list_files_filter_relevant(max_files = 300) +d1 <- t1$download_files(max_files = 100, dryrun = F) +d2 <- t1$dragenObj$download_files(max_files = 100, dryrun = F) +d1_tidy <- t1$tidy_files(d1) +d2_tidy <- t1$dragenObj$tidy_files(d2) +d_write1 <- t1$write( + d1_tidy, + outdir = file.path(p, "dracarys_tidy"), + prefix = prefix, + format = "tsv" +) +d_write2 <- t1$dragenObj$write( + d2_tidy, outdir = file.path(p, "dracarys_tidy"), prefix = prefix, format = "tsv" @@ -58,6 +68,8 @@ d_write <- t2$write( \if{html}{\out{
}} \describe{ \item{\code{prefix}}{The LibraryID prefix of the tumor sample (needed for path lookup).} + +\item{\code{dragenObj}}{dragen object.} } \if{html}{\out{
}} } diff --git a/man/WgsContigMeanCovFile.Rd b/man/WgsContigMeanCovFile.Rd deleted file mode 100644 index 49b974b..0000000 --- a/man/WgsContigMeanCovFile.Rd +++ /dev/null @@ -1,146 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{WgsContigMeanCovFile} -\alias{WgsContigMeanCovFile} -\title{WgsContigMeanCovFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \verb{wgs_contig_mean_cov_.csv} file output from DRAGEN. -This file contains the estimated coverage for all contigs, and an autosomal -estimated coverage. -} -\examples{ -x1 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_normal.csv.gz", package = "dracarys") -x2 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_tumor.csv.gz", package = "dracarys") -cc1 <- WgsContigMeanCovFile$new(x1) -cc2 <- WgsContigMeanCovFile$new(x2) -d1 <- cc1$read() -d2 <- cc2$read() -cc1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv") -cc2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv") - -cc1$plot(d1) -cc2$plot(d2) - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsContigMeanCovFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-WgsContigMeanCovFile-read}{\code{WgsContigMeanCovFile$read()}} -\item \href{#method-WgsContigMeanCovFile-write}{\code{WgsContigMeanCovFile$write()}} -\item \href{#method-WgsContigMeanCovFile-plot}{\code{WgsContigMeanCovFile$plot()}} -\item \href{#method-WgsContigMeanCovFile-clone}{\code{WgsContigMeanCovFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \verb{wgs_contig_mean_cov_.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsContigMeanCovFile$read(keep_alt = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{keep_alt}}{Keep the ALT + Mito chromosomes?} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -tibble with the following columns: -\itemize{ -\item label: file label. -\item chrom: contig name. -\item n_bases: number of bases aligned to contig (excludes bases from -duplicate marked reads, reads with MAPQ=0, and clipped bases). -\item coverage: col2 / contig length -} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \verb{wgs_contig_mean_cov_.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsContigMeanCovFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-plot}{}}} -\subsection{Method \code{plot()}}{ -Plots the \verb{wgs_contig_mean_cov_.csv} files. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsContigMeanCovFile$plot(d, top_alt_n = 15)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{top_alt_n}}{Number of top covered alt contigs to plot per phenotype.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -A ggplot2 object with chromosomes on X axis, and coverage on Y axis. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsContigMeanCovFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/WgsCoverageMetricsFile.Rd b/man/WgsCoverageMetricsFile.Rd deleted file mode 100644 index 322ce83..0000000 --- a/man/WgsCoverageMetricsFile.Rd +++ /dev/null @@ -1,105 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{WgsCoverageMetricsFile} -\alias{WgsCoverageMetricsFile} -\title{WgsCoverageMetricsFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \verb{wgs_coverage_metrics_.csv} file output from DRAGEN. -This file contains read depth of coverage metrics. -} -\examples{ -x1 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_normal.csv.gz", package = "dracarys") -x2 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_tumor.csv.gz", package = "dracarys") -cm1 <- WgsCoverageMetricsFile$new(x1) -cm2 <- WgsCoverageMetricsFile$new(x2) -d1 <- read(cm1) -d2 <- read(cm2) -cm1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv") -cm2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv") - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsCoverageMetricsFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-WgsCoverageMetricsFile-read}{\code{WgsCoverageMetricsFile$read()}} -\item \href{#method-WgsCoverageMetricsFile-write}{\code{WgsCoverageMetricsFile$write()}} -\item \href{#method-WgsCoverageMetricsFile-clone}{\code{WgsCoverageMetricsFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsCoverageMetricsFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \verb{wgs_coverage_metrics_.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsCoverageMetricsFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsCoverageMetricsFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \verb{wgs_coverage_metrics_.csv} file output -from DRAGEN -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsCoverageMetricsFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsCoverageMetricsFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsCoverageMetricsFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/WgsFineHistFile.Rd b/man/WgsFineHistFile.Rd deleted file mode 100644 index b3f5407..0000000 --- a/man/WgsFineHistFile.Rd +++ /dev/null @@ -1,136 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{WgsFineHistFile} -\alias{WgsFineHistFile} -\title{WgsFineHistFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \verb{wgs_fine_hist_.csv} file output from DRAGEN. -This file contains two columns: Depth and Overall. -The value in the Depth column ranges from 0 to 1000+ and the Overall -column indicates the number of loci covered at the corresponding depth. -} -\examples{ -x1 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_normal.csv.gz", package = "dracarys") -x2 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_tumor.csv.gz", package = "dracarys") -ch1 <- WgsFineHistFile$new(x1) -ch2 <- WgsFineHistFile$new(x2) -d1 <- read(ch1) -d2 <- read(ch2) -ch1$plot(d1) -ch2$plot(d2) -ch1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv") -ch2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv") -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsFineHistFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-WgsFineHistFile-read}{\code{WgsFineHistFile$read()}} -\item \href{#method-WgsFineHistFile-write}{\code{WgsFineHistFile$write()}} -\item \href{#method-WgsFineHistFile-plot}{\code{WgsFineHistFile$plot()}} -\item \href{#method-WgsFineHistFile-clone}{\code{WgsFineHistFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsFineHistFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \verb{wgs_fine_hist_.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsFineHistFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with following columns: -\itemize{ -\item depth -\item number of loci with given depth -} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsFineHistFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \verb{wgs_fine_hist_.csv} file output -from DRAGEN -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsFineHistFile$write( - d, - out_dir = NULL, - prefix, - out_format = "tsv", - drid = NULL -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsFineHistFile-plot}{}}} -\subsection{Method \code{plot()}}{ -Plots the \verb{wgs_fine_hist_.csv} files. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsFineHistFile$plot(d, x_lim = c(0, 300))}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{x_lim}}{X axis range to plot.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -A ggplot2 object with depth of coverage on X axis, -and number of loci with that depth on Y axis. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsFineHistFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsFineHistFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/WgsHistFile.Rd b/man/WgsHistFile.Rd deleted file mode 100644 index 0fecd69..0000000 --- a/man/WgsHistFile.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{WgsHistFile} -\alias{WgsHistFile} -\title{WgsHistFile R6 Class} -\description{ -Contains methods for reading and displaying contents of -the \code{wgs_hist.csv} file output from DRAGEN -} -\examples{ -x <- system.file("extdata/wgs/SEQC-II.wgs_hist.csv.gz", package = "dracarys") -h <- WgsHistFile$new(x) -d <- h$read() -h$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv") - -} -\section{Super class}{ -\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsHistFile} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-WgsHistFile-read}{\code{WgsHistFile$read()}} -\item \href{#method-WgsHistFile-write}{\code{WgsHistFile$write()}} -\item \href{#method-WgsHistFile-clone}{\code{WgsHistFile$clone()}} -} -} -\if{html}{\out{ -
Inherited methods - -
-}} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsHistFile-read}{}}} -\subsection{Method \code{read()}}{ -Reads the \code{wgs_hist.csv} file output from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsHistFile$read()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -tibble with one row and metrics spread across individual columns. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsHistFile-write}{}}} -\subsection{Method \code{write()}}{ -Writes a tidy version of the \code{wgs_hist.csv} file output -from DRAGEN. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsHistFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{d}}{Parsed object from \code{self$read()}.} - -\item{\code{out_dir}}{Output directory.} - -\item{\code{prefix}}{Prefix of output file(s).} - -\item{\code{out_format}}{Format of output file(s).} - -\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-WgsHistFile-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{WgsHistFile$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/dragen_cnv_metrics_read.Rd b/man/dragen_cnv_metrics_read.Rd new file mode 100644 index 0000000..c347067 --- /dev/null +++ b/man/dragen_cnv_metrics_read.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_cnv_metrics_read} +\alias{dragen_cnv_metrics_read} +\title{Read DRAGEN CNV Metrics} +\usage{ +dragen_cnv_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{cnv_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_cnv_metrics_read(x) +} + +} diff --git a/man/dragen_contig_mean_coverage_plot.Rd b/man/dragen_contig_mean_coverage_plot.Rd new file mode 100644 index 0000000..2a98030 --- /dev/null +++ b/man/dragen_contig_mean_coverage_plot.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_contig_mean_coverage_plot} +\alias{dragen_contig_mean_coverage_plot} +\title{Plot DRAGEN Contig Mean Coverage +TODO +Plots the \verb{wgs_contig_mean_cov_.csv} files.} +\usage{ +dragen_contig_mean_coverage_plot(d, top_alt_n = 15) +} +\arguments{ +\item{d}{Parsed tibble.} + +\item{top_alt_n}{Number of top covered alt contigs to plot per phenotype.} +} +\value{ +A ggplot2 object with chromosomes on X axis, and coverage on Y axis. +} +\description{ +Plot DRAGEN Contig Mean Coverage +TODO +Plots the \verb{wgs_contig_mean_cov_.csv} files. +} diff --git a/man/dragen_coverage_metrics_read.Rd b/man/dragen_coverage_metrics_read.Rd new file mode 100644 index 0000000..dc88de2 --- /dev/null +++ b/man/dragen_coverage_metrics_read.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_coverage_metrics_read} +\alias{dragen_coverage_metrics_read} +\title{Read DRAGEN Coverage Metrics} +\usage{ +dragen_coverage_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{coverage_metrics.csv} file generated by DRAGEN. +} +\examples{ +\dontrun{ +dragen_coverage_metrics_read(x) +} +} diff --git a/man/dragen_fastqc_metrics_read.Rd b/man/dragen_fastqc_metrics_read.Rd new file mode 100644 index 0000000..018a75d --- /dev/null +++ b/man/dragen_fastqc_metrics_read.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen_fastqc.R +\name{dragen_fastqc_metrics_read} +\alias{dragen_fastqc_metrics_read} +\title{DRAGEN FASTQC Metrics} +\usage{ +dragen_fastqc_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Read DRAGEN \code{fastqc_metrics.csv} file. +} diff --git a/man/tso_fraglenhist_plot.Rd b/man/dragen_fraglenhist_plot.Rd similarity index 66% rename from man/tso_fraglenhist_plot.Rd rename to man/dragen_fraglenhist_plot.Rd index 7a6daad..e046df2 100644 --- a/man/tso_fraglenhist_plot.Rd +++ b/man/dragen_fraglenhist_plot.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tso.R -\name{tso_fraglenhist_plot} -\alias{tso_fraglenhist_plot} -\title{Plot Fragment Length Hist} +% Please edit documentation in R/dragen.R +\name{dragen_fraglenhist_plot} +\alias{dragen_fraglenhist_plot} +\title{DRAGEN Fragment Length Hist Plot} \usage{ -tso_fraglenhist_plot(d, min_count = 10) +dragen_fraglenhist_plot(d, min_count = 10) } \arguments{ \item{d}{Parsed tibble.} diff --git a/man/dragen_gc_metrics_read.Rd b/man/dragen_gc_metrics_read.Rd new file mode 100644 index 0000000..f68f4a0 --- /dev/null +++ b/man/dragen_gc_metrics_read.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_gc_metrics_read} +\alias{dragen_gc_metrics_read} +\title{Read DRAGEN GC Metrics} +\usage{ +dragen_gc_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{gc_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_gc_metrics_read(x) +} + +} diff --git a/man/dragen_mapping_metrics_read.Rd b/man/dragen_mapping_metrics_read.Rd new file mode 100644 index 0000000..a2ff466 --- /dev/null +++ b/man/dragen_mapping_metrics_read.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_mapping_metrics_read} +\alias{dragen_mapping_metrics_read} +\title{Read DRAGEN Mapping Metrics} +\usage{ +dragen_mapping_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{mapping_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_mapping_metrics_read(x) +} +} diff --git a/man/dragen_sv_metrics_read.Rd b/man/dragen_sv_metrics_read.Rd new file mode 100644 index 0000000..0150f5d --- /dev/null +++ b/man/dragen_sv_metrics_read.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_sv_metrics_read} +\alias{dragen_sv_metrics_read} +\title{Read DRAGEN SV Metrics} +\usage{ +dragen_sv_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{sv_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_sv_metrics_read(x) +} +} diff --git a/man/dragen_trimmer_metrics_read.Rd b/man/dragen_trimmer_metrics_read.Rd new file mode 100644 index 0000000..8a58398 --- /dev/null +++ b/man/dragen_trimmer_metrics_read.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_trimmer_metrics_read} +\alias{dragen_trimmer_metrics_read} +\title{Read DRAGEN Trimmer Metrics} +\usage{ +dragen_trimmer_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{trimmer_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_trimmer_metrics_read(x) +} +} diff --git a/man/dragen_umi_metrics_read.Rd b/man/dragen_umi_metrics_read.Rd new file mode 100644 index 0000000..16f57af --- /dev/null +++ b/man/dragen_umi_metrics_read.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_umi_metrics_read} +\alias{dragen_umi_metrics_read} +\title{Read DRAGEN UMI Metrics} +\usage{ +dragen_umi_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{umi_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_umi_metrics_read(x) +} +} diff --git a/man/dragen_vc_metrics_read.Rd b/man/dragen_vc_metrics_read.Rd new file mode 100644 index 0000000..e0e904e --- /dev/null +++ b/man/dragen_vc_metrics_read.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dragen.R +\name{dragen_vc_metrics_read} +\alias{dragen_vc_metrics_read} +\title{Read DRAGEN VariantCall Metrics} +\usage{ +dragen_vc_metrics_read(x) +} +\arguments{ +\item{x}{Path to file.} +} +\value{ +Tibble with metrics. +} +\description{ +Reads the \code{vc_metrics.csv} file output from DRAGEN. +} +\examples{ +\dontrun{ +dragen_vc_metrics_read(x) +} +} diff --git a/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd b/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd index 2a8ebf7..5d9423f 100644 --- a/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd +++ b/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd @@ -39,6 +39,19 @@ Downloads files from the \code{tso_ctdna_tumor_only_v2} workflow and writes them } \examples{ \dontrun{ +#---- Local ----# +p <- file.path( + "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", + "analysis/cttsov2/20240915ff0295ed" +) +prefix <- "L2401290" +outdir <- file.path(p, "dracarys_tidy") +d <- dtw_Wf_tso_ctdna_tumor_only_v2( + path = p, prefix = prefix, outdir = outdir, + format = "tsv", + dryrun = F +) + p <- file.path( "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", "analysis/cttsov2/20240915ff0295ed" diff --git a/man/time_metrics_process.Rd b/man/time_metrics_process.Rd deleted file mode 100644 index 91b6b20..0000000 --- a/man/time_metrics_process.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dragen.R -\name{time_metrics_process} -\alias{time_metrics_process} -\title{Process Multiple TimeMetricsFile Objects} -\usage{ -time_metrics_process(x, id = seq_len(length(x))) -} -\arguments{ -\item{x}{Atomic vector with one or more TimeMetricsFile objects.} - -\item{id}{ID for each input, which is used to disambiguate files -generated from same samples. Default: index from 1 to length of \code{x}.} -} -\value{ -tibble with the following columns: -\itemize{ -\item Step: DRAGEN step -\item Time: time in HH:MM -} -} -\description{ -Processes multiple TimeMetricsFile objects. -} -\examples{ -p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys") -x <- TimeMetricsFile$new(p) -(tm <- time_metrics_process(c(x, x), id = c("run1", "run2"))) - -} diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R deleted file mode 100644 index 1caa4ee..0000000 --- a/tests/testthat/test-roxytest-testexamples-dragen.R +++ /dev/null @@ -1,13 +0,0 @@ -# Generated by roxytest: do not edit by hand! - -# File R/dragen.R: @testexamples - -test_that("Function time_metrics_process() @ L789", { - - p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys") - x <- TimeMetricsFile$new(p) - (tm <- time_metrics_process(c(x, x), id = c("run1", "run2"))) - - expect_equal(nrow(tm), 2) -}) -