Skip to content

Commit

Permalink
Integrate GUNC in BIN_QC subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
dialvarezs committed Nov 1, 2024
1 parent 3702329 commit 0eb167a
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 111 deletions.
11 changes: 6 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#707](https://github.com/nf-core/mag/pull/707) - Make Bin QC a subworkflow (added by @dialvarezs)
- [#707](https://github.com/nf-core/mag/pull/707) - Added CheckM2 as an alternative bin completeness and QC tool (added by @dialvarezs)
- [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs)

### `Changed`

### `Fixed`

- [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs)
- [#707](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs)

### `Dependencies`

| Tool | Previous version | New version |
| ------- | ---------------- | ----------- |
| CheckM2 | | 1.0.2 |

### `Deprecated`

## 3.2.1 [2024-10-30]
Expand All @@ -32,10 +37,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Dependencies`

| Tool | Previous version | New version |
| ------- | ---------------- | ----------- |
| CheckM2 | | 1.0.2 |

### `Deprecated`

## 3.2.0 [2024-10-27]
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ params {
refine_bins_dastool = false
refine_bins_dastool_threshold = 0.5
postbinning_input = 'raw_bins_only'
exclude_unbins_from_postbinning = false
exclude_unbins_from_postbinning = false

// Bin QC
skip_binqc = false
Expand Down
114 changes: 84 additions & 30 deletions subworkflows/local/bin_qc.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* BUSCO/CheckM/CheckM2: Quantitative measures for the assessment of genome assembly
* BUSCO/CheckM/CheckM2/GUNC: Quantitative measures for the assessment of genome assembly
*/

include { BUSCO_DB_PREPARATION } from '../../modules/local/busco_db_preparation'
Expand All @@ -10,33 +10,42 @@ include { CHECKM_QA } from '../../modules/nf-core/checkm
include { CHECKM_LINEAGEWF } from '../../modules/nf-core/checkm/lineagewf/main'
include { CHECKM2_PREDICT } from '../../modules/nf-core/checkm2/predict/main'
include { COMBINE_TSV as COMBINE_BINQC_TSV } from '../../modules/local/combine_tsv'
include { GUNC_DOWNLOADDB } from '../../modules/nf-core/gunc/downloaddb/main'
include { GUNC_RUN } from '../../modules/nf-core/gunc/run/main'
include { GUNC_MERGECHECKM } from '../../modules/nf-core/gunc/mergecheckm/main'


workflow BIN_QC {
take:
bins // channel: [ val(meta), path(bin) ]
checkm_db
checkm2_db
busco_db
ch_bins // [ [ meta] , fasta ], input bins (mandatory)
ch_checkm_db // [ db ], presupplied CheckM database (optional)
ch_checkm2_db // [ [meta] , db ], presupplied CheckM2 database (optional)
ch_busco_db // [ [meta] , db ], presupplied BUSCO database (optional)
ch_gunc_db // [ db ], presupplied GUNC database (optional)

main:
qc_summary = []
ch_input_bins_for_qc = ch_bins.transpose()
ch_versions = Channel.empty()
multiqc_reports = []
checkm_tsv = []
ch_multiqc_files = Channel.empty()


if (params.binqc_tool == "busco") {
// BUSCO workflow
if (!busco_db.isEmpty()) {
if (busco_db.extension in ['gz', 'tgz']) {
/*
* BUSCO
*/
if (!ch_busco_db.isEmpty()) {
if (ch_busco_db.extension in ['gz', 'tgz']) {
// Expects to be tar.gz!
BUSCO_DB_PREPARATION(busco_db)
BUSCO_DB_PREPARATION(ch_busco_db)
ch_db_for_busco = BUSCO_DB_PREPARATION.out.db.map { meta, db ->
[[id: meta, lineage: 'Y'], db]
}
}
else if (busco_db.isDirectory()) {
else if (ch_busco_db.isDirectory()) {
// Set meta to match expected channel cardinality for BUSCO
ch_db_for_busco = Channel
.of(busco_db)
.of(ch_busco_db)
.map { db ->
def basename = db.getBaseName()
def lineage = basename.contains('odb10') ? 'Y' : 'N'
Expand All @@ -48,8 +57,7 @@ workflow BIN_QC {
else {
// Set BUSCO database to empty to allow for --auto-lineage
ch_db_for_busco = Channel
.of([])
.map { _empty_db -> [[lineage: ''], []] }
.of([[lineage: ''], []])
.collect()
}

Expand All @@ -63,21 +71,25 @@ workflow BIN_QC {
BUSCO_SAVE_DOWNLOAD(ch_downloads)
}

BUSCO(bins, ch_db_for_busco)
BUSCO(ch_input_bins_for_qc, ch_db_for_busco)

BUSCO_SUMMARY(
BUSCO.out.summary_domain.map { it[1] }.collect().ifEmpty([]),
BUSCO.out.summary_specific.map { it[1] }.collect().ifEmpty([]),
BUSCO.out.failed_bin.map { it[1] }.collect().ifEmpty([])
)

multiqc_reports = BUSCO.out.summary_domain.mix(BUSCO.out.summary_specific).map{ it[1] }
summary = BUSCO_SUMMARY.out.summary
ch_multiqc_files = ch_multiqc_files.mix(
BUSCO.out.summary_domain.mix(BUSCO.out.summary_specific).map{ it[1] }
)
qc_summary = BUSCO_SUMMARY.out.summary
ch_versions = ch_versions.mix(BUSCO.out.versions.first())
}
else if (params.binqc_tool == "checkm") {
// CheckM workflow
ch_bins_for_checkmlineagewf = bins
/*
* CheckM
*/
ch_bins_for_checkmlineagewf = ch_input_bins_for_qc
.groupTuple()
.filter { meta, _bins ->
meta.domain != "eukarya"
Expand All @@ -87,7 +99,7 @@ workflow BIN_QC {
ext: fa.extension.unique().join("") // the pipeline ensures that all bins will have the same extension
}

CHECKM_LINEAGEWF(ch_bins_for_checkmlineagewf.reads, ch_bins_for_checkmlineagewf.ext, checkm_db)
CHECKM_LINEAGEWF(ch_bins_for_checkmlineagewf.reads, ch_bins_for_checkmlineagewf.ext, ch_checkm_db)
ch_versions = ch_versions.mix(CHECKM_LINEAGEWF.out.versions.first())

ch_checkmqa_input = CHECKM_LINEAGEWF.out.checkm_output
Expand All @@ -100,23 +112,65 @@ workflow BIN_QC {

COMBINE_BINQC_TSV(CHECKM_QA.out.output.map { it[1] }.collect())

summary = COMBINE_BINQC_TSV.out.combined
qc_summary = COMBINE_BINQC_TSV.out.combined
ch_versions = ch_versions.mix(CHECKM_QA.out.versions.first())
checkm_tsv = CHECKM_QA.out.output
}
else if (params.binqc_tool == "checkm2") {
// CheckM2 workflow
CHECKM2_PREDICT(bins.groupTuple(), checkm2_db)
/*
* CheckM2
*/
CHECKM2_PREDICT(ch_input_bins_for_qc.groupTuple(), ch_checkm2_db)

COMBINE_BINQC_TSV(CHECKM2_PREDICT.out.checkm2_tsv.map { it[1] }.collect())

summary = COMBINE_BINQC_TSV.out.combined
qc_summary = COMBINE_BINQC_TSV.out.combined
ch_versions = ch_versions.mix(CHECKM2_PREDICT.out.versions.first())
}

if (params.run_gunc) {
/*
* GUNC
*/
ch_input_bins_for_gunc = ch_bins
.filter { meta, _bins ->
meta.domain != "eukarya"
}
.flatMap { meta, bins ->
bins.collect { bin -> [meta, bin] }
}

if ( params.gunc_db ) {
ch_db_for_gunc = ch_gunc_db
}
else {
ch_db_for_gunc = GUNC_DOWNLOADDB(params.gunc_database_type).db
ch_versions.mix(GUNC_DOWNLOADDB.out.versions)
}

GUNC_RUN(ch_input_bins_for_gunc, ch_db_for_gunc)
ch_versions.mix(GUNC_RUN.out.versions)

// Make sure to keep directory in sync with modules.conf
GUNC_RUN.out.maxcss_level_tsv
.map{it[1]}
.collectFile(name: "gunc_summary.tsv", keepHeader: true, storeDir: "${params.outdir}/GenomeBinning/QC/")

if ( params.binqc_tool == 'checkm' ) {
ch_input_to_mergecheckm = GUNC_RUN.out.maxcss_level_tsv.combine(CHECKM_QA.out.output, by: 0)

GUNC_MERGECHECKM(ch_input_to_mergecheckm)
ch_versions.mix(GUNC_MERGECHECKM.out.versions)

// Make sure to keep directory in sync with modules.conf
GUNC_MERGECHECKM.out.tsv
.map{it[1]}
.collectFile(name: "gunc_checkm_summary.tsv", keepHeader: true, storeDir: "${params.outdir}/GenomeBinning/QC/")
}
}


emit:
summary = summary
checkm_tsv = checkm_tsv
multiqc = multiqc_reports
versions = ch_versions
qc_summary = qc_summary
multiqc_files = ch_multiqc_files
versions = ch_versions
}
51 changes: 0 additions & 51 deletions subworkflows/local/gunc_qc.nf

This file was deleted.

32 changes: 8 additions & 24 deletions workflows/mag.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ include { BINNING } from '../subwo
include { BIN_QC } from '../subworkflows/local/bin_qc'
include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement'
include { VIRUS_IDENTIFICATION } from '../subworkflows/local/virus_identification'
include { GUNC_QC } from '../subworkflows/local/gunc_qc'
include { GTDBTK } from '../subworkflows/local/gtdbtk'
include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna'
include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification'
Expand Down Expand Up @@ -649,7 +648,7 @@ workflow MAG {
================================================================================
*/

bin_qc_summary = Channel.empty()
ch_bin_qc_summary = Channel.empty()

if (!params.skip_binning || params.ancient_dna) {
BINNING_PREPARATION(
Expand Down Expand Up @@ -792,32 +791,17 @@ workflow MAG {
* Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, CHECKM2, and/or GUNC
*/

ch_input_bins_for_qc = ch_input_for_postbinning.transpose()

BIN_QC(
ch_input_bins_for_qc,
ch_input_for_postbinning,
ch_checkm_db,
ch_checkm2_db,
ch_busco_db
ch_busco_db,
ch_gunc_db
)

bin_qc_summary = BIN_QC.out.summary
ch_bin_qc_summary = BIN_QC.out.qc_summary
ch_versions = ch_versions.mix(BIN_QC.out.versions)

if (params.run_gunc) {
ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, _bins ->
meta.domain != "eukarya"
}

GUNC_QC(
ch_input_bins_for_gunc,
ch_gunc_db,
params.binqc_tool == 'checkm' ? BIN_QC.out.checkm_tsv : []
)

ch_versions = ch_versions.mix(GUNC_QC.out.versions)
}

ch_quast_bins_summary = Channel.empty()
if (!params.skip_quast) {
ch_input_for_quast_bins = ch_input_for_postbinning
Expand Down Expand Up @@ -886,7 +870,7 @@ workflow MAG {

GTDBTK(
ch_gtdb_bins,
bin_qc_summary,
ch_bin_qc_summary,
gtdb,
gtdb_mash
)
Expand All @@ -901,7 +885,7 @@ workflow MAG {
if ((!params.skip_binqc) || !params.skip_quast || !params.skip_gtdbtk) {
BIN_SUMMARY(
ch_input_for_binsummary,
bin_qc_summary.ifEmpty([]),
ch_bin_qc_summary.ifEmpty([]),
ch_quast_bins_summary.ifEmpty([]),
ch_gtdbtk_summary.ifEmpty([]),
ch_cat_global_summary.ifEmpty([])
Expand Down Expand Up @@ -1043,7 +1027,7 @@ workflow MAG {
}

if (!params.skip_binning && !params.skip_binqc && params.binqc_tool == 'busco') {
ch_multiqc_files = ch_multiqc_files.mix(BIN_QC.out.multiqc.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(BIN_QC.out.multiqc_files.collect().ifEmpty([]))
}


Expand Down

0 comments on commit 0eb167a

Please sign in to comment.