From c3a1f33051f785c084f204aa257abdee64f2fdf1 Mon Sep 17 00:00:00 2001 From: tives82 Date: Wed, 27 Dec 2023 12:17:23 -0700 Subject: [PATCH 1/5] added BBMAP_BBDUK.out.clean_reads as input to kraken2 and PREPROCESSING_READ_QC.out.clean_reads as input in walkercreek.nf workflow to only run cleaned reads through IRMA --- subworkflows/local/preprocessing_read_qc.nf | 2 +- workflows/walkercreek.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/preprocessing_read_qc.nf b/subworkflows/local/preprocessing_read_qc.nf index a3d2b5e..eb41d25 100755 --- a/subworkflows/local/preprocessing_read_qc.nf +++ b/subworkflows/local/preprocessing_read_qc.nf @@ -51,7 +51,7 @@ workflow PREPROCESSING_READ_QC { ch_versions = ch_versions.mix(QC_REPORT.out.versions) if ( !params.skip_kraken2 ) { - KRAKEN2_KRAKEN2(reads, db, false, true) + KRAKEN2_KRAKEN2(BBMAP_BBDUK.out.clean_reads, db, false, true) ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions) ch_kraken2report_summary_input = KRAKEN2_KRAKEN2.out.txt diff --git a/workflows/walkercreek.nf b/workflows/walkercreek.nf index 3705992..84b3b4d 100644 --- a/workflows/walkercreek.nf +++ b/workflows/walkercreek.nf @@ -206,7 +206,7 @@ workflow WALKERCREEK { /* SUBWORKFLOW: ASSEMBLY_TYPING_CLADE_VARIABLES - assembly, flu typing/subtyping, and Nextclade variable determination based upon flu 'abricate_subtype' */ - ASSEMBLY_TYPING_CLADE_VARIABLES(ch_all_reads) + ASSEMBLY_TYPING_CLADE_VARIABLES(PREPROCESSING_READ_QC.out.clean_reads) ch_assembly = ASSEMBLY_TYPING_CLADE_VARIABLES.out.assembly ch_HA = ASSEMBLY_TYPING_CLADE_VARIABLES.out.HA ch_NA = ASSEMBLY_TYPING_CLADE_VARIABLES.out.NA @@ -229,7 +229,7 @@ workflow WALKERCREEK { // // MODULE: Run FastQC // - FASTQC (ch_all_reads) + FASTQC (PREPROCESSING_READ_QC.out.clean_reads) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // From a062d9ecb6eb286fbd3817b22ac08b1bc3c34f3f Mon Sep 17 00:00:00 2001 From: tives82 Date: Fri, 12 Jan 2024 14:47:21 -0700 Subject: [PATCH 2/5] Adding vadr module to workflow --- conf/modules.config | 11 +++++ modules/local/vadr.nf | 41 +++++++++++++++++++ .../local/assembly_typing_clade_variables.nf | 26 ++++++------ 3 files changed, 66 insertions(+), 12 deletions(-) create mode 100755 modules/local/vadr.nf diff --git a/conf/modules.config b/conf/modules.config index 0164c0c..5984793 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -156,6 +156,17 @@ process { pattern: "*" ] } + withName: 'VADR' { + ext.args = '--minlen 60' + ext.args2 = '--split --cpu 8 -r --atgonly --xnocomp --nomisc --alt_fail extrant5,extrant3 --mkey flu' + ext.when = { } + publishDir = [ + enabled: true, + mode: "${params.publish_dir_mode}", + path: { "${params.outdir}/vadr"}, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: ABRICATE_FLU { ext.args = '--db insaflu --minid 70 --mincov 60' ext.when = { } diff --git a/modules/local/vadr.nf b/modules/local/vadr.nf new file mode 100755 index 0000000..2dff676 --- /dev/null +++ b/modules/local/vadr.nf @@ -0,0 +1,41 @@ +process VADR { + tag "$meta.id" + label 'process_medium' + + container 'quay.io/staphb/vadr:1.6.3' + + input: + tuple val(meta), path(assembly) + + output: + tuple val(meta), path("${meta.id}/") , optional:true, emit: vadr + path "*.vadr.log" , emit: log + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def vadr_log = "${meta.id}.vadr.log" + + """ + fasta-trim-terminal-ambigs.pl \\ + $args \\ + $assembly > ${meta.id}.vadr_trimmed.fasta + + v-annotate.pl \\ + $args2 \\ + ${meta.id}.vadr_trimmed.fasta \\ + $meta.id + + # Soft link for traceability + ln -s .command.log $vadr_log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vadr: \$(vadr --version 2>&1 | sed 's/^.*vadr //') + END_VERSIONS + """ +} diff --git a/subworkflows/local/assembly_typing_clade_variables.nf b/subworkflows/local/assembly_typing_clade_variables.nf index a8b57b4..f8818d5 100755 --- a/subworkflows/local/assembly_typing_clade_variables.nf +++ b/subworkflows/local/assembly_typing_clade_variables.nf @@ -7,6 +7,7 @@ include { IRMA } from '../../modules/local/irma.nf' include { IRMA_CONSENSUS_QC } from '../../modules/local/irma_consensus_qc.nf' include { IRMA_CONSENSUS_QC_REPORTSHEET } from '../../modules/local/irma_consensus_qc_reportsheet.nf' +include { VADR } from '../../modules/local/vadr.nf' include { ABRICATE_FLU } from '../../modules/local/abricate_flu.nf' include { IRMA_ABRICATE_REPORT } from '../../modules/local/irma_abricate_report' include { IRMA_ABRICATE_REPORTSHEET } from '../../modules/local/irma_abricate_reportsheet.nf' @@ -34,11 +35,10 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES { clean_reads // file: /path/to/BBMAP_BBDUK/'*.clean*.fastq.gz' main: - ch_versions = Channel.empty() - ch_assembly = Channel.empty() - ch_HA = Channel.empty() - ch_NA = Channel.empty() - ch_dataset = Channel.empty() + ch_versions = Channel.empty() + ch_assembly = Channel.empty() + ch_HA = Channel.empty() + ch_NA = Channel.empty() IRMA(clean_reads, irma_module) ch_assembly = IRMA.out.assembly @@ -66,6 +66,8 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES { IRMA_CONSENSUS_QC_REPORTSHEET(ch_irma_consensus_qc_results) irma_consensus_qc_tsv = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv + VADR(IRMA.out.assembly) + ABRICATE_FLU(IRMA.out.assembly) ch_versions = ch_versions.mix(ABRICATE_FLU.out.versions) @@ -104,12 +106,12 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES { ) emit: - HA = IRMA.out.HA - NA = IRMA.out.NA - typing_report_tsv = IRMA_ABRICATE_REPORTSHEET.out.typing_report_tsv - irma_consensus_qc_tsv = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv - assembly = ch_assembly - dataset = ch_dataset - versions = ch_versions + HA = IRMA.out.HA + NA = IRMA.out.NA + typing_report_tsv = IRMA_ABRICATE_REPORTSHEET.out.typing_report_tsv + irma_consensus_qc_tsv = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv + assembly = ch_assembly + dataset = ch_dataset + versions = ch_versions } From 0a7115cb99893b209e6bfa257ad5bf0b426f4f18 Mon Sep 17 00:00:00 2001 From: tives82 Date: Tue, 16 Jan 2024 15:04:24 -0700 Subject: [PATCH 3/5] added vadr module --- README.md | 3 ++- docs/output.md | 4 +++- nextflow.config | 1 + nextflow_schema.json | 5 +++++ subworkflows/local/assembly_typing_clade_variables.nf | 2 +- subworkflows/local/preprocessing_read_qc.nf | 6 ++++-- 6 files changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a325f09..af42fd8 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool > **Currently prepares influenza samples (paired-end FASTQ files) for assembly. These steps also provide different quality reports for sample evaluation.** * Combine FASTQ file lanes, if they were provided with multiple lanes, into unified FASTQ files to ensure they are organized and named consistently (`Lane_Merge`). -* Remove human read data with the ([`NCBI_SRA_Human_Scrubber`](https://github.com/ncbi/sra-human-scrubber) for uploading reads to to public repositories for DNA sequencing data. +* Remove human read data with the [`NCBI_SRA_Human_Scrubber`](https://github.com/ncbi/sra-human-scrubber) for uploading reads to to public repositories for DNA sequencing data. * Filter unpaired reads from FASTQ files (`SeqKit_Pair`). * Trim reads and assess quality (`FaQCs`). * Remove adapter sequences and phix reference with (`BBMap_BBDuk`). @@ -51,6 +51,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool * Assembly of influenza gene segments with (`IRMA`) using the built-in FLU module. Also, influenza typing and H/N subtype classifications are made. * QC of consensus assembly (`IRMA_Consensus_QC`). * Generate IRMA consensus QC report (`IRMA_Consensus_QC_Reportsheet`) +* Annotation of IRMA consensus sequences with (`VADR`) * Influenza A type and H/N subtype classification as well as influenza B type and lineage classification using (`Abricate_Flu`). The database used in this task is [InsaFlu](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-018-0555-0). * Generate a summary report for influenza classification results (`IMRA_Abricate_Reportsheet`). * Gather corresponding Nextclade dataset using the Abricate_Flu classifcation results (`Nextclade_Variables`). diff --git a/docs/output.md b/docs/output.md index 4f8afa4..cbc02fb 100644 --- a/docs/output.md +++ b/docs/output.md @@ -31,7 +31,8 @@ results/ ├── pipeline_info ├── qc_report ├── reports -└── SUMMARY_REPORT +├── SUMMARY_REPORT +└── vadr ``` ## Pipeline overview @@ -92,6 +93,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d * Assembly of influenza gene segments with (`IRMA`) using the built-in FLU module. Also, influenza typing and H/N subtype classifications are made. * QC of consensus assembly (`IRMA_Consensus_QC`). * Generate IRMA consensus QC report (`IRMA_Consensus_QC_Reportsheet`) +* Annotation of IRMA consensus sequences with (`VADR`) * Influenza A type and H/N subtype classification as well as influenza B type and lineage classification using (`Abricate_Flu`). The database used in this task is [InsaFlu](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-018-0555-0). * Generate a summary report for influenza classification results (`IMRA_Abricate_Reportsheet`). * Gather corresponding Nextclade dataset using the Abricate_Flu classifcation results (`Nextclade_Variables`). diff --git a/nextflow.config b/nextflow.config index 61d5ecd..325a7a5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,6 +59,7 @@ params { irma_module = "FLU" genome_length = 13500 keep_ref_deletions = true + skip_ncbi_sra_human_scrubber = false skip_kraken2 = false skip_nextclade = false adapters_fasta = 'https://raw.githubusercontent.com/BioInfoTools/BBMap/master/resources/adapters.fa' diff --git a/nextflow_schema.json b/nextflow_schema.json index 887fcb7..1b70f52 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -315,6 +315,11 @@ "default": false, "description": "Skip Kraken2 option." }, + "skip_ncbi_sra_human_scrubber": { + "type": "boolean", + "default": false, + "description": "Skip NCBI SRA human scrubber option." + }, "skip_nextclade": { "type": "boolean", "default": false, diff --git a/subworkflows/local/assembly_typing_clade_variables.nf b/subworkflows/local/assembly_typing_clade_variables.nf index f8818d5..6df7232 100755 --- a/subworkflows/local/assembly_typing_clade_variables.nf +++ b/subworkflows/local/assembly_typing_clade_variables.nf @@ -19,7 +19,7 @@ include { NEXTCLADE_VARIABLES } from '../../modules/local/nextc ============================================================================================================ */ -def irma_module = 'FLU' +def irma_module = '' if (params.irma_module) { irma_module = params.irma_module } diff --git a/subworkflows/local/preprocessing_read_qc.nf b/subworkflows/local/preprocessing_read_qc.nf index eb41d25..44a12af 100755 --- a/subworkflows/local/preprocessing_read_qc.nf +++ b/subworkflows/local/preprocessing_read_qc.nf @@ -32,8 +32,10 @@ workflow PREPROCESSING_READ_QC { ch_kraken2reportsheet = Channel.empty() ch_kraken2_reportsheet_tsv = Channel.empty() - NCBI_SRA_HUMAN_SCRUBBER(reads) - ch_versions = ch_versions.mix(NCBI_SRA_HUMAN_SCRUBBER.out.versions) + if ( !params.skip_ncbi_sra_human_scrubber ) { + NCBI_SRA_HUMAN_SCRUBBER(reads) + ch_versions = ch_versions.mix(NCBI_SRA_HUMAN_SCRUBBER.out.versions) + } SEQKIT_PAIR(reads) ch_versions = ch_versions.mix(SEQKIT_PAIR.out.versions) From e4323e9ccea3b7de31dcc83453215dcdf3aa3848 Mon Sep 17 00:00:00 2001 From: tives82 Date: Fri, 19 Jan 2024 10:20:04 -0700 Subject: [PATCH 4/5] add skip_vadr param option --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ subworkflows/local/assembly_typing_clade_variables.nf | 4 +++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 325a7a5..0887d3a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,6 +60,7 @@ params { genome_length = 13500 keep_ref_deletions = true skip_ncbi_sra_human_scrubber = false + skip_vadr = false skip_kraken2 = false skip_nextclade = false adapters_fasta = 'https://raw.githubusercontent.com/BioInfoTools/BBMap/master/resources/adapters.fa' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1b70f52..80e7167 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -320,6 +320,11 @@ "default": false, "description": "Skip NCBI SRA human scrubber option." }, + "skip_vadr": { + "type": "boolean", + "default": false, + "description": "Skip the VADR module option." + }, "skip_nextclade": { "type": "boolean", "default": false, diff --git a/subworkflows/local/assembly_typing_clade_variables.nf b/subworkflows/local/assembly_typing_clade_variables.nf index 6df7232..6650dd4 100755 --- a/subworkflows/local/assembly_typing_clade_variables.nf +++ b/subworkflows/local/assembly_typing_clade_variables.nf @@ -66,7 +66,9 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES { IRMA_CONSENSUS_QC_REPORTSHEET(ch_irma_consensus_qc_results) irma_consensus_qc_tsv = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv - VADR(IRMA.out.assembly) + if ( !params.skip_vadr ) { + VADR(IRMA.out.assembly) + } ABRICATE_FLU(IRMA.out.assembly) ch_versions = ch_versions.mix(ABRICATE_FLU.out.versions) From 9a03aadb411fab69b0fe6affa168ab99d23ba896 Mon Sep 17 00:00:00 2001 From: tives82 Date: Wed, 24 Jan 2024 10:14:31 -0700 Subject: [PATCH 5/5] Format devcontainer.json with Prettier --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a58..8f8680c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -17,11 +17,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, }