From c3a1f33051f785c084f204aa257abdee64f2fdf1 Mon Sep 17 00:00:00 2001
From: tives82 <tiverson@utah.gov>
Date: Wed, 27 Dec 2023 12:17:23 -0700
Subject: [PATCH 1/5] added BBMAP_BBDUK.out.clean_reads as input to kraken2 and
 PREPROCESSING_READ_QC.out.clean_reads as input in walkercreek.nf workflow to
 only run cleaned reads through IRMA

---
 subworkflows/local/preprocessing_read_qc.nf | 2 +-
 workflows/walkercreek.nf                    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/preprocessing_read_qc.nf b/subworkflows/local/preprocessing_read_qc.nf
index a3d2b5e..eb41d25 100755
--- a/subworkflows/local/preprocessing_read_qc.nf
+++ b/subworkflows/local/preprocessing_read_qc.nf
@@ -51,7 +51,7 @@ workflow PREPROCESSING_READ_QC {
     ch_versions = ch_versions.mix(QC_REPORT.out.versions)
 
     if ( !params.skip_kraken2 ) {
-        KRAKEN2_KRAKEN2(reads, db, false, true)
+        KRAKEN2_KRAKEN2(BBMAP_BBDUK.out.clean_reads, db, false, true)
         ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions)
 
         ch_kraken2report_summary_input = KRAKEN2_KRAKEN2.out.txt
diff --git a/workflows/walkercreek.nf b/workflows/walkercreek.nf
index 3705992..84b3b4d 100644
--- a/workflows/walkercreek.nf
+++ b/workflows/walkercreek.nf
@@ -206,7 +206,7 @@ workflow WALKERCREEK {
     /*
         SUBWORKFLOW: ASSEMBLY_TYPING_CLADE_VARIABLES - assembly, flu typing/subtyping, and Nextclade variable determination based upon flu 'abricate_subtype'
     */
-    ASSEMBLY_TYPING_CLADE_VARIABLES(ch_all_reads)
+    ASSEMBLY_TYPING_CLADE_VARIABLES(PREPROCESSING_READ_QC.out.clean_reads)
     ch_assembly = ASSEMBLY_TYPING_CLADE_VARIABLES.out.assembly
     ch_HA = ASSEMBLY_TYPING_CLADE_VARIABLES.out.HA
     ch_NA = ASSEMBLY_TYPING_CLADE_VARIABLES.out.NA
@@ -229,7 +229,7 @@ workflow WALKERCREEK {
     //
     // MODULE: Run FastQC
     //
-    FASTQC (ch_all_reads)
+    FASTQC (PREPROCESSING_READ_QC.out.clean_reads)
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
     //

From a062d9ecb6eb286fbd3817b22ac08b1bc3c34f3f Mon Sep 17 00:00:00 2001
From: tives82 <tiverson@utah.gov>
Date: Fri, 12 Jan 2024 14:47:21 -0700
Subject: [PATCH 2/5] Adding vadr module to workflow

---
 conf/modules.config                           | 11 +++++
 modules/local/vadr.nf                         | 41 +++++++++++++++++++
 .../local/assembly_typing_clade_variables.nf  | 26 ++++++------
 3 files changed, 66 insertions(+), 12 deletions(-)
 create mode 100755 modules/local/vadr.nf

diff --git a/conf/modules.config b/conf/modules.config
index 0164c0c..5984793 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -156,6 +156,17 @@ process {
             pattern: "*"
         ]
     }
+    withName: 'VADR' {
+        ext.args   = '--minlen 60'
+        ext.args2  = '--split --cpu 8 -r --atgonly --xnocomp --nomisc --alt_fail extrant5,extrant3 --mkey flu'
+        ext.when         = {  }
+        publishDir       = [
+            enabled: true,
+            mode: "${params.publish_dir_mode}",
+            path: { "${params.outdir}/vadr"},
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
     withName: ABRICATE_FLU {
         ext.args = '--db insaflu --minid 70 --mincov 60'
         ext.when   = {  }
diff --git a/modules/local/vadr.nf b/modules/local/vadr.nf
new file mode 100755
index 0000000..2dff676
--- /dev/null
+++ b/modules/local/vadr.nf
@@ -0,0 +1,41 @@
+process VADR {
+    tag "$meta.id"
+    label 'process_medium'
+
+    container 'quay.io/staphb/vadr:1.6.3'
+
+    input:
+    tuple val(meta), path(assembly)
+
+    output:
+    tuple val(meta), path("${meta.id}/") , optional:true, emit: vadr
+    path "*.vadr.log"                    , emit: log
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ""
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def vadr_log = "${meta.id}.vadr.log"
+
+    """
+    fasta-trim-terminal-ambigs.pl \\
+    $args \\
+    $assembly > ${meta.id}.vadr_trimmed.fasta
+
+    v-annotate.pl \\
+    $args2 \\
+    ${meta.id}.vadr_trimmed.fasta \\
+    $meta.id
+
+    # Soft link for traceability
+    ln -s .command.log $vadr_log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        vadr: \$(vadr --version 2>&1 | sed 's/^.*vadr //')
+    END_VERSIONS
+    """
+}
diff --git a/subworkflows/local/assembly_typing_clade_variables.nf b/subworkflows/local/assembly_typing_clade_variables.nf
index a8b57b4..f8818d5 100755
--- a/subworkflows/local/assembly_typing_clade_variables.nf
+++ b/subworkflows/local/assembly_typing_clade_variables.nf
@@ -7,6 +7,7 @@
 include { IRMA                                 } from '../../modules/local/irma.nf'
 include { IRMA_CONSENSUS_QC                    } from '../../modules/local/irma_consensus_qc.nf'
 include { IRMA_CONSENSUS_QC_REPORTSHEET        } from '../../modules/local/irma_consensus_qc_reportsheet.nf'
+include { VADR                                 } from '../../modules/local/vadr.nf'
 include { ABRICATE_FLU                         } from '../../modules/local/abricate_flu.nf'
 include { IRMA_ABRICATE_REPORT                 } from '../../modules/local/irma_abricate_report'
 include { IRMA_ABRICATE_REPORTSHEET            } from '../../modules/local/irma_abricate_reportsheet.nf'
@@ -34,11 +35,10 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES {
     clean_reads // file: /path/to/BBMAP_BBDUK/'*.clean*.fastq.gz'
 
     main:
-    ch_versions            = Channel.empty()
-    ch_assembly            = Channel.empty()
-    ch_HA                  = Channel.empty()
-    ch_NA                  = Channel.empty()
-    ch_dataset             = Channel.empty()
+    ch_versions                        = Channel.empty()
+    ch_assembly                        = Channel.empty()
+    ch_HA                              = Channel.empty()
+    ch_NA                              = Channel.empty()
 
     IRMA(clean_reads, irma_module)
     ch_assembly = IRMA.out.assembly
@@ -66,6 +66,8 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES {
     IRMA_CONSENSUS_QC_REPORTSHEET(ch_irma_consensus_qc_results)
     irma_consensus_qc_tsv = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv
 
+    VADR(IRMA.out.assembly)
+
     ABRICATE_FLU(IRMA.out.assembly)
     ch_versions = ch_versions.mix(ABRICATE_FLU.out.versions)
 
@@ -104,12 +106,12 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES {
                                     )
 
     emit:
-    HA                         = IRMA.out.HA
-    NA                         = IRMA.out.NA
-    typing_report_tsv          = IRMA_ABRICATE_REPORTSHEET.out.typing_report_tsv
-    irma_consensus_qc_tsv      = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv
-    assembly                   = ch_assembly
-    dataset                    = ch_dataset
-    versions                   = ch_versions
+    HA                              = IRMA.out.HA
+    NA                              = IRMA.out.NA
+    typing_report_tsv               = IRMA_ABRICATE_REPORTSHEET.out.typing_report_tsv
+    irma_consensus_qc_tsv           = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv
+    assembly                        = ch_assembly
+    dataset                         = ch_dataset
+    versions                        = ch_versions
 
 }

From 0a7115cb99893b209e6bfa257ad5bf0b426f4f18 Mon Sep 17 00:00:00 2001
From: tives82 <tiverson@utah.gov>
Date: Tue, 16 Jan 2024 15:04:24 -0700
Subject: [PATCH 3/5] added vadr module

---
 README.md                                             | 3 ++-
 docs/output.md                                        | 4 +++-
 nextflow.config                                       | 1 +
 nextflow_schema.json                                  | 5 +++++
 subworkflows/local/assembly_typing_clade_variables.nf | 2 +-
 subworkflows/local/preprocessing_read_qc.nf           | 6 ++++--
 6 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index a325f09..af42fd8 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 > **Currently prepares influenza samples (paired-end FASTQ files) for assembly. These steps also provide different quality reports for sample evaluation.**
 
 * Combine FASTQ file lanes, if they were provided with multiple lanes, into unified FASTQ files to ensure they are organized and named consistently (`Lane_Merge`).
-* Remove human read data with the ([`NCBI_SRA_Human_Scrubber`](https://github.com/ncbi/sra-human-scrubber) for uploading reads to to public repositories for DNA sequencing data.
+* Remove human read data with the [`NCBI_SRA_Human_Scrubber`](https://github.com/ncbi/sra-human-scrubber) for uploading reads to to public repositories for DNA sequencing data.
 * Filter unpaired reads from FASTQ files (`SeqKit_Pair`).
 * Trim reads and assess quality (`FaQCs`).
 * Remove adapter sequences and phix reference with (`BBMap_BBDuk`).
@@ -51,6 +51,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 * Assembly of influenza gene segments with (`IRMA`) using the built-in FLU module. Also, influenza typing and H/N subtype classifications are made.
 * QC of consensus assembly (`IRMA_Consensus_QC`).
 * Generate IRMA consensus QC report (`IRMA_Consensus_QC_Reportsheet`)
+* Annotation of IRMA consensus sequences with (`VADR`)
 * Influenza A type and H/N subtype classification as well as influenza B type and lineage classification using (`Abricate_Flu`). The database used in this task is [InsaFlu](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-018-0555-0).
 * Generate a summary report for influenza classification results (`IMRA_Abricate_Reportsheet`).
 * Gather corresponding Nextclade dataset using the Abricate_Flu classifcation results (`Nextclade_Variables`).
diff --git a/docs/output.md b/docs/output.md
index 4f8afa4..cbc02fb 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -31,7 +31,8 @@ results/
 ├── pipeline_info
 ├── qc_report
 ├── reports
-└── SUMMARY_REPORT
+├── SUMMARY_REPORT
+└── vadr
 ```
 
 ## Pipeline overview
@@ -92,6 +93,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 * Assembly of influenza gene segments with (`IRMA`) using the built-in FLU module. Also, influenza typing and H/N subtype classifications are made.
 * QC of consensus assembly (`IRMA_Consensus_QC`).
 * Generate IRMA consensus QC report (`IRMA_Consensus_QC_Reportsheet`)
+* Annotation of IRMA consensus sequences with (`VADR`)
 * Influenza A type and H/N subtype classification as well as influenza B type and lineage classification using (`Abricate_Flu`). The database used in this task is [InsaFlu](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-018-0555-0).
 * Generate a summary report for influenza classification results (`IMRA_Abricate_Reportsheet`).
 * Gather corresponding Nextclade dataset using the Abricate_Flu classifcation results (`Nextclade_Variables`).
diff --git a/nextflow.config b/nextflow.config
index 61d5ecd..325a7a5 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -59,6 +59,7 @@ params {
     irma_module                  = "FLU"
     genome_length                = 13500
     keep_ref_deletions           = true
+    skip_ncbi_sra_human_scrubber = false
     skip_kraken2                 = false
     skip_nextclade               = false
     adapters_fasta               = 'https://raw.githubusercontent.com/BioInfoTools/BBMap/master/resources/adapters.fa'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 887fcb7..1b70f52 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -315,6 +315,11 @@
                     "default": false,
                     "description": "Skip Kraken2 option."
                 },
+                "skip_ncbi_sra_human_scrubber": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "Skip NCBI SRA human scrubber option."
+                },
                 "skip_nextclade": {
                     "type": "boolean",
                     "default": false,
diff --git a/subworkflows/local/assembly_typing_clade_variables.nf b/subworkflows/local/assembly_typing_clade_variables.nf
index f8818d5..6df7232 100755
--- a/subworkflows/local/assembly_typing_clade_variables.nf
+++ b/subworkflows/local/assembly_typing_clade_variables.nf
@@ -19,7 +19,7 @@ include { NEXTCLADE_VARIABLES                  } from '../../modules/local/nextc
 ============================================================================================================
 */
 
-def irma_module = 'FLU'
+def irma_module = ''
 if (params.irma_module) {
     irma_module = params.irma_module
 }
diff --git a/subworkflows/local/preprocessing_read_qc.nf b/subworkflows/local/preprocessing_read_qc.nf
index eb41d25..44a12af 100755
--- a/subworkflows/local/preprocessing_read_qc.nf
+++ b/subworkflows/local/preprocessing_read_qc.nf
@@ -32,8 +32,10 @@ workflow PREPROCESSING_READ_QC {
     ch_kraken2reportsheet      = Channel.empty()
     ch_kraken2_reportsheet_tsv = Channel.empty()
 
-    NCBI_SRA_HUMAN_SCRUBBER(reads)
-    ch_versions = ch_versions.mix(NCBI_SRA_HUMAN_SCRUBBER.out.versions)
+    if ( !params.skip_ncbi_sra_human_scrubber ) {
+        NCBI_SRA_HUMAN_SCRUBBER(reads)
+        ch_versions = ch_versions.mix(NCBI_SRA_HUMAN_SCRUBBER.out.versions)
+    }
 
     SEQKIT_PAIR(reads)
     ch_versions = ch_versions.mix(SEQKIT_PAIR.out.versions)

From e4323e9ccea3b7de31dcc83453215dcdf3aa3848 Mon Sep 17 00:00:00 2001
From: tives82 <tiverson@utah.gov>
Date: Fri, 19 Jan 2024 10:20:04 -0700
Subject: [PATCH 4/5] add skip_vadr param option

---
 nextflow.config                                       | 1 +
 nextflow_schema.json                                  | 5 +++++
 subworkflows/local/assembly_typing_clade_variables.nf | 4 +++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 325a7a5..0887d3a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -60,6 +60,7 @@ params {
     genome_length                = 13500
     keep_ref_deletions           = true
     skip_ncbi_sra_human_scrubber = false
+    skip_vadr                    = false
     skip_kraken2                 = false
     skip_nextclade               = false
     adapters_fasta               = 'https://raw.githubusercontent.com/BioInfoTools/BBMap/master/resources/adapters.fa'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1b70f52..80e7167 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -320,6 +320,11 @@
                     "default": false,
                     "description": "Skip NCBI SRA human scrubber option."
                 },
+                "skip_vadr": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "Skip the VADR module option."
+                },
                 "skip_nextclade": {
                     "type": "boolean",
                     "default": false,
diff --git a/subworkflows/local/assembly_typing_clade_variables.nf b/subworkflows/local/assembly_typing_clade_variables.nf
index 6df7232..6650dd4 100755
--- a/subworkflows/local/assembly_typing_clade_variables.nf
+++ b/subworkflows/local/assembly_typing_clade_variables.nf
@@ -66,7 +66,9 @@ workflow ASSEMBLY_TYPING_CLADE_VARIABLES {
     IRMA_CONSENSUS_QC_REPORTSHEET(ch_irma_consensus_qc_results)
     irma_consensus_qc_tsv = IRMA_CONSENSUS_QC_REPORTSHEET.out.irma_consensus_qc_tsv
 
-    VADR(IRMA.out.assembly)
+    if ( !params.skip_vadr ) {
+        VADR(IRMA.out.assembly)
+    }
 
     ABRICATE_FLU(IRMA.out.assembly)
     ch_versions = ch_versions.mix(ABRICATE_FLU.out.versions)

From 9a03aadb411fab69b0fe6affa168ab99d23ba896 Mon Sep 17 00:00:00 2001
From: tives82 <tiverson@utah.gov>
Date: Wed, 24 Jan 2024 10:14:31 -0700
Subject: [PATCH 5/5] Format devcontainer.json with Prettier

---
 .devcontainer/devcontainer.json | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index ea27a58..8f8680c 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -17,11 +17,11 @@
                 "python.linting.flake8Path": "/opt/conda/bin/flake8",
                 "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
                 "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
-                "python.linting.pylintPath": "/opt/conda/bin/pylint"
+                "python.linting.pylintPath": "/opt/conda/bin/pylint",
             },
 
             // Add the IDs of extensions you want installed when the container is created.
-            "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
-        }
-    }
+            "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"],
+        },
+    },
 }