From b668869947da5337a9c0ae98ede932d3b9a455b7 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 29 Mar 2023 07:48:50 +0000 Subject: [PATCH 01/17] installing freyja using nf-core --- modules.json | 25 ++++++ modules/nf-core/freyja/boot/main.nf | 56 ++++++++++++ modules/nf-core/freyja/boot/meta.yml | 83 ++++++++++++++++++ modules/nf-core/freyja/demix/main.nf | 52 +++++++++++ modules/nf-core/freyja/demix/meta.yml | 74 ++++++++++++++++ modules/nf-core/freyja/update/main.nf | 49 +++++++++++ modules/nf-core/freyja/update/meta.yml | 40 +++++++++ modules/nf-core/freyja/variants/main.nf | 50 +++++++++++ modules/nf-core/freyja/variants/meta.yml | 52 +++++++++++ .../bam_variant_demix_boot_freyja/main.nf | 77 +++++++++++++++++ .../bam_variant_demix_boot_freyja/meta.yml | 86 +++++++++++++++++++ 11 files changed, 644 insertions(+) create mode 100644 modules/nf-core/freyja/boot/main.nf create mode 100644 modules/nf-core/freyja/boot/meta.yml create mode 100644 modules/nf-core/freyja/demix/main.nf create mode 100644 modules/nf-core/freyja/demix/meta.yml create mode 100644 modules/nf-core/freyja/update/main.nf create mode 100644 modules/nf-core/freyja/update/meta.yml create mode 100644 modules/nf-core/freyja/variants/main.nf create mode 100644 modules/nf-core/freyja/variants/meta.yml create mode 100644 subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf create mode 100644 subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml diff --git a/modules.json b/modules.json index 4e7dec6f..31b725ab 100644 --- a/modules.json +++ b/modules.json @@ -120,6 +120,26 @@ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, + "freyja/boot": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/demix": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/update": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/variants": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", @@ -279,6 +299,11 @@ "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e", "installed_by": ["bam_sort_stats_samtools", "bam_markduplicates_picard"] }, + "bam_variant_demix_boot_freyja": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["subworkflows"] + }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", diff --git a/modules/nf-core/freyja/boot/main.nf b/modules/nf-core/freyja/boot/main.nf new file mode 100644 index 00000000..c11d9953 --- /dev/null +++ b/modules/nf-core/freyja/boot/main.nf @@ -0,0 +1,56 @@ +process FREYJA_BOOT { + tag "$meta.id" + label 'process_long' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + tuple val(meta), path(variants), path(depths) + val repeats + path barcodes + path lineages_meta + + output: + tuple val(meta), path("*lineages.csv") , emit: lineages + tuple val(meta), path("*summarized.csv"), emit: summarized + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + boot \\ + $args \\ + --nt $task.cpus \\ + --nb $repeats \\ + --output_base $prefix \\ + --barcodes $barcodes \\ + --meta $lineages_meta \\ + $variants \\ + $depths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_lineage.csv + touch ${prefix}_summarized.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/boot/meta.yml b/modules/nf-core/freyja/boot/meta.yml new file mode 100644 index 00000000..3eca1568 --- /dev/null +++ b/modules/nf-core/freyja/boot/meta.yml @@ -0,0 +1,83 @@ +name: "freyja_boot" +description: Bootstrap sample demixing by resampling each site based on a multinomial distribution of read depth across all sites, where the event probabilities were determined by the fraction of the total sample reads found at each site, followed by a secondary resampling at each site according to a multinomial distribution (that is, binomial when there was only one SNV at a site), where event probabilities were determined by the frequencies of each base at the site, and the number of trials is given by the sequencing depth. +keywords: + - variants + - fasta + - deconvolution + - wastewater + - bootstrapping +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + + - repeats: + type: value (int) + description: Number of bootstrap repeats to perform + + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + + - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages: + type: file + description: a csv file that includes the lineages present and their corresponding abundances + pattern: "*lineages.csv" + - summarized: + type: file + description: a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + pattern: "*summarized.csv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/demix/main.nf b/modules/nf-core/freyja/demix/main.nf new file mode 100644 index 00000000..6587cb3c --- /dev/null +++ b/modules/nf-core/freyja/demix/main.nf @@ -0,0 +1,52 @@ +process FREYJA_DEMIX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + tuple val(meta), path(variants), path(depths) + path barcodes + path lineages_meta + + output: + tuple val(meta), path("*.tsv"), emit: demix + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + demix \\ + $args \\ + --output ${prefix}.tsv \\ + --barcodes $barcodes \\ + --meta $lineages_meta \\ + $variants \\ + $depths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/freyja/demix/meta.yml b/modules/nf-core/freyja/demix/meta.yml new file mode 100644 index 00000000..a47663df --- /dev/null +++ b/modules/nf-core/freyja/demix/meta.yml @@ -0,0 +1,74 @@ +name: "freyja_demix" +description: specify the relative abundance of each known haplotype +keywords: + - variants + - fasta + - deconvolution + - wastewater +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + + - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - demix: + type: file + description: a tsv file that includes the lineages present, their corresponding abundances, and summarization by constellation + pattern: "*.demix.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/update/main.nf b/modules/nf-core/freyja/update/main.nf new file mode 100644 index 00000000..1711b14c --- /dev/null +++ b/modules/nf-core/freyja/update/main.nf @@ -0,0 +1,49 @@ +process FREYJA_UPDATE { + tag "$db_name" + label 'process_single' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + val db_name + + output: + path "${db_name}/usher_barcodes.csv" , emit: barcodes + path "${db_name}/lineages.yml" , emit: lineages_topology + path "${db_name}/curated_lineages.json", emit: lineages_meta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir -p $db_name + freyja \\ + update \\ + --outdir $db_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + """ + mkdir $db_name + + touch "${db_name}/usher_barcodes.csv" + touch "${db_name}/lineages.yml" + touch "${db_name}/curated_lineages.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/update/meta.yml b/modules/nf-core/freyja/update/meta.yml new file mode 100644 index 00000000..2af5d662 --- /dev/null +++ b/modules/nf-core/freyja/update/meta.yml @@ -0,0 +1,40 @@ +name: "freyja_update" +description: downloads new versions of the curated SARS-CoV-2 lineage file and barcodes +keywords: + - database + - variants + - UShER +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - db_name: + type: string + description: "The name of the database directory" + +output: + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + - lineages_topology: + type: file + description: File containing the lineage topology + pattern: "*lineages.yml" + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/variants/main.nf b/modules/nf-core/freyja/variants/main.nf new file mode 100644 index 00000000..cdc6d788 --- /dev/null +++ b/modules/nf-core/freyja/variants/main.nf @@ -0,0 +1,50 @@ +process FREYJA_VARIANTS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + + output: + tuple val(meta), path("*.variants.tsv"), path("*.depth.tsv"), emit: variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + variants \\ + $args \\ + --ref $fasta \\ + --variants ${prefix}.variants.tsv \\ + --depths ${prefix}.depth.tsv \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.variants.tsv + touch ${prefix}.depth.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/variants/meta.yml b/modules/nf-core/freyja/variants/meta.yml new file mode 100644 index 00000000..13ddd377 --- /dev/null +++ b/modules/nf-core/freyja/variants/meta.yml @@ -0,0 +1,52 @@ +name: "freyja_variants" +description: call variant and sequencing depth information of the variant +keywords: + - variants + - fasta + - wastewater +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.bam" + + - fasta: + type: file + description: The reference sequence used for mapping and generating the BAM file + pattern: "*.fa" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf b/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf new file mode 100644 index 00000000..12c967ed --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf @@ -0,0 +1,77 @@ +include { FREYJA_VARIANTS } from '../../../modules/nf-core/freyja/variants' +include { FREYJA_UPDATE } from '../../../modules/nf-core/freyja/update' +include { FREYJA_DEMIX } from '../../../modules/nf-core/freyja/demix' +include { FREYJA_BOOT } from '../../../modules/nf-core/freyja/boot' + +workflow BAM_VARIANT_DEMIX_BOOT_FREYJA { + + take: + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + val_repeats // value repeats + val_db_name // string db_name + ch_barcodes // channel: [ path(barcodes)] + ch_lineages_meta // channel: [ path(lineages_meta)] + + main: + ch_versions = Channel.empty() + + // + // Variant calling + // + FREYJA_VARIANTS ( + ch_bam, + ch_fasta + ) + ch_freyja_variants = FREYJA_VARIANTS.out.variants + + ch_versions = ch_versions.mix(FREYJA_VARIANTS.out.versions.first()) + + // + // Update the database if none are given. + // + if (!ch_barcodes || !ch_lineages_meta) { + FREYJA_UPDATE ( + val_db_name + ) + + ch_barcodes = FREYJA_UPDATE.out.barcodes + ch_lineages_meta = FREYJA_UPDATE.out.lineages_meta + + ch_versions = ch_versions.mix(FREYJA_UPDATE.out.versions.first()) + } + + + // + // demix and define minimum variant abundances + // + FREYJA_DEMIX ( + ch_freyja_variants, + ch_barcodes, + ch_lineages_meta + ) + ch_freyja_demix = FREYJA_DEMIX.out.demix + ch_versions = ch_versions.mix(FREYJA_DEMIX.out.versions.first()) + + + // + // Perform bootstrapping to get more accurate estimates of abundancies + // + FREYJA_BOOT ( + ch_freyja_variants, + val_repeats, + ch_barcodes, + ch_lineages_meta + ) + ch_versions = ch_versions.mix(FREYJA_BOOT.out.versions.first()) + + emit: + variants = FREYJA_VARIANTS.out.variants // channel: [ val(meta), path(variants_tsv), path(depths_tsv) ] + demix = FREYJA_DEMIX.out.demix // channel: [ val(meta), path(demix_tsv) ] + lineages = FREYJA_BOOT.out.lineages // channel: [ val(meta), path(lineages_csv) ] + summarized = FREYJA_BOOT.out.summarized // channel: [ val(meta), path(summarized_csv) ] + barcodes = ch_barcodes // channel: [ path(barcodes) ] + lineages_meta = ch_lineages_meta // channel: [ path(lineages_meta) ] + versions = ch_versions // channel: [ path(versions.yml) ] + } + diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml b/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml new file mode 100644 index 00000000..2515b4fb --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml @@ -0,0 +1,86 @@ +name: "bam_variant_demix_boot_freyja" +description: Recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference) +keywords: + - bam + - variants + - cram + +modules: + - freyja/variants + - freyja/demix + - freyja/update + - freyja/boot + +input: + - ch_bam: + type: file + description: | + Structure: [ val(meta), path(bam) ] + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] and sorted BAM file + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] and the fasta reference used for the sorted BAM file + - val_repeats: + type: value (int) + description: Number of bootstrap repeats to perform + - val_db_name: + type: value (string) + description: Name of the dir where UShER's files will be stored + - ch_barcodes: + type: file + description: | + Structure: path(barcodes) + File containing lineage defining barcodes + - ch_lineages_meta: + type: file + description: | + Structure: path(lineages_meta) + File containing lineage metadata that correspond to barcodes + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - variants: + type: file + description: | + Structure: [ val(meta), path(variants) ] + File containing identified variants in a gff-like format + - depths: + type: file + description: | + Structure: [ val(meta), path(variants) ] + File containing depth of the variants + - demix: + type: file + description: | + Structure: [ val(meta), path(demix) ] + a tsv file that includes the lineages present, their corresponding abundances, and summarization by constellation + - lineages: + type: file + description: | + Structure: [ val(meta), path(lineages) ] + a csv file that includes the lineages present and their corresponding abundances + - summarized: + type: file + description: | + Structure: [ val(meta), path(lineages) ] + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - barcodes: + type: file + description: path(barcodes) + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - lineages_meta: + type: file + description: path(lineages_meta) + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" From 2443ca80066b22a8841793e4b83435689b3b5063 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 29 Mar 2023 08:47:11 +0000 Subject: [PATCH 02/17] further setup of freyja --- conf/modules_illumina.config | 35 +++++++++++++++++++++++++++++++++++ conf/modules_nanopore.config | 35 +++++++++++++++++++++++++++++++++++ nextflow.config | 5 +++++ nextflow_schema.json | 34 ++++++++++++++++++++++++++++++++++ workflows/illumina.nf | 31 ++++++++++++++++++++++++------- workflows/nanopore.nf | 32 +++++++++++++++++--------------- 6 files changed, 150 insertions(+), 22 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 05424060..ffc3a537 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -191,6 +191,41 @@ if (!params.skip_variants) { } } + if (!params.skip_freyja) { + process { + withName: 'FREYJA_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_DEMIX' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_BOOT' { + ext.args = [ + '--boxplot PDF' + ].join(' ').trim(), + publishDir = [ + path: { "${params.outdir}/variants/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/" }, + mode: params.publish_dir_mode, + ] + } + } + } + if (!params.skip_ivar_trim && params.protocol == 'amplicon') { process { withName: 'IVAR_TRIM' { diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 5a4a277f..fda49b53 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -229,6 +229,41 @@ if (!params.skip_nextclade) { } } +if (!params.skip_freyja) { + process { + withName: 'FREYJA_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_DEMIX' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_BOOT' { + ext.args = [ + '--boxplot PDF' + ].join(' ').trim(), + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/" }, + mode: params.publish_dir_mode, + ] + } + } + } + if (!params.skip_variants_quast) { process { withName: 'QUAST' { diff --git a/nextflow.config b/nextflow.config index 361c3dd7..7b77f5c7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,11 @@ params { min_mapped_reads = 1000 ivar_trim_noprimer = false ivar_trim_offset = null + skip_freyja = false + freyja_repeats = 1000 + freyja_db_name = 'freyja_db' + freyja_barcodes = null + freyja_lineages = null filter_duplicates = false save_unaligned = false save_mpileup = false diff --git a/nextflow_schema.json b/nextflow_schema.json index ff58253a..476d6848 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -251,6 +251,40 @@ "description": "Maximum window size before and after variant locii used to generate ASCIIGenome screenshots.", "fa_icon": "fab fa-windows" }, + "skip_freyja": { + "type": "boolean", + "fa_icon": "fas fast-forward", + "description": "Skip freyja deep SARS-CoV-2 variant analysis using a depth weighted approach." + }, + "freyja_db": { + "type": "string", + "default": "freyja_db", + "hidden": true, + "fa_icon": "fas fa-folder-open", + "description": "Specify the name where to store UShER database (default: 'freyja_db')." + }, + "freyja_repeats": { + "type": "integer", + "default": 1000, + "fa_icon": "fas fa-hand-paper", + "description": "Specify the number of bootstrap repeats to do." + }, + "freyja_barcodes": { + "type": "string", + "format": "path", + "hidden": true, + "default": "[]", + "fa_icon": "fas fa-file", + "description": "Lineage defining barcodes, default is most recent from UShER database." + }, + "freyja_lineages": { + "type": "string", + "format": "path", + "default": "[]", + "hidden": true, + "fa_icon": "fas fa-file", + "description": "Metadata of lineages that match barcode, default is most recent from UShER database." + }, "multiqc_title": { "type": "string", "hidden": true, diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 7d44924f..8f8cddf6 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -21,7 +21,8 @@ WorkflowIllumina.initialise(params, log, valid_params) def checkPathParamList = [ params.input, params.fasta, params.gff, params.bowtie2_index, params.kraken2_db, params.primer_bed, params.primer_fasta, - params.blast_db, params.spades_hmm, params.multiqc_config + params.blast_db, params.spades_hmm, params.multiqc_config, + params.freyja_barcodes, params.freyja_lineages ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -96,8 +97,9 @@ include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main // // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' -include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' +include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' +include { BAM_VARIANT_DEMIX_BOOT_FREYJA } from '../subworkflows/nf-core/bam_variant_demix_boot_freyja/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -232,7 +234,7 @@ workflow ILLUMINA { } } .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Reads before trimming'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -319,7 +321,7 @@ workflow ILLUMINA { ch_pass_fail_mapped .fail .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Mapped reads'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -457,6 +459,21 @@ workflow ILLUMINA { ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) } + // + // SUBWORKFLOW: Determine variants with Freyja + // + if (!params.skip_variants && !params.skip_freyja) { + BAM_VARIANT_DEMIX_BOOT_FREYJA( + ch_bam, + PREPARE_GENOME.out.fasta, + params.freyja_repeats, + params.freyja_db_name, + params.freyja_barcodes, + params.freyja_lineages, + ) + ch_versions= ch_versions.mix(BAM_VARIANT_DEMIX_BOOT_FREYJA.out.versions) + } + // // SUBWORKFLOW: Call consensus with iVar and downstream QC // @@ -506,8 +523,8 @@ workflow ILLUMINA { def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] return [ "$meta.id\t$clade" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Sample', 'clade'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 69854472..5e0dceff 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -16,7 +16,8 @@ WorkflowNanopore.initialise(params, log, valid_params) def checkPathParamList = [ params.input, params.fastq_dir, params.fast5_dir, - params.sequencing_summary, params.gff + params.sequencing_summary, params.gff, + params.freyja_barcodes, params.freyja_lineages ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -58,11 +59,12 @@ include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../mod // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_nanopore' -include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' -include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' -include { FILTER_BAM_SAMTOOLS } from '../subworkflows/local/filter_bam_samtools' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_nanopore' +include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' +include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { FILTER_BAM_SAMTOOLS } from '../subworkflows/local/filter_bam_samtools' +include { BAM_VARIANT_DEMIX_BOOT_FREYJA } from '../subworkflows/nf-core/bam_variant_demix_boot_freyja/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -180,8 +182,8 @@ workflow NANOPORE { .filter { it[1] == null } .filter { it[-1] >= params.min_barcode_reads } .map { it -> [ "${it[0]}\t${it[-1]}" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Barcode', 'Read count'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -194,8 +196,8 @@ workflow NANOPORE { ch_fastq_dirs .filter { it[-1] == null } .map { it -> [ "${it[1]}\t${it[0]}" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Sample', 'Missing barcode'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -239,7 +241,7 @@ workflow NANOPORE { ch_pass_fail_barcode_count .fail .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Barcode count'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -278,7 +280,7 @@ workflow NANOPORE { ch_pass_fail_guppyplex_count .fail .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Read count'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -410,13 +412,13 @@ workflow NANOPORE { NEXTCLADE_RUN .out .csv - .map { + .map { meta, csv -> def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] return [ "$meta.id\t$clade" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Sample', 'clade'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) From 00c76e57081c8f6edc0c18b99d050f4a9b166719 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 29 Mar 2023 09:47:43 +0000 Subject: [PATCH 03/17] fixing config and naming --- conf/modules_illumina.config | 4 +--- conf/modules_nanopore.config | 4 +--- nextflow.config | 10 +++++----- nextflow_schema.json | 2 +- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index ffc3a537..694c62ce 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -208,9 +208,7 @@ if (!params.skip_variants) { ] } withName: 'FREYJA_BOOT' { - ext.args = [ - '--boxplot PDF' - ].join(' ').trim(), + ext.args = '--boxplot PDF' publishDir = [ path: { "${params.outdir}/variants/freyja/bootstrap" }, mode: params.publish_dir_mode, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index fda49b53..15c2e7c6 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -246,9 +246,7 @@ if (!params.skip_freyja) { ] } withName: 'FREYJA_BOOT' { - ext.args = [ - '--boxplot PDF' - ].join(' ').trim(), + ext.args = '--boxplot PDF' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/freyja/bootstrap" }, mode: params.publish_dir_mode, diff --git a/nextflow.config b/nextflow.config index 7b77f5c7..a8863319 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,6 +41,11 @@ params { multiqc_title = null multiqc_config = null max_multiqc_email_size = '25.MB' + skip_freyja = false + freyja_repeats = 1000 + freyja_db_name = 'freyja_db' + freyja_barcodes = null + freyja_lineages = null skip_mosdepth = false skip_pangolin = false skip_nextclade = false @@ -67,11 +72,6 @@ params { min_mapped_reads = 1000 ivar_trim_noprimer = false ivar_trim_offset = null - skip_freyja = false - freyja_repeats = 1000 - freyja_db_name = 'freyja_db' - freyja_barcodes = null - freyja_lineages = null filter_duplicates = false save_unaligned = false save_mpileup = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 476d6848..1602fad5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -256,7 +256,7 @@ "fa_icon": "fas fast-forward", "description": "Skip freyja deep SARS-CoV-2 variant analysis using a depth weighted approach." }, - "freyja_db": { + "freyja_db_name": { "type": "string", "default": "freyja_db", "hidden": true, From 47087b21d9ed483b05091634d2dbb06fd2786ae8 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 29 Mar 2023 14:17:07 +0000 Subject: [PATCH 04/17] Test finished & compling with PR requirements --- CHANGELOG.md | 2 +- README.md | 4 ++- conf/test.config | 1 + conf/test_full.config | 1 + conf/test_full_nanopore.config | 3 ++ conf/test_full_sispa.config | 1 + conf/test_nanopore.config | 3 ++ conf/test_sispa.config | 1 + docs/output.md | 45 +++++++++++++++++++++++++++++ docs/usage.md | 4 +++ modules/nf-core/freyja/boot/main.nf | 2 +- nextflow.config | 2 +- 12 files changed, 65 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fbaac5c7..604f76c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unpublished Version / DEV] ### Enhancements & fixes - +- [[#299](https://github.com/nf-core/viralrecon/issues/299)] - Add the freyja pipeline as a subworkflow ## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 ### Credits diff --git a/README.md b/README.md index c50924a5..bffdeb2b 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,8 @@ A number of improvements were made to the pipeline recently, mainly with regard - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - 9. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + 9. Recover relative lineage abundances from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) + 10. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) @@ -78,6 +79,7 @@ A number of improvements were made to the pipeline recently, mainly with regard - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + - Recover relative lineage abundances from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) - Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) 8. Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results ([`MultiQC`](http://multiqc.info/)) diff --git a/conf/test.config b/conf/test.config index 15e93a4b..b6b5b783 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,6 +32,7 @@ params { // Variant calling options variant_caller = 'ivar' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/conf/test_full.config b/conf/test_full.config index 3c9b7ec4..3703af44 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -26,6 +26,7 @@ params { // Variant calling options variant_caller = 'ivar' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config index 2536ea4b..ec041051 100644 --- a/conf/test_full_nanopore.config +++ b/conf/test_full_nanopore.config @@ -25,6 +25,9 @@ params { genome = 'MN908947.3' primer_set_version = 3 + // variant calling options + freyja_repeats = 10 + // Other parameters artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index 90bc0842..54e0235f 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -24,6 +24,7 @@ params { // Variant calling options variant_caller = 'bcftools' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index 406c8c3e..0674e8ab 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -30,6 +30,9 @@ params { genome = 'MN908947.3' primer_set_version = 3 + // variant calling options + freyja_repeats = 10 + // Other parameters artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/minion_test/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_sispa.config b/conf/test_sispa.config index d3e39be8..0ec450bb 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -30,6 +30,7 @@ params { // Variant calling options variant_caller = 'bcftools' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/docs/output.md b/docs/output.md index 610e8a15..c3ec28a1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -21,6 +21,7 @@ The directories listed below will be created in the results directory after the - [Pangolin](#nanopore-pangolin) - Lineage analysis - [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks - [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks + - [Freyja](#fre) - [Variants long table](#nanopore-variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis - [Workflow reporting](#nanopore-workflow-reporting) - [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results @@ -241,6 +242,29 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. +### Nanopore: Freyja +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + ### Nanopore: ASCIIGenome
@@ -697,6 +721,27 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. +### Freyja +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + ### Variants long table
diff --git a/docs/usage.md b/docs/usage.md index 1f27af21..f7b790e8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -416,6 +416,10 @@ If the `--save_reference` parameter is provided then the Nextclade dataset gener > **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +#### Freyja +[Freyja](https://github.com/andersen-lab/Freyja) depends on a dataset of barcodes that uses lineage defening mutations (provide by [UShER](https://usher-wiki.readthedocs.io/en/latest/#)), by default the most recent barcodes will be downloaded. However, when running analyses across large time windows and these analyses need to be compared, it might be of interest to keep the constant barcodes (or rerun all freyja analyses with the most recent dataset). To do this specify the barcodes and lineages using the variables `freyja_barcodes`, `freyja_lineages` respectivly. + + ### nf-core/configs In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. diff --git a/modules/nf-core/freyja/boot/main.nf b/modules/nf-core/freyja/boot/main.nf index c11d9953..10e39d02 100644 --- a/modules/nf-core/freyja/boot/main.nf +++ b/modules/nf-core/freyja/boot/main.nf @@ -1,6 +1,6 @@ process FREYJA_BOOT { tag "$meta.id" - label 'process_long' + label 'process_high' conda "bioconda::freyja=1.3.12" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/nextflow.config b/nextflow.config index a8863319..e4a4a4be 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,7 @@ params { multiqc_config = null max_multiqc_email_size = '25.MB' skip_freyja = false - freyja_repeats = 1000 + freyja_repeats = 100 freyja_db_name = 'freyja_db' freyja_barcodes = null freyja_lineages = null From b6e3401519711d233ee51cf28b7ec4b401cc125c Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Thu, 13 Apr 2023 11:10:35 +0000 Subject: [PATCH 05/17] updating boot label --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 31b725ab..1d79b252 100644 --- a/modules.json +++ b/modules.json @@ -122,7 +122,7 @@ }, "freyja/boot": { "branch": "master", - "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "git_sha": "281c744ed84352c24697f0916c7744853ce83927", "installed_by": ["bam_variant_demix_boot_freyja"] }, "freyja/demix": { From fd7a133395be8b055c050cc938770e27b221dafe Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Thu, 13 Apr 2023 11:29:18 +0000 Subject: [PATCH 06/17] Keeping prettier happy --- docs/output.md | 2 ++ docs/usage.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index c3ec28a1..6f86d608 100644 --- a/docs/output.md +++ b/docs/output.md @@ -243,6 +243,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. ### Nanopore: Freyja +
Output files @@ -722,6 +723,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. ### Freyja +
Output files diff --git a/docs/usage.md b/docs/usage.md index f7b790e8..3941e4c0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -417,8 +417,8 @@ If the `--save_reference` parameter is provided then the Nextclade dataset gener > **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. #### Freyja -[Freyja](https://github.com/andersen-lab/Freyja) depends on a dataset of barcodes that uses lineage defening mutations (provide by [UShER](https://usher-wiki.readthedocs.io/en/latest/#)), by default the most recent barcodes will be downloaded. However, when running analyses across large time windows and these analyses need to be compared, it might be of interest to keep the constant barcodes (or rerun all freyja analyses with the most recent dataset). To do this specify the barcodes and lineages using the variables `freyja_barcodes`, `freyja_lineages` respectivly. +[Freyja](https://github.com/andersen-lab/Freyja) depends on a dataset of barcodes that uses lineage defening mutations (provide by [UShER](https://usher-wiki.readthedocs.io/en/latest/#)), by default the most recent barcodes will be downloaded. However, when running analyses across large time windows and these analyses need to be compared, it might be of interest to keep the constant barcodes (or rerun all freyja analyses with the most recent dataset). To do this specify the barcodes and lineages using the variables `freyja_barcodes`, `freyja_lineages` respectivly. ### nf-core/configs From 2e477785b069c0f72bc23e0104351ef80157c3cd Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Fri, 14 Apr 2023 09:25:39 +0200 Subject: [PATCH 07/17] am I prettie? --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 604f76c6..f58f9774 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unpublished Version / DEV] ### Enhancements & fixes + - [[#299](https://github.com/nf-core/viralrecon/issues/299)] - Add the freyja pipeline as a subworkflow + ## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 ### Credits From 133270a1d72d375914939ff50613c856e30eb121 Mon Sep 17 00:00:00 2001 From: Joon Klaps <61584065+Joon-Klaps@users.noreply.github.com> Date: Wed, 16 Aug 2023 08:09:21 +0000 Subject: [PATCH 08/17] Fixing output.md freyja, default bootstrap repeat --- README.md | 2 +- docs/output.md | 45 +++++++++++++++++++++++--------------------- nextflow_schema.json | 2 +- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index bffdeb2b..3af4d544 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ A number of improvements were made to the pipeline recently, mainly with regard - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - 9. Recover relative lineage abundances from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) + 9. Relative lineage abundance analysis from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) 10. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) diff --git a/docs/output.md b/docs/output.md index 6f86d608..b2957be7 100644 --- a/docs/output.md +++ b/docs/output.md @@ -20,8 +20,8 @@ The directories listed below will be created in the results directory after the - [QUAST](#nanopore-quast) - Consensus assessment report - [Pangolin](#nanopore-pangolin) - Lineage analysis - [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks + - [Freyja](#nanopore-freyja) - Relative lineage abundance analysis from mixed SARS-CoV-2 samples (typically wastewater) - [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks - - [Freyja](#fre) - [Variants long table](#nanopore-variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis - [Workflow reporting](#nanopore-workflow-reporting) - [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results @@ -346,6 +346,7 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t - [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - [iVar variants](#ivar-variants) _||_ [BCFTools call](#bcftools-call) - Variant calling - [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + - [Freyja](#freyja) - Relative lineage abundance analysis from mixed SARS-CoV-2 samples (typically wastewater) - [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - [iVar consensus](#ivar-consensus) _||_ [BCFTools and BEDTools](#bcftools-and-bedtools) - Consensus sequence generation - [QUAST](#quast) - Consensus assessment report @@ -618,6 +619,28 @@ iVar outputs a tsv format which is not compatible with downstream analysis such ![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) +### Freyja + +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + ### ASCIIGenome
@@ -722,27 +745,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. -### Freyja - -
-Output files -- `/freyja/demix` - - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation -- `/freyja/freyja_db` - - `.json`: dataset containing lineage metadata that correspond to barcodes. - - `.yml`: dataset containing the lineage topology. - - `.csv`: dataset containing lineage defining barcodes. -- `/freyja/variants` - - `*.variants.tsv`: Analysis results including identified variants in a gff-like format - - `*.depth.tsv`: Analysis results including the depth of the identified variants -- `/freyja/boot` - - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping - - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping - -
- -[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. ### Variants long table diff --git a/nextflow_schema.json b/nextflow_schema.json index 1602fad5..7848aa05 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -265,7 +265,7 @@ }, "freyja_repeats": { "type": "integer", - "default": 1000, + "default": 100, "fa_icon": "fas fa-hand-paper", "description": "Specify the number of bootstrap repeats to do." }, From 578372431d56b465d560dbbb70405ee537899965 Mon Sep 17 00:00:00 2001 From: Joon Klaps <61584065+Joon-Klaps@users.noreply.github.com> Date: Wed, 16 Aug 2023 08:14:22 +0000 Subject: [PATCH 09/17] woops prettier delete empty lines --- docs/output.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index b2957be7..536a20d3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -745,8 +745,6 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. - - ### Variants long table
From 1e7911c63774ffe844ec192e4e7e5512cc45ed6c Mon Sep 17 00:00:00 2001 From: Joon Klaps <61584065+Joon-Klaps@users.noreply.github.com> Date: Wed, 16 Aug 2023 08:31:48 +0000 Subject: [PATCH 10/17] Fixing freyja default variables in schema --- nextflow_schema.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7848aa05..da4de01d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -273,14 +273,12 @@ "type": "string", "format": "path", "hidden": true, - "default": "[]", "fa_icon": "fas fa-file", "description": "Lineage defining barcodes, default is most recent from UShER database." }, "freyja_lineages": { "type": "string", "format": "path", - "default": "[]", "hidden": true, "fa_icon": "fas fa-file", "description": "Metadata of lineages that match barcode, default is most recent from UShER database." From 3b51111f573906b6e58c8860744195d15d205ffb Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 16 Aug 2023 10:50:41 +0200 Subject: [PATCH 11/17] Update CHANGELOG.md --- CHANGELOG.md | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index acc074d9..444e5a98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,21 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Credits +Special thanks to the following for their code contributions to the release: + - [Adam Talbot](https://github.com/adamrtalbot) +- [Joon Klaps](https://github.com/Joon-Klaps) -### Software dependencies +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. ### Enhancements & fixes - [[#299](https://github.com/nf-core/viralrecon/issues/299)] - Add the freyja pipeline as a subworkflow - [[PR #387](https://github.com/nf-core/viralrecon/pull/387)] - Software closes gracefully when encountering an error -## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 +### Parameters -### Credits +| Old parameter | New parameter | +| ------------------- | ------------- | +| `--skip_freyja` | | +| `--freyja_repeats` | | +| `--freyja_db_name` | | +| `--freyja_barcodes` | | +| `--freyja_lineages` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if new parameter information isn't present. ### Software dependencies +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `freyja` | | 1.3.12 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 + +### Credits + Special thanks to the following for their code contributions to the release: - [Friederike Hanssen](https://github.com/FriederikeHanssen) From a0582c5327829ed78e5452856258a4a914c76040 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 16 Aug 2023 10:51:24 +0200 Subject: [PATCH 12/17] Update nextflow_schema.json --- nextflow_schema.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index da4de01d..3f5acf47 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -259,7 +259,6 @@ "freyja_db_name": { "type": "string", "default": "freyja_db", - "hidden": true, "fa_icon": "fas fa-folder-open", "description": "Specify the name where to store UShER database (default: 'freyja_db')." }, @@ -272,14 +271,12 @@ "freyja_barcodes": { "type": "string", "format": "path", - "hidden": true, "fa_icon": "fas fa-file", "description": "Lineage defining barcodes, default is most recent from UShER database." }, "freyja_lineages": { "type": "string", "format": "path", - "hidden": true, "fa_icon": "fas fa-file", "description": "Metadata of lineages that match barcode, default is most recent from UShER database." }, From 0f2388644b2619ef92bb188f4aa448304c04e69c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 16 Aug 2023 10:51:44 +0200 Subject: [PATCH 13/17] Update modules_nanopore.config --- conf/modules_nanopore.config | 61 +++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 15c2e7c6..8f03b373 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -230,37 +230,40 @@ if (!params.skip_nextclade) { } if (!params.skip_freyja) { - process { - withName: 'FREYJA_VARIANTS' { - publishDir = [ - path: { "${params.outdir}/${params.artic_minion_caller}/freyja/variants" }, - mode: params.publish_dir_mode, - pattern: "*.{tsv,csv}" - ] - } - withName: 'FREYJA_DEMIX' { - publishDir = [ - path: { "${params.outdir}/${params.artic_minion_caller}/freyja/demix" }, - mode: params.publish_dir_mode, - pattern: "*.{tsv,csv}" - ] - } - withName: 'FREYJA_BOOT' { - ext.args = '--boxplot PDF' - publishDir = [ - path: { "${params.outdir}/${params.artic_minion_caller}/freyja/bootstrap" }, - mode: params.publish_dir_mode, - pattern: "*.{tsv,csv,pdf}" - ] - } - withName: 'FREYJA_UPDATE' { - publishDir = [ - path: { "${params.outdir}/${params.artic_minion_caller}/freyja/" }, - mode: params.publish_dir_mode, - ] - } + process { + withName: 'FREYJA_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + + withName: 'FREYJA_DEMIX' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + + withName: 'FREYJA_BOOT' { + ext.args = '--boxplot PDF' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/" }, + mode: params.publish_dir_mode, + ] } } +} if (!params.skip_variants_quast) { process { From 6d5ee58a6f0821312c7170a57b19d24ac2bed013 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 16 Aug 2023 10:52:27 +0200 Subject: [PATCH 14/17] Update usage.md --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 3941e4c0..102e2b00 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -418,7 +418,7 @@ If the `--save_reference` parameter is provided then the Nextclade dataset gener #### Freyja -[Freyja](https://github.com/andersen-lab/Freyja) depends on a dataset of barcodes that uses lineage defening mutations (provide by [UShER](https://usher-wiki.readthedocs.io/en/latest/#)), by default the most recent barcodes will be downloaded. However, when running analyses across large time windows and these analyses need to be compared, it might be of interest to keep the constant barcodes (or rerun all freyja analyses with the most recent dataset). To do this specify the barcodes and lineages using the variables `freyja_barcodes`, `freyja_lineages` respectivly. +[Freyja](https://github.com/andersen-lab/Freyja) relies on a dataset of barcodes that use lineage defining mutations (see [UShER](https://usher-wiki.readthedocs.io/en/latest/#)). By default the most recent barcodes will be downloaded and used. However, if analyses need to be compared across multiple datasets, it might be of interest to re-use the same barcodes, or to rerun all Freyja analyses with the most recent dataset. To do this, specify the barcodes and lineages using the `--freyja_barcodes`, `--freyja_lineages` parameters, respectivly. ### nf-core/configs From e5266166cd8058d7858fb7f3ea6aa5021472941e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 16 Aug 2023 10:57:06 +0200 Subject: [PATCH 15/17] Update CHANGELOG.md --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 444e5a98..b8acc98a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,9 +37,9 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Dependency | Old version | New version | -| ----------- | ----------- | ----------- | -| `freyja` | | 1.3.12 | +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `freyja` | | 1.3.12 | > **NB:** Dependency has been **updated** if both old and new version information is present. > From a768d5480d128ce0af8d25ddce6e7db84fc04f58 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 16 Aug 2023 09:41:30 +0000 Subject: [PATCH 16/17] Actually implementing freyja within nanopore --- workflows/nanopore.nf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 5e0dceff..3c335e74 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -426,6 +426,21 @@ workflow NANOPORE { .set { ch_nextclade_multiqc } } + // + // SUBWORKFLOW: Determine variants with Freyja + // + if (!params.skip_freyja) { + BAM_VARIANT_DEMIX_BOOT_FREYJA( + ARTIC_MINION.out.bam_primertrimmed, + ARTIC_MINION.out.fasta, + params.freyja_repeats, + params.freyja_db_name, + params.freyja_barcodes, + params.freyja_lineages, + ) + ch_versions= ch_versions.mix(BAM_VARIANT_DEMIX_BOOT_FREYJA.out.versions) + } + // // MODULE: Consensus QC across all samples with QUAST // From 8c74f8f2f28bd8f97b577211994a39c5698b65a4 Mon Sep 17 00:00:00 2001 From: Joon-Klaps Date: Wed, 16 Aug 2023 11:42:00 +0000 Subject: [PATCH 17/17] fixing nanopore freyja error --- conf/modules_illumina.config | 2 +- conf/modules_nanopore.config | 2 +- workflows/nanopore.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 694c62ce..2cc48bea 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -208,7 +208,7 @@ if (!params.skip_variants) { ] } withName: 'FREYJA_BOOT' { - ext.args = '--boxplot PDF' + ext.args = '--boxplot pdf' publishDir = [ path: { "${params.outdir}/variants/freyja/bootstrap" }, mode: params.publish_dir_mode, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 8f03b373..31ca44fb 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -248,7 +248,7 @@ if (!params.skip_freyja) { } withName: 'FREYJA_BOOT' { - ext.args = '--boxplot PDF' + ext.args = '--boxplot pdf' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/freyja/bootstrap" }, mode: params.publish_dir_mode, diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 3c335e74..677634ab 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -432,7 +432,7 @@ workflow NANOPORE { if (!params.skip_freyja) { BAM_VARIANT_DEMIX_BOOT_FREYJA( ARTIC_MINION.out.bam_primertrimmed, - ARTIC_MINION.out.fasta, + PREPARE_GENOME.out.fasta, params.freyja_repeats, params.freyja_db_name, params.freyja_barcodes,