diff --git a/CHANGELOG.md b/CHANGELOG.md index dbaf239a..b8acc98a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,20 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Credits +Special thanks to the following for their code contributions to the release: + - [Adam Talbot](https://github.com/adamrtalbot) +- [Joon Klaps](https://github.com/Joon-Klaps) -### Software dependencies +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. ### Enhancements & fixes -- [[#387](https://github.com/nf-core/viralrecon/pull/387/files)] - Software closes gracefully when encountering an error +- [[#299](https://github.com/nf-core/viralrecon/issues/299)] - Add the freyja pipeline as a subworkflow +- [[PR #387](https://github.com/nf-core/viralrecon/pull/387)] - Software closes gracefully when encountering an error -## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 +### Parameters -### Credits +| Old parameter | New parameter | +| ------------------- | ------------- | +| `--skip_freyja` | | +| `--freyja_repeats` | | +| `--freyja_db_name` | | +| `--freyja_barcodes` | | +| `--freyja_lineages` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if new parameter information isn't present. ### Software dependencies +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `freyja` | | 1.3.12 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 + +### Credits + Special thanks to the following for their code contributions to the release: - [Friederike Hanssen](https://github.com/FriederikeHanssen) diff --git a/README.md b/README.md index c50924a5..3af4d544 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,8 @@ A number of improvements were made to the pipeline recently, mainly with regard - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - 9. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + 9. Relative lineage abundance analysis from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) + 10. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) @@ -78,6 +79,7 @@ A number of improvements were made to the pipeline recently, mainly with regard - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + - Recover relative lineage abundances from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) - Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) 8. Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results ([`MultiQC`](http://multiqc.info/)) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 05424060..2cc48bea 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -191,6 +191,39 @@ if (!params.skip_variants) { } } + if (!params.skip_freyja) { + process { + withName: 'FREYJA_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_DEMIX' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_BOOT' { + ext.args = '--boxplot pdf' + publishDir = [ + path: { "${params.outdir}/variants/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/" }, + mode: params.publish_dir_mode, + ] + } + } + } + if (!params.skip_ivar_trim && params.protocol == 'amplicon') { process { withName: 'IVAR_TRIM' { diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 5a4a277f..31ca44fb 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -229,6 +229,42 @@ if (!params.skip_nextclade) { } } +if (!params.skip_freyja) { + process { + withName: 'FREYJA_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + + withName: 'FREYJA_DEMIX' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + + withName: 'FREYJA_BOOT' { + ext.args = '--boxplot pdf' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/" }, + mode: params.publish_dir_mode, + ] + } + } +} + if (!params.skip_variants_quast) { process { withName: 'QUAST' { diff --git a/conf/test.config b/conf/test.config index 15e93a4b..b6b5b783 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,6 +32,7 @@ params { // Variant calling options variant_caller = 'ivar' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/conf/test_full.config b/conf/test_full.config index 3c9b7ec4..3703af44 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -26,6 +26,7 @@ params { // Variant calling options variant_caller = 'ivar' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config index 2536ea4b..ec041051 100644 --- a/conf/test_full_nanopore.config +++ b/conf/test_full_nanopore.config @@ -25,6 +25,9 @@ params { genome = 'MN908947.3' primer_set_version = 3 + // variant calling options + freyja_repeats = 10 + // Other parameters artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index 90bc0842..54e0235f 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -24,6 +24,7 @@ params { // Variant calling options variant_caller = 'bcftools' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index 406c8c3e..0674e8ab 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -30,6 +30,9 @@ params { genome = 'MN908947.3' primer_set_version = 3 + // variant calling options + freyja_repeats = 10 + // Other parameters artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/minion_test/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_sispa.config b/conf/test_sispa.config index d3e39be8..0ec450bb 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -30,6 +30,7 @@ params { // Variant calling options variant_caller = 'bcftools' + freyja_repeats = 10 // Assembly options assemblers = 'spades,unicycler,minia' diff --git a/docs/output.md b/docs/output.md index 610e8a15..536a20d3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -20,6 +20,7 @@ The directories listed below will be created in the results directory after the - [QUAST](#nanopore-quast) - Consensus assessment report - [Pangolin](#nanopore-pangolin) - Lineage analysis - [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks + - [Freyja](#nanopore-freyja) - Relative lineage abundance analysis from mixed SARS-CoV-2 samples (typically wastewater) - [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks - [Variants long table](#nanopore-variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis - [Workflow reporting](#nanopore-workflow-reporting) @@ -241,6 +242,30 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi [Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. +### Nanopore: Freyja + +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + ### Nanopore: ASCIIGenome
@@ -321,6 +346,7 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t - [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - [iVar variants](#ivar-variants) _||_ [BCFTools call](#bcftools-call) - Variant calling - [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + - [Freyja](#freyja) - Relative lineage abundance analysis from mixed SARS-CoV-2 samples (typically wastewater) - [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - [iVar consensus](#ivar-consensus) _||_ [BCFTools and BEDTools](#bcftools-and-bedtools) - Consensus sequence generation - [QUAST](#quast) - Consensus assessment report @@ -593,6 +619,28 @@ iVar outputs a tsv format which is not compatible with downstream analysis such ![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) +### Freyja + +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + ### ASCIIGenome
diff --git a/docs/usage.md b/docs/usage.md index 1f27af21..102e2b00 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -416,6 +416,10 @@ If the `--save_reference` parameter is provided then the Nextclade dataset gener > **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +#### Freyja + +[Freyja](https://github.com/andersen-lab/Freyja) relies on a dataset of barcodes that use lineage defining mutations (see [UShER](https://usher-wiki.readthedocs.io/en/latest/#)). By default the most recent barcodes will be downloaded and used. However, if analyses need to be compared across multiple datasets, it might be of interest to re-use the same barcodes, or to rerun all Freyja analyses with the most recent dataset. To do this, specify the barcodes and lineages using the `--freyja_barcodes`, `--freyja_lineages` parameters, respectivly. + ### nf-core/configs In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. diff --git a/modules.json b/modules.json index 4e7dec6f..1d79b252 100644 --- a/modules.json +++ b/modules.json @@ -120,6 +120,26 @@ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, + "freyja/boot": { + "branch": "master", + "git_sha": "281c744ed84352c24697f0916c7744853ce83927", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/demix": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/update": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/variants": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", @@ -279,6 +299,11 @@ "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e", "installed_by": ["bam_sort_stats_samtools", "bam_markduplicates_picard"] }, + "bam_variant_demix_boot_freyja": { + "branch": "master", + "git_sha": "4bb5cb441e89811385d18a90809ecf36c9daafd8", + "installed_by": ["subworkflows"] + }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", diff --git a/modules/nf-core/freyja/boot/main.nf b/modules/nf-core/freyja/boot/main.nf new file mode 100644 index 00000000..10e39d02 --- /dev/null +++ b/modules/nf-core/freyja/boot/main.nf @@ -0,0 +1,56 @@ +process FREYJA_BOOT { + tag "$meta.id" + label 'process_high' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + tuple val(meta), path(variants), path(depths) + val repeats + path barcodes + path lineages_meta + + output: + tuple val(meta), path("*lineages.csv") , emit: lineages + tuple val(meta), path("*summarized.csv"), emit: summarized + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + boot \\ + $args \\ + --nt $task.cpus \\ + --nb $repeats \\ + --output_base $prefix \\ + --barcodes $barcodes \\ + --meta $lineages_meta \\ + $variants \\ + $depths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_lineage.csv + touch ${prefix}_summarized.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/boot/meta.yml b/modules/nf-core/freyja/boot/meta.yml new file mode 100644 index 00000000..3eca1568 --- /dev/null +++ b/modules/nf-core/freyja/boot/meta.yml @@ -0,0 +1,83 @@ +name: "freyja_boot" +description: Bootstrap sample demixing by resampling each site based on a multinomial distribution of read depth across all sites, where the event probabilities were determined by the fraction of the total sample reads found at each site, followed by a secondary resampling at each site according to a multinomial distribution (that is, binomial when there was only one SNV at a site), where event probabilities were determined by the frequencies of each base at the site, and the number of trials is given by the sequencing depth. +keywords: + - variants + - fasta + - deconvolution + - wastewater + - bootstrapping +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + + - repeats: + type: value (int) + description: Number of bootstrap repeats to perform + + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + + - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages: + type: file + description: a csv file that includes the lineages present and their corresponding abundances + pattern: "*lineages.csv" + - summarized: + type: file + description: a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + pattern: "*summarized.csv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/demix/main.nf b/modules/nf-core/freyja/demix/main.nf new file mode 100644 index 00000000..6587cb3c --- /dev/null +++ b/modules/nf-core/freyja/demix/main.nf @@ -0,0 +1,52 @@ +process FREYJA_DEMIX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + tuple val(meta), path(variants), path(depths) + path barcodes + path lineages_meta + + output: + tuple val(meta), path("*.tsv"), emit: demix + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + demix \\ + $args \\ + --output ${prefix}.tsv \\ + --barcodes $barcodes \\ + --meta $lineages_meta \\ + $variants \\ + $depths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/freyja/demix/meta.yml b/modules/nf-core/freyja/demix/meta.yml new file mode 100644 index 00000000..a47663df --- /dev/null +++ b/modules/nf-core/freyja/demix/meta.yml @@ -0,0 +1,74 @@ +name: "freyja_demix" +description: specify the relative abundance of each known haplotype +keywords: + - variants + - fasta + - deconvolution + - wastewater +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + + - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - demix: + type: file + description: a tsv file that includes the lineages present, their corresponding abundances, and summarization by constellation + pattern: "*.demix.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/update/main.nf b/modules/nf-core/freyja/update/main.nf new file mode 100644 index 00000000..1711b14c --- /dev/null +++ b/modules/nf-core/freyja/update/main.nf @@ -0,0 +1,49 @@ +process FREYJA_UPDATE { + tag "$db_name" + label 'process_single' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + val db_name + + output: + path "${db_name}/usher_barcodes.csv" , emit: barcodes + path "${db_name}/lineages.yml" , emit: lineages_topology + path "${db_name}/curated_lineages.json", emit: lineages_meta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir -p $db_name + freyja \\ + update \\ + --outdir $db_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + """ + mkdir $db_name + + touch "${db_name}/usher_barcodes.csv" + touch "${db_name}/lineages.yml" + touch "${db_name}/curated_lineages.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/update/meta.yml b/modules/nf-core/freyja/update/meta.yml new file mode 100644 index 00000000..2af5d662 --- /dev/null +++ b/modules/nf-core/freyja/update/meta.yml @@ -0,0 +1,40 @@ +name: "freyja_update" +description: downloads new versions of the curated SARS-CoV-2 lineage file and barcodes +keywords: + - database + - variants + - UShER +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - db_name: + type: string + description: "The name of the database directory" + +output: + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + - lineages_topology: + type: file + description: File containing the lineage topology + pattern: "*lineages.yml" + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/variants/main.nf b/modules/nf-core/freyja/variants/main.nf new file mode 100644 index 00000000..cdc6d788 --- /dev/null +++ b/modules/nf-core/freyja/variants/main.nf @@ -0,0 +1,50 @@ +process FREYJA_VARIANTS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::freyja=1.3.12" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.3.12--pyhdfd78af_0': + 'quay.io/biocontainers/freyja:1.3.12--pyhdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + + output: + tuple val(meta), path("*.variants.tsv"), path("*.depth.tsv"), emit: variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + variants \\ + $args \\ + --ref $fasta \\ + --variants ${prefix}.variants.tsv \\ + --depths ${prefix}.depth.tsv \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.variants.tsv + touch ${prefix}.depth.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/variants/meta.yml b/modules/nf-core/freyja/variants/meta.yml new file mode 100644 index 00000000..13ddd377 --- /dev/null +++ b/modules/nf-core/freyja/variants/meta.yml @@ -0,0 +1,52 @@ +name: "freyja_variants" +description: call variant and sequencing depth information of the variant +keywords: + - variants + - fasta + - wastewater +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "http://dx.doi.org/10.1038/s41586-022-05049-6" + licence: "['BSD-2-Clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.bam" + + - fasta: + type: file + description: The reference sequence used for mapping and generating the BAM file + pattern: "*.fa" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Joon-Klaps" diff --git a/nextflow.config b/nextflow.config index 361c3dd7..e4a4a4be 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,6 +41,11 @@ params { multiqc_title = null multiqc_config = null max_multiqc_email_size = '25.MB' + skip_freyja = false + freyja_repeats = 100 + freyja_db_name = 'freyja_db' + freyja_barcodes = null + freyja_lineages = null skip_mosdepth = false skip_pangolin = false skip_nextclade = false diff --git a/nextflow_schema.json b/nextflow_schema.json index ff58253a..3f5acf47 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -251,6 +251,35 @@ "description": "Maximum window size before and after variant locii used to generate ASCIIGenome screenshots.", "fa_icon": "fab fa-windows" }, + "skip_freyja": { + "type": "boolean", + "fa_icon": "fas fast-forward", + "description": "Skip freyja deep SARS-CoV-2 variant analysis using a depth weighted approach." + }, + "freyja_db_name": { + "type": "string", + "default": "freyja_db", + "fa_icon": "fas fa-folder-open", + "description": "Specify the name where to store UShER database (default: 'freyja_db')." + }, + "freyja_repeats": { + "type": "integer", + "default": 100, + "fa_icon": "fas fa-hand-paper", + "description": "Specify the number of bootstrap repeats to do." + }, + "freyja_barcodes": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-file", + "description": "Lineage defining barcodes, default is most recent from UShER database." + }, + "freyja_lineages": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-file", + "description": "Metadata of lineages that match barcode, default is most recent from UShER database." + }, "multiqc_title": { "type": "string", "hidden": true, diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf b/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf new file mode 100644 index 00000000..12c967ed --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf @@ -0,0 +1,77 @@ +include { FREYJA_VARIANTS } from '../../../modules/nf-core/freyja/variants' +include { FREYJA_UPDATE } from '../../../modules/nf-core/freyja/update' +include { FREYJA_DEMIX } from '../../../modules/nf-core/freyja/demix' +include { FREYJA_BOOT } from '../../../modules/nf-core/freyja/boot' + +workflow BAM_VARIANT_DEMIX_BOOT_FREYJA { + + take: + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + val_repeats // value repeats + val_db_name // string db_name + ch_barcodes // channel: [ path(barcodes)] + ch_lineages_meta // channel: [ path(lineages_meta)] + + main: + ch_versions = Channel.empty() + + // + // Variant calling + // + FREYJA_VARIANTS ( + ch_bam, + ch_fasta + ) + ch_freyja_variants = FREYJA_VARIANTS.out.variants + + ch_versions = ch_versions.mix(FREYJA_VARIANTS.out.versions.first()) + + // + // Update the database if none are given. + // + if (!ch_barcodes || !ch_lineages_meta) { + FREYJA_UPDATE ( + val_db_name + ) + + ch_barcodes = FREYJA_UPDATE.out.barcodes + ch_lineages_meta = FREYJA_UPDATE.out.lineages_meta + + ch_versions = ch_versions.mix(FREYJA_UPDATE.out.versions.first()) + } + + + // + // demix and define minimum variant abundances + // + FREYJA_DEMIX ( + ch_freyja_variants, + ch_barcodes, + ch_lineages_meta + ) + ch_freyja_demix = FREYJA_DEMIX.out.demix + ch_versions = ch_versions.mix(FREYJA_DEMIX.out.versions.first()) + + + // + // Perform bootstrapping to get more accurate estimates of abundancies + // + FREYJA_BOOT ( + ch_freyja_variants, + val_repeats, + ch_barcodes, + ch_lineages_meta + ) + ch_versions = ch_versions.mix(FREYJA_BOOT.out.versions.first()) + + emit: + variants = FREYJA_VARIANTS.out.variants // channel: [ val(meta), path(variants_tsv), path(depths_tsv) ] + demix = FREYJA_DEMIX.out.demix // channel: [ val(meta), path(demix_tsv) ] + lineages = FREYJA_BOOT.out.lineages // channel: [ val(meta), path(lineages_csv) ] + summarized = FREYJA_BOOT.out.summarized // channel: [ val(meta), path(summarized_csv) ] + barcodes = ch_barcodes // channel: [ path(barcodes) ] + lineages_meta = ch_lineages_meta // channel: [ path(lineages_meta) ] + versions = ch_versions // channel: [ path(versions.yml) ] + } + diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml b/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml new file mode 100644 index 00000000..2515b4fb --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml @@ -0,0 +1,86 @@ +name: "bam_variant_demix_boot_freyja" +description: Recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference) +keywords: + - bam + - variants + - cram + +modules: + - freyja/variants + - freyja/demix + - freyja/update + - freyja/boot + +input: + - ch_bam: + type: file + description: | + Structure: [ val(meta), path(bam) ] + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] and sorted BAM file + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] and the fasta reference used for the sorted BAM file + - val_repeats: + type: value (int) + description: Number of bootstrap repeats to perform + - val_db_name: + type: value (string) + description: Name of the dir where UShER's files will be stored + - ch_barcodes: + type: file + description: | + Structure: path(barcodes) + File containing lineage defining barcodes + - ch_lineages_meta: + type: file + description: | + Structure: path(lineages_meta) + File containing lineage metadata that correspond to barcodes + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - variants: + type: file + description: | + Structure: [ val(meta), path(variants) ] + File containing identified variants in a gff-like format + - depths: + type: file + description: | + Structure: [ val(meta), path(variants) ] + File containing depth of the variants + - demix: + type: file + description: | + Structure: [ val(meta), path(demix) ] + a tsv file that includes the lineages present, their corresponding abundances, and summarization by constellation + - lineages: + type: file + description: | + Structure: [ val(meta), path(lineages) ] + a csv file that includes the lineages present and their corresponding abundances + - summarized: + type: file + description: | + Structure: [ val(meta), path(lineages) ] + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - barcodes: + type: file + description: path(barcodes) + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - lineages_meta: + type: file + description: path(lineages_meta) + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 7d44924f..8f8cddf6 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -21,7 +21,8 @@ WorkflowIllumina.initialise(params, log, valid_params) def checkPathParamList = [ params.input, params.fasta, params.gff, params.bowtie2_index, params.kraken2_db, params.primer_bed, params.primer_fasta, - params.blast_db, params.spades_hmm, params.multiqc_config + params.blast_db, params.spades_hmm, params.multiqc_config, + params.freyja_barcodes, params.freyja_lineages ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -96,8 +97,9 @@ include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main // // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' -include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' +include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' +include { BAM_VARIANT_DEMIX_BOOT_FREYJA } from '../subworkflows/nf-core/bam_variant_demix_boot_freyja/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -232,7 +234,7 @@ workflow ILLUMINA { } } .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Reads before trimming'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -319,7 +321,7 @@ workflow ILLUMINA { ch_pass_fail_mapped .fail .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Mapped reads'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -457,6 +459,21 @@ workflow ILLUMINA { ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) } + // + // SUBWORKFLOW: Determine variants with Freyja + // + if (!params.skip_variants && !params.skip_freyja) { + BAM_VARIANT_DEMIX_BOOT_FREYJA( + ch_bam, + PREPARE_GENOME.out.fasta, + params.freyja_repeats, + params.freyja_db_name, + params.freyja_barcodes, + params.freyja_lineages, + ) + ch_versions= ch_versions.mix(BAM_VARIANT_DEMIX_BOOT_FREYJA.out.versions) + } + // // SUBWORKFLOW: Call consensus with iVar and downstream QC // @@ -506,8 +523,8 @@ workflow ILLUMINA { def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] return [ "$meta.id\t$clade" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Sample', 'clade'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 69854472..677634ab 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -16,7 +16,8 @@ WorkflowNanopore.initialise(params, log, valid_params) def checkPathParamList = [ params.input, params.fastq_dir, params.fast5_dir, - params.sequencing_summary, params.gff + params.sequencing_summary, params.gff, + params.freyja_barcodes, params.freyja_lineages ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -58,11 +59,12 @@ include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../mod // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_nanopore' -include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' -include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' -include { FILTER_BAM_SAMTOOLS } from '../subworkflows/local/filter_bam_samtools' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_nanopore' +include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' +include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { FILTER_BAM_SAMTOOLS } from '../subworkflows/local/filter_bam_samtools' +include { BAM_VARIANT_DEMIX_BOOT_FREYJA } from '../subworkflows/nf-core/bam_variant_demix_boot_freyja/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -180,8 +182,8 @@ workflow NANOPORE { .filter { it[1] == null } .filter { it[-1] >= params.min_barcode_reads } .map { it -> [ "${it[0]}\t${it[-1]}" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Barcode', 'Read count'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -194,8 +196,8 @@ workflow NANOPORE { ch_fastq_dirs .filter { it[-1] == null } .map { it -> [ "${it[1]}\t${it[0]}" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Sample', 'Missing barcode'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -239,7 +241,7 @@ workflow NANOPORE { ch_pass_fail_barcode_count .fail .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Barcode count'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -278,7 +280,7 @@ workflow NANOPORE { ch_pass_fail_guppyplex_count .fail .collect() - .map { + .map { tsv_data -> def header = ['Sample', 'Read count'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -410,13 +412,13 @@ workflow NANOPORE { NEXTCLADE_RUN .out .csv - .map { + .map { meta, csv -> def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] return [ "$meta.id\t$clade" ] } - .collect() - .map { + .collect() + .map { tsv_data -> def header = ['Sample', 'clade'] WorkflowCommons.multiqcTsvFromList(tsv_data, header) @@ -424,6 +426,21 @@ workflow NANOPORE { .set { ch_nextclade_multiqc } } + // + // SUBWORKFLOW: Determine variants with Freyja + // + if (!params.skip_freyja) { + BAM_VARIANT_DEMIX_BOOT_FREYJA( + ARTIC_MINION.out.bam_primertrimmed, + PREPARE_GENOME.out.fasta, + params.freyja_repeats, + params.freyja_db_name, + params.freyja_barcodes, + params.freyja_lineages, + ) + ch_versions= ch_versions.mix(BAM_VARIANT_DEMIX_BOOT_FREYJA.out.versions) + } + // // MODULE: Consensus QC across all samples with QUAST //